summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/Makefile.template6
-rw-r--r--src/gallium/SConscript24
-rw-r--r--src/gallium/auxiliary/Makefile181
-rw-r--r--src/gallium/auxiliary/SConscript185
-rw-r--r--src/gallium/auxiliary/cso_cache/Makefile11
-rw-r--r--src/gallium/auxiliary/cso_cache/SConscript11
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c40
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h7
-rw-r--r--src/gallium/auxiliary/draw/Makefile46
-rw-r--r--src/gallium/auxiliary/draw/SConscript46
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c84
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h22
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.c338
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.h76
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c12
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aapoint.c12
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_clip.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_cull.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_offset.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_stipple.c5
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_line.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_point.c14
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h35
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c9
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h8
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c35
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c56
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c65
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_util.c16
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h1
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c5
-rw-r--r--src/gallium/auxiliary/gallivm/Makefile92
-rw-r--r--src/gallium/auxiliary/gallivm/SConscript16
-rw-r--r--src/gallium/auxiliary/gallivm/tgsitollvm.cpp4
-rw-r--r--src/gallium/auxiliary/indices/Makefile16
-rw-r--r--src/gallium/auxiliary/indices/SConscript28
-rw-r--r--src/gallium/auxiliary/pipebuffer/Makefile19
-rw-r--r--src/gallium/auxiliary/pipebuffer/SConscript19
-rw-r--r--src/gallium/auxiliary/rbug/Makefile14
-rw-r--r--src/gallium/auxiliary/rbug/SConscript14
-rw-r--r--src/gallium/auxiliary/rbug/rbug_context.h2
-rw-r--r--src/gallium/auxiliary/rbug/rbug_proto.h2
-rw-r--r--src/gallium/auxiliary/rtasm/Makefile13
-rw-r--r--src/gallium/auxiliary/rtasm/SConscript13
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_execmem.c32
-rw-r--r--src/gallium/auxiliary/sct/Makefile9
-rw-r--r--src/gallium/auxiliary/sct/SConscript9
-rw-r--r--src/gallium/auxiliary/sct/sct.c453
-rw-r--r--src/gallium/auxiliary/sct/sct.h123
-rw-r--r--src/gallium/auxiliary/sct/usage.c61
-rw-r--r--src/gallium/auxiliary/tgsi/Makefile22
-rw-r--r--src/gallium/auxiliary/tgsi/SConscript23
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c94
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c1898
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h47
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c30
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h23
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c22
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c287
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c22
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c380
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c152
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h94
-rw-r--r--src/gallium/auxiliary/translate/Makefile12
-rw-r--r--src/gallium/auxiliary/translate/SConscript12
-rw-r--r--src/gallium/auxiliary/util/Makefile48
-rw-r--r--src/gallium/auxiliary/util/SConscript61
-rw-r--r--src/gallium/auxiliary/util/u_bitmask.c32
-rw-r--r--src/gallium/auxiliary/util/u_debug.c33
-rw-r--r--src/gallium/auxiliary/util/u_debug.h2
-rw-r--r--src/gallium/auxiliary/util/u_debug_memory.c8
-rw-r--r--src/gallium/auxiliary/util/u_dl.h12
-rw-r--r--src/gallium/auxiliary/util/u_format.h8
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c11
-rw-r--r--src/gallium/auxiliary/util/u_math.h12
-rw-r--r--src/gallium/auxiliary/util/u_network.c8
-rw-r--r--src/gallium/auxiliary/util/u_network.h2
-rw-r--r--src/gallium/auxiliary/util/u_prim.h35
-rw-r--r--src/gallium/auxiliary/util/u_stream_stdc.c2
-rw-r--r--src/gallium/auxiliary/util/u_upload_mgr.h2
-rw-r--r--src/gallium/auxiliary/vl/Makefile13
-rw-r--r--src/gallium/auxiliary/vl/SConscript13
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c11
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.c30
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_derived.c8
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c2
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.c2
-rw-r--r--src/gallium/drivers/failover/fo_context.c42
-rw-r--r--src/gallium/drivers/failover/fo_winsys.h3
-rw-r--r--src/gallium/drivers/i915/i915_buffer.c1
-rw-r--r--src/gallium/drivers/i915/i915_context.c20
-rw-r--r--src/gallium/drivers/i915/i915_state.c11
-rw-r--r--src/gallium/drivers/i915/i915_state_derived.c10
-rw-r--r--src/gallium/drivers/i965/Makefile74
-rw-r--r--src/gallium/drivers/i965/SConscript77
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.c202
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.h148
-rw-r--r--src/gallium/drivers/i965/brw_cc.c111
-rw-r--r--src/gallium/drivers/i965/brw_clip.c224
-rw-r--r--src/gallium/drivers/i965/brw_clip.h199
-rw-r--r--src/gallium/drivers/i965/brw_clip_line.c271
-rw-r--r--src/gallium/drivers/i965/brw_clip_point.c48
-rw-r--r--src/gallium/drivers/i965/brw_clip_state.c209
-rw-r--r--src/gallium/drivers/i965/brw_clip_tri.c595
-rw-r--r--src/gallium/drivers/i965/brw_clip_unfilled.c497
-rw-r--r--src/gallium/drivers/i965/brw_clip_util.c388
-rw-r--r--src/gallium/drivers/i965/brw_context.c154
-rw-r--r--src/gallium/drivers/i965/brw_context.h858
-rw-r--r--src/gallium/drivers/i965/brw_curbe.c390
-rw-r--r--src/gallium/drivers/i965/brw_debug.h43
-rw-r--r--src/gallium/drivers/i965/brw_defines.h847
-rw-r--r--src/gallium/drivers/i965/brw_disasm.c922
-rw-r--r--src/gallium/drivers/i965/brw_disasm.h36
-rw-r--r--src/gallium/drivers/i965/brw_draw.c291
-rw-r--r--src/gallium/drivers/i965/brw_draw.h39
-rw-r--r--src/gallium/drivers/i965/brw_draw_upload.c542
-rw-r--r--src/gallium/drivers/i965/brw_eu.c262
-rw-r--r--src/gallium/drivers/i965/brw_eu.h992
-rw-r--r--src/gallium/drivers/i965/brw_eu_debug.c94
-rw-r--r--src/gallium/drivers/i965/brw_eu_emit.c1433
-rw-r--r--src/gallium/drivers/i965/brw_eu_util.c126
-rw-r--r--src/gallium/drivers/i965/brw_gs.c216
-rw-r--r--src/gallium/drivers/i965/brw_gs.h76
-rw-r--r--src/gallium/drivers/i965/brw_gs_emit.c181
-rw-r--r--src/gallium/drivers/i965/brw_gs_state.c169
-rw-r--r--src/gallium/drivers/i965/brw_misc_state.c513
-rw-r--r--src/gallium/drivers/i965/brw_pipe_blend.c208
-rw-r--r--src/gallium/drivers/i965/brw_pipe_clear.c218
-rw-r--r--src/gallium/drivers/i965/brw_pipe_depth.c172
-rw-r--r--src/gallium/drivers/i965/brw_pipe_fb.c84
-rw-r--r--src/gallium/drivers/i965/brw_pipe_flush.c83
-rw-r--r--src/gallium/drivers/i965/brw_pipe_misc.c54
-rw-r--r--src/gallium/drivers/i965/brw_pipe_query.c263
-rw-r--r--src/gallium/drivers/i965/brw_pipe_rast.c161
-rw-r--r--src/gallium/drivers/i965/brw_pipe_rast.h16
-rw-r--r--src/gallium/drivers/i965/brw_pipe_sampler.c233
-rw-r--r--src/gallium/drivers/i965/brw_pipe_shader.c303
-rw-r--r--src/gallium/drivers/i965/brw_pipe_vertex.c71
-rw-r--r--src/gallium/drivers/i965/brw_reg.h115
-rw-r--r--src/gallium/drivers/i965/brw_screen.c403
-rw-r--r--src/gallium/drivers/i965/brw_screen.h199
-rw-r--r--src/gallium/drivers/i965/brw_screen_buffers.c202
-rw-r--r--src/gallium/drivers/i965/brw_screen_surface.c262
-rw-r--r--src/gallium/drivers/i965/brw_screen_tex_layout.c414
-rw-r--r--src/gallium/drivers/i965/brw_screen_texture.c573
-rw-r--r--src/gallium/drivers/i965/brw_sf.c216
-rw-r--r--src/gallium/drivers/i965/brw_sf.h122
-rw-r--r--src/gallium/drivers/i965/brw_sf_emit.c765
-rw-r--r--src/gallium/drivers/i965/brw_sf_state.c333
-rw-r--r--src/gallium/drivers/i965/brw_state.h174
-rw-r--r--src/gallium/drivers/i965/brw_state_batch.c98
-rw-r--r--src/gallium/drivers/i965/brw_state_cache.c617
-rw-r--r--src/gallium/drivers/i965/brw_state_debug.c153
-rw-r--r--src/gallium/drivers/i965/brw_state_upload.c270
-rw-r--r--src/gallium/drivers/i965/brw_structs.h1576
-rw-r--r--src/gallium/drivers/i965/brw_structs_dump.c1247
-rw-r--r--src/gallium/drivers/i965/brw_structs_dump.h276
-rwxr-xr-xsrc/gallium/drivers/i965/brw_structs_dump.py291
-rw-r--r--src/gallium/drivers/i965/brw_swtnl.c95
-rw-r--r--src/gallium/drivers/i965/brw_types.h21
-rw-r--r--src/gallium/drivers/i965/brw_urb.c263
-rw-r--r--src/gallium/drivers/i965/brw_util.c38
-rw-r--r--src/gallium/drivers/i965/brw_util.h44
-rw-r--r--src/gallium/drivers/i965/brw_vs.c131
-rw-r--r--src/gallium/drivers/i965/brw_vs.h106
-rw-r--r--src/gallium/drivers/i965/brw_vs_emit.c1654
-rw-r--r--src/gallium/drivers/i965/brw_vs_state.c201
-rw-r--r--src/gallium/drivers/i965/brw_vs_surface_state.c232
-rw-r--r--src/gallium/drivers/i965/brw_winsys.h309
-rw-r--r--src/gallium/drivers/i965/brw_winsys_debug.c87
-rw-r--r--src/gallium/drivers/i965/brw_wm.c319
-rw-r--r--src/gallium/drivers/i965/brw_wm.h344
-rw-r--r--src/gallium/drivers/i965/brw_wm_constant_buffer.c165
-rw-r--r--src/gallium/drivers/i965/brw_wm_debug.c256
-rw-r--r--src/gallium/drivers/i965/brw_wm_emit.c1521
-rw-r--r--src/gallium/drivers/i965/brw_wm_fp.c1224
-rw-r--r--src/gallium/drivers/i965/brw_wm_glsl.c2032
-rw-r--r--src/gallium/drivers/i965/brw_wm_iz.c156
-rw-r--r--src/gallium/drivers/i965/brw_wm_pass0.c366
-rw-r--r--src/gallium/drivers/i965/brw_wm_pass1.c292
-rw-r--r--src/gallium/drivers/i965/brw_wm_pass2.c334
-rw-r--r--src/gallium/drivers/i965/brw_wm_sampler_state.c229
-rw-r--r--src/gallium/drivers/i965/brw_wm_state.c339
-rw-r--r--src/gallium/drivers/i965/brw_wm_surface_state.c294
-rw-r--r--src/gallium/drivers/i965/intel_decode.c1790
-rw-r--r--src/gallium/drivers/i965/intel_decode.h29
-rw-r--r--src/gallium/drivers/i965/intel_structs.h132
-rw-r--r--src/gallium/drivers/identity/id_context.c52
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript28
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_flow.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_aos.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_draw_arrays.c33
-rw-r--r--src/gallium/drivers/llvmpipe/lp_prim_vbuf.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_blend.c18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c14
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_rasterizer.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_surface.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_vs.c12
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_cache.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample_c.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_winsys.h2
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c3
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h3
-rw-r--r--src/gallium/drivers/nv04/nv04_context.c24
-rw-r--r--src/gallium/drivers/nv04/nv04_context.h4
-rw-r--r--src/gallium/drivers/nv04/nv04_fragtex.c16
-rw-r--r--src/gallium/drivers/nv04/nv04_prim_vbuf.c16
-rw-r--r--src/gallium/drivers/nv04/nv04_screen.c6
-rw-r--r--src/gallium/drivers/nv04/nv04_state.c60
-rw-r--r--src/gallium/drivers/nv04/nv04_state_emit.c10
-rw-r--r--src/gallium/drivers/nv04/nv04_surface_2d.c66
-rw-r--r--src/gallium/drivers/nv04/nv04_surface_2d.h4
-rw-r--r--src/gallium/drivers/nv04/nv04_transfer.c24
-rw-r--r--src/gallium/drivers/nv04/nv04_vbo.c12
-rw-r--r--src/gallium/drivers/nv10/nv10_context.c6
-rw-r--r--src/gallium/drivers/nv10/nv10_context.h4
-rw-r--r--src/gallium/drivers/nv10/nv10_screen.c3
-rw-r--r--src/gallium/drivers/nv10/nv10_transfer.c24
-rw-r--r--src/gallium/drivers/nv10/nv10_vbo.c11
-rw-r--r--src/gallium/drivers/nv20/nv20_context.c6
-rw-r--r--src/gallium/drivers/nv20/nv20_context.h4
-rw-r--r--src/gallium/drivers/nv20/nv20_miptree.c22
-rw-r--r--src/gallium/drivers/nv20/nv20_screen.c3
-rw-r--r--src/gallium/drivers/nv20/nv20_state_emit.c16
-rw-r--r--src/gallium/drivers/nv20/nv20_transfer.c26
-rw-r--r--src/gallium/drivers/nv20/nv20_vbo.c9
-rw-r--r--src/gallium/drivers/nv20/nv20_vertprog.c3
-rw-r--r--src/gallium/drivers/nv30/nv30_context.c6
-rw-r--r--src/gallium/drivers/nv30/nv30_context.h5
-rw-r--r--src/gallium/drivers/nv30/nv30_fragprog.c38
-rw-r--r--src/gallium/drivers/nv30/nv30_miptree.c22
-rw-r--r--src/gallium/drivers/nv30/nv30_screen.c9
-rw-r--r--src/gallium/drivers/nv30/nv30_state.c11
-rw-r--r--src/gallium/drivers/nv30/nv30_transfer.c26
-rw-r--r--src/gallium/drivers/nv30/nv30_vbo.c21
-rw-r--r--src/gallium/drivers/nv30/nv30_vertprog.c3
-rw-r--r--src/gallium/drivers/nv40/nv40_context.c6
-rw-r--r--src/gallium/drivers/nv40/nv40_context.h7
-rw-r--r--src/gallium/drivers/nv40/nv40_draw.c11
-rw-r--r--src/gallium/drivers/nv40/nv40_fragprog.c46
-rw-r--r--src/gallium/drivers/nv40/nv40_miptree.c22
-rw-r--r--src/gallium/drivers/nv40/nv40_screen.c7
-rw-r--r--src/gallium/drivers/nv40/nv40_state.c11
-rw-r--r--src/gallium/drivers/nv40/nv40_state_emit.c1
-rw-r--r--src/gallium/drivers/nv40/nv40_transfer.c26
-rw-r--r--src/gallium/drivers/nv40/nv40_vbo.c28
-rw-r--r--src/gallium/drivers/nv40/nv40_vertprog.c4
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c39
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h4
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c613
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h1
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c4
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c62
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c15
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c20
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c6
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c12
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c95
-rw-r--r--src/gallium/drivers/r300/SConscript7
-rw-r--r--src/gallium/drivers/r300/r300_emit.c4
-rw-r--r--src/gallium/drivers/r300/r300_reg.h16
-rw-r--r--src/gallium/drivers/r300/r300_render.c75
-rw-r--r--src/gallium/drivers/r300/r300_render.h60
-rw-r--r--src/gallium/drivers/r300/r300_state.c9
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c6
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c2
-rw-r--r--src/gallium/drivers/r300/r300_vs.c5
-rw-r--r--src/gallium/drivers/softpipe/sp_clear.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c22
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h6
-rw-r--r--src/gallium/drivers/softpipe/sp_draw_arrays.c53
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.c1
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_blend.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_query.c26
-rw-r--r--src/gallium/drivers/softpipe/sp_query.h4
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h28
-rw-r--r--src/gallium/drivers/softpipe/sp_state_blend.c5
-rw-r--r--src/gallium/drivers/softpipe/sp_state_derived.c10
-rw-r--r--src/gallium/drivers/softpipe/sp_state_fs.c75
-rw-r--r--src/gallium/drivers/softpipe/sp_state_rasterizer.c9
-rw-r--r--src/gallium/drivers/softpipe/sp_state_surface.c2
-rw-r--r--src/gallium/drivers/svga/svga_context.c44
-rw-r--r--src/gallium/drivers/svga/svga_context.h16
-rw-r--r--src/gallium/drivers/svga/svga_draw.c3
-rw-r--r--src/gallium/drivers/svga/svga_pipe_draw.c28
-rw-r--r--src/gallium/drivers/svga/svga_pipe_fs.c3
-rw-r--r--src/gallium/drivers/svga/svga_pipe_vertex.c13
-rw-r--r--src/gallium/drivers/svga/svga_pipe_vs.c3
-rw-r--r--src/gallium/drivers/svga/svga_state_fs.c44
-rw-r--r--src/gallium/drivers/svga/svga_state_need_swtnl.c8
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c27
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_draw.c2
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_state.c11
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.c2
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c2
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_dump.c606
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_dump.h3
-rwxr-xr-xsrc/gallium/drivers/svga/svgadump/svga_dump.py94
-rw-r--r--src/gallium/drivers/trace/README5
-rw-r--r--src/gallium/drivers/trace/tr_context.c61
-rw-r--r--src/gallium/drivers/trace/tr_dump.c4
-rw-r--r--src/gallium/drivers/trace/tr_rbug.c6
-rw-r--r--src/gallium/drivers/trace/tr_screen.c2
-rw-r--r--src/gallium/drivers/trace/tr_state.h2
-rw-r--r--src/gallium/include/pipe/p_context.h50
-rw-r--r--src/gallium/include/pipe/p_defines.h38
-rw-r--r--src/gallium/include/pipe/p_shader_tokens.h54
-rw-r--r--src/gallium/state_trackers/dri/dri_context.c14
-rw-r--r--src/gallium/state_trackers/dri/dri_context.h22
-rw-r--r--src/gallium/state_trackers/dri/dri_drawable.c24
-rw-r--r--src/gallium/state_trackers/dri/dri_drawable.h18
-rw-r--r--src/gallium/state_trackers/dri/dri_screen.c16
-rw-r--r--src/gallium/state_trackers/dri/dri_screen.h4
-rw-r--r--src/gallium/state_trackers/egl/egl_surface.c4
-rw-r--r--src/gallium/state_trackers/egl/egl_tracker.c3
-rw-r--r--src/gallium/state_trackers/glx/xlib/glx_api.c1
-rw-r--r--src/gallium/state_trackers/glx/xlib/xm_api.c23
-rw-r--r--src/gallium/state_trackers/python/SConscript4
-rw-r--r--src/gallium/state_trackers/python/gallium.i1
-rw-r--r--src/gallium/state_trackers/python/p_context.i46
-rw-r--r--src/gallium/state_trackers/python/p_device.i4
-rw-r--r--src/gallium/state_trackers/python/p_texture.i8
-rwxr-xr-xsrc/gallium/state_trackers/python/retrace/interpreter.py26
-rw-r--r--src/gallium/state_trackers/python/samples/gs.py254
-rw-r--r--src/gallium/state_trackers/python/samples/tri.py2
-rw-r--r--src/gallium/state_trackers/python/st_device.c11
-rw-r--r--src/gallium/state_trackers/python/st_device.h4
-rw-r--r--src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py2
-rw-r--r--src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py2
-rwxr-xr-xsrc/gallium/state_trackers/python/tests/texture_render.py4
-rwxr-xr-xsrc/gallium/state_trackers/python/tests/texture_sample.py8
-rw-r--r--src/gallium/state_trackers/vega/Makefile9
-rw-r--r--src/gallium/state_trackers/vega/api_path.c3
-rw-r--r--src/gallium/state_trackers/vega/arc.c2
-rw-r--r--src/gallium/state_trackers/vega/bezier.c1
-rw-r--r--src/gallium/state_trackers/vega/renderer.c2
-rw-r--r--src/gallium/state_trackers/vega/stroker.c4
-rw-r--r--src/gallium/state_trackers/wgl/SConscript3
-rw-r--r--src/gallium/state_trackers/xorg/xorg_crtc.c10
-rw-r--r--src/gallium/state_trackers/xorg/xorg_driver.c95
-rw-r--r--src/gallium/state_trackers/xorg/xorg_exa.c35
-rw-r--r--src/gallium/state_trackers/xorg/xorg_exa_tgsi.c33
-rw-r--r--src/gallium/state_trackers/xorg/xorg_renderer.c1
-rw-r--r--src/gallium/winsys/drm/SConscript5
-rw-r--r--src/gallium/winsys/drm/i965/Makefile12
-rw-r--r--src/gallium/winsys/drm/i965/SConscript7
-rw-r--r--src/gallium/winsys/drm/i965/dri/Makefile26
-rw-r--r--src/gallium/winsys/drm/i965/dri/SConscript19
-rw-r--r--src/gallium/winsys/drm/i965/egl/Makefile29
-rw-r--r--src/gallium/winsys/drm/i965/gem/Makefile14
-rw-r--r--src/gallium/winsys/drm/i965/gem/SConscript15
-rw-r--r--src/gallium/winsys/drm/i965/gem/i965_drm_api.c243
-rw-r--r--src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c427
-rw-r--r--src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h64
-rw-r--r--src/gallium/winsys/drm/i965/xlib/Makefile97
-rw-r--r--src/gallium/winsys/drm/i965/xlib/xlib_i965.c522
-rw-r--r--src/gallium/winsys/drm/i965/xorg/Makefile57
-rw-r--r--src/gallium/winsys/drm/i965/xorg/intel_xorg.c147
-rw-r--r--src/gallium/winsys/drm/intel/dri/Makefile1
-rw-r--r--src/gallium/winsys/drm/intel/dri/SConscript2
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_drm_api.c1
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_drm.c4
-rw-r--r--src/gallium/winsys/drm/radeon/dri/SConscript2
-rw-r--r--src/gallium/winsys/drm/radeon/python/SConscript2
-rw-r--r--src/gallium/winsys/drm/radeon/xorg/Makefile26
-rw-r--r--src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c16
-rw-r--r--src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c9
-rw-r--r--src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h78
-rw-r--r--src/gallium/winsys/drm/vmware/dri/SConscript2
-rw-r--r--src/gallium/winsys/drm/vmware/xorg/SConscript3
-rw-r--r--src/gallium/winsys/drm/vmware/xorg/vmw_driver.h11
-rw-r--r--src/gallium/winsys/drm/vmware/xorg/vmw_ioctl.c56
-rw-r--r--src/gallium/winsys/drm/vmware/xorg/vmw_video.c21
-rw-r--r--src/gallium/winsys/g3dvl/nouveau/Makefile8
-rw-r--r--src/gallium/winsys/g3dvl/xlib/Makefile8
-rw-r--r--src/gallium/winsys/gdi/SConscript4
-rw-r--r--src/gallium/winsys/xlib/Makefile15
-rw-r--r--src/gallium/winsys/xlib/SConscript9
-rw-r--r--src/gallium/winsys/xlib/xlib.c9
-rw-r--r--src/gallium/winsys/xlib/xlib.h1
-rw-r--r--src/gallium/winsys/xlib/xlib_brw_context.c209
-rw-r--r--src/gallium/winsys/xlib/xlib_trace.c113
395 files changed, 42352 insertions, 4273 deletions
diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template
index 63983c5220..136423513c 100644
--- a/src/gallium/Makefile.template
+++ b/src/gallium/Makefile.template
@@ -54,13 +54,13 @@ install:
##### RULES #####
.c.o:
- $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+ $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
.cpp.o:
- $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@
+ $(CXX) -c $(INCLUDES) $(DEFINES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@
.S.o:
- $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+ $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
sinclude depend
diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index 8be84cddbe..eea32b1314 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -2,29 +2,7 @@ import os
Import('*')
-env = env.Clone()
-
-auxiliaries = []
-
-Export('auxiliaries')
-
-
-if llvm:
- SConscript(['auxiliary/gallivm/SConscript'])
-
-SConscript([
- # NOTE: order matters!
- 'auxiliary/util/SConscript',
- 'auxiliary/rtasm/SConscript',
- 'auxiliary/tgsi/SConscript',
- 'auxiliary/cso_cache/SConscript',
- 'auxiliary/translate/SConscript',
- 'auxiliary/draw/SConscript',
- 'auxiliary/pipebuffer/SConscript',
- 'auxiliary/indices/SConscript',
- 'auxiliary/rbug/SConscript',
- 'auxiliary/vl/SConscript',
-])
+SConscript('auxiliary/SConscript')
for driver in env['drivers']:
SConscript(os.path.join('drivers', driver, 'SConscript'))
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 5446eb68a9..e3af41c6e0 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -1,12 +1,177 @@
-# src/gallium/auxiliary/Makefile
TOP = ../../..
include $(TOP)/configs/current
-SUBDIRS = $(GALLIUM_AUXILIARY_DIRS)
+LIBNAME = gallium
-default install clean:
- @for dir in $(SUBDIRS) ; do \
- if [ -d $$dir ] ; then \
- (cd $$dir && $(MAKE) $@) || exit 1; \
- fi \
- done
+C_SOURCES = \
+ cso_cache/cso_context.c \
+ cso_cache/cso_cache.c \
+ cso_cache/cso_hash.c \
+ draw/draw_context.c \
+ draw/draw_gs.c \
+ draw/draw_pipe.c \
+ draw/draw_pipe_aaline.c \
+ draw/draw_pipe_aapoint.c \
+ draw/draw_pipe_clip.c \
+ draw/draw_pipe_cull.c \
+ draw/draw_pipe_flatshade.c \
+ draw/draw_pipe_offset.c \
+ draw/draw_pipe_pstipple.c \
+ draw/draw_pipe_stipple.c \
+ draw/draw_pipe_twoside.c \
+ draw/draw_pipe_unfilled.c \
+ draw/draw_pipe_util.c \
+ draw/draw_pipe_validate.c \
+ draw/draw_pipe_vbuf.c \
+ draw/draw_pipe_wide_line.c \
+ draw/draw_pipe_wide_point.c \
+ draw/draw_pt.c \
+ draw/draw_pt_elts.c \
+ draw/draw_pt_emit.c \
+ draw/draw_pt_fetch.c \
+ draw/draw_pt_fetch_emit.c \
+ draw/draw_pt_fetch_shade_emit.c \
+ draw/draw_pt_fetch_shade_pipeline.c \
+ draw/draw_pt_post_vs.c \
+ draw/draw_pt_util.c \
+ draw/draw_pt_varray.c \
+ draw/draw_pt_vcache.c \
+ draw/draw_vertex.c \
+ draw/draw_vs.c \
+ draw/draw_vs_varient.c \
+ draw/draw_vs_aos.c \
+ draw/draw_vs_aos_io.c \
+ draw/draw_vs_aos_machine.c \
+ draw/draw_vs_exec.c \
+ draw/draw_vs_llvm.c \
+ draw/draw_vs_ppc.c \
+ draw/draw_vs_sse.c \
+ indices/u_indices_gen.c \
+ indices/u_unfilled_gen.c \
+ pipebuffer/pb_buffer_fenced.c \
+ pipebuffer/pb_buffer_malloc.c \
+ pipebuffer/pb_bufmgr_alt.c \
+ pipebuffer/pb_bufmgr_cache.c \
+ pipebuffer/pb_bufmgr_debug.c \
+ pipebuffer/pb_bufmgr_fenced.c \
+ pipebuffer/pb_bufmgr_mm.c \
+ pipebuffer/pb_bufmgr_ondemand.c \
+ pipebuffer/pb_bufmgr_pool.c \
+ pipebuffer/pb_bufmgr_slab.c \
+ pipebuffer/pb_validate.c \
+ rbug/rbug_connection.c \
+ rbug/rbug_core.c \
+ rbug/rbug_texture.c \
+ rbug/rbug_context.c \
+ rbug/rbug_shader.c \
+ rbug/rbug_demarshal.c \
+ rtasm/rtasm_cpu.c \
+ rtasm/rtasm_execmem.c \
+ rtasm/rtasm_x86sse.c \
+ rtasm/rtasm_ppc.c \
+ rtasm/rtasm_ppc_spe.c \
+ tgsi/tgsi_sanity.c \
+ tgsi/tgsi_build.c \
+ tgsi/tgsi_dump.c \
+ tgsi/tgsi_exec.c \
+ tgsi/tgsi_info.c \
+ tgsi/tgsi_iterate.c \
+ tgsi/tgsi_parse.c \
+ tgsi/tgsi_ppc.c \
+ tgsi/tgsi_scan.c \
+ tgsi/tgsi_sse2.c \
+ tgsi/tgsi_text.c \
+ tgsi/tgsi_transform.c \
+ tgsi/tgsi_ureg.c \
+ tgsi/tgsi_util.c \
+ translate/translate_generic.c \
+ translate/translate_sse.c \
+ translate/translate.c \
+ translate/translate_cache.c \
+ util/u_debug.c \
+ util/u_debug_dump.c \
+ util/u_debug_symbol.c \
+ util/u_debug_stack.c \
+ util/u_blit.c \
+ util/u_blitter.c \
+ util/u_cache.c \
+ util/u_cpu_detect.c \
+ util/u_dl.c \
+ util/u_draw_quad.c \
+ util/u_format.c \
+ util/u_format_access.c \
+ util/u_format_table.c \
+ util/u_gen_mipmap.c \
+ util/u_handle_table.c \
+ util/u_hash_table.c \
+ util/u_hash.c \
+ util/u_keymap.c \
+ util/u_linear.c \
+ util/u_network.c \
+ util/u_math.c \
+ util/u_mm.c \
+ util/u_rect.c \
+ util/u_simple_shaders.c \
+ util/u_snprintf.c \
+ util/u_stream_stdc.c \
+ util/u_stream_wd.c \
+ util/u_surface.c \
+ util/u_texture.c \
+ util/u_tile.c \
+ util/u_time.c \
+ util/u_timed_winsys.c \
+ util/u_upload_mgr.c \
+ util/u_simple_screen.c \
+ vl/vl_bitstream_parser.c \
+ vl/vl_mpeg12_mc_renderer.c \
+ vl/vl_compositor.c \
+ vl/vl_csc.c \
+ vl/vl_shader_build.c
+
+GALLIVM_SOURCES = \
+ gallivm/gallivm.cpp \
+ gallivm/gallivm_cpu.cpp \
+ gallivm/instructions.cpp \
+ gallivm/loweringpass.cpp \
+ gallivm/tgsitollvm.cpp \
+ gallivm/storage.cpp \
+ gallivm/storagesoa.cpp \
+ gallivm/instructionssoa.cpp
+
+INC_SOURCES = \
+ gallivm/gallivm_builtins.cpp \
+ gallivm/gallivmsoabuiltins.cpp
+
+# XXX: gallivm doesn't build correctly so disable for now
+#ifeq ($(MESA_LLVM),1)
+#DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS
+#CPP_SOURCES += \
+# $(GALLIVM_SOURCES)
+#endif
+
+
+include ../Makefile.template
+
+
+gallivm/gallivm_builtins.cpp: gallivm/llvm_builtins.c
+ clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin
+ (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@
+ rm temp1.bin
+
+gallivm/gallivmsoabuiltins.cpp: gallivm/soabuiltins.c
+ clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin
+ (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@
+ rm temp2.bin
+
+
+indices/u_indices_gen.c: indices/u_indices_gen.py
+ python $< > $@
+
+indices/u_unfilled_gen.c: indices/u_unfilled_gen.py
+ python $< > $@
+
+util/u_format_table.c: util/u_format_table.py util/u_format_parse.py util/u_format.csv
+ python util/u_format_table.py util/u_format.csv > $@
+
+util/u_format_access.c: util/u_format_access.py util/u_format_parse.py util/u_format.csv
+ python util/u_format_access.py util/u_format.csv > $@
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
new file mode 100644
index 0000000000..782eb53386
--- /dev/null
+++ b/src/gallium/auxiliary/SConscript
@@ -0,0 +1,185 @@
+Import('*')
+
+from sys import executable as python_cmd
+
+env.Append(CPPPATH = [
+ 'indices',
+ 'util',
+])
+
+env.CodeGenerate(
+ target = 'indices/u_indices_gen.c',
+ script = 'indices/u_indices_gen.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+ target = 'indices/u_unfilled_gen.c',
+ script = 'indices/u_unfilled_gen.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+ target = 'util/u_format_table.c',
+ script = 'util/u_format_table.py',
+ source = ['util/u_format.csv'],
+ command = 'python $SCRIPT $SOURCE > $TARGET'
+)
+
+env.CodeGenerate(
+ target = 'util/u_format_access.c',
+ script = 'util/u_format_access.py',
+ source = ['util/u_format.csv'],
+ command = 'python $SCRIPT $SOURCE > $TARGET'
+)
+
+source = [
+ 'cso_cache/cso_context.c',
+ 'cso_cache/cso_cache.c',
+ 'cso_cache/cso_hash.c',
+ 'draw/draw_context.c',
+ 'draw/draw_pipe.c',
+ 'draw/draw_pipe_aaline.c',
+ 'draw/draw_pipe_aapoint.c',
+ 'draw/draw_pipe_clip.c',
+ 'draw/draw_pipe_cull.c',
+ 'draw/draw_pipe_flatshade.c',
+ 'draw/draw_pipe_offset.c',
+ 'draw/draw_pipe_pstipple.c',
+ 'draw/draw_pipe_stipple.c',
+ 'draw/draw_pipe_twoside.c',
+ 'draw/draw_pipe_unfilled.c',
+ 'draw/draw_pipe_util.c',
+ 'draw/draw_pipe_validate.c',
+ 'draw/draw_pipe_vbuf.c',
+ 'draw/draw_pipe_wide_line.c',
+ 'draw/draw_pipe_wide_point.c',
+ 'draw/draw_pt.c',
+ 'draw/draw_pt_elts.c',
+ 'draw/draw_pt_emit.c',
+ 'draw/draw_pt_fetch.c',
+ 'draw/draw_pt_fetch_emit.c',
+ 'draw/draw_pt_fetch_shade_emit.c',
+ 'draw/draw_pt_fetch_shade_pipeline.c',
+ 'draw/draw_pt_post_vs.c',
+ 'draw/draw_pt_util.c',
+ 'draw/draw_pt_varray.c',
+ 'draw/draw_pt_vcache.c',
+ 'draw/draw_vertex.c',
+ 'draw/draw_vs.c',
+ 'draw/draw_vs_aos.c',
+ 'draw/draw_vs_aos_io.c',
+ 'draw/draw_vs_aos_machine.c',
+ 'draw/draw_vs_exec.c',
+ 'draw/draw_vs_llvm.c',
+ 'draw/draw_vs_ppc.c',
+ 'draw/draw_vs_sse.c',
+ 'draw/draw_vs_varient.c',
+ 'draw/draw_gs.c',
+ #'indices/u_indices.c',
+ #'indices/u_unfilled_indices.c',
+ 'indices/u_indices_gen.c',
+ 'indices/u_unfilled_gen.c',
+ 'pipebuffer/pb_buffer_fenced.c',
+ 'pipebuffer/pb_buffer_malloc.c',
+ 'pipebuffer/pb_bufmgr_alt.c',
+ 'pipebuffer/pb_bufmgr_cache.c',
+ 'pipebuffer/pb_bufmgr_debug.c',
+ 'pipebuffer/pb_bufmgr_fenced.c',
+ 'pipebuffer/pb_bufmgr_mm.c',
+ 'pipebuffer/pb_bufmgr_ondemand.c',
+ 'pipebuffer/pb_bufmgr_pool.c',
+ 'pipebuffer/pb_bufmgr_slab.c',
+ 'pipebuffer/pb_validate.c',
+ 'rbug/rbug_core.c',
+ 'rbug/rbug_shader.c',
+ 'rbug/rbug_context.c',
+ 'rbug/rbug_texture.c',
+ 'rbug/rbug_demarshal.c',
+ 'rbug/rbug_connection.c',
+ 'rtasm/rtasm_cpu.c',
+ 'rtasm/rtasm_execmem.c',
+ 'rtasm/rtasm_x86sse.c',
+ 'rtasm/rtasm_ppc.c',
+ 'rtasm/rtasm_ppc_spe.c',
+ 'tgsi/tgsi_build.c',
+ 'tgsi/tgsi_dump.c',
+ 'tgsi/tgsi_dump_c.c',
+ 'tgsi/tgsi_exec.c',
+ 'tgsi/tgsi_info.c',
+ 'tgsi/tgsi_iterate.c',
+ 'tgsi/tgsi_parse.c',
+ 'tgsi/tgsi_sanity.c',
+ 'tgsi/tgsi_scan.c',
+ 'tgsi/tgsi_ppc.c',
+ 'tgsi/tgsi_sse2.c',
+ 'tgsi/tgsi_text.c',
+ 'tgsi/tgsi_transform.c',
+ 'tgsi/tgsi_ureg.c',
+ 'tgsi/tgsi_util.c',
+ 'translate/translate_generic.c',
+ 'translate/translate_sse.c',
+ 'translate/translate.c',
+ 'translate/translate_cache.c',
+ 'util/u_bitmask.c',
+ 'util/u_blit.c',
+ 'util/u_blitter.c',
+ 'util/u_cache.c',
+ 'util/u_cpu_detect.c',
+ 'util/u_debug.c',
+ 'util/u_debug_dump.c',
+ 'util/u_debug_memory.c',
+ 'util/u_debug_stack.c',
+ 'util/u_debug_symbol.c',
+ 'util/u_dl.c',
+ 'util/u_draw_quad.c',
+ 'util/u_format.c',
+ 'util/u_format_access.c',
+ 'util/u_format_table.c',
+ 'util/u_gen_mipmap.c',
+ 'util/u_handle_table.c',
+ 'util/u_hash.c',
+ 'util/u_hash_table.c',
+ 'util/u_keymap.c',
+ 'util/u_network.c',
+ 'util/u_math.c',
+ 'util/u_mm.c',
+ 'util/u_rect.c',
+ 'util/u_simple_shaders.c',
+ 'util/u_snprintf.c',
+ 'util/u_stream_stdc.c',
+ 'util/u_stream_wd.c',
+ 'util/u_surface.c',
+ 'util/u_texture.c',
+ 'util/u_tile.c',
+ 'util/u_time.c',
+ 'util/u_timed_winsys.c',
+ 'util/u_upload_mgr.c',
+ 'util/u_simple_screen.c',
+ 'vl/vl_bitstream_parser.c',
+ 'vl/vl_mpeg12_mc_renderer.c',
+ 'vl/vl_compositor.c',
+ 'vl/vl_csc.c',
+ 'vl/vl_shader_build.c',
+]
+
+if env['llvm']:
+ source += [
+ 'gallivm/gallivm.cpp',
+ 'gallivm/gallivm_cpu.cpp',
+ 'gallivm/instructions.cpp',
+ 'gallivm/loweringpass.cpp',
+ 'gallivm/tgsitollvm.cpp',
+ 'gallivm/storage.cpp',
+ 'gallivm/storagesoa.cpp',
+ 'gallivm/instructionssoa.cpp',
+ ]
+
+gallium = env.ConvenienceLibrary(
+ target = 'gallium',
+ source = source,
+)
+
+Export('gallium')
diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile
deleted file mode 100644
index 8726afcd94..0000000000
--- a/src/gallium/auxiliary/cso_cache/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = cso_cache
-
-C_SOURCES = \
- cso_context.c \
- cso_cache.c \
- cso_hash.c
-
-include ../../Makefile.template
diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript
deleted file mode 100644
index 651e68a191..0000000000
--- a/src/gallium/auxiliary/cso_cache/SConscript
+++ /dev/null
@@ -1,11 +0,0 @@
-Import('*')
-
-cso_cache = env.ConvenienceLibrary(
- target = 'cso_cache',
- source = [
- 'cso_context.c',
- 'cso_cache.c',
- 'cso_hash.c',
- ])
-
-auxiliaries.insert(0, cso_cache)
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 80bd0c91db..2b16332e14 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -42,6 +42,7 @@
#include "cso_cache/cso_context.h"
#include "cso_cache/cso_cache.h"
#include "cso_cache/cso_hash.h"
+#include "cso_context.h"
struct cso_context {
struct pipe_context *pipe;
@@ -85,8 +86,8 @@ struct cso_context {
void *blend, *blend_saved;
void *depth_stencil, *depth_stencil_saved;
void *rasterizer, *rasterizer_saved;
- void *fragment_shader, *fragment_shader_saved;
- void *vertex_shader, *vertex_shader_saved;
+ void *fragment_shader, *fragment_shader_saved, *geometry_shader;
+ void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved;
struct pipe_framebuffer_state fb, fb_saved;
struct pipe_viewport_state vp, vp_saved;
@@ -1027,3 +1028,38 @@ enum pipe_error cso_set_blend_color(struct cso_context *ctx,
}
return PIPE_OK;
}
+
+enum pipe_error cso_set_geometry_shader_handle(struct cso_context *ctx,
+ void *handle)
+{
+ if (ctx->geometry_shader != handle) {
+ ctx->geometry_shader = handle;
+ ctx->pipe->bind_gs_state(ctx->pipe, handle);
+ }
+ return PIPE_OK;
+}
+
+void cso_delete_geometry_shader(struct cso_context *ctx, void *handle)
+{
+ if (handle == ctx->geometry_shader) {
+ /* unbind before deleting */
+ ctx->pipe->bind_gs_state(ctx->pipe, NULL);
+ ctx->geometry_shader = NULL;
+ }
+ ctx->pipe->delete_gs_state(ctx->pipe, handle);
+}
+
+void cso_save_geometry_shader(struct cso_context *ctx)
+{
+ assert(!ctx->geometry_shader_saved);
+ ctx->geometry_shader_saved = ctx->geometry_shader;
+}
+
+void cso_restore_geometry_shader(struct cso_context *ctx)
+{
+ if (ctx->geometry_shader_saved != ctx->geometry_shader) {
+ ctx->pipe->bind_gs_state(ctx->pipe, ctx->geometry_shader_saved);
+ ctx->geometry_shader = ctx->geometry_shader_saved;
+ }
+ ctx->geometry_shader_saved = NULL;
+}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index e5b92177cf..b9e313e32d 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -146,6 +146,13 @@ void cso_save_vertex_shader(struct cso_context *cso);
void cso_restore_vertex_shader(struct cso_context *cso);
+enum pipe_error cso_set_geometry_shader_handle(struct cso_context *ctx,
+ void *handle);
+void cso_delete_geometry_shader(struct cso_context *ctx, void *handle);
+void cso_save_geometry_shader(struct cso_context *cso);
+void cso_restore_geometry_shader(struct cso_context *cso);
+
+
enum pipe_error cso_set_framebuffer(struct cso_context *cso,
const struct pipe_framebuffer_state *fb);
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
deleted file mode 100644
index 5041dcc072..0000000000
--- a/src/gallium/auxiliary/draw/Makefile
+++ /dev/null
@@ -1,46 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = draw
-
-C_SOURCES = \
- draw_context.c \
- draw_pipe.c \
- draw_pipe_aaline.c \
- draw_pipe_aapoint.c \
- draw_pipe_clip.c \
- draw_pipe_cull.c \
- draw_pipe_flatshade.c \
- draw_pipe_offset.c \
- draw_pipe_pstipple.c \
- draw_pipe_stipple.c \
- draw_pipe_twoside.c \
- draw_pipe_unfilled.c \
- draw_pipe_util.c \
- draw_pipe_validate.c \
- draw_pipe_vbuf.c \
- draw_pipe_wide_line.c \
- draw_pipe_wide_point.c \
- draw_pt.c \
- draw_pt_elts.c \
- draw_pt_emit.c \
- draw_pt_fetch.c \
- draw_pt_fetch_emit.c \
- draw_pt_fetch_shade_emit.c \
- draw_pt_fetch_shade_pipeline.c \
- draw_pt_post_vs.c \
- draw_pt_util.c \
- draw_pt_varray.c \
- draw_pt_vcache.c \
- draw_vertex.c \
- draw_vs.c \
- draw_vs_varient.c \
- draw_vs_aos.c \
- draw_vs_aos_io.c \
- draw_vs_aos_machine.c \
- draw_vs_exec.c \
- draw_vs_llvm.c \
- draw_vs_ppc.c \
- draw_vs_sse.c
-
-include ../../Makefile.template
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
deleted file mode 100644
index 5f05aa324a..0000000000
--- a/src/gallium/auxiliary/draw/SConscript
+++ /dev/null
@@ -1,46 +0,0 @@
-Import('*')
-
-draw = env.ConvenienceLibrary(
- target = 'draw',
- source = [
- 'draw_context.c',
- 'draw_pipe.c',
- 'draw_pipe_aaline.c',
- 'draw_pipe_aapoint.c',
- 'draw_pipe_clip.c',
- 'draw_pipe_cull.c',
- 'draw_pipe_flatshade.c',
- 'draw_pipe_offset.c',
- 'draw_pipe_pstipple.c',
- 'draw_pipe_stipple.c',
- 'draw_pipe_twoside.c',
- 'draw_pipe_unfilled.c',
- 'draw_pipe_util.c',
- 'draw_pipe_validate.c',
- 'draw_pipe_vbuf.c',
- 'draw_pipe_wide_line.c',
- 'draw_pipe_wide_point.c',
- 'draw_pt.c',
- 'draw_pt_elts.c',
- 'draw_pt_emit.c',
- 'draw_pt_fetch.c',
- 'draw_pt_fetch_emit.c',
- 'draw_pt_fetch_shade_emit.c',
- 'draw_pt_fetch_shade_pipeline.c',
- 'draw_pt_post_vs.c',
- 'draw_pt_util.c',
- 'draw_pt_varray.c',
- 'draw_pt_vcache.c',
- 'draw_vertex.c',
- 'draw_vs.c',
- 'draw_vs_aos.c',
- 'draw_vs_aos_io.c',
- 'draw_vs_aos_machine.c',
- 'draw_vs_exec.c',
- 'draw_vs_llvm.c',
- 'draw_vs_ppc.c',
- 'draw_vs_sse.c',
- 'draw_vs_varient.c'
- ])
-
-auxiliaries.insert(0, draw)
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index a4f1fcddc1..667aa46b20 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -36,6 +36,7 @@
#include "draw_context.h"
#include "draw_vbuf.h"
#include "draw_vs.h"
+#include "draw_gs.h"
#include "draw_pt.h"
#include "draw_pipe.h"
@@ -67,6 +68,9 @@ struct draw_context *draw_create( void )
if (!draw_vs_init( draw ))
goto fail;
+ if (!draw_gs_init( draw ))
+ goto fail;
+
return draw;
fail:
@@ -231,11 +235,19 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw,
void
draw_set_mapped_constant_buffer(struct draw_context *draw,
- const void *buffer,
+ unsigned shader_type,
+ const void *buffer,
unsigned size )
{
- draw->pt.user.constants = buffer;
- draw_vs_set_constants( draw, (const float (*)[4])buffer, size );
+ debug_assert(shader_type == PIPE_SHADER_VERTEX ||
+ shader_type == PIPE_SHADER_GEOMETRY);
+ if (shader_type == PIPE_SHADER_VERTEX) {
+ draw->pt.user.vs_constants = buffer;
+ draw_vs_set_constants( draw, (const float (*)[4])buffer, size );
+ } else if (shader_type == PIPE_SHADER_GEOMETRY) {
+ draw->pt.user.gs_constants = buffer;
+ draw_gs_set_constants( draw, (const float (*)[4])buffer, size );
+ }
}
@@ -298,7 +310,7 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable )
* a post-transformed vertex.
*
* With this function, drivers that use the draw module should have no reason
- * to track the current vertex shader.
+ * to track the current vertex/geometry shader.
*
* Note that the draw module may sometimes generate vertices with extra
* attributes (such as texcoords for AA lines). The driver can call this
@@ -309,43 +321,59 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable )
* work for the drivers.
*/
int
-draw_find_vs_output(const struct draw_context *draw,
- uint semantic_name, uint semantic_index)
+draw_find_shader_output(const struct draw_context *draw,
+ uint semantic_name, uint semantic_index)
{
const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+ const struct draw_geometry_shader *gs = draw->gs.geometry_shader;
uint i;
- for (i = 0; i < vs->info.num_outputs; i++) {
- if (vs->info.output_semantic_name[i] == semantic_name &&
- vs->info.output_semantic_index[i] == semantic_index)
+ const struct tgsi_shader_info *info = &vs->info;
+
+ if (gs)
+ info = &gs->info;
+
+ for (i = 0; i < info->num_outputs; i++) {
+ if (info->output_semantic_name[i] == semantic_name &&
+ info->output_semantic_index[i] == semantic_index)
return i;
}
/* XXX there may be more than one extra vertex attrib.
* For example, simulated gl_FragCoord and gl_PointCoord.
*/
- if (draw->extra_vp_outputs.semantic_name == semantic_name &&
- draw->extra_vp_outputs.semantic_index == semantic_index) {
- return draw->extra_vp_outputs.slot;
+ if (draw->extra_shader_outputs.semantic_name == semantic_name &&
+ draw->extra_shader_outputs.semantic_index == semantic_index) {
+ return draw->extra_shader_outputs.slot;
}
+
return 0;
}
/**
- * Return number of vertex shader outputs.
+ * Return number of the shader outputs.
+ *
+ * If geometry shader is present, its output will be returned,
+ * if not vertex shader is used.
*/
uint
-draw_num_vs_outputs(const struct draw_context *draw)
+draw_num_shader_outputs(const struct draw_context *draw)
{
uint count = draw->vs.vertex_shader->info.num_outputs;
- if (draw->extra_vp_outputs.slot > 0)
+
+ /* if geometry shader is present, its outputs go to te
+ * driver, not the vertex shaders */
+ if (draw->gs.geometry_shader)
+ count = draw->gs.geometry_shader->info.num_outputs;
+
+ if (draw->extra_shader_outputs.slot > 0)
count++;
return count;
}
/**
- * Provide TGSI sampler objects for vertex shaders that use texture fetches.
+ * Provide TGSI sampler objects for vertex/geometry shaders that use texture fetches.
* This might only be used by software drivers for the time being.
*/
void
@@ -355,6 +383,8 @@ draw_texture_samplers(struct draw_context *draw,
{
draw->vs.num_samplers = num_samplers;
draw->vs.samplers = samplers;
+ draw->gs.num_samplers = num_samplers;
+ draw->gs.samplers = samplers;
}
@@ -366,13 +396,6 @@ void draw_set_render( struct draw_context *draw,
draw->render = render;
}
-void draw_set_edgeflags( struct draw_context *draw,
- const unsigned *edgeflag )
-{
- draw->pt.user.edgeflag = edgeflag;
-}
-
-
/**
@@ -428,3 +451,18 @@ void draw_do_flush( struct draw_context *draw, unsigned flags )
draw->flushing = FALSE;
}
}
+
+
+int draw_current_shader_outputs(struct draw_context *draw)
+{
+ if (draw->gs.geometry_shader)
+ return draw->gs.num_gs_outputs;
+ return draw->vs.num_vs_outputs;
+}
+
+int draw_current_shader_position_output(struct draw_context *draw)
+{
+ if (draw->gs.geometry_shader)
+ return draw->gs.position_output;
+ return draw->vs.position_output;
+}
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index d529e4e9a2..b716209df2 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -45,6 +45,7 @@ struct pipe_context;
struct draw_context;
struct draw_stage;
struct draw_vertex_shader;
+struct draw_geometry_shader;
struct tgsi_sampler;
@@ -85,11 +86,11 @@ draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe
int
-draw_find_vs_output(const struct draw_context *draw,
- uint semantic_name, uint semantic_index);
+draw_find_shader_output(const struct draw_context *draw,
+ uint semantic_name, uint semantic_index);
uint
-draw_num_vs_outputs(const struct draw_context *draw);
+draw_num_shader_outputs(const struct draw_context *draw);
void
@@ -112,6 +113,17 @@ void draw_delete_vertex_shader(struct draw_context *draw,
struct draw_vertex_shader *dvs);
+/*
+ * Geometry shader functions
+ */
+struct draw_geometry_shader *
+draw_create_geometry_shader(struct draw_context *draw,
+ const struct pipe_shader_state *shader);
+void draw_bind_geometry_shader(struct draw_context *draw,
+ struct draw_geometry_shader *dvs);
+void draw_delete_geometry_shader(struct draw_context *draw,
+ struct draw_geometry_shader *dvs);
+
/*
* Vertex data functions
@@ -140,12 +152,10 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer);
void draw_set_mapped_constant_buffer(struct draw_context *draw,
+ unsigned shader_type,
const void *buffer,
unsigned size );
-void draw_set_edgeflags( struct draw_context *draw,
- const unsigned *edgeflag );
-
/***********************************************************************
* draw_prim.c
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
new file mode 100644
index 0000000000..5db2e75542
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -0,0 +1,338 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMWare Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "draw_gs.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_exec.h"
+
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#define MAX_PRIM_VERTICES 6
+/* fixme: move it from here */
+#define MAX_PRIMITIVES 64
+
+boolean
+draw_gs_init( struct draw_context *draw )
+{
+ draw->gs.machine = tgsi_exec_machine_create();
+ if (!draw->gs.machine)
+ return FALSE;
+
+ draw->gs.machine->Primitives = align_malloc(
+ MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
+ if (!draw->gs.machine->Primitives)
+ return FALSE;
+ memset(draw->gs.machine->Primitives, 0,
+ MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector));
+
+ return TRUE;
+}
+
+
+void draw_gs_set_constants( struct draw_context *draw,
+ const float (*constants)[4],
+ unsigned size )
+{
+}
+
+
+struct draw_geometry_shader *
+draw_create_geometry_shader(struct draw_context *draw,
+ const struct pipe_shader_state *state)
+{
+ struct draw_geometry_shader *gs;
+ int i;
+
+ gs = CALLOC_STRUCT(draw_geometry_shader);
+
+ if (!gs)
+ return NULL;
+
+ gs->state = *state;
+ gs->state.tokens = tgsi_dup_tokens(state->tokens);
+ if (!gs->state.tokens) {
+ FREE(gs);
+ return NULL;
+ }
+
+ tgsi_scan_shader(state->tokens, &gs->info);
+
+ /* setup the defaults */
+ gs->input_primitive = PIPE_PRIM_TRIANGLES;
+ gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP;
+ gs->max_output_vertices = 32;
+
+ for (i = 0; i < gs->info.num_properties; ++i) {
+ if (gs->info.properties[i].name ==
+ TGSI_PROPERTY_GS_INPUT_PRIM)
+ gs->input_primitive = gs->info.properties[i].data[0];
+ else if (gs->info.properties[i].name ==
+ TGSI_PROPERTY_GS_OUTPUT_PRIM)
+ gs->output_primitive = gs->info.properties[i].data[0];
+ else if (gs->info.properties[i].name ==
+ TGSI_PROPERTY_GS_MAX_VERTICES)
+ gs->max_output_vertices = gs->info.properties[i].data[0];
+ }
+
+ gs->machine = draw->gs.machine;
+
+ if (gs)
+ {
+ uint i;
+ for (i = 0; i < gs->info.num_outputs; i++) {
+ if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
+ gs->info.output_semantic_index[i] == 0)
+ gs->position_output = i;
+ }
+ }
+
+ return gs;
+}
+
+void draw_bind_geometry_shader(struct draw_context *draw,
+ struct draw_geometry_shader *dgs)
+{
+ draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
+
+ if (dgs) {
+ draw->gs.geometry_shader = dgs;
+ draw->gs.num_gs_outputs = dgs->info.num_outputs;
+ draw->gs.position_output = dgs->position_output;
+ draw_geometry_shader_prepare(dgs, draw);
+ }
+ else {
+ draw->gs.geometry_shader = NULL;
+ draw->gs.num_gs_outputs = 0;
+ }
+}
+
+void draw_delete_geometry_shader(struct draw_context *draw,
+ struct draw_geometry_shader *dgs)
+{
+ FREE(dgs);
+}
+
+static INLINE int num_vertices_for_prim(int prim)
+{
+ switch(prim) {
+ case PIPE_PRIM_POINTS:
+ return 1;
+ case PIPE_PRIM_LINES:
+ return 2;
+ case PIPE_PRIM_LINE_LOOP:
+ return 2;
+ case PIPE_PRIM_LINE_STRIP:
+ return 2;
+ case PIPE_PRIM_TRIANGLES:
+ return 3;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return 3;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return 3;
+ case PIPE_PRIM_LINES_ADJACENCY:
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ return 4;
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ return 6;
+ default:
+ assert(!"Bad geometry shader input");
+ return 0;
+ }
+}
+
+static void draw_fetch_geometry_input(struct draw_geometry_shader *shader,
+ int start_primitive,
+ int num_primitives,
+ const float (*input_ptr)[4],
+ unsigned input_vertex_stride,
+ unsigned inputs_from_vs)
+{
+ struct tgsi_exec_machine *machine = shader->machine;
+ unsigned slot, vs_slot, k, j;
+ unsigned num_vertices = num_vertices_for_prim(shader->input_primitive);
+ int idx = 0;
+
+ for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; slot++) {
+ /*debug_printf("Slot = %d (semantic = %d)\n", slot,
+ shader->info.input_semantic_name[slot]);*/
+ if (shader->info.input_semantic_name[slot] ==
+ TGSI_SEMANTIC_PRIMID) {
+ for (j = 0; j < num_primitives; ++j) {
+ machine->Inputs[idx].xyzw[0].f[j] = (float)start_primitive + j;
+ machine->Inputs[idx].xyzw[1].f[j] = (float)start_primitive + j;
+ machine->Inputs[idx].xyzw[2].f[j] = (float)start_primitive + j;
+ machine->Inputs[idx].xyzw[3].f[j] = (float)start_primitive + j;
+ }
+ ++idx;
+ } else {
+ for (j = 0; j < num_primitives; ++j) {
+ int vidx = idx;
+ const float (*prim_ptr)[4];
+ /*debug_printf(" %d) Prim (num_verts = %d)\n", start_primitive + j,
+ num_vertices);*/
+ prim_ptr = (const float (*)[4])(
+ (const char *)input_ptr +
+ (j * num_vertices * input_vertex_stride));
+
+ for (k = 0; k < num_vertices; ++k, ++vidx) {
+ const float (*input)[4];
+ input = (const float (*)[4])(
+ (const char *)prim_ptr + (k * input_vertex_stride));
+ vidx = k * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot;
+ /*debug_printf("\t%d)(%d) Input vert:\n", vidx, k);*/
+#if 1
+ assert(!util_is_inf_or_nan(input[vs_slot][0]));
+ assert(!util_is_inf_or_nan(input[vs_slot][1]));
+ assert(!util_is_inf_or_nan(input[vs_slot][2]));
+ assert(!util_is_inf_or_nan(input[vs_slot][3]));
+#endif
+ machine->Inputs[vidx].xyzw[0].f[j] = input[vs_slot][0];
+ machine->Inputs[vidx].xyzw[1].f[j] = input[vs_slot][1];
+ machine->Inputs[vidx].xyzw[2].f[j] = input[vs_slot][2];
+ machine->Inputs[vidx].xyzw[3].f[j] = input[vs_slot][3];
+#if 0
+ debug_printf("\t\t%d %f %f %f %f\n", slot,
+ machine->Inputs[vidx].xyzw[0].f[j],
+ machine->Inputs[vidx].xyzw[1].f[j],
+ machine->Inputs[vidx].xyzw[2].f[j],
+ machine->Inputs[vidx].xyzw[3].f[j]);
+#endif
+ }
+ }
+ ++vs_slot;
+ idx += num_vertices;
+ }
+ }
+}
+
+static INLINE void
+draw_geometry_fetch_outputs(struct draw_geometry_shader *shader,
+ int num_primitives,
+ float (*output)[4],
+ unsigned vertex_size)
+{
+ struct tgsi_exec_machine *machine = shader->machine;
+ unsigned prim_idx, j, slot;
+
+ /* Unswizzle all output results.
+ */
+ /* FIXME: handle all the primitives produced by the gs, not just
+ * the first one
+ unsigned prim_count =
+ mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];*/
+ for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) {
+ unsigned num_verts_per_prim = machine->Primitives[0];
+ for (j = 0; j < num_verts_per_prim; j++) {
+ int idx = (prim_idx * num_verts_per_prim + j) *
+ shader->info.num_outputs;
+#ifdef DEBUG_OUTPUTS
+ debug_printf("%d) Output vert:\n", idx);
+#endif
+ for (slot = 0; slot < shader->info.num_outputs; slot++) {
+ output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[prim_idx];
+ output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[prim_idx];
+ output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[prim_idx];
+ output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[prim_idx];
+#ifdef DEBUG_OUTPUTS
+ debug_printf("\t%d: %f %f %f %f\n", slot,
+ output[slot][0],
+ output[slot][1],
+ output[slot][2],
+ output[slot][3]);
+#endif
+ debug_assert(!util_is_inf_or_nan(output[slot][0]));
+ }
+ output = (float (*)[4])((char *)output + vertex_size);
+ }
+ }
+}
+
+void draw_geometry_shader_run(struct draw_geometry_shader *shader,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned vertex_size)
+{
+ struct tgsi_exec_machine *machine = shader->machine;
+ unsigned int i;
+ unsigned num_vertices = num_vertices_for_prim(shader->input_primitive);
+ unsigned num_primitives = count/num_vertices;
+ unsigned inputs_from_vs = 0;
+
+ machine->Consts = constants;
+
+ for (i = 0; i < shader->info.num_inputs; ++i) {
+ if (shader->info.input_semantic_name[i] != TGSI_SEMANTIC_PRIMID)
+ ++inputs_from_vs;
+ }
+
+ for (i = 0; i < num_primitives; ++i) {
+ unsigned int max_primitives = 1;
+
+ draw_fetch_geometry_input(shader, i, max_primitives, input,
+ input_stride, inputs_from_vs);
+
+ tgsi_set_exec_mask(machine,
+ 1,
+ max_primitives > 1,
+ max_primitives > 2,
+ max_primitives > 3);
+
+ /* run interpreter */
+ tgsi_exec_machine_run(machine);
+
+ draw_geometry_fetch_outputs(shader, max_primitives,
+ output, vertex_size);
+ }
+}
+
+void draw_geometry_shader_delete(struct draw_geometry_shader *shader)
+{
+ FREE((void*) shader->state.tokens);
+ FREE(shader);
+}
+
+void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
+ struct draw_context *draw)
+{
+ if (shader->machine->Tokens != shader->state.tokens) {
+ tgsi_exec_machine_bind_shader(shader->machine,
+ shader->state.tokens,
+ draw->gs.num_samplers,
+ draw->gs.samplers);
+ }
+}
diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h
new file mode 100644
index 0000000000..d6a97d9c4e
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_gs.h
@@ -0,0 +1,76 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMWare Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DRAW_GS_H
+#define DRAW_GS_H
+
+#include "draw_context.h"
+#include "draw_private.h"
+
+
+#define MAX_TGSI_PRIMITIVES 4
+
+struct draw_context;
+
+/**
+ * Private version of the compiled geometry shader
+ */
+struct draw_geometry_shader {
+ struct draw_context *draw;
+
+ struct tgsi_exec_machine *machine;
+
+ /* This member will disappear shortly:*/
+ struct pipe_shader_state state;
+
+ struct tgsi_shader_info info;
+ unsigned position_output;
+
+ unsigned max_output_vertices;
+ unsigned input_primitive;
+ unsigned output_primitive;
+
+ /* Extracted from shader:
+ */
+ const float (*immediates)[4];
+};
+
+void draw_geometry_shader_run(struct draw_geometry_shader *shader,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride);
+
+void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
+ struct draw_context *draw);
+
+void draw_geometry_shader_delete(struct draw_geometry_shader *shader);
+
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 14375426ed..4585dcdb48 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -660,13 +660,13 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
}
/* update vertex attrib info */
- aaline->tex_slot = draw->vs.num_vs_outputs;
- aaline->pos_slot = draw->vs.position_output;
+ aaline->tex_slot = draw_current_shader_outputs(draw);
+ aaline->pos_slot = draw_current_shader_position_output(draw);;
/* advertise the extra post-transformed vertex attribute */
- draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
- draw->extra_vp_outputs.semantic_index = aaline->fs->generic_attrib;
- draw->extra_vp_outputs.slot = aaline->tex_slot;
+ draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
+ draw->extra_shader_outputs.semantic_index = aaline->fs->generic_attrib;
+ draw->extra_shader_outputs.slot = aaline->tex_slot;
/* how many samplers? */
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
@@ -707,7 +707,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
aaline->state.texture);
draw->suspend_flushing = FALSE;
- draw->extra_vp_outputs.slot = 0;
+ draw->extra_shader_outputs.slot = 0;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 75130a8fb0..d86717e518 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -687,14 +687,14 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
bind_aapoint_fragment_shader(aapoint);
/* update vertex attrib info */
- aapoint->tex_slot = draw->vs.num_vs_outputs;
+ aapoint->tex_slot = draw_current_shader_outputs(draw);
assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
- aapoint->pos_slot = draw->vs.position_output;
+ aapoint->pos_slot = draw_current_shader_position_output(draw);
- draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
- draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib;
- draw->extra_vp_outputs.slot = aapoint->tex_slot;
+ draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
+ draw->extra_shader_outputs.semantic_index = aapoint->fs->generic_attrib;
+ draw->extra_shader_outputs.slot = aapoint->tex_slot;
/* find psize slot in post-transform vertex */
aapoint->psize_slot = -1;
@@ -731,7 +731,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags)
aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs);
draw->suspend_flushing = FALSE;
- draw->extra_vp_outputs.slot = 0;
+ draw->extra_shader_outputs.slot = 0;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 0670268a19..205cda5eab 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -114,8 +114,8 @@ static void interp( const struct clipper *clip,
const struct vertex_header *out,
const struct vertex_header *in )
{
- const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs;
- const unsigned pos_attr = clip->stage.draw->vs.position_output;
+ const unsigned nr_attrs = draw_current_shader_outputs(clip->stage.draw);
+ const unsigned pos_attr = draw_current_shader_position_output(clip->stage.draw);
unsigned j;
/* Vertex header.
diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c
index 0a70483858..11b39db599 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -55,7 +55,7 @@ static INLINE struct cull_stage *cull_stage( struct draw_stage *stage )
static void cull_tri( struct draw_stage *stage,
struct prim_header *header )
{
- const unsigned pos = stage->draw->vs.position_output;
+ const unsigned pos = draw_current_shader_position_output(stage->draw);
/* Window coords: */
const float *v0 = header->v[0]->data[pos];
diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
index 40798a5d6e..e829492423 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -63,7 +63,7 @@ static INLINE struct offset_stage *offset_stage( struct draw_stage *stage )
static void do_offset_tri( struct draw_stage *stage,
struct prim_header *header )
{
- const unsigned pos = stage->draw->vs.position_output;
+ const unsigned pos = draw_current_shader_position_output(stage->draw);
struct offset_stage *offset = offset_stage(stage);
float inv_det = 1.0f / header->det;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 6e921bac27..70fbab9ea7 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -73,7 +73,8 @@ screen_interp( struct draw_context *draw,
const struct vertex_header *v1 )
{
uint attr;
- for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) {
+ int num_outputs = draw_current_shader_outputs(draw);
+ for (attr = 0; attr < num_outputs; attr++) {
const float *val0 = v0->data[attr];
const float *val1 = v1->data[attr];
float *newv = dst->data[attr];
@@ -121,7 +122,7 @@ stipple_line(struct draw_stage *stage, struct prim_header *header)
struct stipple_stage *stipple = stipple_stage(stage);
struct vertex_header *v0 = header->v[0];
struct vertex_header *v1 = header->v[1];
- const unsigned pos = stage->draw->vs.position_output;
+ const unsigned pos = draw_current_shader_position_output(stage->draw);
const float *pos0 = v0->data[pos];
const float *pos1 = v1->data[pos];
float start = 0;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index f32cbef983..3073c87082 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -59,7 +59,7 @@ static void wideline_line( struct draw_stage *stage,
struct prim_header *header )
{
/*const struct wideline_stage *wide = wideline_stage(stage);*/
- const unsigned pos = stage->draw->vs.position_output;
+ const unsigned pos = draw_current_shader_position_output(stage->draw);
const float half_width = 0.5f * stage->draw->rasterizer->line_width;
struct prim_header tri;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index 7d76a7dbf3..8dc50c0ab4 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -112,7 +112,7 @@ static void set_texcoords(const struct widepoint_stage *wide,
if (wide->point_coord_fs_input >= 0) {
/* put gl_PointCoord into the extra vertex slot */
- uint slot = wide->stage.draw->extra_vp_outputs.slot;
+ uint slot = wide->stage.draw->extra_shader_outputs.slot;
v->data[slot][0] = tc[0];
v->data[slot][1] = tc[1];
v->data[slot][2] = 0.0F;
@@ -130,7 +130,7 @@ static void widepoint_point( struct draw_stage *stage,
struct prim_header *header )
{
const struct widepoint_stage *wide = widepoint_stage(stage);
- const unsigned pos = stage->draw->vs.position_output;
+ const unsigned pos = draw_current_shader_position_output(stage->draw);
const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite;
float half_size;
float left_adj, right_adj, bot_adj, top_adj;
@@ -257,13 +257,13 @@ static void widepoint_first_point( struct draw_stage *stage,
wide->point_coord_fs_input = find_pntc_input_attrib(draw);
/* setup extra vp output (point coord implemented as a texcoord) */
- draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
- draw->extra_vp_outputs.semantic_index = 0;
- draw->extra_vp_outputs.slot = draw->vs.num_vs_outputs;
+ draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
+ draw->extra_shader_outputs.semantic_index = 0;
+ draw->extra_shader_outputs.slot = draw_current_shader_outputs(draw);
}
else {
wide->point_coord_fs_input = -1;
- draw->extra_vp_outputs.slot = 0;
+ draw->extra_shader_outputs.slot = 0;
}
wide->psize_slot = -1;
@@ -287,7 +287,7 @@ static void widepoint_flush( struct draw_stage *stage, unsigned flags )
{
stage->point = widepoint_first_point;
stage->next->flush( stage->next, flags );
- stage->draw->extra_vp_outputs.slot = 0;
+ stage->draw->extra_shader_outputs.slot = 0;
}
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 41fcb16a0a..e49041556b 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -142,8 +142,6 @@ struct draw_context
/* user-space vertex data, buffers */
struct {
- const unsigned *edgeflag;
-
/** vertex element/index buffer (ex: glDrawElements) */
const void *elts;
/** bytes per index (0, 1, 2 or 4) */
@@ -154,8 +152,9 @@ struct draw_context
/** vertex arrays */
const void *vbuffer[PIPE_MAX_ATTRIBS];
- /** constant buffer (for vertex shader) */
- const void *constants;
+ /** constant buffer (for vertex/geometry shader) */
+ const void *vs_constants;
+ const void *gs_constants;
} user;
boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */
@@ -184,6 +183,7 @@ struct draw_context
struct draw_vertex_shader *vertex_shader;
uint num_vs_outputs; /**< convenience, from vertex_shader */
uint position_output;
+ uint edgeflag_output;
/** TGSI program interpreter runtime state */
struct tgsi_exec_machine *machine;
@@ -212,6 +212,18 @@ struct draw_context
struct translate_cache *emit_cache;
} vs;
+ struct {
+ struct draw_geometry_shader *geometry_shader;
+ uint num_gs_outputs; /**< convenience, from geometry_shader */
+ uint position_output;
+
+ /** TGSI program interpreter runtime state */
+ struct tgsi_exec_machine *machine;
+
+ uint num_samplers;
+ struct tgsi_sampler **samplers;
+ } gs;
+
/* Clip derived state:
*/
float plane[12][4];
@@ -223,7 +235,7 @@ struct draw_context
uint semantic_name;
uint semantic_index;
int slot;
- } extra_vp_outputs;
+ } extra_shader_outputs;
unsigned reduced_prim;
@@ -246,6 +258,19 @@ void draw_vs_set_constants( struct draw_context *,
+/*******************************************************************************
+ * Geometry shading code:
+ */
+boolean draw_gs_init( struct draw_context *draw );
+void draw_gs_set_constants( struct draw_context *,
+ const float (*constants)[4],
+ unsigned size );
+
+/*******************************************************************************
+ * Common shading code:
+ */
+int draw_current_shader_outputs(struct draw_context *draw);
+int draw_current_shader_position_output(struct draw_context *draw);
/*******************************************************************************
* Vertex processing (was passthrough) code:
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 4865a2d854..2801dbafe4 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -314,12 +314,3 @@ draw_arrays(struct draw_context *draw, unsigned prim,
/* drawing done here: */
draw_pt_arrays(draw, prim, start, count);
}
-
-boolean draw_pt_get_edgeflag( struct draw_context *draw,
- unsigned idx )
-{
- if (draw->pt.user.edgeflag)
- return (draw->pt.user.edgeflag[idx/32] & (1 << (idx%32))) != 0;
- else
- return 1;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 7a17a9fb6b..20edf7a227 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -149,11 +149,6 @@ struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw );
struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw);
-/* More helpers:
- */
-boolean draw_pt_get_edgeflag( struct draw_context *draw,
- unsigned idx );
-
/*******************************************************************************
* HW vertex emit:
@@ -217,7 +212,8 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
boolean bypass_clipping,
boolean bypass_viewport,
- boolean opengl );
+ boolean opengl,
+ boolean need_edgeflags );
struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw );
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index 65c3a34c34..305bfef435 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -42,11 +42,11 @@ struct pt_fetch {
struct translate *translate;
unsigned vertex_size;
- boolean need_edgeflags;
struct translate_cache *cache;
};
+
/* Perform the fetch from API vertex elements & vertex buffers, to a
* contiguous set of float[4] attributes as required for the
* vertex_shader->run_linear() method.
@@ -120,7 +120,12 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
fetch->translate = translate_cache_find(fetch->cache, &key);
{
- static struct vertex_header vh = { 0, 1, 0, UNDEFINED_VERTEX_ID, { .0f, .0f, .0f, .0f } };
+ static struct vertex_header vh = { 0,
+ 1,
+ 0,
+ UNDEFINED_VERTEX_ID,
+ { .0f, .0f, .0f, .0f } };
+
fetch->translate->set_buffer(fetch->translate,
draw->pt.nr_vertex_buffers,
&vh,
@@ -128,9 +133,6 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
}
}
- fetch->need_edgeflags = ((draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
- draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) &&
- draw->pt.user.edgeflag);
}
@@ -158,17 +160,6 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
count,
verts );
- /* Edgeflags are hard to fit into a translate program, populate
- * them separately if required. In the setup above they are
- * defaulted to one, so only need this if there is reason to change
- * that default:
- */
- if (fetch->need_edgeflags) {
- for (i = 0; i < count; i++) {
- struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size);
- vh->edgeflag = draw_pt_get_edgeflag( draw, elts[i] );
- }
- }
}
@@ -193,18 +184,6 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
start,
count,
verts );
-
- /* Edgeflags are hard to fit into a translate program, populate
- * them separately if required. In the setup above they are
- * defaulted to one, so only need this if there is reason to change
- * that default:
- */
- if (fetch->need_edgeflags) {
- for (i = 0; i < count; i++) {
- struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size);
- vh->edgeflag = draw_pt_get_edgeflag( draw, start + i );
- }
- }
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index df6c265b7e..1a9df4cac5 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -32,6 +32,7 @@
#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
#include "draw/draw_vs.h"
+#include "draw/draw_gs.h"
#include "translate/translate.h"
@@ -85,9 +86,9 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
draw_pt_post_vs_prepare( fpme->post_vs,
(boolean)draw->bypass_clipping,
(boolean)(draw->identity_viewport ||
- draw->rasterizer->bypass_vs_clip_and_viewport),
- (boolean)draw->rasterizer->gl_rasterization_rules );
-
+ draw->rasterizer->bypass_vs_clip_and_viewport),
+ (boolean)draw->rasterizer->gl_rasterization_rules,
+ (draw->vs.edgeflag_output ? true : false) );
if (!(opt & PT_PIPELINE)) {
draw_pt_emit_prepare( fpme->emit,
@@ -119,7 +120,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+ struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
+ struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
unsigned opt = fpme->opt;
unsigned alloc_count = align( fetch_count, 4 );
@@ -147,13 +149,21 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
*/
if (opt & PT_SHADE)
{
- shader->run_linear(shader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- (const float (*)[4])draw->pt.user.constants,
- fetch_count,
- fpme->vertex_size,
- fpme->vertex_size);
+ vshader->run_linear(vshader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.vs_constants,
+ fetch_count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ if (gshader)
+ draw_geometry_shader_run(gshader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.gs_constants,
+ fetch_count,
+ fpme->vertex_size,
+ fpme->vertex_size);
}
if (draw_pt_post_vs_run( fpme->post_vs,
@@ -196,6 +206,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+ struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader;
unsigned opt = fpme->opt;
unsigned alloc_count = align( count, 4 );
@@ -226,10 +237,19 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
shader->run_linear(shader,
(const float (*)[4])pipeline_verts->data,
( float (*)[4])pipeline_verts->data,
- (const float (*)[4])draw->pt.user.constants,
+ (const float (*)[4])draw->pt.user.vs_constants,
count,
fpme->vertex_size,
fpme->vertex_size);
+
+ if (geometry_shader)
+ draw_geometry_shader_run(geometry_shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.gs_constants,
+ count,
+ fpme->vertex_size,
+ fpme->vertex_size);
}
if (draw_pt_post_vs_run( fpme->post_vs,
@@ -270,6 +290,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+ struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader;
unsigned opt = fpme->opt;
unsigned alloc_count = align( count, 4 );
@@ -296,10 +317,19 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
shader->run_linear(shader,
(const float (*)[4])pipeline_verts->data,
( float (*)[4])pipeline_verts->data,
- (const float (*)[4])draw->pt.user.constants,
+ (const float (*)[4])draw->pt.user.vs_constants,
count,
fpme->vertex_size,
fpme->vertex_size);
+
+ if (geometry_shader)
+ draw_geometry_shader_run(geometry_shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.gs_constants,
+ count,
+ fpme->vertex_size,
+ fpme->vertex_size);
}
if (draw_pt_post_vs_run( fpme->post_vs,
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 6c1cb48e8b..55151823a1 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -100,7 +100,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
struct vertex_header *out = vertices;
const float *scale = pvs->draw->viewport.scale;
const float *trans = pvs->draw->viewport.translate;
- const unsigned pos = pvs->draw->vs.position_output;
+ const unsigned pos = draw_current_shader_position_output(pvs->draw);
unsigned clipped = 0;
unsigned j;
@@ -147,6 +147,39 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
+/* As above plus edgeflags
+ */
+static boolean
+post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ unsigned j;
+ boolean needpipe;
+
+ needpipe = post_vs_cliptest_viewport_gl( pvs, vertices, count, stride);
+
+ /* If present, copy edgeflag VS output into vertex header.
+ * Otherwise, leave header as is.
+ */
+ if (pvs->draw->vs.edgeflag_output) {
+ struct vertex_header *out = vertices;
+ int ef = pvs->draw->vs.edgeflag_output;
+
+ for (j = 0; j < count; j++) {
+ const float *edgeflag = out->data[ef];
+ out->edgeflag = !(edgeflag[0] != 1.0f);
+ needpipe |= !out->edgeflag;
+ out = (struct vertex_header *)( (char *)out + stride );
+ }
+ }
+ return needpipe;
+}
+
+
+
+
/* If bypass_clipping is set, skip cliptest and rhw divide.
*/
static boolean post_vs_viewport( struct pt_post_vs *pvs,
@@ -157,7 +190,7 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs,
struct vertex_header *out = vertices;
const float *scale = pvs->draw->viewport.scale;
const float *trans = pvs->draw->viewport.translate;
- const unsigned pos = pvs->draw->vs.position_output;
+ const unsigned pos = draw_current_shader_position_output(pvs->draw);
unsigned j;
if (0) debug_printf("%s\n", __FUNCTION__);
@@ -201,17 +234,29 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
boolean bypass_clipping,
boolean bypass_viewport,
- boolean opengl )
+ boolean opengl,
+ boolean need_edgeflags )
{
- if (bypass_clipping) {
- if (bypass_viewport)
- pvs->run = post_vs_none;
- else
- pvs->run = post_vs_viewport;
+ if (!need_edgeflags) {
+ if (bypass_clipping) {
+ if (bypass_viewport)
+ pvs->run = post_vs_none;
+ else
+ pvs->run = post_vs_viewport;
+ }
+ else {
+ /* if (opengl) */
+ pvs->run = post_vs_cliptest_viewport_gl;
+ }
}
else {
- /* if (opengl) */
- pvs->run = post_vs_cliptest_viewport_gl;
+ /* If we need to copy edgeflags to the vertex header, it should
+ * mean we're running the primitive pipeline. Hence the bypass
+ * flags should be false.
+ */
+ assert(!bypass_clipping);
+ assert(!bypass_viewport);
+ pvs->run = post_vs_cliptest_viewport_gl_edgeflag;
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
index b61fa29143..17c3b8cec2 100644
--- a/src/gallium/auxiliary/draw/draw_pt_util.c
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -50,16 +50,32 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
*first = 2;
*incr = 1;
break;
+ case PIPE_PRIM_LINES_ADJACENCY:
+ *first = 4;
+ *incr = 2;
+ break;
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ *first = 4;
+ *incr = 1;
+ break;
case PIPE_PRIM_TRIANGLES:
*first = 3;
*incr = 3;
break;
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ *first = 6;
+ *incr = 3;
+ break;
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_TRIANGLE_FAN:
case PIPE_PRIM_POLYGON:
*first = 3;
*incr = 1;
break;
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ *first = 6;
+ *incr = 1;
+ break;
case PIPE_PRIM_QUADS:
*first = 4;
*incr = 4;
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
index 010c7a18a7..f0aec5feba 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -36,6 +36,10 @@ static void FUNC(struct draw_pt_front_end *frontend,
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_QUADS:
case PIPE_PRIM_QUAD_STRIP:
+ case PIPE_PRIM_LINES_ADJACENCY:
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
for (j = 0; j < count;) {
unsigned remaining = count - j;
unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr );
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 790e89ed82..3553689532 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -101,6 +101,9 @@ draw_create_vertex_shader(struct draw_context *draw,
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
vs->info.output_semantic_index[i] == 0)
vs->position_output = i;
+ else if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_EDGEFLAG &&
+ vs->info.output_semantic_index[i] == 0)
+ vs->edgeflag_output = i;
}
}
@@ -120,6 +123,7 @@ draw_bind_vertex_shader(struct draw_context *draw,
draw->vs.vertex_shader = dvs;
draw->vs.num_vs_outputs = dvs->info.num_outputs;
draw->vs.position_output = dvs->position_output;
+ draw->vs.edgeflag_output = dvs->edgeflag_output;
dvs->prepare( dvs, draw );
}
else {
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 89ae158751..e3b807ebd0 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -107,6 +107,7 @@ struct draw_vertex_shader {
struct tgsi_shader_info info;
unsigned position_output;
+ unsigned edgeflag_output;
/* Extracted from shader:
*/
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 7ee567d478..d16692584e 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -147,11 +147,12 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
temp_buffer,
temp_buffer,
- (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
+ (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants,
count,
temp_vertex_stride,
temp_vertex_stride);
+ /* FIXME: geometry shading? */
if (vsvg->base.key.clip) {
/* not really handling clipping, just do the rhw so we can
@@ -207,7 +208,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
temp_buffer,
temp_buffer,
- (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
+ (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants,
count,
temp_vertex_stride,
temp_vertex_stride);
diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile
deleted file mode 100644
index 5a96d94ec3..0000000000
--- a/src/gallium/auxiliary/gallivm/Makefile
+++ /dev/null
@@ -1,92 +0,0 @@
-# -*-makefile-*-
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = gallivm
-
-
-GALLIVM_SOURCES = \
- gallivm.cpp \
- gallivm_cpu.cpp \
- instructions.cpp \
- loweringpass.cpp \
- tgsitollvm.cpp \
- storage.cpp \
- storagesoa.cpp \
- instructionssoa.cpp
-
-INC_SOURCES = gallivm_builtins.cpp gallivmsoabuiltins.cpp
-
-CPP_SOURCES = \
- $(GALLIVM_SOURCES)
-
-C_SOURCES =
-ASM_SOURCES =
-
-OBJECTS = $(C_SOURCES:.c=.o) \
- $(CPP_SOURCES:.cpp=.o) \
- $(ASM_SOURCES:.S=.o)
-
-### Include directories
-INCLUDES = \
- -I. \
- -I$(TOP)/src/gallium/drivers \
- -I$(TOP)/src/gallium/auxiliary \
- -I$(TOP)/src/gallium/include \
- -I$(TOP)/src/mesa \
- -I$(TOP)/include
-
-
-##### RULES #####
-
-.c.o:
- $(CC) -c $(INCLUDES) $(LLVM_CFLAGS) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
-
-.cpp.o:
- $(CXX) -c $(INCLUDES) $(LLVM_CXXFLAGS) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@
-
-.S.o:
- $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
-
-##### TARGETS #####
-
-default:: depend symlinks $(LIBNAME)
-
-
-$(LIBNAME): $(OBJECTS) Makefile
- $(TOP)/bin/mklib -o $@ -static $(OBJECTS)
-
-
-depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES)
- rm -f depend
- touch depend
- $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \
- $(ASM_SOURCES) $(INC_SOURCES) 2> /dev/null
-
-
-gallivm_builtins.cpp: llvm_builtins.c
- clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin
- (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@
- rm temp1.bin
-
-gallivmsoabuiltins.cpp: soabuiltins.c
- clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin
- (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@
- rm temp2.bin
-
-# Emacs tags
-tags:
- etags `find . -name \*.[ch]` `find ../include`
-
-
-# Remove .o and backup files
-clean:
- -rm -f *.o */*.o *~ *.so *~ server/*.o
- -rm -f depend depend.bak
- -rm -f gallivm_builtins.cpp
- -rm -f gallivmsoabuiltins.cpp
-
-symlinks:
-
-
-include depend
diff --git a/src/gallium/auxiliary/gallivm/SConscript b/src/gallium/auxiliary/gallivm/SConscript
deleted file mode 100644
index c0aa51b90a..0000000000
--- a/src/gallium/auxiliary/gallivm/SConscript
+++ /dev/null
@@ -1,16 +0,0 @@
-Import('*')
-
-gallivm = env.ConvenienceLibrary(
- target = 'gallivm',
- source = [
- 'gallivm.cpp',
- 'gallivm_cpu.cpp',
- 'instructions.cpp',
- 'loweringpass.cpp',
- 'tgsitollvm.cpp',
- 'storage.cpp',
- 'storagesoa.cpp',
- 'instructionssoa.cpp',
- ])
-
-auxiliaries.insert(0, gallivm)
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 5cafe8c3f0..8f7d3b7100 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -552,7 +552,7 @@ translate_instruction(llvm::Module *module,
break;
case TGSI_OPCODE_SHL:
break;
- case TGSI_OPCODE_SHR:
+ case TGSI_OPCODE_ISHR:
break;
case TGSI_OPCODE_AND:
break;
@@ -919,7 +919,7 @@ translate_instructionir(llvm::Module *module,
break;
case TGSI_OPCODE_SHL:
break;
- case TGSI_OPCODE_SHR:
+ case TGSI_OPCODE_ISHR:
break;
case TGSI_OPCODE_AND:
break;
diff --git a/src/gallium/auxiliary/indices/Makefile b/src/gallium/auxiliary/indices/Makefile
deleted file mode 100644
index f2ebc3f410..0000000000
--- a/src/gallium/auxiliary/indices/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = indices
-
-C_SOURCES = \
- u_indices_gen.c \
- u_unfilled_gen.c
-
-include ../../Makefile.template
-
-u_indices_gen.c: u_indices_gen.py
- python $< > $@
-
-u_unfilled_gen.c: u_unfilled_gen.py
- python $< > $@
diff --git a/src/gallium/auxiliary/indices/SConscript b/src/gallium/auxiliary/indices/SConscript
deleted file mode 100644
index 712e215534..0000000000
--- a/src/gallium/auxiliary/indices/SConscript
+++ /dev/null
@@ -1,28 +0,0 @@
-Import('*')
-
-from sys import executable as python_cmd
-
-env.CodeGenerate(
- target = 'u_indices_gen.c',
- script = 'u_indices_gen.py',
- source = [],
- command = python_cmd + ' $SCRIPT > $TARGET'
-)
-
-env.CodeGenerate(
- target = 'u_unfilled_gen.c',
- script = 'u_unfilled_gen.py',
- source = [],
- command = python_cmd + ' $SCRIPT > $TARGET'
-)
-
-indices = env.ConvenienceLibrary(
- target = 'indices',
- source = [
-# 'u_indices.c',
-# 'u_unfilled_indices.c',
- 'u_indices_gen.c',
- 'u_unfilled_gen.c',
- ])
-
-auxiliaries.insert(0, indices)
diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile
deleted file mode 100644
index 1c00ba8d98..0000000000
--- a/src/gallium/auxiliary/pipebuffer/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = pipebuffer
-
-C_SOURCES = \
- pb_buffer_fenced.c \
- pb_buffer_malloc.c \
- pb_bufmgr_alt.c \
- pb_bufmgr_cache.c \
- pb_bufmgr_debug.c \
- pb_bufmgr_fenced.c \
- pb_bufmgr_mm.c \
- pb_bufmgr_ondemand.c \
- pb_bufmgr_pool.c \
- pb_bufmgr_slab.c \
- pb_validate.c
-
-include ../../Makefile.template
diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript
deleted file mode 100644
index 8e9f06abe4..0000000000
--- a/src/gallium/auxiliary/pipebuffer/SConscript
+++ /dev/null
@@ -1,19 +0,0 @@
-Import('*')
-
-pipebuffer = env.ConvenienceLibrary(
- target = 'pipebuffer',
- source = [
- 'pb_buffer_fenced.c',
- 'pb_buffer_malloc.c',
- 'pb_bufmgr_alt.c',
- 'pb_bufmgr_cache.c',
- 'pb_bufmgr_debug.c',
- 'pb_bufmgr_fenced.c',
- 'pb_bufmgr_mm.c',
- 'pb_bufmgr_ondemand.c',
- 'pb_bufmgr_pool.c',
- 'pb_bufmgr_slab.c',
- 'pb_validate.c',
- ])
-
-auxiliaries.insert(0, pipebuffer)
diff --git a/src/gallium/auxiliary/rbug/Makefile b/src/gallium/auxiliary/rbug/Makefile
deleted file mode 100644
index cd12e8468f..0000000000
--- a/src/gallium/auxiliary/rbug/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = rbug
-
-C_SOURCES = \
- rbug_connection.c \
- rbug_core.c \
- rbug_texture.c \
- rbug_context.c \
- rbug_shader.c \
- rbug_demarshal.c
-
-include ../../Makefile.template
diff --git a/src/gallium/auxiliary/rbug/SConscript b/src/gallium/auxiliary/rbug/SConscript
deleted file mode 100644
index 4a9afb45d3..0000000000
--- a/src/gallium/auxiliary/rbug/SConscript
+++ /dev/null
@@ -1,14 +0,0 @@
-Import('*')
-
-rbug = env.ConvenienceLibrary(
- target = 'rbug',
- source = [
- 'rbug_core.c',
- 'rbug_shader.c',
- 'rbug_context.c',
- 'rbug_texture.c',
- 'rbug_demarshal.c',
- 'rbug_connection.c',
- ])
-
-auxiliaries.insert(0, rbug)
diff --git a/src/gallium/auxiliary/rbug/rbug_context.h b/src/gallium/auxiliary/rbug/rbug_context.h
index da61c2365b..03126d6b12 100644
--- a/src/gallium/auxiliary/rbug/rbug_context.h
+++ b/src/gallium/auxiliary/rbug/rbug_context.h
@@ -46,7 +46,7 @@ typedef enum
RBUG_BLOCK_BEFORE = 1,
RBUG_BLOCK_AFTER = 2,
RBUG_BLOCK_RULE = 4,
- RBUG_BLOCK_MASK = 7,
+ RBUG_BLOCK_MASK = 7
} rbug_block_t;
struct rbug_proto_context_list
diff --git a/src/gallium/auxiliary/rbug/rbug_proto.h b/src/gallium/auxiliary/rbug/rbug_proto.h
index d273be0166..4f3eb75dc4 100644
--- a/src/gallium/auxiliary/rbug/rbug_proto.h
+++ b/src/gallium/auxiliary/rbug/rbug_proto.h
@@ -65,7 +65,7 @@ enum rbug_opcode
RBUG_OP_SHADER_DISABLE = 770,
RBUG_OP_SHADER_REPLACE = 771,
RBUG_OP_SHADER_LIST_REPLY = -768,
- RBUG_OP_SHADER_INFO_REPLY = -769,
+ RBUG_OP_SHADER_INFO_REPLY = -769
};
/**
diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile
deleted file mode 100644
index ab8ea464c6..0000000000
--- a/src/gallium/auxiliary/rtasm/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = rtasm
-
-C_SOURCES = \
- rtasm_cpu.c \
- rtasm_execmem.c \
- rtasm_x86sse.c \
- rtasm_ppc.c \
- rtasm_ppc_spe.c
-
-include ../../Makefile.template
diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript
deleted file mode 100644
index eb48368acc..0000000000
--- a/src/gallium/auxiliary/rtasm/SConscript
+++ /dev/null
@@ -1,13 +0,0 @@
-Import('*')
-
-rtasm = env.ConvenienceLibrary(
- target = 'rtasm',
- source = [
- 'rtasm_cpu.c',
- 'rtasm_execmem.c',
- 'rtasm_x86sse.c',
- 'rtasm_ppc.c',
- 'rtasm_ppc_spe.c',
- ])
-
-auxiliaries.insert(0, rtasm)
diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
index 01811d5011..ffed768f97 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
@@ -41,6 +41,12 @@
#define MAP_ANONYMOUS MAP_ANON
#endif
+#if defined(PIPE_OS_WINDOWS)
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
+#endif
+#include <windows.h>
+#endif
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
@@ -118,7 +124,29 @@ rtasm_exec_free(void *addr)
}
-#else /* PIPE_OS_LINUX || PIPE_OS_BSD || PIPE_OS_SOLARIS */
+#elif defined(PIPE_OS_WINDOWS)
+
+
+/*
+ * Avoid Data Execution Prevention.
+ */
+
+void *
+rtasm_exec_malloc(size_t size)
+{
+ return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+}
+
+
+void
+rtasm_exec_free(void *addr)
+{
+ VirtualFree(addr, 0, MEM_RELEASE);
+}
+
+
+#else
+
/*
* Just use regular memory.
@@ -138,4 +166,4 @@ rtasm_exec_free(void *addr)
}
-#endif /* PIPE_OS_LINUX || PIPE_OS_BSD || PIPE_OS_SOLARIS */
+#endif
diff --git a/src/gallium/auxiliary/sct/Makefile b/src/gallium/auxiliary/sct/Makefile
deleted file mode 100644
index a7d111b689..0000000000
--- a/src/gallium/auxiliary/sct/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = sct
-
-C_SOURCES = \
- sct.c
-
-include ../../Makefile.template
diff --git a/src/gallium/auxiliary/sct/SConscript b/src/gallium/auxiliary/sct/SConscript
deleted file mode 100644
index 76927d973f..0000000000
--- a/src/gallium/auxiliary/sct/SConscript
+++ /dev/null
@@ -1,9 +0,0 @@
-Import('*')
-
-sct = env.ConvenienceLibrary(
- target = 'sct',
- source = [
- 'sct.c'
- ])
-
-auxiliaries.insert(0, sct)
diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c
deleted file mode 100644
index 722d2b7e66..0000000000
--- a/src/gallium/auxiliary/sct/sct.c
+++ /dev/null
@@ -1,453 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "util/u_memory.h"
-#include "pipe/p_state.h"
-#include "sct.h"
-
-
-struct texture_list
-{
- struct pipe_texture *texture;
- struct texture_list *next;
-};
-
-
-
-#define MAX_SURFACES ((PIPE_MAX_COLOR_BUFS) + 1)
-
-struct sct_context
-{
- const struct pipe_context *context;
-
- /** surfaces the context is drawing into */
- struct pipe_surface *surfaces[MAX_SURFACES];
-
- /** currently bound textures */
- struct pipe_texture *textures[PIPE_MAX_SAMPLERS];
-
- /** previously bound textures, used but not flushed */
- struct texture_list *textures_used;
-
- boolean needs_flush;
-
- struct sct_context *next;
-};
-
-
-
-struct sct_surface
-{
- const struct pipe_surface *surface;
-
- /** list of contexts drawing to this surface */
- struct sct_context_list *contexts;
-
- struct sct_surface *next;
-};
-
-
-
-/**
- * Find the surface_info for the given pipe_surface
- */
-static struct sct_surface *
-find_surface_info(struct surface_context_tracker *sct,
- const struct pipe_surface *surface)
-{
- struct sct_surface *si;
- for (si = sct->surfaces; si; si = si->next)
- if (si->surface == surface)
- return si;
- return NULL;
-}
-
-
-/**
- * As above, but create new surface_info if surface is new.
- */
-static struct sct_surface *
-find_create_surface_info(struct surface_context_tracker *sct,
- const struct pipe_surface *surface)
-{
- struct sct_surface *si = find_surface_info(sct, surface);
- if (si)
- return si;
-
- /* alloc new */
- si = CALLOC_STRUCT(sct_surface);
- if (si) {
- si->surface = surface;
-
- /* insert at head */
- si->next = sct->surfaces;
- sct->surfaces = si;
- }
-
- return si;
-}
-
-
-/**
- * Find a context_info for the given context.
- */
-static struct sct_context *
-find_context_info(struct surface_context_tracker *sct,
- const struct pipe_context *context)
-{
- struct sct_context *ci;
- for (ci = sct->contexts; ci; ci = ci->next)
- if (ci->context == context)
- return ci;
- return NULL;
-}
-
-
-/**
- * As above, but create new context_info if context is new.
- */
-static struct sct_context *
-find_create_context_info(struct surface_context_tracker *sct,
- const struct pipe_context *context)
-{
- struct sct_context *ci = find_context_info(sct, context);
- if (ci)
- return ci;
-
- /* alloc new */
- ci = CALLOC_STRUCT(sct_context);
- if (ci) {
- ci->context = context;
-
- /* insert at head */
- ci->next = sct->contexts;
- sct->contexts = ci;
- }
-
- return ci;
-}
-
-
-/**
- * Is the context already bound to the surface?
- */
-static boolean
-find_surface_context(const struct sct_surface *si,
- const struct pipe_context *context)
-{
- const struct sct_context_list *cl;
- for (cl = si->contexts; cl; cl = cl->next) {
- if (cl->context == context) {
- return TRUE;
- }
- }
- return FALSE;
-}
-
-
-/**
- * Add a context to the list of contexts associated with a surface.
- */
-static void
-add_context_to_surface(struct sct_surface *si,
- const struct pipe_context *context)
-{
- struct sct_context_list *cl = CALLOC_STRUCT(sct_context_list);
- if (cl) {
- cl->context = context;
- /* insert at head of list of contexts */
- cl->next = si->contexts;
- si->contexts = cl;
- }
-}
-
-
-/**
- * Remove a context from the list of contexts associated with a surface.
- */
-static void
-remove_context_from_surface(struct sct_surface *si,
- const struct pipe_context *context)
-{
- struct sct_context_list *prev = NULL, *curr, *next;
-
- for (curr = si->contexts; curr; curr = next) {
- if (curr->context == context) {
- /* remove */
- if (prev)
- prev->next = curr->next;
- else
- si->contexts = curr->next;
- next = curr->next;
- FREE(curr);
- }
- else {
- prev = curr;
- next = curr->next;
- }
- }
-}
-
-
-/**
- * Unbind context from surface.
- */
-static void
-unbind_context_surface(struct surface_context_tracker *sct,
- struct pipe_context *context,
- struct pipe_surface *surface)
-{
- struct sct_surface *si = find_surface_info(sct, surface);
- if (si) {
- remove_context_from_surface(si, context);
- }
-}
-
-
-/**
- * Bind context to a set of surfaces (color + Z).
- * Like MakeCurrent().
- */
-void
-sct_bind_surfaces(struct surface_context_tracker *sct,
- struct pipe_context *context,
- uint num_surf,
- struct pipe_surface **surfaces)
-{
- struct sct_context *ci = find_create_context_info(sct, context);
- uint i;
-
- if (!ci) {
- return; /* out of memory */
- }
-
- /* unbind currently bound surfaces */
- for (i = 0; i < MAX_SURFACES; i++) {
- if (ci->surfaces[i]) {
- unbind_context_surface(sct, context, ci->surfaces[i]);
- }
- }
-
- /* bind new surfaces */
- for (i = 0; i < num_surf; i++) {
- struct sct_surface *si = find_create_surface_info(sct, surfaces[i]);
- if (!find_surface_context(si, context)) {
- add_context_to_surface(si, context);
- }
- }
-}
-
-
-/**
- * Return list of contexts bound to a surface.
- */
-const struct sct_context_list *
-sct_get_surface_contexts(struct surface_context_tracker *sct,
- const struct pipe_surface *surface)
-{
- const struct sct_surface *si = find_surface_info(sct, surface);
- return si->contexts;
-}
-
-
-
-static boolean
-find_texture(const struct sct_context *ci,
- const struct pipe_texture *texture)
-{
- const struct texture_list *tl;
-
- for (tl = ci->textures_used; tl; tl = tl->next) {
- if (tl->texture == texture) {
- return TRUE;
- }
- }
- return FALSE;
-}
-
-
-/**
- * Add the given texture to the context's list of used textures.
- */
-static void
-add_texture_used(struct sct_context *ci,
- struct pipe_texture *texture)
-{
- if (!find_texture(ci, texture)) {
- /* add to list */
- struct texture_list *tl = CALLOC_STRUCT(texture_list);
- if (tl) {
- pipe_texture_reference(&tl->texture, texture);
- /* insert at head */
- tl->next = ci->textures_used;
- ci->textures_used = tl;
- }
- }
-}
-
-
-/**
- * Bind a texture to a rendering context.
- */
-void
-sct_bind_texture(struct surface_context_tracker *sct,
- struct pipe_context *context,
- uint unit,
- struct pipe_texture *tex)
-{
- struct sct_context *ci = find_context_info(sct, context);
-
- if (ci->textures[unit] != tex) {
- /* put texture on the 'used' list */
- add_texture_used(ci, tex);
- /* bind new */
- pipe_texture_reference(&ci->textures[unit], tex);
- }
-}
-
-
-/**
- * Check if the given texture has been used by the rendering context
- * since the last call to sct_flush_textures().
- */
-boolean
-sct_is_texture_used(struct surface_context_tracker *sct,
- const struct pipe_context *context,
- const struct pipe_texture *texture)
-{
- const struct sct_context *ci = find_context_info(sct, context);
- return find_texture(ci, texture);
-}
-
-
-/**
- * To be called when the image contents of a texture are changed, such
- * as for gl[Copy]TexSubImage().
- * XXX this may not be needed
- */
-void
-sct_update_texture(struct pipe_texture *tex)
-{
-
-}
-
-
-/**
- * When a scene is flushed/rendered we can release the list of
- * used textures.
- */
-void
-sct_flush_textures(struct surface_context_tracker *sct,
- struct pipe_context *context)
-{
- struct sct_context *ci = find_context_info(sct, context);
- struct texture_list *tl, *next;
- uint i;
-
- for (tl = ci->textures_used; tl; tl = next) {
- next = tl->next;
- pipe_texture_reference(&tl->texture, NULL);
- FREE(tl);
- }
- ci->textures_used = NULL;
-
- /* put the currently bound textures on the 'used' list */
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- add_texture_used(ci, ci->textures[i]);
- }
-}
-
-
-
-void
-sct_destroy_context(struct surface_context_tracker *sct,
- struct pipe_context *context)
-{
- /* XXX should we require an unbinding first? */
- {
- struct sct_surface *si;
- for (si = sct->surfaces; si; si = si->next) {
- remove_context_from_surface(si, context);
- }
- }
-
- /* remove context from context_info list */
- {
- struct sct_context *ci, *next, *prev = NULL;
- for (ci = sct->contexts; ci; ci = next) {
- next = ci->next;
- if (ci->context == context) {
- if (prev)
- prev->next = ci->next;
- else
- sct->contexts = ci->next;
- FREE(ci);
- }
- else {
- prev = ci;
- }
- }
- }
-
-}
-
-
-void
-sct_destroy_surface(struct surface_context_tracker *sct,
- struct pipe_surface *surface)
-{
- if (1) {
- /* debug/sanity: no context should be bound to surface */
- struct sct_context *ci;
- uint i;
- for (ci = sct->contexts; ci; ci = ci->next) {
- for (i = 0; i < MAX_SURFACES; i++) {
- assert(ci->surfaces[i] != surface);
- }
- }
- }
-
- /* remove surface from sct_surface list */
- {
- struct sct_surface *si, *next, *prev = NULL;
- for (si = sct->surfaces; si; si = next) {
- next = si->next;
- if (si->surface == surface) {
- /* unlink */
- if (prev)
- prev->next = si->next;
- else
- sct->surfaces = si->next;
- FREE(si);
- }
- else {
- prev = si;
- }
- }
- }
-}
diff --git a/src/gallium/auxiliary/sct/sct.h b/src/gallium/auxiliary/sct/sct.h
deleted file mode 100644
index cf7c4d3bdf..0000000000
--- a/src/gallium/auxiliary/sct/sct.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Surface/Context Tracking
- *
- * For some drivers, we need to monitor the binding between contexts and
- * surfaces/textures.
- * This code may evolve quite a bit...
- */
-
-
-#ifndef SCT_H
-#define SCT_H
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-struct pipe_context;
-struct pipe_surface;
-
-struct sct_context;
-struct sct_surface;
-
-
-/**
- * Per-device info, basically
- */
-struct surface_context_tracker
-{
- struct sct_context *contexts;
- struct sct_surface *surfaces;
-};
-
-
-
-/**
- * Simple linked list of contexts
- */
-struct sct_context_list
-{
- const struct pipe_context *context;
- struct sct_context_list *next;
-};
-
-
-
-extern void
-sct_bind_surfaces(struct surface_context_tracker *sct,
- struct pipe_context *context,
- uint num_surf,
- struct pipe_surface **surfaces);
-
-
-extern void
-sct_bind_texture(struct surface_context_tracker *sct,
- struct pipe_context *context,
- uint unit,
- struct pipe_texture *texture);
-
-
-extern void
-sct_update_texture(struct pipe_texture *tex);
-
-
-extern boolean
-sct_is_texture_used(struct surface_context_tracker *sct,
- const struct pipe_context *context,
- const struct pipe_texture *texture);
-
-extern void
-sct_flush_textures(struct surface_context_tracker *sct,
- struct pipe_context *context);
-
-
-extern const struct sct_context_list *
-sct_get_surface_contexts(struct surface_context_tracker *sct,
- const struct pipe_surface *surf);
-
-
-extern void
-sct_destroy_context(struct surface_context_tracker *sct,
- struct pipe_context *context);
-
-
-extern void
-sct_destroy_surface(struct surface_context_tracker *sct,
- struct pipe_surface *surface);
-
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* SCT_H */
diff --git a/src/gallium/auxiliary/sct/usage.c b/src/gallium/auxiliary/sct/usage.c
deleted file mode 100644
index 6227f19962..0000000000
--- a/src/gallium/auxiliary/sct/usage.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/* surface / context tracking */
-
-
-/*
-
-context A:
- render to texture T
-
-context B:
- texture from T
-
------------------------
-
-flush surface:
- which contexts are bound to the surface?
-
------------------------
-
-glTexSubImage():
- which contexts need to be flushed?
-
- */
-
-
-/*
-
-in MakeCurrent():
-
- call sct_bind_surfaces(context, list of surfaces) to update the
- dependencies between context and surfaces
-
-
-in SurfaceFlush(), or whatever it is in D3D:
-
- call sct_get_surface_contexts(surface) to get a list of contexts
- which are currently bound to the surface.
-
-
-
-in BindTexture():
-
- call sct_bind_texture(context, texture) to indicate that the texture
- is used in the scene.
-
-
-in glTexSubImage() or RenderToTexture():
-
- call sct_is_texture_used(context, texture) to determine if the texture
- has been used in the scene, but the scene's not flushed. If TRUE is
- returned it means the scene has to be rendered/flushed before the contents
- of the texture can be changed.
-
-
-in psb_scene_flush/terminate():
-
- call sct_flush_textures(context) to tell the SCT that the textures which
- were used in the scene can be released.
-
-
-
-*/
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
deleted file mode 100644
index 5f0a580b09..0000000000
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ /dev/null
@@ -1,22 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = tgsi
-
-C_SOURCES = \
- tgsi_sanity.c \
- tgsi_build.c \
- tgsi_dump.c \
- tgsi_exec.c \
- tgsi_info.c \
- tgsi_iterate.c \
- tgsi_parse.c \
- tgsi_ppc.c \
- tgsi_scan.c \
- tgsi_sse2.c \
- tgsi_text.c \
- tgsi_transform.c \
- tgsi_ureg.c \
- tgsi_util.c
-
-include ../../Makefile.template
diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
deleted file mode 100644
index b6bc2924f0..0000000000
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ /dev/null
@@ -1,23 +0,0 @@
-Import('*')
-
-tgsi = env.ConvenienceLibrary(
- target = 'tgsi',
- source = [
- 'tgsi_build.c',
- 'tgsi_dump.c',
- 'tgsi_dump_c.c',
- 'tgsi_exec.c',
- 'tgsi_info.c',
- 'tgsi_iterate.c',
- 'tgsi_parse.c',
- 'tgsi_sanity.c',
- 'tgsi_scan.c',
- 'tgsi_ppc.c',
- 'tgsi_sse2.c',
- 'tgsi_text.c',
- 'tgsi_transform.c',
- 'tgsi_ureg.c',
- 'tgsi_util.c',
- ])
-
-auxiliaries.insert(0, tgsi)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 92903fe57f..de9cbc8630 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -1025,7 +1025,7 @@ tgsi_build_full_property(
size++;
*property = tgsi_build_property(
- TGSI_PROPERTY_GS_INPUT_PRIM,
+ full_prop->Property.PropertyName,
header );
assert( full_prop->Property.NrTokens <= 8 + 1 );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index ba1357697d..e2e5394f86 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -121,12 +121,16 @@ static const char *semantic_names[] =
"PSIZE",
"GENERIC",
"NORMAL",
- "FACE"
+ "FACE",
+ "EDGEFLAG",
+ "PRIM_ID"
};
static const char *immediate_type_names[] =
{
- "FLT32"
+ "FLT32",
+ "UINT32",
+ "INT32"
};
static const char *swizzle_names[] =
@@ -173,13 +177,19 @@ static const char *primitive_names[] =
static void
-_dump_register(
+_dump_register_decl(
struct dump_ctx *ctx,
uint file,
int first,
int last )
{
ENM( file, file_names );
+
+ /* all geometry shader inputs are two dimensional */
+ if (file == TGSI_FILE_INPUT &&
+ ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY)
+ TXT("[]");
+
CHR( '[' );
SID( first );
if (first != last) {
@@ -190,6 +200,52 @@ _dump_register(
}
static void
+_dump_register_dst(
+ struct dump_ctx *ctx,
+ uint file,
+ int index)
+{
+ ENM( file, file_names );
+
+ CHR( '[' );
+ SID( index );
+ CHR( ']' );
+}
+
+
+static void
+_dump_register_src(
+ struct dump_ctx *ctx,
+ const struct tgsi_full_src_register *src )
+{
+ if (src->Register.Indirect) {
+ ENM( src->Register.File, file_names );
+ CHR( '[' );
+ ENM( src->Indirect.File, file_names );
+ CHR( '[' );
+ SID( src->Indirect.Index );
+ TXT( "]." );
+ ENM( src->Indirect.SwizzleX, swizzle_names );
+ if (src->Register.Index != 0) {
+ if (src->Register.Index > 0)
+ CHR( '+' );
+ SID( src->Register.Index );
+ }
+ CHR( ']' );
+ } else {
+ ENM( src->Register.File, file_names );
+ CHR( '[' );
+ SID( src->Register.Index );
+ CHR( ']' );
+ }
+ if (src->Register.Dimension) {
+ CHR( '[' );
+ SID( src->Dimension.Index );
+ CHR( ']' );
+ }
+}
+
+static void
_dump_register_ind(
struct dump_ctx *ctx,
uint file,
@@ -243,7 +299,7 @@ iter_declaration(
TXT( "DCL " );
- _dump_register(
+ _dump_register_decl(
ctx,
decl->Declaration.File,
decl->Range.First,
@@ -358,6 +414,12 @@ iter_immediate(
case TGSI_IMM_FLOAT32:
FLT( imm->u[i].Float );
break;
+ case TGSI_IMM_UINT32:
+ UID(imm->u[i].Uint);
+ break;
+ case TGSI_IMM_INT32:
+ SID(imm->u[i].Int);
+ break;
default:
assert( 0 );
}
@@ -434,10 +496,9 @@ iter_instruction(
dst->Indirect.SwizzleX );
}
else {
- _dump_register(
+ _dump_register_dst(
ctx,
dst->Register.File,
- dst->Register.Index,
dst->Register.Index );
}
_dump_writemask( ctx, dst->Register.WriteMask );
@@ -453,26 +514,11 @@ iter_instruction(
CHR( ' ' );
if (src->Register.Negate)
- TXT( "-(" );
+ CHR( '-' );
if (src->Register.Absolute)
CHR( '|' );
- if (src->Register.Indirect) {
- _dump_register_ind(
- ctx,
- src->Register.File,
- src->Register.Index,
- src->Indirect.File,
- src->Indirect.Index,
- src->Indirect.SwizzleX );
- }
- else {
- _dump_register(
- ctx,
- src->Register.File,
- src->Register.Index,
- src->Register.Index );
- }
+ _dump_register_src(ctx, src);
if (src->Register.SwizzleX != TGSI_SWIZZLE_X ||
src->Register.SwizzleY != TGSI_SWIZZLE_Y ||
@@ -487,8 +533,6 @@ iter_instruction(
if (src->Register.Absolute)
CHR( '|' );
- if (src->Register.Negate)
- CHR( ')' );
first_reg = FALSE;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 717358620c..f43233bdb4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2,6 +2,7 @@
*
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2009-2010 VMware, Inc. All rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -60,6 +61,7 @@
#include "util/u_memory.h"
#include "util/u_math.h"
+
#define FAST_MATH 1
#define TILE_TOP_LEFT 0
@@ -67,11 +69,329 @@
#define TILE_BOTTOM_LEFT 2
#define TILE_BOTTOM_RIGHT 3
+static void
+micro_abs(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = fabsf(src->f[0]);
+ dst->f[1] = fabsf(src->f[1]);
+ dst->f[2] = fabsf(src->f[2]);
+ dst->f[3] = fabsf(src->f[3]);
+}
+
+static void
+micro_arl(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = (int)floorf(src->f[0]);
+ dst->i[1] = (int)floorf(src->f[1]);
+ dst->i[2] = (int)floorf(src->f[2]);
+ dst->i[3] = (int)floorf(src->f[3]);
+}
+
+static void
+micro_arr(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = (int)floorf(src->f[0] + 0.5f);
+ dst->i[1] = (int)floorf(src->f[1] + 0.5f);
+ dst->i[2] = (int)floorf(src->f[2] + 0.5f);
+ dst->i[3] = (int)floorf(src->f[3] + 0.5f);
+}
+
+static void
+micro_ceil(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = ceilf(src->f[0]);
+ dst->f[1] = ceilf(src->f[1]);
+ dst->f[2] = ceilf(src->f[2]);
+ dst->f[3] = ceilf(src->f[3]);
+}
+
+static void
+micro_cos(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = cosf(src->f[0]);
+ dst->f[1] = cosf(src->f[1]);
+ dst->f[2] = cosf(src->f[2]);
+ dst->f[3] = cosf(src->f[3]);
+}
+
+static void
+micro_ddx(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] =
+ dst->f[1] =
+ dst->f[2] =
+ dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
+}
+
+static void
+micro_ddy(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] =
+ dst->f[1] =
+ dst->f[2] =
+ dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
+}
+
+static void
+micro_exp2(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+#if FAST_MATH
+ dst->f[0] = util_fast_exp2(src->f[0]);
+ dst->f[1] = util_fast_exp2(src->f[1]);
+ dst->f[2] = util_fast_exp2(src->f[2]);
+ dst->f[3] = util_fast_exp2(src->f[3]);
+#else
+#if DEBUG
+ /* Inf is okay for this instruction, so clamp it to silence assertions. */
+ uint i;
+ union tgsi_exec_channel clamped;
+
+ for (i = 0; i < 4; i++) {
+ if (src->f[i] > 127.99999f) {
+ clamped.f[i] = 127.99999f;
+ } else if (src->f[i] < -126.99999f) {
+ clamped.f[i] = -126.99999f;
+ } else {
+ clamped.f[i] = src->f[i];
+ }
+ }
+ src = &clamped;
+#endif /* DEBUG */
+
+ dst->f[0] = powf(2.0f, src->f[0]);
+ dst->f[1] = powf(2.0f, src->f[1]);
+ dst->f[2] = powf(2.0f, src->f[2]);
+ dst->f[3] = powf(2.0f, src->f[3]);
+#endif /* FAST_MATH */
+}
+
+static void
+micro_flr(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = floorf(src->f[0]);
+ dst->f[1] = floorf(src->f[1]);
+ dst->f[2] = floorf(src->f[2]);
+ dst->f[3] = floorf(src->f[3]);
+}
+
+static void
+micro_frc(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src->f[0] - floorf(src->f[0]);
+ dst->f[1] = src->f[1] - floorf(src->f[1]);
+ dst->f[2] = src->f[2] - floorf(src->f[2]);
+ dst->f[3] = src->f[3] - floorf(src->f[3]);
+}
+
+static void
+micro_iabs(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
+ dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
+ dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
+ dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
+}
+
+static void
+micro_ineg(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = -src->i[0];
+ dst->i[1] = -src->i[1];
+ dst->i[2] = -src->i[2];
+ dst->i[3] = -src->i[3];
+}
+
+static void
+micro_lg2(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+#if FAST_MATH
+ dst->f[0] = util_fast_log2(src->f[0]);
+ dst->f[1] = util_fast_log2(src->f[1]);
+ dst->f[2] = util_fast_log2(src->f[2]);
+ dst->f[3] = util_fast_log2(src->f[3]);
+#else
+ dst->f[0] = logf(src->f[0]) * 1.442695f;
+ dst->f[1] = logf(src->f[1]) * 1.442695f;
+ dst->f[2] = logf(src->f[2]) * 1.442695f;
+ dst->f[3] = logf(src->f[3]) * 1.442695f;
+#endif
+}
+
+static void
+micro_lrp(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0];
+ dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1];
+ dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2];
+ dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3];
+}
+
+static void
+micro_mad(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0];
+ dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1];
+ dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2];
+ dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3];
+}
+
+static void
+micro_mov(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src->u[0];
+ dst->u[1] = src->u[1];
+ dst->u[2] = src->u[2];
+ dst->u[3] = src->u[3];
+}
+
+static void
+micro_rcp(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = 1.0f / src->f[0];
+ dst->f[1] = 1.0f / src->f[1];
+ dst->f[2] = 1.0f / src->f[2];
+ dst->f[3] = 1.0f / src->f[3];
+}
+
+static void
+micro_rnd(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = floorf(src->f[0] + 0.5f);
+ dst->f[1] = floorf(src->f[1] + 0.5f);
+ dst->f[2] = floorf(src->f[2] + 0.5f);
+ dst->f[3] = floorf(src->f[3] + 0.5f);
+}
+
+static void
+micro_rsq(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
+ dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
+ dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
+ dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
+}
+
+static void
+micro_seq(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f;
+}
+
+static void
+micro_sge(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f;
+}
+
+static void
+micro_sgn(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
+ dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
+ dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
+ dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
+}
+
+static void
+micro_sgt(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f;
+}
+
+static void
+micro_sin(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = sinf(src->f[0]);
+ dst->f[1] = sinf(src->f[1]);
+ dst->f[2] = sinf(src->f[2]);
+ dst->f[3] = sinf(src->f[3]);
+}
+
+static void
+micro_sle(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f;
+}
+
+static void
+micro_slt(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f;
+}
+
+static void
+micro_sne(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f;
+}
+
+static void
+micro_trunc(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = (float)(int)src->f[0];
+ dst->f[1] = (float)(int)src->f[1];
+ dst->f[2] = (float)(int)src->f[2];
+ dst->f[3] = (float)(int)src->f[3];
+}
+
+
#define CHAN_X 0
#define CHAN_Y 1
#define CHAN_Z 2
#define CHAN_W 3
+enum tgsi_exec_datatype {
+ TGSI_EXEC_DATA_FLOAT,
+ TGSI_EXEC_DATA_INT,
+ TGSI_EXEC_DATA_UINT
+};
+
/*
* Shorthand locations of various utility registers (_I = Index, _C = Channel)
*/
@@ -123,23 +443,19 @@
/** The execution mask depends on the conditional mask and the loop mask */
#define UPDATE_EXEC_MASK(MACH) \
- MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
+ MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
static const union tgsi_exec_channel ZeroVec =
{ { 0.0, 0.0, 0.0, 0.0 } };
-#ifdef DEBUG
-static void
-check_inf_or_nan(const union tgsi_exec_channel *chan)
-{
- assert(!util_is_inf_or_nan(chan->f[0]));
- assert(!util_is_inf_or_nan(chan->f[1]));
- assert(!util_is_inf_or_nan(chan->f[2]));
- assert(!util_is_inf_or_nan(chan->f[3]));
-}
-#endif
+#define CHECK_INF_OR_NAN(chan) do {\
+ assert(!util_is_inf_or_nan((chan)->f[0]));\
+ assert(!util_is_inf_or_nan((chan)->f[1]));\
+ assert(!util_is_inf_or_nan((chan)->f[2]));\
+ assert(!util_is_inf_or_nan((chan)->f[3]));\
+ } while (0)
#ifdef DEBUG
@@ -292,6 +608,14 @@ tgsi_exec_machine_bind_shader(
* sizeof(struct tgsi_full_declaration));
maxDeclarations += 10;
}
+ if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
+ unsigned reg;
+ for (reg = parse.FullToken.FullDeclaration.Range.First;
+ reg <= parse.FullToken.FullDeclaration.Range.Last;
+ ++reg) {
+ ++mach->NumOutputs;
+ }
+ }
memcpy(declarations + numDeclarations,
&parse.FullToken.FullDeclaration,
sizeof(declarations[0]));
@@ -372,6 +696,7 @@ tgsi_exec_machine_create( void )
memset(mach, 0, sizeof(*mach));
mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
+ mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
/* Setup constants. */
@@ -413,18 +738,6 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
align_free(mach);
}
-
-static void
-micro_abs(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = fabsf( src->f[0] );
- dst->f[1] = fabsf( src->f[1] );
- dst->f[2] = fabsf( src->f[2] );
- dst->f[3] = fabsf( src->f[3] );
-}
-
static void
micro_add(
union tgsi_exec_channel *dst,
@@ -437,76 +750,6 @@ micro_add(
dst->f[3] = src0->f[3] + src1->f[3];
}
-#if 0
-static void
-micro_iadd(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] + src1->i[0];
- dst->i[1] = src0->i[1] + src1->i[1];
- dst->i[2] = src0->i[2] + src1->i[2];
- dst->i[3] = src0->i[3] + src1->i[3];
-}
-#endif
-
-static void
-micro_and(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] & src1->u[0];
- dst->u[1] = src0->u[1] & src1->u[1];
- dst->u[2] = src0->u[2] & src1->u[2];
- dst->u[3] = src0->u[3] & src1->u[3];
-}
-
-static void
-micro_ceil(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = ceilf( src->f[0] );
- dst->f[1] = ceilf( src->f[1] );
- dst->f[2] = ceilf( src->f[2] );
- dst->f[3] = ceilf( src->f[3] );
-}
-
-static void
-micro_cos(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = cosf( src->f[0] );
- dst->f[1] = cosf( src->f[1] );
- dst->f[2] = cosf( src->f[2] );
- dst->f[3] = cosf( src->f[3] );
-}
-
-static void
-micro_ddx(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] =
- dst->f[1] =
- dst->f[2] =
- dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
-}
-
-static void
-micro_ddy(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] =
- dst->f[1] =
- dst->f[2] =
- dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
-}
-
static void
micro_div(
union tgsi_exec_channel *dst,
@@ -527,99 +770,6 @@ micro_div(
}
}
-#if 0
-static void
-micro_udiv(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] / src1->u[0];
- dst->u[1] = src0->u[1] / src1->u[1];
- dst->u[2] = src0->u[2] / src1->u[2];
- dst->u[3] = src0->u[3] / src1->u[3];
-}
-#endif
-
-static void
-micro_eq(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-#if 0
-static void
-micro_ieq(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
- dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
- dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
- dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
-}
-#endif
-
-static void
-micro_exp2(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
-{
-#if FAST_MATH
- dst->f[0] = util_fast_exp2( src->f[0] );
- dst->f[1] = util_fast_exp2( src->f[1] );
- dst->f[2] = util_fast_exp2( src->f[2] );
- dst->f[3] = util_fast_exp2( src->f[3] );
-#else
-
-#if DEBUG
- /* Inf is okay for this instruction, so clamp it to silence assertions. */
- uint i;
- union tgsi_exec_channel clamped;
-
- for (i = 0; i < 4; i++) {
- if (src->f[i] > 127.99999f) {
- clamped.f[i] = 127.99999f;
- } else if (src->f[i] < -126.99999f) {
- clamped.f[i] = -126.99999f;
- } else {
- clamped.f[i] = src->f[i];
- }
- }
- src = &clamped;
-#endif
-
- dst->f[0] = powf( 2.0f, src->f[0] );
- dst->f[1] = powf( 2.0f, src->f[1] );
- dst->f[2] = powf( 2.0f, src->f[2] );
- dst->f[3] = powf( 2.0f, src->f[3] );
-#endif
-}
-
-#if 0
-static void
-micro_f2ut(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->u[0] = (uint) src->f[0];
- dst->u[1] = (uint) src->f[1];
- dst->u[2] = (uint) src->f[2];
- dst->u[3] = (uint) src->f[3];
-}
-#endif
-
static void
micro_float_clamp(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
@@ -647,71 +797,6 @@ micro_float_clamp(union tgsi_exec_channel *dst,
}
static void
-micro_flr(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = floorf( src->f[0] );
- dst->f[1] = floorf( src->f[1] );
- dst->f[2] = floorf( src->f[2] );
- dst->f[3] = floorf( src->f[3] );
-}
-
-static void
-micro_frc(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = src->f[0] - floorf( src->f[0] );
- dst->f[1] = src->f[1] - floorf( src->f[1] );
- dst->f[2] = src->f[2] - floorf( src->f[2] );
- dst->f[3] = src->f[3] - floorf( src->f[3] );
-}
-
-static void
-micro_i2f(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) src->i[0];
- dst->f[1] = (float) src->i[1];
- dst->f[2] = (float) src->i[2];
- dst->f[3] = (float) src->i[3];
-}
-
-static void
-micro_lg2(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
-#if FAST_MATH
- dst->f[0] = util_fast_log2( src->f[0] );
- dst->f[1] = util_fast_log2( src->f[1] );
- dst->f[2] = util_fast_log2( src->f[2] );
- dst->f[3] = util_fast_log2( src->f[3] );
-#else
- dst->f[0] = logf( src->f[0] ) * 1.442695f;
- dst->f[1] = logf( src->f[1] ) * 1.442695f;
- dst->f[2] = logf( src->f[2] ) * 1.442695f;
- dst->f[3] = logf( src->f[3] ) * 1.442695f;
-#endif
-}
-
-static void
-micro_le(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
micro_lt(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src0,
@@ -725,38 +810,6 @@ micro_lt(
dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
}
-#if 0
-static void
-micro_ilt(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
- dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
- dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
- dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
-}
-#endif
-
-#if 0
-static void
-micro_ult(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
- dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
- dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
- dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
-}
-#endif
-
static void
micro_max(
union tgsi_exec_channel *dst,
@@ -769,34 +822,6 @@ micro_max(
dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
}
-#if 0
-static void
-micro_imax(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
- dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
- dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
- dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
-}
-#endif
-
-#if 0
-static void
-micro_umax(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
- dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
- dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
- dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
-}
-#endif
-
static void
micro_min(
union tgsi_exec_channel *dst,
@@ -809,48 +834,6 @@ micro_min(
dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
}
-#if 0
-static void
-micro_imin(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
- dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
- dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
- dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
-}
-#endif
-
-#if 0
-static void
-micro_umin(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
- dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
- dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
- dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
-}
-#endif
-
-#if 0
-static void
-micro_umod(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] % src1->u[0];
- dst->u[1] = src0->u[1] % src1->u[1];
- dst->u[2] = src0->u[2] % src1->u[2];
- dst->u[3] = src0->u[3] % src1->u[3];
-}
-#endif
-
static void
micro_mul(
union tgsi_exec_channel *dst,
@@ -865,20 +848,6 @@ micro_mul(
#if 0
static void
-micro_imul(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] * src1->i[0];
- dst->i[1] = src0->i[1] * src1->i[1];
- dst->i[2] = src0->i[2] * src1->i[2];
- dst->i[3] = src0->i[3] * src1->i[3];
-}
-#endif
-
-#if 0
-static void
micro_imul64(
union tgsi_exec_channel *dst0,
union tgsi_exec_channel *dst1,
@@ -942,42 +911,6 @@ micro_neg(
dst->f[3] = -src->f[3];
}
-#if 0
-static void
-micro_ineg(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->i[0] = -src->i[0];
- dst->i[1] = -src->i[1];
- dst->i[2] = -src->i[2];
- dst->i[3] = -src->i[3];
-}
-#endif
-
-static void
-micro_not(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->u[0] = ~src->u[0];
- dst->u[1] = ~src->u[1];
- dst->u[2] = ~src->u[2];
- dst->u[3] = ~src->u[3];
-}
-
-static void
-micro_or(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] | src1->u[0];
- dst->u[1] = src0->u[1] | src1->u[1];
- dst->u[2] = src0->u[2] | src1->u[2];
- dst->u[3] = src0->u[3] | src1->u[3];
-}
-
static void
micro_pow(
union tgsi_exec_channel *dst,
@@ -998,88 +931,6 @@ micro_pow(
}
static void
-micro_rnd(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = floorf( src->f[0] + 0.5f );
- dst->f[1] = floorf( src->f[1] + 0.5f );
- dst->f[2] = floorf( src->f[2] + 0.5f );
- dst->f[3] = floorf( src->f[3] + 0.5f );
-}
-
-static void
-micro_sgn(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
- dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
- dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
- dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
-}
-
-static void
-micro_shl(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] << src1->i[0];
- dst->i[1] = src0->i[1] << src1->i[1];
- dst->i[2] = src0->i[2] << src1->i[2];
- dst->i[3] = src0->i[3] << src1->i[3];
-}
-
-static void
-micro_ishr(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] >> src1->i[0];
- dst->i[1] = src0->i[1] >> src1->i[1];
- dst->i[2] = src0->i[2] >> src1->i[2];
- dst->i[3] = src0->i[3] >> src1->i[3];
-}
-
-static void
-micro_trunc(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0 )
-{
- dst->f[0] = (float) (int) src0->f[0];
- dst->f[1] = (float) (int) src0->f[1];
- dst->f[2] = (float) (int) src0->f[2];
- dst->f[3] = (float) (int) src0->f[3];
-}
-
-#if 0
-static void
-micro_ushr(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] >> src1->u[0];
- dst->u[1] = src0->u[1] >> src1->u[1];
- dst->u[2] = src0->u[2] >> src1->u[2];
- dst->u[3] = src0->u[3] >> src1->u[3];
-}
-#endif
-
-static void
-micro_sin(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = sinf( src->f[0] );
- dst->f[1] = sinf( src->f[1] );
- dst->f[2] = sinf( src->f[2] );
- dst->f[3] = sinf( src->f[3] );
-}
-
-static void
micro_sqrt( union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
@@ -1101,31 +952,6 @@ micro_sub(
dst->f[3] = src0->f[3] - src1->f[3];
}
-#if 0
-static void
-micro_u2f(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) src->u[0];
- dst->f[1] = (float) src->u[1];
- dst->f[2] = (float) src->u[2];
- dst->f[3] = (float) src->u[3];
-}
-#endif
-
-static void
-micro_xor(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] ^ src1->u[0];
- dst->u[1] = src0->u[1] ^ src1->u[1];
- dst->u[2] = src0->u[2] ^ src1->u[2];
- dst->u[3] = src0->u[3] ^ src1->u[3];
-}
-
static void
fetch_src_file_channel(
const struct tgsi_exec_machine *mach,
@@ -1224,11 +1050,11 @@ fetch_src_file_channel(
}
static void
-fetch_source(
- const struct tgsi_exec_machine *mach,
- union tgsi_exec_channel *chan,
- const struct tgsi_full_src_register *reg,
- const uint chan_index )
+fetch_source(const struct tgsi_exec_machine *mach,
+ union tgsi_exec_channel *chan,
+ const struct tgsi_full_src_register *reg,
+ const uint chan_index,
+ enum tgsi_exec_datatype src_datatype)
{
union tgsi_exec_channel index;
uint swizzle;
@@ -1277,10 +1103,10 @@ fetch_source(
&indir_index );
/* add value of address register to the offset */
- index.i[0] += (int) indir_index.f[0];
- index.i[1] += (int) indir_index.f[1];
- index.i[2] += (int) indir_index.f[2];
- index.i[3] += (int) indir_index.f[3];
+ index.i[0] += indir_index.i[0];
+ index.i[1] += indir_index.i[1];
+ index.i[2] += indir_index.i[2];
+ index.i[3] += indir_index.i[3];
/* for disabled execution channels, zero-out the index to
* avoid using a potential garbage value.
@@ -1357,10 +1183,10 @@ fetch_source(
&index2,
&indir_index );
- index.i[0] += (int) indir_index.f[0];
- index.i[1] += (int) indir_index.f[1];
- index.i[2] += (int) indir_index.f[2];
- index.i[3] += (int) indir_index.f[3];
+ index.i[0] += indir_index.i[0];
+ index.i[1] += indir_index.i[1];
+ index.i[2] += indir_index.i[2];
+ index.i[3] += indir_index.i[3];
/* for disabled execution channels, zero-out the index to
* avoid using a potential garbage value.
@@ -1385,32 +1211,30 @@ fetch_source(
&index,
chan );
- switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
- case TGSI_UTIL_SIGN_CLEAR:
- micro_abs( chan, chan );
- break;
-
- case TGSI_UTIL_SIGN_SET:
- micro_abs( chan, chan );
- micro_neg( chan, chan );
- break;
-
- case TGSI_UTIL_SIGN_TOGGLE:
- micro_neg( chan, chan );
- break;
+ if (reg->Register.Absolute) {
+ if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
+ micro_abs(chan, chan);
+ } else {
+ micro_iabs(chan, chan);
+ }
+ }
- case TGSI_UTIL_SIGN_KEEP:
- break;
+ if (reg->Register.Negate) {
+ if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
+ micro_neg(chan, chan);
+ } else {
+ micro_ineg(chan, chan);
+ }
}
}
static void
-store_dest(
- struct tgsi_exec_machine *mach,
- const union tgsi_exec_channel *chan,
- const struct tgsi_full_dst_register *reg,
- const struct tgsi_full_instruction *inst,
- uint chan_index )
+store_dest(struct tgsi_exec_machine *mach,
+ const union tgsi_exec_channel *chan,
+ const struct tgsi_full_dst_register *reg,
+ const struct tgsi_full_instruction *inst,
+ uint chan_index,
+ enum tgsi_exec_datatype dst_datatype)
{
uint i;
union tgsi_exec_channel null;
@@ -1419,9 +1243,9 @@ store_dest(
int offset = 0; /* indirection offset */
int index;
-#ifdef DEBUG
- check_inf_or_nan(chan);
-#endif
+ if (dst_datatype == TGSI_EXEC_DATA_FLOAT) {
+ CHECK_INF_OR_NAN(chan);
+ }
/* There is an extra source register that indirectly subscripts
* a register file. The direct index now becomes an offset
@@ -1456,7 +1280,7 @@ store_dest(
&indir_index );
/* save indirection offset */
- offset = (int) indir_index.f[0];
+ offset = indir_index.i[0];
}
switch (reg->Register.File) {
@@ -1468,6 +1292,15 @@ store_dest(
index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
+ reg->Register.Index;
dst = &mach->Outputs[offset + index].xyzw[chan_index];
+#if 0
+ if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
+ fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
+ for (i = 0; i < QUAD_SIZE; i++)
+ if (execmask & (1 << i))
+ fprintf(stderr, "%f, ", chan->f[i]);
+ fprintf(stderr, ")\n");
+ }
+#endif
break;
case TGSI_FILE_TEMPORARY:
@@ -1577,10 +1410,10 @@ store_dest(
}
#define FETCH(VAL,INDEX,CHAN)\
- fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
+ fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
#define STORE(VAL,INDEX,CHAN)\
- store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
+ store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT)
/**
@@ -1638,6 +1471,35 @@ exec_kilp(struct tgsi_exec_machine *mach,
mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
}
+static void
+emit_vertex(struct tgsi_exec_machine *mach)
+{
+ /* FIXME: check for exec mask correctly
+ unsigned i;
+ for (i = 0; i < QUAD_SIZE; ++i) {
+ if ((mach->ExecMask & (1 << i)))
+ */
+ if (mach->ExecMask) {
+ mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
+ mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
+ }
+}
+
+static void
+emit_primitive(struct tgsi_exec_machine *mach)
+{
+ unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
+ /* FIXME: check for exec mask correctly
+ unsigned i;
+ for (i = 0; i < QUAD_SIZE; ++i) {
+ if ((mach->ExecMask & (1 << i)))
+ */
+ if (mach->ExecMask) {
+ ++(*prim_count);
+ debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
+ mach->Primitives[*prim_count] = 0;
+ }
+}
/*
* Fetch a four texture samples using STR texture coordinates.
@@ -1908,7 +1770,7 @@ exec_declaration(struct tgsi_exec_machine *mach,
if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
assert(decl->Semantic.Index == 0);
assert(first == last);
- assert(mask = TGSI_WRITEMASK_XYZW);
+ assert(mask == TGSI_WRITEMASK_XYZW);
mach->Inputs[first] = mach->QuadPos;
} else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
@@ -1954,6 +1816,461 @@ exec_declaration(struct tgsi_exec_machine *mach,
}
}
+typedef void (* micro_op)(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src);
+
+static void
+exec_scalar_unary(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_op op,
+ enum tgsi_exec_datatype dst_datatype,
+ enum tgsi_exec_datatype src_datatype)
+{
+ unsigned int chan;
+ union tgsi_exec_channel src;
+ union tgsi_exec_channel dst;
+
+ fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype);
+ op(&dst, &src);
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
+ }
+ }
+}
+
+static void
+exec_vector_unary(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_op op,
+ enum tgsi_exec_datatype dst_datatype,
+ enum tgsi_exec_datatype src_datatype)
+{
+ unsigned int chan;
+ struct tgsi_exec_vector dst;
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ union tgsi_exec_channel src;
+
+ fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
+ op(&dst.xyzw[chan], &src);
+ }
+ }
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
+ }
+ }
+}
+
+static void
+exec_vector_binary(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_op op,
+ enum tgsi_exec_datatype dst_datatype,
+ enum tgsi_exec_datatype src_datatype)
+{
+ unsigned int chan;
+ struct tgsi_exec_vector dst;
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ union tgsi_exec_channel src[2];
+
+ fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
+ fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
+ op(&dst.xyzw[chan], src);
+ }
+ }
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
+ }
+ }
+}
+
+static void
+exec_vector_trinary(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_op op,
+ enum tgsi_exec_datatype dst_datatype,
+ enum tgsi_exec_datatype src_datatype)
+{
+ unsigned int chan;
+ struct tgsi_exec_vector dst;
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ union tgsi_exec_channel src[3];
+
+ fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
+ fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
+ fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
+ op(&dst.xyzw[chan], src);
+ }
+ }
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
+ }
+ }
+}
+
+static void
+exec_break(struct tgsi_exec_machine *mach)
+{
+ if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
+ /* turn off loop channels for each enabled exec channel */
+ mach->LoopMask &= ~mach->ExecMask;
+ /* Todo: if mach->LoopMask == 0, jump to end of loop */
+ UPDATE_EXEC_MASK(mach);
+ } else {
+ assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
+
+ mach->Switch.mask = 0x0;
+
+ UPDATE_EXEC_MASK(mach);
+ }
+}
+
+static void
+exec_switch(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
+ assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
+
+ mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
+ fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
+ mach->Switch.mask = 0x0;
+ mach->Switch.defaultMask = 0x0;
+
+ mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
+ mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
+
+ UPDATE_EXEC_MASK(mach);
+}
+
+static void
+exec_case(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
+ union tgsi_exec_channel src;
+ uint mask = 0;
+
+ fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
+
+ if (mach->Switch.selector.u[0] == src.u[0]) {
+ mask |= 0x1;
+ }
+ if (mach->Switch.selector.u[1] == src.u[1]) {
+ mask |= 0x2;
+ }
+ if (mach->Switch.selector.u[2] == src.u[2]) {
+ mask |= 0x4;
+ }
+ if (mach->Switch.selector.u[3] == src.u[3]) {
+ mask |= 0x8;
+ }
+
+ mach->Switch.defaultMask |= mask;
+
+ mach->Switch.mask |= mask & prevMask;
+
+ UPDATE_EXEC_MASK(mach);
+}
+
+static void
+exec_default(struct tgsi_exec_machine *mach)
+{
+ uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
+
+ mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
+
+ UPDATE_EXEC_MASK(mach);
+}
+
+static void
+exec_endswitch(struct tgsi_exec_machine *mach)
+{
+ mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
+ mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
+
+ UPDATE_EXEC_MASK(mach);
+}
+
+static void
+micro_i2f(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = (float)src->i[0];
+ dst->f[1] = (float)src->i[1];
+ dst->f[2] = (float)src->i[2];
+ dst->f[3] = (float)src->i[3];
+}
+
+static void
+micro_not(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = ~src->u[0];
+ dst->u[1] = ~src->u[1];
+ dst->u[2] = ~src->u[2];
+ dst->u[3] = ~src->u[3];
+}
+
+static void
+micro_shl(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] << src[1].u[0];
+ dst->u[1] = src[0].u[1] << src[1].u[1];
+ dst->u[2] = src[0].u[2] << src[1].u[2];
+ dst->u[3] = src[0].u[3] << src[1].u[3];
+}
+
+static void
+micro_and(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] & src[1].u[0];
+ dst->u[1] = src[0].u[1] & src[1].u[1];
+ dst->u[2] = src[0].u[2] & src[1].u[2];
+ dst->u[3] = src[0].u[3] & src[1].u[3];
+}
+
+static void
+micro_or(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] | src[1].u[0];
+ dst->u[1] = src[0].u[1] | src[1].u[1];
+ dst->u[2] = src[0].u[2] | src[1].u[2];
+ dst->u[3] = src[0].u[3] | src[1].u[3];
+}
+
+static void
+micro_xor(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] ^ src[1].u[0];
+ dst->u[1] = src[0].u[1] ^ src[1].u[1];
+ dst->u[2] = src[0].u[2] ^ src[1].u[2];
+ dst->u[3] = src[0].u[3] ^ src[1].u[3];
+}
+
+static void
+micro_f2i(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = (int)src->f[0];
+ dst->i[1] = (int)src->f[1];
+ dst->i[2] = (int)src->f[2];
+ dst->i[3] = (int)src->f[3];
+}
+
+static void
+micro_idiv(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src[0].i[0] / src[1].i[0];
+ dst->i[1] = src[0].i[1] / src[1].i[1];
+ dst->i[2] = src[0].i[2] / src[1].i[2];
+ dst->i[3] = src[0].i[3] / src[1].i[3];
+}
+
+static void
+micro_imax(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0];
+ dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1];
+ dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2];
+ dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3];
+}
+
+static void
+micro_imin(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0];
+ dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1];
+ dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2];
+ dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3];
+}
+
+static void
+micro_isge(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0;
+ dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0;
+ dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0;
+ dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0;
+}
+
+static void
+micro_ishr(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src[0].i[0] >> src[1].i[0];
+ dst->i[1] = src[0].i[1] >> src[1].i[1];
+ dst->i[2] = src[0].i[2] >> src[1].i[2];
+ dst->i[3] = src[0].i[3] >> src[1].i[3];
+}
+
+static void
+micro_islt(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0;
+ dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0;
+ dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0;
+ dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0;
+}
+
+static void
+micro_f2u(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = (uint)src->f[0];
+ dst->u[1] = (uint)src->f[1];
+ dst->u[2] = (uint)src->f[2];
+ dst->u[3] = (uint)src->f[3];
+}
+
+static void
+micro_u2f(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = (float)src->u[0];
+ dst->f[1] = (float)src->u[1];
+ dst->f[2] = (float)src->u[2];
+ dst->f[3] = (float)src->u[3];
+}
+
+static void
+micro_uadd(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] + src[1].u[0];
+ dst->u[1] = src[0].u[1] + src[1].u[1];
+ dst->u[2] = src[0].u[2] + src[1].u[2];
+ dst->u[3] = src[0].u[3] + src[1].u[3];
+}
+
+static void
+micro_udiv(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] / src[1].u[0];
+ dst->u[1] = src[0].u[1] / src[1].u[1];
+ dst->u[2] = src[0].u[2] / src[1].u[2];
+ dst->u[3] = src[0].u[3] / src[1].u[3];
+}
+
+static void
+micro_umad(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0];
+ dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1];
+ dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2];
+ dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3];
+}
+
+static void
+micro_umax(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0];
+ dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1];
+ dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2];
+ dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3];
+}
+
+static void
+micro_umin(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0];
+ dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1];
+ dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2];
+ dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3];
+}
+
+static void
+micro_umod(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] % src[1].u[0];
+ dst->u[1] = src[0].u[1] % src[1].u[1];
+ dst->u[2] = src[0].u[2] % src[1].u[2];
+ dst->u[3] = src[0].u[3] % src[1].u[3];
+}
+
+static void
+micro_umul(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] * src[1].u[0];
+ dst->u[1] = src[0].u[1] * src[1].u[1];
+ dst->u[2] = src[0].u[2] * src[1].u[2];
+ dst->u[3] = src[0].u[3] * src[1].u[3];
+}
+
+static void
+micro_useq(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0;
+ dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0;
+ dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0;
+ dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0;
+}
+
+static void
+micro_usge(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0;
+ dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0;
+ dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0;
+ dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0;
+}
+
+static void
+micro_ushr(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] >> src[1].u[0];
+ dst->u[1] = src[0].u[1] >> src[1].u[1];
+ dst->u[2] = src[0].u[2] >> src[1].u[2];
+ dst->u[3] = src[0].u[3] >> src[1].u[3];
+}
+
+static void
+micro_uslt(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0;
+ dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0;
+ dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0;
+ dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0;
+}
+
+static void
+micro_usne(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0;
+ dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0;
+ dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0;
+ dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0;
+}
+
static void
exec_instruction(
struct tgsi_exec_machine *mach,
@@ -1968,23 +2285,11 @@ exec_instruction(
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
- case TGSI_OPCODE_FLR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_flr(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_MOV:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&d[chan_index], 0, chan_index);
- }
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_LIT:
@@ -2021,23 +2326,11 @@ exec_instruction(
break;
case TGSI_OPCODE_RCP:
- /* TGSI_OPCODE_RECIP */
- FETCH( &r[0], 0, CHAN_X );
- micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_RSQ:
- /* TGSI_OPCODE_RECIPSQRT */
- FETCH( &r[0], 0, CHAN_X );
- micro_abs( &r[0], &r[0] );
- micro_sqrt( &r[0], &r[0] );
- micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_EXP:
@@ -2208,41 +2501,15 @@ exec_instruction(
break;
case TGSI_OPCODE_SLT:
- /* TGSI_OPCODE_SETLT */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SGE:
- /* TGSI_OPCODE_SETGE */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_MAD:
- /* TGSI_OPCODE_MADD */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_mul( &r[0], &r[0], &r[1] );
- FETCH( &r[1], 2, chan_index );
- micro_add(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SUB:
@@ -2257,17 +2524,7 @@ exec_instruction(
break;
case TGSI_OPCODE_LRP:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
- micro_sub( &r[1], &r[1], &r[2] );
- micro_mul( &r[0], &r[0], &r[1] );
- micro_add(&d[chan_index], &r[0], &r[2]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_CND:
@@ -2301,13 +2558,7 @@ exec_instruction(
break;
case TGSI_OPCODE_FRC:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_frc(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_CLAMP:
@@ -2323,33 +2574,20 @@ exec_instruction(
}
break;
+ case TGSI_OPCODE_FLR:
+ exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
+ break;
+
case TGSI_OPCODE_ROUND:
- case TGSI_OPCODE_ARR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_rnd(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_EX2:
- FETCH(&r[0], 0, CHAN_X);
-
- micro_exp2( &r[0], &r[0] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_LG2:
- FETCH( &r[0], 0, CHAN_X );
- micro_lg2( &r[0], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_POW:
@@ -2402,15 +2640,9 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_ABS:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- micro_abs(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
- break;
+ case TGSI_OPCODE_ABS:
+ exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
+ break;
case TGSI_OPCODE_RCC:
FETCH(&r[0], 0, CHAN_X);
@@ -2449,33 +2681,15 @@ exec_instruction(
break;
case TGSI_OPCODE_COS:
- FETCH(&r[0], 0, CHAN_X);
-
- micro_cos( &r[0], &r[0] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_DDX:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_ddx(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_DDY:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_ddy(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_KILP:
@@ -2552,14 +2766,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SEQ:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SFL:
@@ -2569,44 +2776,19 @@ exec_instruction(
break;
case TGSI_OPCODE_SGT:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SIN:
- FETCH( &r[0], 0, CHAN_X );
- micro_sin( &r[0], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SLE:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SNE:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_STR:
@@ -2711,6 +2893,10 @@ exec_instruction(
assert (0);
break;
+ case TGSI_OPCODE_ARR:
+ exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
+ break;
+
case TGSI_OPCODE_BRA:
assert (0);
break;
@@ -2730,6 +2916,8 @@ exec_instruction(
mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
+ mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
+ mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
/* note that PC was already incremented above */
mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
@@ -2737,12 +2925,17 @@ exec_instruction(
/* Second, push the Cond, Loop, Cont, Func stacks */
assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
- mach->CondStack[mach->CondStackTop++] = mach->CondMask;
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+ assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
+ assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
+
+ mach->CondStack[mach->CondStackTop++] = mach->CondMask;
+ mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+ mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+ mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
+ mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
/* Finally, jump to the subroutine */
@@ -2775,6 +2968,12 @@ exec_instruction(
mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
mach->ContMask = mach->ContStack[mach->ContStackTop];
+ mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
+ mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
+
+ mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
+ mach->BreakType = mach->BreakStack[mach->BreakStackTop];
+
assert(mach->FuncStackTop > 0);
mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
@@ -2785,14 +2984,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SSG:
- /* TGSI_OPCODE_SGN */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_sgn(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_CMP:
@@ -2976,87 +3168,31 @@ exec_instruction(
break;
case TGSI_OPCODE_CEIL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_ceil(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_I2F:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_i2f(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
break;
case TGSI_OPCODE_NOT:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_not(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_TRUNC:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_trunc(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SHL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_shl(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_SHR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_ishr(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_AND:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_and(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_OR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_or(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_MOD:
@@ -3064,14 +3200,7 @@ exec_instruction(
break;
case TGSI_OPCODE_XOR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_xor(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_SAD:
@@ -3087,13 +3216,11 @@ exec_instruction(
break;
case TGSI_OPCODE_EMIT:
- mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
- mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
+ emit_vertex(mach);
break;
case TGSI_OPCODE_ENDPRIM:
- mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
- mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
+ emit_primitive(mach);
break;
case TGSI_OPCODE_BGNFOR:
@@ -3122,11 +3249,15 @@ exec_instruction(
case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->ContStack[mach->ContStackTop++] = mach->ContMask;
assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
+
+ mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+ mach->ContStack[mach->ContStackTop++] = mach->ContMask;
mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
+ mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
+ mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
break;
case TGSI_OPCODE_ENDFOR:
@@ -3173,6 +3304,8 @@ exec_instruction(
--mach->LoopLabelStackTop;
assert(mach->LoopCounterStackTop > 0);
--mach->LoopCounterStackTop;
+
+ mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
}
UPDATE_EXEC_MASK(mach);
break;
@@ -3196,15 +3329,14 @@ exec_instruction(
mach->ContMask = mach->ContStack[--mach->ContStackTop];
assert(mach->LoopLabelStackTop > 0);
--mach->LoopLabelStackTop;
+
+ mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
}
UPDATE_EXEC_MASK(mach);
break;
case TGSI_OPCODE_BRK:
- /* turn off loop channels for each enabled exec channel */
- mach->LoopMask &= ~mach->ExecMask;
- /* Todo: if mach->LoopMask == 0, jump to end of loop */
- UPDATE_EXEC_MASK(mach);
+ exec_break(mach);
break;
case TGSI_OPCODE_CONT:
@@ -3235,6 +3367,12 @@ exec_instruction(
mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
mach->ContMask = mach->ContStack[mach->ContStackTop];
+ mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
+ mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
+
+ mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
+ mach->BreakType = mach->BreakStack[mach->BreakStackTop];
+
assert(mach->FuncStackTop > 0);
mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
@@ -3246,11 +3384,135 @@ exec_instruction(
case TGSI_OPCODE_NOP:
break;
+ case TGSI_OPCODE_BREAKC:
+ FETCH(&r[0], 0, CHAN_X);
+ /* update CondMask */
+ if (r[0].u[0] && (mach->ExecMask & 0x1)) {
+ mach->LoopMask &= ~0x1;
+ }
+ if (r[0].u[1] && (mach->ExecMask & 0x2)) {
+ mach->LoopMask &= ~0x2;
+ }
+ if (r[0].u[2] && (mach->ExecMask & 0x4)) {
+ mach->LoopMask &= ~0x4;
+ }
+ if (r[0].u[3] && (mach->ExecMask & 0x8)) {
+ mach->LoopMask &= ~0x8;
+ }
+ /* Todo: if mach->LoopMask == 0, jump to end of loop */
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_F2I:
+ exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
+ break;
+
+ case TGSI_OPCODE_IDIV:
+ exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_IMAX:
+ exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_IMIN:
+ exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_INEG:
+ exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_ISGE:
+ exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_ISHR:
+ exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_ISLT:
+ exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_F2U:
+ exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
+ break;
+
+ case TGSI_OPCODE_U2F:
+ exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UADD:
+ exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UDIV:
+ exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UMAD:
+ exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UMAX:
+ exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UMIN:
+ exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UMOD:
+ exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UMUL:
+ exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_USEQ:
+ exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_USGE:
+ exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_USHR:
+ exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_USLT:
+ exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_USNE:
+ exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_SWITCH:
+ exec_switch(mach, inst);
+ break;
+
+ case TGSI_OPCODE_CASE:
+ exec_case(mach, inst);
+ break;
+
+ case TGSI_OPCODE_DEFAULT:
+ exec_default(mach);
+ break;
+
+ case TGSI_OPCODE_ENDSWITCH:
+ exec_endswitch(mach);
+ break;
+
default:
assert( 0 );
}
}
+
#define DEBUG_EXECUTION 0
@@ -3270,9 +3532,13 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
mach->FuncMask = 0xf;
mach->ExecMask = 0xf;
+ mach->Switch.mask = 0xf;
+
assert(mach->CondStackTop == 0);
assert(mach->LoopStackTop == 0);
assert(mach->ContStackTop == 0);
+ assert(mach->SwitchStackTop == 0);
+ assert(mach->BreakStackTop == 0);
assert(mach->CallStackTop == 0);
mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
@@ -3329,11 +3595,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
if (j > 0) {
debug_printf(" ");
}
- debug_printf("(%6f, %6f, %6f, %6f)\n",
- temps[i].xyzw[0].f[j],
- temps[i].xyzw[1].f[j],
- temps[i].xyzw[2].f[j],
- temps[i].xyzw[3].f[j]);
+ debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
+ temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
+ temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
+ temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
+ temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
}
}
}
@@ -3347,11 +3613,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
if (j > 0) {
debug_printf(" ");
}
- debug_printf("{%6f, %6f, %6f, %6f}\n",
- outputs[i].xyzw[0].f[j],
- outputs[i].xyzw[1].f[j],
- outputs[i].xyzw[2].f[j],
- outputs[i].xyzw[3].f[j]);
+ debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
+ outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
+ outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
+ outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
+ outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
}
}
}
@@ -3373,6 +3639,8 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
assert(mach->CondStackTop == 0);
assert(mach->LoopStackTop == 0);
assert(mach->ContStackTop == 0);
+ assert(mach->SwitchStackTop == 0);
+ assert(mach->BreakStackTop == 0);
assert(mach->CallStackTop == 0);
return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index fd94c1bc44..aa3a98d7f1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -179,6 +179,7 @@ struct tgsi_exec_labels
#define TGSI_EXEC_MAX_COND_NESTING 32
#define TGSI_EXEC_MAX_LOOP_NESTING 32
+#define TGSI_EXEC_MAX_SWITCH_NESTING 32
#define TGSI_EXEC_MAX_CALL_NESTING 32
/* The maximum number of input attributes per vertex. For 2D
@@ -191,6 +192,14 @@ struct tgsi_exec_labels
*/
#define TGSI_EXEC_MAX_CONST_BUFFER 4096
+/* The maximum number of vertices per primitive */
+#define TGSI_MAX_PRIM_VERTICES 6
+
+/* The maximum number of primitives to be generated */
+#define TGSI_MAX_PRIMITIVES 64
+
+/* The maximum total number of vertices */
+#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS)
/** function call/activation record */
struct tgsi_call_record
@@ -198,10 +207,29 @@ struct tgsi_call_record
uint CondStackTop;
uint LoopStackTop;
uint ContStackTop;
+ int SwitchStackTop;
+ int BreakStackTop;
uint ReturnAddr;
};
+/* Switch-case block state. */
+struct tgsi_switch_record {
+ uint mask; /**< execution mask */
+ union tgsi_exec_channel selector; /**< a value case statements are compared to */
+ uint defaultMask; /**< non-execute mask for default case */
+};
+
+
+enum tgsi_break_type {
+ TGSI_EXEC_BREAK_INSIDE_LOOP,
+ TGSI_EXEC_BREAK_INSIDE_SWITCH
+};
+
+
+#define TGSI_EXEC_MAX_BREAK_STACK (TGSI_EXEC_MAX_LOOP_NESTING + TGSI_EXEC_MAX_SWITCH_NESTING)
+
+
/**
* Run-time virtual machine state for executing TGSI shader.
*/
@@ -214,8 +242,8 @@ struct tgsi_exec_machine
float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
- struct tgsi_exec_vector Inputs[PIPE_MAX_ATTRIBS];
- struct tgsi_exec_vector Outputs[PIPE_MAX_ATTRIBS];
+ struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS];
+ struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES];
struct tgsi_exec_vector *Addrs;
struct tgsi_exec_vector *Predicates;
@@ -229,6 +257,8 @@ struct tgsi_exec_machine
/* GEOMETRY processor only. */
unsigned *Primitives;
+ unsigned NumOutputs;
+ unsigned MaxGeometryShaderOutputs;
/* FRAGMENT processor only. */
const struct tgsi_interp_coef *InterpCoefs;
@@ -242,6 +272,12 @@ struct tgsi_exec_machine
uint FuncMask; /**< For function calls */
uint ExecMask; /**< = CondMask & LoopMask */
+ /* Current switch-case state. */
+ struct tgsi_switch_record Switch;
+
+ /* Current break type. */
+ enum tgsi_break_type BreakType;
+
/** Condition mask stack (for nested conditionals) */
uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
int CondStackTop;
@@ -262,6 +298,13 @@ struct tgsi_exec_machine
uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
int ContStackTop;
+ /** Switch case stack */
+ struct tgsi_switch_record SwitchStack[TGSI_EXEC_MAX_SWITCH_NESTING];
+ int SwitchStackTop;
+
+ enum tgsi_break_type BreakStack[TGSI_EXEC_MAX_BREAK_STACK];
+ int BreakStackTop;
+
/** Function execution mask stack (for executing subroutine code) */
uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
int FuncStackTop;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index be375cabb8..de0e09cdba 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -119,7 +119,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT },
{ 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC },
{ 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL },
- { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR },
+ { 0, 0, 0, 0, 0, 0, "", 88 }, /* removed */
{ 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND },
{ 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR },
{ 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD },
@@ -149,7 +149,33 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC },
{ 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL },
{ 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END },
- { 0, 0, 0, 0, 0, 0, "", 118 } /* removed */
+ { 0, 0, 0, 0, 0, 0, "", 118 }, /* removed */
+ { 1, 1, 0, 0, 0, 0, "F2I", TGSI_OPCODE_F2I },
+ { 1, 2, 0, 0, 0, 0, "IDIV", TGSI_OPCODE_IDIV },
+ { 1, 2, 0, 0, 0, 0, "IMAX", TGSI_OPCODE_IMAX },
+ { 1, 2, 0, 0, 0, 0, "IMIN", TGSI_OPCODE_IMIN },
+ { 1, 1, 0, 0, 0, 0, "INEG", TGSI_OPCODE_INEG },
+ { 1, 2, 0, 0, 0, 0, "ISGE", TGSI_OPCODE_ISGE },
+ { 1, 2, 0, 0, 0, 0, "ISHR", TGSI_OPCODE_ISHR },
+ { 1, 2, 0, 0, 0, 0, "ISLT", TGSI_OPCODE_ISLT },
+ { 1, 1, 0, 0, 0, 0, "F2U", TGSI_OPCODE_F2U },
+ { 1, 1, 0, 0, 0, 0, "U2F", TGSI_OPCODE_U2F },
+ { 1, 2, 0, 0, 0, 0, "UADD", TGSI_OPCODE_UADD },
+ { 1, 2, 0, 0, 0, 0, "UDIV", TGSI_OPCODE_UDIV },
+ { 1, 3, 0, 0, 0, 0, "UMAD", TGSI_OPCODE_UMAD },
+ { 1, 2, 0, 0, 0, 0, "UMAX", TGSI_OPCODE_UMAX },
+ { 1, 2, 0, 0, 0, 0, "UMIN", TGSI_OPCODE_UMIN },
+ { 1, 2, 0, 0, 0, 0, "UMOD", TGSI_OPCODE_UMOD },
+ { 1, 2, 0, 0, 0, 0, "UMUL", TGSI_OPCODE_UMUL },
+ { 1, 2, 0, 0, 0, 0, "USEQ", TGSI_OPCODE_USEQ },
+ { 1, 2, 0, 0, 0, 0, "USGE", TGSI_OPCODE_USGE },
+ { 1, 2, 0, 0, 0, 0, "USHR", TGSI_OPCODE_USHR },
+ { 1, 2, 0, 0, 0, 0, "USLT", TGSI_OPCODE_USLT },
+ { 1, 2, 0, 0, 0, 0, "USNE", TGSI_OPCODE_USNE },
+ { 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH },
+ { 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE },
+ { 0, 0, 0, 0, 0, 0, "DEFAULT", TGSI_OPCODE_DEFAULT },
+ { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH }
};
const struct tgsi_opcode_info *
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index b34263da48..e4af15c156 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -124,7 +124,6 @@ OP11(I2F)
OP11(NOT)
OP11(TRUNC)
OP12(SHL)
-OP12(SHR)
OP12(AND)
OP12(OR)
OP12(MOD)
@@ -146,6 +145,28 @@ OP01(IFC)
OP01(BREAKC)
OP01(KIL)
OP00(END)
+OP11(F2I)
+OP12(IDIV)
+OP12(IMAX)
+OP12(IMIN)
+OP11(INEG)
+OP12(ISGE)
+OP12(ISHR)
+OP12(ISLT)
+OP11(F2U)
+OP11(U2F)
+OP12(UADD)
+OP12(UDIV)
+OP13(UMAD)
+OP12(UMAX)
+OP12(UMIN)
+OP12(UMOD)
+OP12(UMUL)
+OP12(USEQ)
+OP12(USGE)
+OP12(USHR)
+OP12(USLT)
+OP12(USNE)
#undef OP00
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index fa65ecb997..8c7062d850 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -119,17 +119,29 @@ tgsi_parse_token(
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate;
+ uint imm_count;
memset(imm, 0, sizeof *imm);
copy_token(&imm->Immediate, &token);
+ imm_count = imm->Immediate.NrTokens - 1;
+
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
- {
- uint imm_count = imm->Immediate.NrTokens - 1;
- for (i = 0; i < imm_count; i++) {
- next_token(ctx, &imm->u[i]);
- }
+ for (i = 0; i < imm_count; i++) {
+ next_token(ctx, &imm->u[i].Float);
+ }
+ break;
+
+ case TGSI_IMM_UINT32:
+ for (i = 0; i < imm_count; i++) {
+ next_token(ctx, &imm->u[i].Uint);
+ }
+ break;
+
+ case TGSI_IMM_INT32:
+ for (i = 0; i < imm_count; i++) {
+ next_token(ctx, &imm->u[i].Int);
}
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index c27579e794..9b0644465a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -26,32 +26,112 @@
**************************************************************************/
#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+#include "cso_cache/cso_hash.h"
#include "tgsi_sanity.h"
#include "tgsi_info.h"
#include "tgsi_iterate.h"
-typedef uint reg_flag;
-
-#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8)
-
-#define MAX_REGISTERS 1024
-#define MAX_REG_FLAGS ((MAX_REGISTERS + BITS_IN_REG_FLAG - 1) / BITS_IN_REG_FLAG)
+typedef struct {
+ uint file : 28;
+ /* max 2 dimensions */
+ uint dimensions : 4;
+ uint indices[2];
+} scan_register;
struct sanity_check_ctx
{
struct tgsi_iterate_context iter;
+ struct cso_hash *regs_decl;
+ struct cso_hash *regs_used;
+ struct cso_hash *regs_ind_used;
- reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REG_FLAGS];
- reg_flag regs_used[TGSI_FILE_COUNT][MAX_REG_FLAGS];
- boolean regs_ind_used[TGSI_FILE_COUNT];
uint num_imms;
uint num_instructions;
uint index_of_END;
uint errors;
uint warnings;
+ uint implied_array_size;
};
+static INLINE unsigned
+scan_register_key(const scan_register *reg)
+{
+ unsigned key = reg->file;
+ key |= (reg->indices[0] << 4);
+ key |= (reg->indices[1] << 18);
+
+ return key;
+}
+
+static void
+fill_scan_register1d(scan_register *reg,
+ uint file, uint index)
+{
+ reg->file = file;
+ reg->dimensions = 1;
+ reg->indices[0] = index;
+ reg->indices[1] = 0;
+}
+
+static void
+fill_scan_register2d(scan_register *reg,
+ uint file, uint index1, uint index2)
+{
+ reg->file = file;
+ reg->dimensions = 2;
+ reg->indices[0] = index1;
+ reg->indices[1] = index2;
+}
+
+static void
+scan_register_dst(scan_register *reg,
+ struct tgsi_full_dst_register *dst)
+{
+ fill_scan_register1d(reg,
+ dst->Register.File,
+ dst->Register.Index);
+}
+
+static void
+scan_register_src(scan_register *reg,
+ struct tgsi_full_src_register *src)
+{
+ if (src->Register.Dimension) {
+ /*FIXME: right now we don't support indirect
+ * multidimensional addressing */
+ debug_assert(!src->Dimension.Indirect);
+ fill_scan_register2d(reg,
+ src->Register.File,
+ src->Register.Index,
+ src->Dimension.Index);
+ } else {
+ fill_scan_register1d(reg,
+ src->Register.File,
+ src->Register.Index);
+ }
+}
+
+static scan_register *
+create_scan_register_src(struct tgsi_full_src_register *src)
+{
+ scan_register *reg = MALLOC(sizeof(scan_register));
+ scan_register_src(reg, src);
+
+ return reg;
+}
+
+static scan_register *
+create_scan_register_dst(struct tgsi_full_dst_register *dst)
+{
+ scan_register *reg = MALLOC(sizeof(scan_register));
+ scan_register_dst(reg, dst);
+
+ return reg;
+}
+
static void
report_error(
struct sanity_check_ctx *ctx,
@@ -99,12 +179,12 @@ check_file_name(
static boolean
is_register_declared(
struct sanity_check_ctx *ctx,
- uint file,
- int index )
+ const scan_register *reg)
{
- assert( index >= 0 && index < MAX_REGISTERS );
-
- return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
+ void *data = cso_hash_find_data_from_template(
+ ctx->regs_decl, scan_register_key(reg),
+ (void*)reg, sizeof(scan_register));
+ return data ? TRUE : FALSE;
}
static boolean
@@ -112,23 +192,37 @@ is_any_register_declared(
struct sanity_check_ctx *ctx,
uint file )
{
- uint i;
+ struct cso_hash_iter iter =
+ cso_hash_first_node(ctx->regs_decl);
- for (i = 0; i < MAX_REG_FLAGS; i++)
- if (ctx->regs_decl[file][i])
+ while (!cso_hash_iter_is_null(iter)) {
+ scan_register *reg = (scan_register *)cso_hash_iter_data(iter);
+ if (reg->file == file)
return TRUE;
+ iter = cso_hash_iter_next(iter);
+ }
+
return FALSE;
}
static boolean
is_register_used(
struct sanity_check_ctx *ctx,
- uint file,
- int index )
+ scan_register *reg)
{
- assert( index < MAX_REGISTERS );
+ void *data = cso_hash_find_data_from_template(
+ ctx->regs_used, scan_register_key(reg),
+ reg, sizeof(scan_register));
+ return data ? TRUE : FALSE;
+}
- return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
+
+static boolean
+is_ind_register_used(
+ struct sanity_check_ctx *ctx,
+ scan_register *reg)
+{
+ return cso_hash_contains(ctx->regs_ind_used, reg->file);
}
static const char *file_names[TGSI_FILE_COUNT] =
@@ -148,31 +242,40 @@ static const char *file_names[TGSI_FILE_COUNT] =
static boolean
check_register_usage(
struct sanity_check_ctx *ctx,
- uint file,
- int index,
+ scan_register *reg,
const char *name,
boolean indirect_access )
{
- if (!check_file_name( ctx, file ))
+ if (!check_file_name( ctx, reg->file )) {
+ FREE(reg);
return FALSE;
+ }
if (indirect_access) {
/* Note that 'index' is an offset relative to the value of the
- * address register. No range checking done here.
- */
- if (!is_any_register_declared( ctx, file ))
- report_error( ctx, "%s: Undeclared %s register", file_names[file], name );
- ctx->regs_ind_used[file] = TRUE;
+ * address register. No range checking done here.*/
+ reg->indices[0] = 0;
+ reg->indices[1] = 0;
+ if (!is_any_register_declared( ctx, reg->file ))
+ report_error( ctx, "%s: Undeclared %s register", file_names[reg->file], name );
+ if (!is_ind_register_used(ctx, reg))
+ cso_hash_insert(ctx->regs_ind_used, reg->file, reg);
+ else
+ FREE(reg);
}
else {
- if (index < 0 || index >= MAX_REGISTERS) {
- report_error( ctx, "%s[%d]: Invalid %s index", file_names[file], index, name );
- return FALSE;
- }
-
- if (!is_register_declared( ctx, file, index ))
- report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name );
- ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG));
+ if (!is_register_declared( ctx, reg )) {
+ if (reg->dimensions == 2)
+ report_error( ctx, "%s[%d][%d]: Undeclared %s register", file_names[reg->file],
+ reg->indices[0], reg->indices[1], name );
+ else
+ report_error( ctx, "%s[%d]: Undeclared %s register", file_names[reg->file],
+ reg->indices[0], name );
+ }
+ if (!is_register_used( ctx, reg ))
+ cso_hash_insert(ctx->regs_used, scan_register_key(reg), reg);
+ else
+ FREE(reg);
}
return TRUE;
}
@@ -210,33 +313,33 @@ iter_instruction(
* Mark the registers as used.
*/
for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+ scan_register *reg = create_scan_register_dst(&inst->Dst[i]);
check_register_usage(
ctx,
- inst->Dst[i].Register.File,
- inst->Dst[i].Register.Index,
+ reg,
"destination",
FALSE );
}
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ scan_register *reg = create_scan_register_src(&inst->Src[i]);
check_register_usage(
ctx,
- inst->Src[i].Register.File,
- inst->Src[i].Register.Index,
+ reg,
"source",
(boolean)inst->Src[i].Register.Indirect );
if (inst->Src[i].Register.Indirect) {
- uint file;
- int index;
+ scan_register *ind_reg = MALLOC(sizeof(scan_register));
- file = inst->Src[i].Indirect.File;
- index = inst->Src[i].Indirect.Index;
+ fill_scan_register1d(ind_reg,
+ inst->Src[i].Indirect.File,
+ inst->Src[i].Indirect.Index);
check_register_usage(
ctx,
- file,
- index,
+ reg,
"indirect",
FALSE );
- if (!(file == TGSI_FILE_ADDRESS || file == TGSI_FILE_LOOP) || index != 0) {
+ if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) ||
+ reg->indices[0] != 0) {
report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]");
}
}
@@ -266,6 +369,19 @@ iter_instruction(
return TRUE;
}
+static void
+check_and_declare(struct sanity_check_ctx *ctx,
+ scan_register *reg)
+{
+ if (is_register_declared( ctx, reg))
+ report_error( ctx, "%s[%u]: The same register declared more than once",
+ file_names[reg->file], reg->indices[0] );
+ cso_hash_insert(ctx->regs_decl,
+ scan_register_key(reg),
+ reg);
+}
+
+
static boolean
iter_declaration(
struct tgsi_iterate_context *iter,
@@ -287,9 +403,21 @@ iter_declaration(
if (!check_file_name( ctx, file ))
return TRUE;
for (i = decl->Range.First; i <= decl->Range.Last; i++) {
- if (is_register_declared( ctx, file, i ))
- report_error( ctx, "%s[%u]: The same register declared more than once", file_names[file], i );
- ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG));
+ /* declared TGSI_FILE_INPUT's for geometry processor
+ * have an implied second dimension */
+ if (file == TGSI_FILE_INPUT &&
+ ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) {
+ uint vert;
+ for (vert = 0; vert < ctx->implied_array_size; ++vert) {
+ scan_register *reg = MALLOC(sizeof(scan_register));
+ fill_scan_register2d(reg, file, vert, i);
+ check_and_declare(ctx, reg);
+ }
+ } else {
+ scan_register *reg = MALLOC(sizeof(scan_register));
+ fill_scan_register1d(reg, file, i);
+ check_and_declare(ctx, reg);
+ }
}
return TRUE;
@@ -301,8 +429,7 @@ iter_immediate(
struct tgsi_full_immediate *imm )
{
struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
-
- assert( ctx->num_imms < MAX_REGISTERS );
+ scan_register *reg;
/* No immediates allowed after the first instruction.
*/
@@ -311,12 +438,16 @@ iter_immediate(
/* Mark the register as declared.
*/
- ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG));
+ reg = MALLOC(sizeof(scan_register));
+ fill_scan_register1d(reg, TGSI_FILE_IMMEDIATE, ctx->num_imms);
+ cso_hash_insert(ctx->regs_decl, scan_register_key(reg), reg);
ctx->num_imms++;
/* Check data type validity.
*/
- if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) {
+ if (imm->Immediate.DataType != TGSI_IMM_FLOAT32 &&
+ imm->Immediate.DataType != TGSI_IMM_UINT32 &&
+ imm->Immediate.DataType != TGSI_IMM_INT32) {
report_error( ctx, "(%u): Invalid immediate data type", imm->Immediate.DataType );
return TRUE;
}
@@ -330,8 +461,12 @@ iter_property(
struct tgsi_iterate_context *iter,
struct tgsi_full_property *prop )
{
- /*struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;*/
+ struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
+ if (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY &&
+ prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) {
+ ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data);
+ }
return TRUE;
}
@@ -340,7 +475,6 @@ epilog(
struct tgsi_iterate_context *iter )
{
struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
- uint file;
/* There must be an END instruction somewhere.
*/
@@ -350,13 +484,17 @@ epilog(
/* Check if all declared registers were used.
*/
- for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) {
- uint i;
-
- for (i = 0; i < MAX_REGISTERS; i++) {
- if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) {
- report_warning( ctx, "%s[%u]: Register never used", file_names[file], i );
+ {
+ struct cso_hash_iter iter =
+ cso_hash_first_node(ctx->regs_decl);
+
+ while (!cso_hash_iter_is_null(iter)) {
+ scan_register *reg = (scan_register *)cso_hash_iter_data(iter);
+ if (!is_register_used(ctx, reg) && !is_ind_register_used(ctx, reg)) {
+ report_warning( ctx, "%s[%u]: Register never used",
+ file_names[reg->file], reg->indices[0] );
}
+ iter = cso_hash_iter_next(iter);
}
}
@@ -368,6 +506,18 @@ epilog(
return TRUE;
}
+static void
+regs_hash_destroy(struct cso_hash *hash)
+{
+ struct cso_hash_iter iter = cso_hash_first_node(hash);
+ while (!cso_hash_iter_is_null(iter)) {
+ scan_register *reg = (scan_register *)cso_hash_iter_data(iter);
+ iter = cso_hash_erase(hash, iter);
+ FREE(reg);
+ }
+ cso_hash_delete(hash);
+}
+
boolean
tgsi_sanity_check(
const struct tgsi_token *tokens )
@@ -381,18 +531,23 @@ tgsi_sanity_check(
ctx.iter.iterate_property = iter_property;
ctx.iter.epilog = epilog;
- memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) );
- memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) );
- memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) );
+ ctx.regs_decl = cso_hash_create();
+ ctx.regs_used = cso_hash_create();
+ ctx.regs_ind_used = cso_hash_create();
+
ctx.num_imms = 0;
ctx.num_instructions = 0;
ctx.index_of_END = ~0;
ctx.errors = 0;
ctx.warnings = 0;
+ ctx.implied_array_size = 0;
if (!tgsi_iterate_shader( tokens, &ctx.iter ))
return FALSE;
+ regs_hash_destroy(ctx.regs_decl);
+ regs_hash_destroy(ctx.regs_used);
+ regs_hash_destroy(ctx.regs_ind_used);
return ctx.errors == 0;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 5f5c95bfbd..a6cc773003 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -139,15 +139,19 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name;
info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index;
info->num_outputs++;
- }
- /* special case */
- if (procType == TGSI_PROCESSOR_FRAGMENT &&
- file == TGSI_FILE_OUTPUT &&
- fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
- info->writes_z = TRUE;
+ /* extra info for special outputs */
+ if (procType == TGSI_PROCESSOR_FRAGMENT &&
+ fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+ info->writes_z = TRUE;
+ }
+ if (procType == TGSI_PROCESSOR_VERTEX &&
+ fulldecl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) {
+ info->writes_edgeflag = TRUE;
+ }
}
- }
+
+ }
}
break;
@@ -225,8 +229,8 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
/* Do a whole bunch of checks for a simple move */
if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV ||
- src->Register.File != TGSI_FILE_INPUT ||
- src->Register.File != TGSI_FILE_SYSTEM_VALUE ||
+ (src->Register.File != TGSI_FILE_INPUT &&
+ src->Register.File != TGSI_FILE_SYSTEM_VALUE) ||
dst->Register.File != TGSI_FILE_OUTPUT ||
src->Register.Index != dst->Register.Index ||
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index a1e8a4f6bb..dae5376c24 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -57,6 +57,7 @@ struct tgsi_shader_info
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */
boolean writes_z; /**< does fragment shader write Z value? */
+ boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
boolean uses_kill; /**< KIL or KILP instruction used? */
boolean uses_fogcoord; /**< fragment shader uses fog coord? */
boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index d63c75dafb..118059ace9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2578,7 +2578,7 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_SHR:
+ case TGSI_OPCODE_ISHR:
return 0;
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index f000958bfc..9fcffeda36 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -27,7 +27,9 @@
#include "util/u_debug.h"
#include "util/u_memory.h"
+#include "util/u_prim.h"
#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
#include "tgsi_text.h"
#include "tgsi_build.h"
#include "tgsi_info.h"
@@ -61,18 +63,20 @@ static boolean uprcase( char c )
}
/*
- * Ignore case of str1 and assume str2 is already uppercase.
+ * Ignore case of str1 and assume str1 is already uppercase.
* Return TRUE iff str1 and str2 are equal.
*/
static int
streq_nocase_uprcase(const char *str1,
const char *str2)
{
- while (*str1 && uprcase(*str1) == *str2) {
+ while (*str1 && *str2) {
+ if (*str1 != uprcase(*str2))
+ return FALSE;
str1++;
str2++;
}
- return *str1 == *str2;
+ return TRUE;
}
static boolean str_match_no_case( const char **pcur, const char *str )
@@ -193,11 +197,26 @@ struct translate_ctx
struct tgsi_token *tokens_cur;
struct tgsi_token *tokens_end;
struct tgsi_header *header;
+ unsigned processor : 4;
+ int implied_array_size : 5;
};
static void report_error( struct translate_ctx *ctx, const char *msg )
{
- debug_printf( "\nError: %s", msg );
+ int line = 1;
+ int column = 1;
+ const char *itr = ctx->text;
+
+ while (itr != ctx->cur) {
+ if (*itr == '\n') {
+ column = 1;
+ ++line;
+ }
+ ++column;
+ ++itr;
+ }
+
+ debug_printf( "\nTGSI asm error: %s [%d : %d] \n", msg, line, column );
}
/* Parse shader header.
@@ -229,6 +248,7 @@ static boolean parse_header( struct translate_ctx *ctx )
if (ctx->tokens_cur >= ctx->tokens_end)
return FALSE;
*(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header );
+ ctx->processor = processor;
return TRUE;
}
@@ -325,92 +345,36 @@ parse_opt_writemask(
return TRUE;
}
-/* <register_file_bracket> ::= <file> `['
- */
static boolean
-parse_register_file_bracket(
- struct translate_ctx *ctx,
- uint *file )
-{
- if (!parse_file( &ctx->cur, file )) {
- report_error( ctx, "Unknown register file" );
- return FALSE;
- }
- eat_opt_white( &ctx->cur );
- if (*ctx->cur != '[') {
- report_error( ctx, "Expected `['" );
- return FALSE;
- }
- ctx->cur++;
- return TRUE;
-}
+parse_register_dst( struct translate_ctx *ctx,
+ uint *file,
+ int *index );
-/* <register_file_bracket_index> ::= <register_file_bracket> <uint>
- */
-static boolean
-parse_register_file_bracket_index(
- struct translate_ctx *ctx,
- uint *file,
- int *index )
-{
- uint uindex;
+struct parsed_src_bracket {
+ int index;
- if (!parse_register_file_bracket( ctx, file ))
- return FALSE;
- eat_opt_white( &ctx->cur );
- if (!parse_uint( &ctx->cur, &uindex )) {
- report_error( ctx, "Expected literal unsigned integer" );
- return FALSE;
- }
- *index = (int) uindex;
- return TRUE;
-}
+ uint ind_file;
+ int ind_index;
+ uint ind_comp;
+};
-/* Parse destination register operand.
- * <register_dst> ::= <register_file_bracket_index> `]'
- */
-static boolean
-parse_register_dst(
- struct translate_ctx *ctx,
- uint *file,
- int *index )
-{
- if (!parse_register_file_bracket_index( ctx, file, index ))
- return FALSE;
- eat_opt_white( &ctx->cur );
- if (*ctx->cur != ']') {
- report_error( ctx, "Expected `]'" );
- return FALSE;
- }
- ctx->cur++;
- return TRUE;
-}
-/* Parse source register operand.
- * <register_src> ::= <register_file_bracket_index> `]' |
- * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `]' |
- * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `+' <uint> `]' |
- * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `-' <uint> `]'
- */
static boolean
-parse_register_src(
+parse_register_src_bracket(
struct translate_ctx *ctx,
- uint *file,
- int *index,
- uint *ind_file,
- int *ind_index,
- uint *ind_comp)
+ struct parsed_src_bracket *brackets)
{
const char *cur;
uint uindex;
- *ind_comp = TGSI_SWIZZLE_X;
- if (!parse_register_file_bracket( ctx, file ))
- return FALSE;
+ memset(brackets, 0, sizeof(struct parsed_src_bracket));
+
eat_opt_white( &ctx->cur );
+
cur = ctx->cur;
- if (parse_file( &cur, ind_file )) {
- if (!parse_register_dst( ctx, ind_file, ind_index ))
+ if (parse_file( &cur, &brackets->ind_file )) {
+ if (!parse_register_dst( ctx, &brackets->ind_file,
+ &brackets->ind_index ))
return FALSE;
eat_opt_white( &ctx->cur );
@@ -420,16 +384,16 @@ parse_register_src(
switch (uprcase(*ctx->cur)) {
case 'X':
- *ind_comp = TGSI_SWIZZLE_X;
+ brackets->ind_comp = TGSI_SWIZZLE_X;
break;
case 'Y':
- *ind_comp = TGSI_SWIZZLE_Y;
+ brackets->ind_comp = TGSI_SWIZZLE_Y;
break;
case 'Z':
- *ind_comp = TGSI_SWIZZLE_Z;
+ brackets->ind_comp = TGSI_SWIZZLE_Z;
break;
case 'W':
- *ind_comp = TGSI_SWIZZLE_W;
+ brackets->ind_comp = TGSI_SWIZZLE_W;
break;
default:
report_error(ctx, "Expected indirect register swizzle component `x', `y', `z' or `w'");
@@ -450,12 +414,12 @@ parse_register_src(
return FALSE;
}
if (negate)
- *index = -(int) uindex;
+ brackets->index = -(int) uindex;
else
- *index = (int) uindex;
+ brackets->index = (int) uindex;
}
else {
- *index = 0;
+ brackets->index = 0;
}
}
else {
@@ -463,9 +427,9 @@ parse_register_src(
report_error( ctx, "Expected literal unsigned integer" );
return FALSE;
}
- *index = (int) uindex;
- *ind_file = TGSI_FILE_NULL;
- *ind_index = 0;
+ brackets->index = (int) uindex;
+ brackets->ind_file = TGSI_FILE_NULL;
+ brackets->ind_index = 0;
}
eat_opt_white( &ctx->cur );
if (*ctx->cur != ']') {
@@ -476,20 +440,123 @@ parse_register_src(
return TRUE;
}
-/* Parse register declaration.
- * <register_dcl> ::= <register_file_bracket_index> `]' |
- * <register_file_bracket_index> `..' <index> `]'
+static boolean
+parse_opt_register_src_bracket(
+ struct translate_ctx *ctx,
+ struct parsed_src_bracket *brackets,
+ int *parsed_brackets)
+{
+ const char *cur = ctx->cur;
+
+ *parsed_brackets = 0;
+
+ eat_opt_white( &cur );
+ if (cur[0] == '[') {
+ ++cur;
+ ctx->cur = cur;
+
+ if (!parse_register_src_bracket(ctx, brackets))
+ return FALSE;
+
+ *parsed_brackets = 1;
+ }
+
+ return TRUE;
+}
+
+/* <register_file_bracket> ::= <file> `['
*/
static boolean
-parse_register_dcl(
+parse_register_file_bracket(
+ struct translate_ctx *ctx,
+ uint *file )
+{
+ if (!parse_file( &ctx->cur, file )) {
+ report_error( ctx, "Unknown register file" );
+ return FALSE;
+ }
+ eat_opt_white( &ctx->cur );
+ if (*ctx->cur != '[') {
+ report_error( ctx, "Expected `['" );
+ return FALSE;
+ }
+ ctx->cur++;
+ return TRUE;
+}
+
+/* <register_file_bracket_index> ::= <register_file_bracket> <uint>
+ */
+static boolean
+parse_register_file_bracket_index(
+ struct translate_ctx *ctx,
+ uint *file,
+ int *index )
+{
+ uint uindex;
+
+ if (!parse_register_file_bracket( ctx, file ))
+ return FALSE;
+ eat_opt_white( &ctx->cur );
+ if (!parse_uint( &ctx->cur, &uindex )) {
+ report_error( ctx, "Expected literal unsigned integer" );
+ return FALSE;
+ }
+ *index = (int) uindex;
+ return TRUE;
+}
+
+/* Parse source register operand.
+ * <register_src> ::= <register_file_bracket_index> `]' |
+ * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `]' |
+ * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `+' <uint> `]' |
+ * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `-' <uint> `]'
+ */
+static boolean
+parse_register_src(
struct translate_ctx *ctx,
uint *file,
- int *first,
- int *last )
+ struct parsed_src_bracket *brackets)
+{
+
+ brackets->ind_comp = TGSI_SWIZZLE_X;
+ if (!parse_register_file_bracket( ctx, file ))
+ return FALSE;
+ if (!parse_register_src_bracket( ctx, brackets ))
+ return FALSE;
+
+ return TRUE;
+}
+
+struct parsed_dcl_bracket {
+ uint first;
+ uint last;
+};
+
+static boolean
+parse_register_dcl_bracket(
+ struct translate_ctx *ctx,
+ struct parsed_dcl_bracket *bracket)
{
- if (!parse_register_file_bracket_index( ctx, file, first ))
+ uint uindex;
+ memset(bracket, 0, sizeof(struct parsed_dcl_bracket));
+
+ eat_opt_white( &ctx->cur );
+
+ if (!parse_uint( &ctx->cur, &uindex )) {
+ /* it can be an empty bracket [] which means its range
+ * is from 0 to some implied size */
+ if (ctx->cur[0] == ']' && ctx->implied_array_size != 0) {
+ bracket->first = 0;
+ bracket->last = ctx->implied_array_size - 1;
+ goto cleanup;
+ }
+ report_error( ctx, "Expected literal unsigned integer" );
return FALSE;
+ }
+ bracket->first = (int) uindex;
+
eat_opt_white( &ctx->cur );
+
if (ctx->cur[0] == '.' && ctx->cur[1] == '.') {
uint uindex;
@@ -499,12 +566,14 @@ parse_register_dcl(
report_error( ctx, "Expected literal integer" );
return FALSE;
}
- *last = (int) uindex;
+ bracket->last = (int) uindex;
eat_opt_white( &ctx->cur );
}
else {
- *last = *first;
+ bracket->last = bracket->first;
}
+
+cleanup:
if (*ctx->cur != ']') {
report_error( ctx, "Expected `]' or `..'" );
return FALSE;
@@ -513,6 +582,70 @@ parse_register_dcl(
return TRUE;
}
+/* Parse register declaration.
+ * <register_dcl> ::= <register_file_bracket_index> `]' |
+ * <register_file_bracket_index> `..' <index> `]'
+ */
+static boolean
+parse_register_dcl(
+ struct translate_ctx *ctx,
+ uint *file,
+ struct parsed_dcl_bracket *brackets,
+ int *num_brackets)
+{
+ const char *cur;
+
+ *num_brackets = 0;
+
+ if (!parse_register_file_bracket( ctx, file ))
+ return FALSE;
+ if (!parse_register_dcl_bracket( ctx, &brackets[0] ))
+ return FALSE;
+
+ *num_brackets = 1;
+
+ cur = ctx->cur;
+ eat_opt_white( &cur );
+
+ if (cur[0] == '[') {
+ ++cur;
+ ctx->cur = cur;
+ if (!parse_register_dcl_bracket( ctx, &brackets[1] ))
+ return FALSE;
+ /* for geometry shader we don't really care about
+ * the first brackets it's always the size of the
+ * input primitive. so we want to declare just
+ * the index relevant to the semantics which is in
+ * the second bracket */
+ if (ctx->processor == TGSI_PROCESSOR_GEOMETRY) {
+ brackets[0] = brackets[1];
+ }
+ *num_brackets = 2;
+ }
+
+ return TRUE;
+}
+
+
+/* Parse destination register operand.
+ * <register_dst> ::= <register_file_bracket_index> `]'
+ */
+static boolean
+parse_register_dst(
+ struct translate_ctx *ctx,
+ uint *file,
+ int *index )
+{
+ if (!parse_register_file_bracket_index( ctx, file, index ))
+ return FALSE;
+ eat_opt_white( &ctx->cur );
+ if (*ctx->cur != ']') {
+ report_error( ctx, "Expected `]'" );
+ return FALSE;
+ }
+ ctx->cur++;
+ return TRUE;
+}
static boolean
parse_dst_operand(
@@ -582,37 +715,44 @@ parse_src_operand(
struct tgsi_full_src_register *src )
{
uint file;
- int index;
- uint ind_file;
- int ind_index;
- uint ind_comp;
uint swizzle[4];
boolean parsed_swizzle;
+ struct parsed_src_bracket bracket[2];
+ int parsed_opt_brackets;
if (*ctx->cur == '-') {
ctx->cur++;
eat_opt_white( &ctx->cur );
src->Register.Negate = 1;
}
-
+
if (*ctx->cur == '|') {
ctx->cur++;
eat_opt_white( &ctx->cur );
src->Register.Absolute = 1;
}
- if (!parse_register_src(ctx, &file, &index, &ind_file, &ind_index, &ind_comp))
+ if (!parse_register_src(ctx, &file, &bracket[0]))
+ return FALSE;
+ if (!parse_opt_register_src_bracket(ctx, &bracket[1], &parsed_opt_brackets))
return FALSE;
+
src->Register.File = file;
- src->Register.Index = index;
- if (ind_file != TGSI_FILE_NULL) {
+ src->Register.Index = bracket[0].index;
+ if (bracket[0].ind_file != TGSI_FILE_NULL) {
src->Register.Indirect = 1;
- src->Indirect.File = ind_file;
- src->Indirect.Index = ind_index;
- src->Indirect.SwizzleX = ind_comp;
- src->Indirect.SwizzleY = ind_comp;
- src->Indirect.SwizzleZ = ind_comp;
- src->Indirect.SwizzleW = ind_comp;
+ src->Indirect.File = bracket[0].ind_file;
+ src->Indirect.Index = bracket[0].ind_index;
+ src->Indirect.SwizzleX = bracket[0].ind_comp;
+ src->Indirect.SwizzleY = bracket[0].ind_comp;
+ src->Indirect.SwizzleZ = bracket[0].ind_comp;
+ src->Indirect.SwizzleW = bracket[0].ind_comp;
+ }
+ if (parsed_opt_brackets) {
+ src->Register.Dimension = 1;
+ src->Dimension.Indirect = 0;
+ src->Dimension.Dimension = 0;
+ src->Dimension.Index = bracket[1].index;
}
/* Parse optional swizzle.
@@ -791,7 +931,9 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] =
"PSIZE",
"GENERIC",
"NORMAL",
- "FACE"
+ "FACE",
+ "EDGEFLAG",
+ "PRIM_ID"
};
static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] =
@@ -805,8 +947,8 @@ static boolean parse_declaration( struct translate_ctx *ctx )
{
struct tgsi_full_declaration decl;
uint file;
- int first;
- int last;
+ struct parsed_dcl_bracket brackets[2];
+ int num_brackets;
uint writemask;
const char *cur;
uint advance;
@@ -818,7 +960,7 @@ static boolean parse_declaration( struct translate_ctx *ctx )
report_error( ctx, "Syntax error" );
return FALSE;
}
- if (!parse_register_dcl( ctx, &file, &first, &last ))
+ if (!parse_register_dcl( ctx, &file, brackets, &num_brackets))
return FALSE;
if (!parse_opt_writemask( ctx, &writemask ))
return FALSE;
@@ -826,8 +968,8 @@ static boolean parse_declaration( struct translate_ctx *ctx )
decl = tgsi_default_full_declaration();
decl.Declaration.File = file;
decl.Declaration.UsageMask = writemask;
- decl.Range.First = first;
- decl.Range.Last = last;
+ decl.Range.First = brackets[0].first;
+ decl.Range.Last = brackets[0].last;
cur = ctx->cur;
eat_opt_white( &cur );
@@ -1027,7 +1169,7 @@ static boolean parse_property( struct translate_ctx *ctx )
}
for (property_name = 0; property_name < TGSI_PROPERTY_COUNT;
++property_name) {
- if (streq_nocase_uprcase(id, property_names[property_name])) {
+ if (streq_nocase_uprcase(property_names[property_name], id)) {
break;
}
}
@@ -1044,6 +1186,10 @@ static boolean parse_property( struct translate_ctx *ctx )
report_error( ctx, "Unknown primitive name as property!" );
return FALSE;
}
+ if (property_name == TGSI_PROPERTY_GS_INPUT_PRIM &&
+ ctx->processor == TGSI_PROCESSOR_GEOMETRY) {
+ ctx->implied_array_size = u_vertices_per_prim(values[0]);
+ }
break;
default:
if (!parse_uint(&ctx->cur, &values[0] )) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 1e730e5342..e64e2b731d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -101,8 +101,13 @@ struct ureg_program
unsigned nr_outputs;
struct {
- float v[4];
+ union {
+ float f[4];
+ unsigned u[4];
+ int i[4];
+ } value;
unsigned nr;
+ unsigned type;
} immediate[UREG_MAX_IMMEDIATE];
unsigned nr_immediates;
@@ -486,22 +491,22 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
}
-
-
-static int match_or_expand_immediate( const float *v,
- unsigned nr,
- float *v2,
- unsigned *nr2,
- unsigned *swizzle )
+static int
+match_or_expand_immediate( const unsigned *v,
+ unsigned nr,
+ unsigned *v2,
+ unsigned *pnr2,
+ unsigned *swizzle )
{
+ unsigned nr2 = *pnr2;
unsigned i, j;
-
+
*swizzle = 0;
for (i = 0; i < nr; i++) {
boolean found = FALSE;
- for (j = 0; j < *nr2 && !found; j++) {
+ for (j = 0; j < nr2 && !found; j++) {
if (v[i] == v2[j]) {
*swizzle |= j << (i * 2);
found = TRUE;
@@ -509,24 +514,28 @@ static int match_or_expand_immediate( const float *v,
}
if (!found) {
- if (*nr2 >= 4)
+ if (nr2 >= 4) {
return FALSE;
+ }
- v2[*nr2] = v[i];
- *swizzle |= *nr2 << (i * 2);
- (*nr2)++;
+ v2[nr2] = v[i];
+ *swizzle |= nr2 << (i * 2);
+ nr2++;
}
}
+ /* Actually expand immediate only when fully succeeded.
+ */
+ *pnr2 = nr2;
return TRUE;
}
-
-
-struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg,
- const float *v,
- unsigned nr )
+static struct ureg_src
+decl_immediate( struct ureg_program *ureg,
+ const unsigned *v,
+ unsigned nr,
+ unsigned type )
{
unsigned i, j;
unsigned swizzle;
@@ -536,38 +545,82 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg,
*/
for (i = 0; i < ureg->nr_immediates; i++) {
- if (match_or_expand_immediate( v,
- nr,
- ureg->immediate[i].v,
- &ureg->immediate[i].nr,
- &swizzle ))
+ if (ureg->immediate[i].type != type) {
+ continue;
+ }
+ if (match_or_expand_immediate(v,
+ nr,
+ ureg->immediate[i].value.u,
+ &ureg->immediate[i].nr,
+ &swizzle)) {
goto out;
+ }
}
if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) {
i = ureg->nr_immediates++;
- if (match_or_expand_immediate( v,
- nr,
- ureg->immediate[i].v,
- &ureg->immediate[i].nr,
- &swizzle ))
+ ureg->immediate[i].type = type;
+ if (match_or_expand_immediate(v,
+ nr,
+ ureg->immediate[i].value.u,
+ &ureg->immediate[i].nr,
+ &swizzle)) {
goto out;
+ }
}
- set_bad( ureg );
+ set_bad(ureg);
out:
/* Make sure that all referenced elements are from this immediate.
* Has the effect of making size-one immediates into scalars.
*/
- for (j = nr; j < 4; j++)
+ for (j = nr; j < 4; j++) {
swizzle |= (swizzle & 0x3) << (j * 2);
+ }
+
+ return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i),
+ (swizzle >> 0) & 0x3,
+ (swizzle >> 2) & 0x3,
+ (swizzle >> 4) & 0x3,
+ (swizzle >> 6) & 0x3);
+}
+
+
+struct ureg_src
+ureg_DECL_immediate( struct ureg_program *ureg,
+ const float *v,
+ unsigned nr )
+{
+ union {
+ float f[4];
+ unsigned u[4];
+ } fu;
+ unsigned int i;
+
+ for (i = 0; i < nr; i++) {
+ fu.f[i] = v[i];
+ }
+
+ return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32);
+}
+
- return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ),
- (swizzle >> 0) & 0x3,
- (swizzle >> 2) & 0x3,
- (swizzle >> 4) & 0x3,
- (swizzle >> 6) & 0x3);
+struct ureg_src
+ureg_DECL_immediate_uint( struct ureg_program *ureg,
+ const unsigned *v,
+ unsigned nr )
+{
+ return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32);
+}
+
+
+struct ureg_src
+ureg_DECL_immediate_int( struct ureg_program *ureg,
+ const int *v,
+ unsigned nr )
+{
+ return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32);
}
@@ -791,8 +844,8 @@ ureg_insn(struct ureg_program *ureg,
unsigned i;
boolean saturate;
boolean predicate;
- boolean negate;
- unsigned swizzle[4];
+ boolean negate = FALSE;
+ unsigned swizzle[4] = { 0 };
saturate = nr_dst ? dst[0].Saturate : FALSE;
predicate = nr_dst ? dst[0].Predicate : FALSE;
@@ -838,8 +891,8 @@ ureg_tex_insn(struct ureg_program *ureg,
unsigned i;
boolean saturate;
boolean predicate;
- boolean negate;
- unsigned swizzle[4];
+ boolean negate = FALSE;
+ unsigned swizzle[4] = { 0 };
saturate = nr_dst ? dst[0].Saturate : FALSE;
predicate = nr_dst ? dst[0].Predicate : FALSE;
@@ -955,21 +1008,23 @@ static void emit_decl_range( struct ureg_program *ureg,
out[1].decl_range.Last = first + count - 1;
}
-static void emit_immediate( struct ureg_program *ureg,
- const float *v )
+static void
+emit_immediate( struct ureg_program *ureg,
+ const unsigned *v,
+ unsigned type )
{
union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 );
out[0].value = 0;
out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
out[0].imm.NrTokens = 5;
- out[0].imm.DataType = TGSI_IMM_FLOAT32;
+ out[0].imm.DataType = type;
out[0].imm.Padding = 0;
- out[1].imm_data.Float = v[0];
- out[2].imm_data.Float = v[1];
- out[3].imm_data.Float = v[2];
- out[4].imm_data.Float = v[3];
+ out[1].imm_data.Uint = v[0];
+ out[2].imm_data.Uint = v[1];
+ out[3].imm_data.Uint = v[2];
+ out[4].imm_data.Uint = v[3];
}
@@ -1055,7 +1110,8 @@ static void emit_decls( struct ureg_program *ureg )
for (i = 0; i < ureg->nr_immediates; i++) {
emit_immediate( ureg,
- ureg->immediate[i].v );
+ ureg->immediate[i].value.u,
+ ureg->immediate[i].type );
}
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 7e3e7bcf1d..6f11273320 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -148,6 +148,16 @@ ureg_DECL_immediate( struct ureg_program *,
unsigned nr );
struct ureg_src
+ureg_DECL_immediate_uint( struct ureg_program *,
+ const unsigned *v,
+ unsigned nr );
+
+struct ureg_src
+ureg_DECL_immediate_int( struct ureg_program *,
+ const int *v,
+ unsigned nr );
+
+struct ureg_src
ureg_DECL_constant( struct ureg_program *,
unsigned index );
@@ -221,6 +231,90 @@ ureg_imm1f( struct ureg_program *ureg,
return ureg_DECL_immediate( ureg, v, 1 );
}
+static INLINE struct ureg_src
+ureg_imm4u( struct ureg_program *ureg,
+ unsigned a, unsigned b,
+ unsigned c, unsigned d)
+{
+ unsigned v[4];
+ v[0] = a;
+ v[1] = b;
+ v[2] = c;
+ v[3] = d;
+ return ureg_DECL_immediate_uint( ureg, v, 4 );
+}
+
+static INLINE struct ureg_src
+ureg_imm3u( struct ureg_program *ureg,
+ unsigned a, unsigned b,
+ unsigned c)
+{
+ unsigned v[3];
+ v[0] = a;
+ v[1] = b;
+ v[2] = c;
+ return ureg_DECL_immediate_uint( ureg, v, 3 );
+}
+
+static INLINE struct ureg_src
+ureg_imm2u( struct ureg_program *ureg,
+ unsigned a, unsigned b)
+{
+ unsigned v[2];
+ v[0] = a;
+ v[1] = b;
+ return ureg_DECL_immediate_uint( ureg, v, 2 );
+}
+
+static INLINE struct ureg_src
+ureg_imm1u( struct ureg_program *ureg,
+ unsigned a)
+{
+ return ureg_DECL_immediate_uint( ureg, &a, 1 );
+}
+
+static INLINE struct ureg_src
+ureg_imm4i( struct ureg_program *ureg,
+ int a, int b,
+ int c, int d)
+{
+ int v[4];
+ v[0] = a;
+ v[1] = b;
+ v[2] = c;
+ v[3] = d;
+ return ureg_DECL_immediate_int( ureg, v, 4 );
+}
+
+static INLINE struct ureg_src
+ureg_imm3i( struct ureg_program *ureg,
+ int a, int b,
+ int c)
+{
+ int v[3];
+ v[0] = a;
+ v[1] = b;
+ v[2] = c;
+ return ureg_DECL_immediate_int( ureg, v, 3 );
+}
+
+static INLINE struct ureg_src
+ureg_imm2i( struct ureg_program *ureg,
+ int a, int b)
+{
+ int v[2];
+ v[0] = a;
+ v[1] = b;
+ return ureg_DECL_immediate_int( ureg, v, 2 );
+}
+
+static INLINE struct ureg_src
+ureg_imm1i( struct ureg_program *ureg,
+ int a)
+{
+ return ureg_DECL_immediate_int( ureg, &a, 1 );
+}
+
/***********************************************************************
* Functions for patching up labels
*/
diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile
deleted file mode 100644
index 3c82f8ae03..0000000000
--- a/src/gallium/auxiliary/translate/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = translate
-
-C_SOURCES = \
- translate_generic.c \
- translate_sse.c \
- translate.c \
- translate_cache.c
-
-include ../../Makefile.template
diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript
deleted file mode 100644
index 9553a67537..0000000000
--- a/src/gallium/auxiliary/translate/SConscript
+++ /dev/null
@@ -1,12 +0,0 @@
-Import('*')
-
-translate = env.ConvenienceLibrary(
- target = 'translate',
- source = [
- 'translate_generic.c',
- 'translate_sse.c',
- 'translate.c',
- 'translate_cache.c',
- ])
-
-auxiliaries.insert(0, translate)
diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile
deleted file mode 100644
index 3ed90fd1b7..0000000000
--- a/src/gallium/auxiliary/util/Makefile
+++ /dev/null
@@ -1,48 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = util
-
-C_SOURCES = \
- u_debug.c \
- u_debug_dump.c \
- u_debug_symbol.c \
- u_debug_stack.c \
- u_blit.c \
- u_blitter.c \
- u_cache.c \
- u_cpu_detect.c \
- u_dl.c \
- u_draw_quad.c \
- u_format.c \
- u_format_access.c \
- u_format_table.c \
- u_gen_mipmap.c \
- u_handle_table.c \
- u_hash_table.c \
- u_hash.c \
- u_keymap.c \
- u_linear.c \
- u_network.c \
- u_math.c \
- u_mm.c \
- u_rect.c \
- u_simple_shaders.c \
- u_snprintf.c \
- u_stream_stdc.c \
- u_stream_wd.c \
- u_surface.c \
- u_texture.c \
- u_tile.c \
- u_time.c \
- u_timed_winsys.c \
- u_upload_mgr.c \
- u_simple_screen.c
-
-include ../../Makefile.template
-
-u_format_table.c: u_format_table.py u_format_parse.py u_format.csv
- python u_format_table.py u_format.csv > $@
-
-u_format_access.c: u_format_access.py u_format_parse.py u_format.csv
- python u_format_access.py u_format.csv > $@
diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript
deleted file mode 100644
index 2a546d19dc..0000000000
--- a/src/gallium/auxiliary/util/SConscript
+++ /dev/null
@@ -1,61 +0,0 @@
-Import('*')
-
-env.Clone()
-
-env.Append(CPPPATH = ['.'])
-
-env.CodeGenerate(
- target = 'u_format_table.c',
- script = 'u_format_table.py',
- source = ['u_format.csv'],
- command = 'python $SCRIPT $SOURCE > $TARGET'
-)
-
-env.CodeGenerate(
- target = 'u_format_access.c',
- script = 'u_format_access.py',
- source = ['u_format.csv'],
- command = 'python $SCRIPT $SOURCE > $TARGET'
-)
-
-util = env.ConvenienceLibrary(
- target = 'util',
- source = [
- 'u_bitmask.c',
- 'u_blit.c',
- 'u_blitter.c',
- 'u_cache.c',
- 'u_cpu_detect.c',
- 'u_debug.c',
- 'u_debug_dump.c',
- 'u_debug_memory.c',
- 'u_debug_stack.c',
- 'u_debug_symbol.c',
- 'u_dl.c',
- 'u_draw_quad.c',
- 'u_format.c',
- 'u_format_access.c',
- 'u_format_table.c',
- 'u_gen_mipmap.c',
- 'u_handle_table.c',
- 'u_hash.c',
- 'u_hash_table.c',
- 'u_keymap.c',
- 'u_network.c',
- 'u_math.c',
- 'u_mm.c',
- 'u_rect.c',
- 'u_simple_shaders.c',
- 'u_snprintf.c',
- 'u_stream_stdc.c',
- 'u_stream_wd.c',
- 'u_surface.c',
- 'u_texture.c',
- 'u_tile.c',
- 'u_time.c',
- 'u_timed_winsys.c',
- 'u_upload_mgr.c',
- 'u_simple_screen.c',
- ])
-
-auxiliaries.insert(0, util)
diff --git a/src/gallium/auxiliary/util/u_bitmask.c b/src/gallium/auxiliary/util/u_bitmask.c
index 77587c07ec..23c93a3ebc 100644
--- a/src/gallium/auxiliary/util/u_bitmask.c
+++ b/src/gallium/auxiliary/util/u_bitmask.c
@@ -97,12 +97,12 @@ util_bitmask_resize(struct util_bitmask *bm,
if(!minimum_size)
return FALSE;
- if(bm->size > minimum_size)
+ if(bm->size >= minimum_size)
return TRUE;
assert(bm->size % UTIL_BITMASK_BITS_PER_WORD == 0);
new_size = bm->size;
- while(!(new_size > minimum_size)) {
+ while(new_size < minimum_size) {
new_size *= 2;
/* Check integer overflow */
if(new_size < bm->size)
@@ -136,7 +136,7 @@ util_bitmask_filled_set(struct util_bitmask *bm,
unsigned index)
{
assert(bm->filled <= bm->size);
- assert(index <= bm->size);
+ assert(index < bm->size);
if(index == bm->filled) {
++bm->filled;
@@ -149,7 +149,7 @@ util_bitmask_filled_unset(struct util_bitmask *bm,
unsigned index)
{
assert(bm->filled <= bm->size);
- assert(index <= bm->size);
+ assert(index < bm->size);
if(index < bm->filled)
bm->filled = index;
@@ -182,7 +182,7 @@ util_bitmask_add(struct util_bitmask *bm)
mask = 1;
}
found:
-
+
/* grow the bitmask if necessary */
if(!util_bitmask_resize(bm, bm->filled))
return UTIL_BITMASK_INVALID_INDEX;
@@ -198,9 +198,9 @@ unsigned
util_bitmask_set(struct util_bitmask *bm,
unsigned index)
{
- unsigned word = index / UTIL_BITMASK_BITS_PER_WORD;
- unsigned bit = index % UTIL_BITMASK_BITS_PER_WORD;
- util_bitmask_word mask = 1 << bit;
+ unsigned word;
+ unsigned bit;
+ util_bitmask_word mask;
assert(bm);
@@ -208,6 +208,10 @@ util_bitmask_set(struct util_bitmask *bm,
if(!util_bitmask_resize(bm, index))
return UTIL_BITMASK_INVALID_INDEX;
+ word = index / UTIL_BITMASK_BITS_PER_WORD;
+ bit = index % UTIL_BITMASK_BITS_PER_WORD;
+ mask = 1 << bit;
+
bm->words[word] |= mask;
util_bitmask_filled_set(bm, index);
@@ -220,15 +224,19 @@ void
util_bitmask_clear(struct util_bitmask *bm,
unsigned index)
{
- unsigned word = index / UTIL_BITMASK_BITS_PER_WORD;
- unsigned bit = index % UTIL_BITMASK_BITS_PER_WORD;
- util_bitmask_word mask = 1 << bit;
+ unsigned word;
+ unsigned bit;
+ util_bitmask_word mask;
assert(bm);
if(index >= bm->size)
return;
+ word = index / UTIL_BITMASK_BITS_PER_WORD;
+ bit = index % UTIL_BITMASK_BITS_PER_WORD;
+ mask = 1 << bit;
+
bm->words[word] &= ~mask;
util_bitmask_filled_unset(bm, index);
@@ -250,7 +258,7 @@ util_bitmask_get(struct util_bitmask *bm,
return TRUE;
}
- if(index > bm->size)
+ if(index >= bm->size)
return FALSE;
if(bm->words[word] & mask) {
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 27e0b0d159..9b4e6ca2a7 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -70,6 +70,7 @@
#include "util/u_stream.h"
#include "util/u_math.h"
#include "util/u_tile.h"
+#include "util/u_prim.h"
#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY
@@ -453,7 +454,8 @@ debug_dump_flags(const struct debug_named_value *names,
util_strncat(output, "|", sizeof(output));
else
first = 0;
- util_strncat(output, names->name, sizeof(output));
+ util_strncat(output, names->name, sizeof(output) - 1);
+ output[sizeof(output) - 1] = '\0';
value &= ~names->value;
}
++names;
@@ -466,7 +468,8 @@ debug_dump_flags(const struct debug_named_value *names,
first = 0;
util_snprintf(rest, sizeof(rest), "0x%08lx", value);
- util_strncat(output, rest, sizeof(output));
+ util_strncat(output, rest, sizeof(output) - 1);
+ output[sizeof(output) - 1] = '\0';
}
if(first)
@@ -601,6 +604,32 @@ const char *pf_name( enum pipe_format format )
}
+
+static const struct debug_named_value pipe_prim_names[] = {
+#ifdef DEBUG
+ DEBUG_NAMED_VALUE(PIPE_PRIM_POINTS),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_LINES),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_LOOP),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_STRIP),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLES),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_STRIP),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_FAN),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_QUADS),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_QUAD_STRIP),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_POLYGON),
+#endif
+ DEBUG_NAMED_VALUE_END
+};
+
+
+const char *u_prim_name( unsigned prim )
+{
+ return debug_dump_enum(pipe_prim_names, prim);
+}
+
+
+
+
#ifdef DEBUG
void debug_dump_image(const char *prefix,
unsigned format, unsigned cpp,
diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
index abd834c741..facc30a553 100644
--- a/src/gallium/auxiliary/util/u_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -188,7 +188,7 @@ void _debug_assert_fail(const char *expr,
#ifdef DEBUG
#define debug_assert(expr) ((expr) ? (void)0 : _debug_assert_fail(#expr, __FILE__, __LINE__, __FUNCTION__))
#else
-#define debug_assert(expr) ((void)0)
+#define debug_assert(expr) do { } while (0 && (expr))
#endif
diff --git a/src/gallium/auxiliary/util/u_debug_memory.c b/src/gallium/auxiliary/util/u_debug_memory.c
index 7623cb9398..d6484f4ad5 100644
--- a/src/gallium/auxiliary/util/u_debug_memory.c
+++ b/src/gallium/auxiliary/util/u_debug_memory.c
@@ -297,9 +297,9 @@ debug_memory_end(unsigned long start_no)
if((start_no <= hdr->no && hdr->no < last_no) ||
(last_no < start_no && (hdr->no < last_no || start_no <= hdr->no))) {
- debug_printf("%s:%u:%s: %u bytes at %p not freed\n",
+ debug_printf("%s:%u:%s: %lu bytes at %p not freed\n",
hdr->file, hdr->line, hdr->function,
- hdr->size, ptr);
+ (unsigned long) hdr->size, ptr);
#if DEBUG_MEMORY_STACK
debug_backtrace_dump(hdr->backtrace, DEBUG_MEMORY_STACK);
#endif
@@ -315,8 +315,8 @@ debug_memory_end(unsigned long start_no)
}
if(total_size) {
- debug_printf("Total of %u KB of system memory apparently leaked\n",
- (total_size + 1023)/1024);
+ debug_printf("Total of %lu KB of system memory apparently leaked\n",
+ (unsigned long) (total_size + 1023)/1024);
}
else {
debug_printf("No memory leaks detected.\n");
diff --git a/src/gallium/auxiliary/util/u_dl.h b/src/gallium/auxiliary/util/u_dl.h
index 018b38543b..85296c58af 100644
--- a/src/gallium/auxiliary/util/u_dl.h
+++ b/src/gallium/auxiliary/util/u_dl.h
@@ -30,6 +30,18 @@
#define U_DL_H_
+#include "pipe/p_config.h"
+
+
+#if defined(PIPE_OS_WINDOWS)
+# define UTIL_DL_EXT ".dll"
+#elif defined(PIPE_OS_APPLE)
+# define UTIL_DL_EXT ".dylib"
+#else
+# define UTIL_DL_EXT ".so"
+#endif
+
+
struct util_dl_library;
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index c344c4201b..a558923b2e 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -119,7 +119,7 @@ enum util_format_colorspace {
UTIL_FORMAT_COLORSPACE_RGB = 0,
UTIL_FORMAT_COLORSPACE_SRGB = 1,
UTIL_FORMAT_COLORSPACE_YUV = 2,
- UTIL_FORMAT_COLORSPACE_ZS = 3,
+ UTIL_FORMAT_COLORSPACE_ZS = 3
};
@@ -201,7 +201,7 @@ util_format_is_depth_and_stencil(enum pipe_format format)
/**
- * Return total bits needed for the pixel format.
+ * Return total bits needed for the pixel format per block.
*/
static INLINE uint
util_format_get_blocksizebits(enum pipe_format format)
@@ -213,11 +213,11 @@ util_format_get_blocksizebits(enum pipe_format format)
return 0;
}
- return desc->block.bits / (desc->block.width * desc->block.height);
+ return desc->block.bits;
}
/**
- * Return bytes per pixel for the given format.
+ * Return bytes per block (not pixel) for the given format.
*/
static INLINE uint
util_format_get_blocksize(enum pipe_format format)
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 0dad6ccbc0..76023794dc 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -63,7 +63,7 @@ struct gen_mipmap_state
struct pipe_sampler_state sampler;
void *vs;
- void *fs;
+ void *fs2d, *fsCube;
struct pipe_buffer *vbuf; /**< quad vertices */
unsigned vbuf_slot;
@@ -1319,7 +1319,8 @@ util_create_gen_mipmap(struct pipe_context *pipe,
}
/* fragment shader */
- ctx->fs = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
+ ctx->fs2d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
+ ctx->fsCube = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE);
/* vertex data that doesn't change */
for (i = 0; i < 4; i++) {
@@ -1427,7 +1428,8 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
struct pipe_context *pipe = ctx->pipe;
pipe->delete_vs_state(pipe, ctx->vs);
- pipe->delete_fs_state(pipe, ctx->fs);
+ pipe->delete_fs_state(pipe, ctx->fs2d);
+ pipe->delete_fs_state(pipe, ctx->fsCube);
pipe_buffer_reference(&ctx->vbuf, NULL);
@@ -1465,6 +1467,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
struct pipe_context *pipe = ctx->pipe;
struct pipe_screen *screen = pipe->screen;
struct pipe_framebuffer_state fb;
+ void *fs = (pt->target == PIPE_TEXTURE_CUBE) ? ctx->fsCube : ctx->fs2d;
uint dstLevel;
uint zslice = 0;
uint offset;
@@ -1502,7 +1505,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
- cso_set_fragment_shader_handle(ctx->cso, ctx->fs);
+ cso_set_fragment_shader_handle(ctx->cso, fs);
cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
/* init framebuffer state */
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index b76592d1ec..b2969a210a 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -583,6 +583,18 @@ do { \
#endif
+static INLINE uint32_t util_unsigned_fixed(float value, unsigned frac_bits)
+{
+ return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits));
+}
+
+static INLINE int32_t util_signed_fixed(float value, unsigned frac_bits)
+{
+ return (int32_t)(value * (1<<frac_bits));
+}
+
+
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c
index 6269c72e12..87ee0e4768 100644
--- a/src/gallium/auxiliary/util/u_network.c
+++ b/src/gallium/auxiliary/util/u_network.c
@@ -6,7 +6,7 @@
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
# include <winsock2.h>
# include <windows.h>
-#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD)
+#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE)
# include <sys/socket.h>
# include <netinet/in.h>
# include <unistd.h>
@@ -54,7 +54,7 @@ u_socket_close(int s)
if (s < 0)
return;
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE)
shutdown(s, SHUT_RDWR);
close(s);
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
@@ -117,7 +117,7 @@ u_socket_connect(const char *hostname, uint16_t port)
if (!host)
return -1;
- memcpy((char *)&sa.sin_addr,host->h_addr,host->h_length);
+ memcpy((char *)&sa.sin_addr,host->h_addr_list[0],host->h_length);
sa.sin_family= host->h_addrtype;
sa.sin_port = htons(port);
@@ -169,7 +169,7 @@ u_socket_listen_on_port(uint16_t portnum)
void
u_socket_block(int s, boolean block)
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE)
int old = fcntl(s, F_GETFL, 0);
if (old == -1)
return;
diff --git a/src/gallium/auxiliary/util/u_network.h b/src/gallium/auxiliary/util/u_network.h
index 0aa898b967..187dcab86e 100644
--- a/src/gallium/auxiliary/util/u_network.h
+++ b/src/gallium/auxiliary/util/u_network.h
@@ -6,7 +6,7 @@
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
# define PIPE_HAVE_SOCKETS
-#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD)
+#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE)
# define PIPE_HAVE_SOCKETS
#endif
diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h
index a9b533eea7..10a874f341 100644
--- a/src/gallium/auxiliary/util/u_prim.h
+++ b/src/gallium/auxiliary/util/u_prim.h
@@ -135,4 +135,39 @@ static INLINE unsigned u_reduced_prim( unsigned pipe_prim )
}
}
+static INLINE unsigned
+u_vertices_per_prim(int primitive)
+{
+ switch(primitive) {
+ case PIPE_PRIM_POINTS:
+ return 1;
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_LOOP:
+ case PIPE_PRIM_LINE_STRIP:
+ return 2;
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return 3;
+ case PIPE_PRIM_LINES_ADJACENCY:
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ return 4;
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ return 6;
+
+ /* following primitives should never be used
+ * with geometry shaders abd their size is
+ * undefined */
+ case PIPE_PRIM_POLYGON:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ default:
+ debug_printf("Unrecognized geometry shader primitive");
+ return 3;
+ }
+}
+
+const char *u_prim_name( unsigned pipe_prim );
+
#endif
diff --git a/src/gallium/auxiliary/util/u_stream_stdc.c b/src/gallium/auxiliary/util/u_stream_stdc.c
index 5cd05b2904..4d976d6dca 100644
--- a/src/gallium/auxiliary/util/u_stream_stdc.c
+++ b/src/gallium/auxiliary/util/u_stream_stdc.c
@@ -32,7 +32,7 @@
#include "pipe/p_config.h"
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE)
#include <stdio.h>
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index 745b5834af..e158bed9d0 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -32,6 +32,8 @@
#ifndef U_UPLOAD_MGR_H
#define U_UPLOAD_MGR_H
+#include "pipe/p_defines.h"
+
struct pipe_screen;
struct pipe_buffer;
struct u_upload_mgr;
diff --git a/src/gallium/auxiliary/vl/Makefile b/src/gallium/auxiliary/vl/Makefile
deleted file mode 100644
index 4314c1e8d6..0000000000
--- a/src/gallium/auxiliary/vl/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = vl
-
-C_SOURCES = \
- vl_bitstream_parser.c \
- vl_mpeg12_mc_renderer.c \
- vl_compositor.c \
- vl_csc.c \
- vl_shader_build.c
-
-include ../../Makefile.template
diff --git a/src/gallium/auxiliary/vl/SConscript b/src/gallium/auxiliary/vl/SConscript
deleted file mode 100644
index aed69f5efe..0000000000
--- a/src/gallium/auxiliary/vl/SConscript
+++ /dev/null
@@ -1,13 +0,0 @@
-Import('*')
-
-vl = env.ConvenienceLibrary(
- target = 'vl',
- source = [
- 'vl_bitstream_parser.c',
- 'vl_mpeg12_mc_renderer.c',
- 'vl_compositor.c',
- 'vl_csc.c',
- 'vl_shader_build.c',
- ])
-
-auxiliaries.insert(0, vl)
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index ab196c21f8..caf581aca6 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -334,11 +334,13 @@ create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
free(tokens);
}
+#if 0
static void
create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
{
assert(false);
}
+#endif
static void
create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
@@ -442,11 +444,13 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
free(tokens);
}
+#if 0
static void
create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
{
assert(false);
}
+#endif
static void
create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
@@ -532,11 +536,13 @@ create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
free(tokens);
}
+#if 0
static void
create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
{
assert(false);
}
+#endif
static void
create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
@@ -658,11 +664,13 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
free(tokens);
}
+#if 0
static void
create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
{
assert(false);
}
+#endif
static void
xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
@@ -1081,6 +1089,9 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
assert(ycbcr_vb);
assert(pos < r->macroblocks_per_batch);
+ mo_vec[1].x = 0;
+ mo_vec[1].y = 0;
+
switch (mb->mb_type) {
case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
{
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index 644496db40..3fa8b975d3 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -59,7 +59,7 @@ cell_map_constant_buffers(struct cell_context *sp)
}
}
- draw_set_mapped_constant_buffer(sp->draw,
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX,
sp->mapped_constants[PIPE_SHADER_VERTEX],
sp->constants[PIPE_SHADER_VERTEX].buffer->size);
}
@@ -85,7 +85,7 @@ cell_unmap_constant_buffers(struct cell_context *sp)
*
* XXX should the element buffer be specified/bound with a separate function?
*/
-static boolean
+static void
cell_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -145,47 +145,35 @@ cell_draw_range_elements(struct pipe_context *pipe,
/* Note: leave drawing surfaces mapped */
cell_unmap_constant_buffers(sp);
-
- return TRUE;
}
-static boolean
+static void
cell_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
- return cell_draw_range_elements( pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- mode, start, count );
+ cell_draw_range_elements( pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ mode, start, count );
}
-static boolean
+static void
cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
{
- return cell_draw_elements(pipe, NULL, 0, mode, start, count);
-}
-
-
-static void
-cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
-{
- struct cell_context *cell = cell_context(pipe);
- draw_set_edgeflags(cell->draw, edgeflags);
+ cell_draw_elements(pipe, NULL, 0, mode, start, count);
}
-
void
cell_init_draw_functions(struct cell_context *cell)
{
cell->pipe.draw_arrays = cell_draw_arrays;
cell->pipe.draw_elements = cell_draw_elements;
cell->pipe.draw_range_elements = cell_draw_range_elements;
- cell->pipe.set_edgeflags = cell_set_edgeflags;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c
index efc4f78364..b723e794e7 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_derived.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c
@@ -66,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell )
vinfo->num_attribs = 0;
/* we always want to emit vertex pos */
- src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
@@ -82,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell )
break;
case TGSI_SEMANTIC_COLOR:
- src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR,
+ src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR,
fs->info.input_semantic_index[i]);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
break;
case TGSI_SEMANTIC_FOG:
- src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
#if 1
if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */
src = 0;
@@ -100,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell )
case TGSI_SEMANTIC_GENERIC:
/* this includes texcoords and varying vars */
- src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC,
+ src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC,
fs->info.input_semantic_index[i]);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index ac5fafec1a..5b87286d4c 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -331,7 +331,7 @@ cell_emit_state(struct cell_context *cell)
const struct draw_context *const draw = cell->draw;
struct cell_shader_info info;
- info.num_outputs = draw_num_vs_outputs(draw);
+ info.num_outputs = draw_num_shader_outputs(draw);
info.declarations = (uintptr_t) draw->vs.machine.Declarations;
info.num_declarations = draw->vs.machine.NumDeclarations;
info.instructions = (uintptr_t) draw->vs.machine.Instructions;
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 5ed330aa6e..d86d8e09a5 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -1681,7 +1681,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_SHR:
+ case TGSI_OPCODE_ISHR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c
index 37184eac7b..46e4338d98 100644
--- a/src/gallium/drivers/failover/fo_context.c
+++ b/src/gallium/drivers/failover/fo_context.c
@@ -44,11 +44,19 @@ static void failover_destroy( struct pipe_context *pipe )
}
+void failover_fail_over( struct failover_context *failover )
+{
+ failover->dirty = TRUE;
+ failover->mode = FO_SW;
+}
+
-static boolean failover_draw_elements( struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned prim, unsigned start, unsigned count)
+static void failover_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim,
+ unsigned start,
+ unsigned count)
{
struct failover_context *failover = failover_context( pipe );
@@ -62,24 +70,22 @@ static boolean failover_draw_elements( struct pipe_context *pipe,
/* Try hardware:
*/
if (failover->mode == FO_HW) {
- if (!failover->hw->draw_elements( failover->hw,
- indexBuffer,
- indexSize,
- prim,
- start,
- count )) {
-
- failover->hw->flush( failover->hw, ~0, NULL );
- failover->mode = FO_SW;
- }
+ failover->hw->draw_elements( failover->hw,
+ indexBuffer,
+ indexSize,
+ prim,
+ start,
+ count );
}
/* Possibly try software:
*/
if (failover->mode == FO_SW) {
- if (failover->dirty)
+ if (failover->dirty) {
+ failover->hw->flush( failover->hw, ~0, NULL );
failover_state_emit( failover );
+ }
failover->sw->draw_elements( failover->sw,
indexBuffer,
@@ -94,15 +100,13 @@ static boolean failover_draw_elements( struct pipe_context *pipe,
*/
failover->sw->flush( failover->sw, ~0, NULL );
}
-
- return TRUE;
}
-static boolean failover_draw_arrays( struct pipe_context *pipe,
+static void failover_draw_arrays( struct pipe_context *pipe,
unsigned prim, unsigned start, unsigned count)
{
- return failover_draw_elements(pipe, NULL, 0, prim, start, count);
+ failover_draw_elements(pipe, NULL, 0, prim, start, count);
}
static unsigned int
diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h
index a8ce997a1f..533122b69d 100644
--- a/src/gallium/drivers/failover/fo_winsys.h
+++ b/src/gallium/drivers/failover/fo_winsys.h
@@ -36,10 +36,13 @@
struct pipe_context;
+struct failover_context;
struct pipe_context *failover_create( struct pipe_context *hw,
struct pipe_context *sw );
+void failover_fail_over( struct failover_context *failover );
+
#endif /* FO_WINSYS_H */
diff --git a/src/gallium/drivers/i915/i915_buffer.c b/src/gallium/drivers/i915/i915_buffer.c
index effeba1297..669964770d 100644
--- a/src/gallium/drivers/i915/i915_buffer.c
+++ b/src/gallium/drivers/i915/i915_buffer.c
@@ -111,6 +111,7 @@ i915_buffer_unmap(struct pipe_screen *screen,
{
struct i915_buffer *buf = i915_buffer(buffer);
assert(!buf->ibuf);
+ (void) buf;
}
static void
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index 94c8aee30f..89feeade75 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -45,7 +45,7 @@
*/
-static boolean
+static void
i915_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -84,7 +84,7 @@ i915_draw_range_elements(struct pipe_context *pipe,
}
- draw_set_mapped_constant_buffer(draw,
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX,
i915->current.constants[PIPE_SHADER_VERTEX],
(i915->current.num_user_constants[PIPE_SHADER_VERTEX] *
4 * sizeof(float)));
@@ -106,27 +106,25 @@ i915_draw_range_elements(struct pipe_context *pipe,
pipe_buffer_unmap(pipe->screen, indexBuffer);
draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL);
}
-
- return TRUE;
}
-static boolean
+static void
i915_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count)
{
- return i915_draw_range_elements(pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- prim, start, count);
+ i915_draw_range_elements(pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ prim, start, count);
}
-static boolean
+static void
i915_draw_arrays(struct pipe_context *pipe,
unsigned prim, unsigned start, unsigned count)
{
- return i915_draw_elements(pipe, NULL, 0, prim, start, count);
+ i915_draw_elements(pipe, NULL, 0, prim, start, count);
}
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 9103847f1c..1528afc859 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -58,10 +58,10 @@ translate_wrap_mode(unsigned wrap)
return TEXCOORDMODE_CLAMP_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
return TEXCOORDMODE_CLAMP_BORDER;
-/*
+ /*
case PIPE_TEX_WRAP_MIRRORED_REPEAT:
return TEXCOORDMODE_MIRROR;
-*/
+ */
default:
return TEXCOORDMODE_WRAP;
}
@@ -752,16 +752,9 @@ static void i915_set_vertex_elements(struct pipe_context *pipe,
}
-static void i915_set_edgeflags(struct pipe_context *pipe,
- const unsigned *bitfield)
-{
- /* TODO do something here */
-}
-
void
i915_init_state_functions( struct i915_context *i915 )
{
- i915->base.set_edgeflags = i915_set_edgeflags;
i915->base.create_blend_state = i915_create_blend_state;
i915->base.bind_blend_state = i915_bind_blend_state;
i915->base.delete_blend_state = i915_delete_blend_state;
diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c
index 178d4e8781..03dd5091a6 100644
--- a/src/gallium/drivers/i915/i915_state_derived.c
+++ b/src/gallium/drivers/i915/i915_state_derived.c
@@ -84,7 +84,7 @@ static void calculate_vertex_layout( struct i915_context *i915 )
/* pos */
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
if (needW) {
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZW;
@@ -101,21 +101,21 @@ static void calculate_vertex_layout( struct i915_context *i915 )
/* primary color */
if (colors[0]) {
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
vinfo.hwfmt[0] |= S4_VFMT_COLOR;
}
/* secondary color */
if (colors[1]) {
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
}
/* fog coord, not fog blend factor */
if (fog) {
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM;
}
@@ -125,7 +125,7 @@ static void calculate_vertex_layout( struct i915_context *i915 )
uint hwtc;
if (texCoords[i]) {
hwtc = TEXCOORDFMT_4D;
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
}
else {
diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
new file mode 100644
index 0000000000..95fd3cd69b
--- /dev/null
+++ b/src/gallium/drivers/i965/Makefile
@@ -0,0 +1,74 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i965
+
+C_SOURCES = \
+ brw_cc.c \
+ brw_clip.c \
+ brw_clip_line.c \
+ brw_clip_point.c \
+ brw_clip_state.c \
+ brw_clip_tri.c \
+ brw_clip_unfilled.c \
+ brw_clip_util.c \
+ brw_context.c \
+ brw_curbe.c \
+ brw_disasm.c \
+ brw_draw.c \
+ brw_draw_upload.c \
+ brw_eu.c \
+ brw_eu_debug.c \
+ brw_eu_emit.c \
+ brw_eu_util.c \
+ brw_gs.c \
+ brw_gs_emit.c \
+ brw_gs_state.c \
+ brw_misc_state.c \
+ brw_pipe_blend.c \
+ brw_pipe_depth.c \
+ brw_pipe_fb.c \
+ brw_pipe_query.c \
+ brw_pipe_shader.c \
+ brw_pipe_flush.c \
+ brw_pipe_misc.c \
+ brw_pipe_sampler.c \
+ brw_pipe_vertex.c \
+ brw_pipe_clear.c \
+ brw_pipe_rast.c \
+ brw_sf.c \
+ brw_sf_emit.c \
+ brw_sf_state.c \
+ brw_state_batch.c \
+ brw_state_debug.c \
+ brw_state_cache.c \
+ brw_state_upload.c \
+ brw_structs_dump.c \
+ brw_swtnl.c \
+ brw_urb.c \
+ brw_util.c \
+ brw_vs.c \
+ brw_vs_emit.c \
+ brw_vs_state.c \
+ brw_vs_surface_state.c \
+ brw_wm.c \
+ brw_wm_debug.c \
+ brw_wm_emit.c \
+ brw_wm_fp.c \
+ brw_wm_iz.c \
+ brw_wm_pass0.c \
+ brw_wm_pass1.c \
+ brw_wm_pass2.c \
+ brw_wm_sampler_state.c \
+ brw_wm_state.c \
+ brw_wm_surface_state.c \
+ brw_screen.c \
+ brw_screen_buffers.c \
+ brw_screen_tex_layout.c \
+ brw_screen_texture.c \
+ brw_screen_surface.c \
+ brw_batchbuffer.c \
+ brw_winsys_debug.c \
+ intel_decode.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript
new file mode 100644
index 0000000000..9c2faaf4b4
--- /dev/null
+++ b/src/gallium/drivers/i965/SConscript
@@ -0,0 +1,77 @@
+Import('*')
+
+env = env.Clone()
+
+i965 = env.ConvenienceLibrary(
+ target = 'i965',
+ source = [
+ 'brw_batchbuffer.c',
+ 'brw_cc.c',
+ 'brw_clip.c',
+ 'brw_clip_line.c',
+ 'brw_clip_point.c',
+ 'brw_clip_state.c',
+ 'brw_clip_tri.c',
+ 'brw_clip_unfilled.c',
+ 'brw_clip_util.c',
+ 'brw_context.c',
+ 'brw_curbe.c',
+ 'brw_disasm.c',
+ 'brw_draw.c',
+ 'brw_draw_upload.c',
+ 'brw_eu.c',
+ 'brw_eu_debug.c',
+ 'brw_eu_emit.c',
+ 'brw_eu_util.c',
+ 'brw_gs.c',
+ 'brw_gs_emit.c',
+ 'brw_gs_state.c',
+ 'brw_misc_state.c',
+ 'brw_pipe_blend.c',
+ 'brw_pipe_clear.c',
+ 'brw_pipe_depth.c',
+ 'brw_pipe_fb.c',
+ 'brw_pipe_flush.c',
+ 'brw_pipe_misc.c',
+ 'brw_pipe_query.c',
+ 'brw_pipe_rast.c',
+ 'brw_pipe_sampler.c',
+ 'brw_pipe_shader.c',
+ 'brw_pipe_vertex.c',
+ 'brw_screen_buffers.c',
+ 'brw_screen.c',
+ 'brw_screen_surface.c',
+ 'brw_screen_tex_layout.c',
+ 'brw_screen_texture.c',
+ 'brw_structs_dump.c',
+ 'brw_sf.c',
+ 'brw_sf_emit.c',
+ 'brw_sf_state.c',
+ 'brw_state_batch.c',
+ 'brw_state_cache.c',
+# 'brw_state_debug.c',
+ 'brw_state_upload.c',
+ 'brw_swtnl.c',
+ 'brw_urb.c',
+ 'brw_util.c',
+ 'brw_vs.c',
+ 'brw_vs_emit.c',
+ 'brw_vs_state.c',
+ 'brw_vs_surface_state.c',
+ 'brw_wm.c',
+# 'brw_wm_constant_buffer.c',
+ 'brw_wm_debug.c',
+ 'brw_wm_emit.c',
+ 'brw_wm_fp.c',
+# 'brw_wm_glsl.c',
+ 'brw_wm_iz.c',
+ 'brw_wm_pass0.c',
+ 'brw_wm_pass1.c',
+ 'brw_wm_pass2.c',
+ 'brw_wm_sampler_state.c',
+ 'brw_wm_state.c',
+ 'brw_wm_surface_state.c',
+ 'intel_decode.c',
+ ])
+
+Export('i965')
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
new file mode 100644
index 0000000000..22607dc608
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -0,0 +1,202 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_memory.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_reg.h"
+#include "brw_winsys.h"
+#include "brw_debug.h"
+#include "brw_structs.h"
+
+#define ALWAYS_EMIT_MI_FLUSH 1
+
+enum pipe_error
+brw_batchbuffer_reset(struct brw_batchbuffer *batch)
+{
+ enum pipe_error ret;
+
+ ret = batch->sws->bo_alloc( batch->sws,
+ BRW_BUFFER_TYPE_BATCH,
+ BRW_BATCH_SIZE, 4096,
+ &batch->buf );
+ if (ret)
+ return ret;
+
+ batch->size = BRW_BATCH_SIZE;
+
+ /* With map_range semantics, the winsys can decide whether to
+ * inject a malloc'ed bounce buffer instead of mapping directly.
+ */
+ batch->map = batch->sws->bo_map(batch->buf,
+ BRW_DATA_BATCH_BUFFER,
+ 0, batch->size,
+ GL_TRUE,
+ GL_TRUE,
+ GL_TRUE);
+
+ batch->ptr = batch->map;
+ return PIPE_OK;
+}
+
+struct brw_batchbuffer *
+brw_batchbuffer_alloc(struct brw_winsys_screen *sws,
+ struct brw_chipset chipset)
+{
+ struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer);
+
+ batch->sws = sws;
+ batch->chipset = chipset;
+ brw_batchbuffer_reset(batch);
+
+ return batch;
+}
+
+void
+brw_batchbuffer_free(struct brw_batchbuffer *batch)
+{
+ if (batch->map) {
+ batch->sws->bo_unmap(batch->buf);
+ batch->map = NULL;
+ }
+
+ bo_reference(&batch->buf, NULL);
+ FREE(batch);
+}
+
+
+void
+_brw_batchbuffer_flush(struct brw_batchbuffer *batch,
+ const char *file,
+ int line)
+{
+ GLuint used = batch->ptr - batch->map;
+
+ if (used == 0)
+ return;
+
+ /* Post-swap throttling done by the state tracker.
+ */
+
+ if (BRW_DEBUG & DEBUG_BATCH)
+ debug_printf("%s:%d: Batchbuffer flush with %db used\n",
+ file, line, used);
+
+ if (ALWAYS_EMIT_MI_FLUSH) {
+ *(GLuint *) (batch->ptr) = MI_FLUSH | BRW_FLUSH_STATE_CACHE;
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
+ }
+
+ /* Round batchbuffer usage to 2 DWORDs.
+ */
+ if ((used & 4) == 0) {
+ *(GLuint *) (batch->ptr) = 0; /* noop */
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
+ }
+
+ /* Mark the end of the buffer.
+ */
+ *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END;
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
+
+ batch->sws->bo_flush_range(batch->buf, 0, used);
+ batch->sws->bo_unmap(batch->buf);
+ batch->map = NULL;
+ batch->ptr = NULL;
+
+ batch->sws->bo_exec(batch->buf, used );
+
+ if (BRW_DEBUG & DEBUG_SYNC) {
+ /* Abuse map/unmap to achieve wait-for-fence.
+ *
+ * XXX: hide this inside the winsys and export a fence
+ * interface.
+ */
+ debug_printf("waiting for idle\n");
+ batch->sws->bo_wait_idle(batch->buf);
+ }
+
+ /* Reset the buffer:
+ */
+ brw_batchbuffer_reset(batch);
+}
+
+
+/* The OUT_RELOC() macro ends up here, generating a relocation within
+ * the batch buffer.
+ */
+enum pipe_error
+brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+ struct brw_winsys_buffer *buffer,
+ uint32_t usage,
+ uint32_t delta)
+{
+ int ret;
+
+ if (batch->ptr - batch->map > batch->buf->size) {
+ debug_printf("bad relocation ptr %p map %p offset %d size %d\n",
+ batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ ret = batch->sws->bo_emit_reloc(batch->buf,
+ usage,
+ delta,
+ batch->ptr - batch->map,
+ buffer);
+ if (ret != 0)
+ return ret;
+
+ /* bo_emit_reloc was resposible for writing a zero into the
+ * batchbuffer if necessary. Just need to update our pointer.
+ */
+ batch->ptr += 4;
+
+ return 0;
+}
+
+enum pipe_error
+brw_batchbuffer_data(struct brw_batchbuffer *batch,
+ const void *data, GLuint bytes,
+ enum cliprect_mode cliprect_mode)
+{
+ enum pipe_error ret;
+
+ assert((bytes & 3) == 0);
+
+ ret = brw_batchbuffer_require_space(batch, bytes);
+ if (ret)
+ return ret;
+
+ memcpy(batch->ptr, data, bytes);
+ batch->ptr += bytes;
+ return 0;
+}
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
new file mode 100644
index 0000000000..7473f5bea4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -0,0 +1,148 @@
+#ifndef BRW_BATCHBUFFER_H
+#define BRW_BATCHBUFFER_H
+
+#include "util/u_debug.h"
+
+#include "brw_types.h"
+#include "brw_winsys.h"
+#include "brw_reg.h"
+
+#define BATCH_SZ 16384
+#define BATCH_RESERVED 16
+
+/* All ignored:
+ */
+enum cliprect_mode {
+ IGNORE_CLIPRECTS,
+ LOOP_CLIPRECTS,
+ NO_LOOP_CLIPRECTS,
+ REFERENCES_CLIPRECTS
+};
+
+
+
+
+struct brw_batchbuffer {
+
+ struct brw_winsys_screen *sws;
+ struct brw_winsys_buffer *buf;
+ struct brw_chipset chipset;
+
+ /**
+ * Values exported to speed up the writing the batchbuffer,
+ * instead of having to go trough a accesor function for
+ * each dword written.
+ */
+ /*{@*/
+ uint8_t *map;
+ uint8_t *ptr;
+ size_t size;
+ struct {
+ uint8_t *end_ptr;
+ } emit;
+
+
+ size_t relocs;
+ size_t max_relocs;
+ /*@}*/
+};
+
+struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws,
+ struct brw_chipset chipset );
+
+void brw_batchbuffer_free(struct brw_batchbuffer *batch);
+
+void _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
+ const char *file, int line);
+
+
+enum pipe_error
+brw_batchbuffer_reset(struct brw_batchbuffer *batch);
+
+
+/* Unlike bmBufferData, this currently requires the buffer be mapped.
+ * Consider it a convenience function wrapping multple
+ * intel_buffer_dword() calls.
+ */
+int brw_batchbuffer_data(struct brw_batchbuffer *batch,
+ const void *data, GLuint bytes,
+ enum cliprect_mode cliprect_mode);
+
+
+int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+ struct brw_winsys_buffer *buffer,
+ enum brw_buffer_usage usage,
+ uint32_t offset);
+
+/* Inline functions - might actually be better off with these
+ * non-inlined. Certainly better off switching all command packets to
+ * be passed as structs rather than dwords, but that's a little bit of
+ * work...
+ */
+static INLINE GLint
+brw_batchbuffer_space(struct brw_batchbuffer *batch)
+{
+ return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
+}
+
+
+static INLINE void
+brw_batchbuffer_emit_dword(struct brw_batchbuffer *batch, GLuint dword)
+{
+ assert(batch->map);
+ assert(brw_batchbuffer_space(batch) >= 4);
+ *(GLuint *) (batch->ptr) = dword;
+ batch->ptr += 4;
+}
+
+static INLINE enum pipe_error
+brw_batchbuffer_require_space(struct brw_batchbuffer *batch,
+ GLuint sz)
+{
+ assert(sz < batch->size - 8);
+ if (brw_batchbuffer_space(batch) < sz) {
+ assert(0);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+#ifdef DEBUG
+ batch->emit.end_ptr = batch->ptr + sz;
+#endif
+ return 0;
+}
+
+/* Here are the crusty old macros, to be removed:
+ */
+#define BEGIN_BATCH(n, cliprect_mode) do { \
+ brw_batchbuffer_require_space(brw->batch, (n)*4); \
+ } while (0)
+
+#define OUT_BATCH(d) brw_batchbuffer_emit_dword(brw->batch, d)
+
+#define OUT_RELOC(buf, usage, delta) do { \
+ assert((unsigned) (delta) < buf->size); \
+ brw_batchbuffer_emit_reloc(brw->batch, buf, \
+ usage, delta); \
+ } while (0)
+
+#ifdef DEBUG
+#define ADVANCE_BATCH() do { \
+ unsigned int _n = brw->batch->ptr - brw->batch->emit.end_ptr; \
+ if (_n != 0) { \
+ debug_printf("%s: %d too many bytes emitted to batch\n", \
+ __FUNCTION__, _n); \
+ abort(); \
+ } \
+ brw->batch->emit.end_ptr = NULL; \
+ } while(0)
+#else
+#define ADVANCE_BATCH()
+#endif
+
+static INLINE void
+brw_batchbuffer_emit_mi_flush(struct brw_batchbuffer *batch)
+{
+ brw_batchbuffer_require_space(batch, 4);
+ brw_batchbuffer_emit_dword(batch, MI_FLUSH);
+}
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
new file mode 100644
index 0000000000..3e070f5591
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -0,0 +1,111 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+static enum pipe_error prepare_cc_vp( struct brw_context *brw )
+{
+ return brw_cache_data( &brw->cache,
+ BRW_CC_VP,
+ &brw->curr.ccv,
+ NULL, 0,
+ &brw->cc.reloc[CC_RELOC_VP].bo );
+}
+
+const struct brw_tracked_state brw_cc_vp = {
+ .dirty = {
+ .mesa = PIPE_NEW_VIEWPORT,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .prepare = prepare_cc_vp
+};
+
+
+/* A long-winded way to OR two unsigned integers together:
+ */
+static INLINE struct brw_cc3
+combine_cc3( struct brw_cc3 a, struct brw_cc3 b )
+{
+ union { struct brw_cc3 cc3; unsigned i; } ca, cb;
+ ca.cc3 = a;
+ cb.cc3 = b;
+ ca.i |= cb.i;
+ return ca.cc3;
+}
+
+
+static int prepare_cc_unit( struct brw_context *brw )
+{
+ brw->cc.cc.cc0 = brw->curr.zstencil->cc0;
+ brw->cc.cc.cc1 = brw->curr.zstencil->cc1;
+ brw->cc.cc.cc2 = brw->curr.zstencil->cc2;
+ brw->cc.cc.cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 );
+
+ brw->cc.cc.cc5 = brw->curr.blend->cc5;
+ brw->cc.cc.cc6 = brw->curr.blend->cc6;
+ brw->cc.cc.cc7 = brw->curr.zstencil->cc7;
+
+ return brw_cache_data_sz(&brw->cache, BRW_CC_UNIT,
+ &brw->cc.cc, sizeof(brw->cc.cc),
+ brw->cc.reloc, 1,
+ &brw->cc.state_bo);
+}
+
+const struct brw_tracked_state brw_cc_unit = {
+ .dirty = {
+ .mesa = PIPE_NEW_DEPTH_STENCIL_ALPHA | PIPE_NEW_BLEND,
+ .brw = 0,
+ .cache = CACHE_NEW_CC_VP
+ },
+ .prepare = prepare_cc_unit,
+};
+
+
+void brw_hw_cc_init( struct brw_context *brw )
+{
+ make_reloc(&brw->cc.reloc[0],
+ BRW_USAGE_STATE,
+ 0,
+ offsetof(struct brw_cc_unit_state, cc4),
+ NULL);
+}
+
+
+void brw_hw_cc_cleanup( struct brw_context *brw )
+{
+ bo_reference(&brw->cc.state_bo, NULL);
+ bo_reference(&brw->cc.reloc[0].bo, NULL);
+}
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
new file mode 100644
index 0000000000..d67a1a6263
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -0,0 +1,224 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_state.h"
+
+#include "util/u_math.h"
+
+#include "brw_screen.h"
+#include "brw_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_pipe_rast.h"
+#include "brw_clip.h"
+
+
+#define FRONT_UNFILLED_BIT 0x1
+#define BACK_UNFILLED_BIT 0x2
+
+
+static enum pipe_error
+compile_clip_prog( struct brw_context *brw,
+ struct brw_clip_prog_key *key,
+ struct brw_winsys_buffer **bo_out )
+{
+ enum pipe_error ret;
+ struct brw_clip_compile c;
+ const GLuint *program;
+ GLuint program_size;
+ GLuint delta;
+
+ memset(&c, 0, sizeof(c));
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(brw, &c.func);
+
+ c.func.single_program_flow = 1;
+
+ c.chipset = brw->chipset;
+ c.key = *key;
+ c.need_ff_sync = c.chipset.is_igdng;
+
+ /* Need to locate the two positions present in vertex + header.
+ * These are currently hardcoded:
+ */
+ c.header_position_offset = ATTR_SIZE;
+
+ if (c.chipset.is_igdng)
+ delta = 3 * REG_SIZE;
+ else
+ delta = REG_SIZE;
+
+ c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE;
+
+ if (c.key.output_color0 != BRW_OUTPUT_NOT_PRESENT)
+ c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE;
+
+ if (c.key.output_color1 != BRW_OUTPUT_NOT_PRESENT)
+ c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE;
+
+ if (c.key.output_bfc0 != BRW_OUTPUT_NOT_PRESENT)
+ c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE;
+
+ if (c.key.output_bfc1 != BRW_OUTPUT_NOT_PRESENT)
+ c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE;
+
+ if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT)
+ c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE;
+
+ if (BRW_IS_IGDNG(brw))
+ c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
+ else
+ c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
+ c.nr_bytes = c.nr_regs * REG_SIZE;
+
+ c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
+
+ /* For some reason the thread is spawned with only 4 channels
+ * unmasked.
+ */
+ brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+ /* Would ideally have the option of producing a program which could
+ * do all three:
+ */
+ switch (key->primitive) {
+ case PIPE_PRIM_TRIANGLES:
+ if (key->do_unfilled)
+ brw_emit_unfilled_clip( &c );
+ else
+ brw_emit_tri_clip( &c );
+ break;
+ case PIPE_PRIM_LINES:
+ brw_emit_line_clip( &c );
+ break;
+ case PIPE_PRIM_POINTS:
+ brw_emit_point_clip( &c );
+ break;
+ default:
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+
+
+ /* get the program
+ */
+ ret = brw_get_program(&c.func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ /* Upload
+ */
+ ret = brw_upload_cache( &brw->cache,
+ BRW_CLIP_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->clip.prog_data,
+ bo_out );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static enum pipe_error
+upload_clip_prog(struct brw_context *brw)
+{
+ const struct brw_vertex_shader *vs = brw->curr.vertex_shader;
+ struct brw_clip_prog_key key;
+ enum pipe_error ret;
+
+ /* Populate the key, starting from the almost-complete version from
+ * the rast state.
+ */
+
+ /* PIPE_NEW_RAST */
+ key = brw->curr.rast->clip_key;
+
+ /* BRW_NEW_REDUCED_PRIMITIVE */
+ key.primitive = brw->reduced_primitive;
+
+ /* XXX: if edgeflag is moved to a proper TGSI vs output, can remove
+ * dependency on CACHE_NEW_VS_PROG
+ */
+ /* CACHE_NEW_VS_PROG */
+ key.nr_attrs = brw->vs.prog_data->nr_outputs;
+
+ /* PIPE_NEW_VS */
+ key.output_hpos = vs->output_hpos;
+ key.output_color0 = vs->output_color0;
+ key.output_color1 = vs->output_color1;
+ key.output_bfc0 = vs->output_bfc0;
+ key.output_bfc1 = vs->output_bfc1;
+ key.output_edgeflag = vs->output_edgeflag;
+
+ /* PIPE_NEW_CLIP */
+ key.nr_userclip = brw->curr.ucp.nr;
+
+ /* Already cached?
+ */
+ if (brw_search_cache(&brw->cache, BRW_CLIP_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->clip.prog_data,
+ &brw->clip.prog_bo))
+ return PIPE_OK;
+
+ /* Compile new program:
+ */
+ ret = compile_clip_prog( brw, &key, &brw->clip.prog_bo );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+const struct brw_tracked_state brw_clip_prog = {
+ .dirty = {
+ .mesa = (PIPE_NEW_RAST |
+ PIPE_NEW_CLIP),
+ .brw = (BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .prepare = upload_clip_prog
+};
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
new file mode 100644
index 0000000000..80e3a11a37
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -0,0 +1,199 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef BRW_CLIP_H
+#define BRW_CLIP_H
+
+#include "pipe/p_state.h"
+#include "brw_reg.h"
+#include "brw_eu.h"
+
+#define MAX_VERTS (3+6+6)
+
+/* Note that if unfilled primitives are being emitted, we have to fix
+ * up polygon offset and flatshading at this point:
+ */
+struct brw_clip_prog_key {
+ GLuint nr_attrs:6;
+ GLuint primitive:4;
+ GLuint nr_userclip:3;
+ GLuint do_flat_shading:1;
+ GLuint do_unfilled:1;
+ GLuint fill_cw:2; /* includes cull information */
+ GLuint fill_ccw:2; /* includes cull information */
+ GLuint offset_cw:1;
+ GLuint offset_ccw:1;
+ GLuint copy_bfc_cw:1;
+ GLuint copy_bfc_ccw:1;
+ GLuint clip_mode:3;
+ GLuint output_hpos:6; /* not always zero? */
+
+ GLuint output_color0:6;
+ GLuint output_color1:6;
+ GLuint output_bfc0:6;
+ GLuint output_bfc1:6;
+ GLuint output_edgeflag:6;
+ GLuint pad1:2;
+
+ GLfloat offset_factor;
+ GLfloat offset_units;
+};
+
+struct brw_clip_prog_data {
+ GLuint curb_read_length; /* user planes? */
+ GLuint clip_mode;
+ GLuint urb_read_length;
+ GLuint total_grf;
+};
+
+#define CLIP_LINE 0
+#define CLIP_POINT 1
+#define CLIP_FILL 2
+#define CLIP_CULL 3
+
+
+#define PRIM_MASK (0x1f)
+
+struct brw_clip_compile {
+ struct brw_compile func;
+ struct brw_clip_prog_key key;
+ struct brw_clip_prog_data prog_data;
+
+ struct {
+ struct brw_reg R0;
+ struct brw_reg vertex[MAX_VERTS];
+
+ struct brw_reg t;
+ struct brw_reg t0, t1;
+ struct brw_reg dp0, dp1;
+
+ struct brw_reg dpPrev;
+ struct brw_reg dp;
+ struct brw_reg loopcount;
+ struct brw_reg nr_verts;
+ struct brw_reg planemask;
+
+ struct brw_reg inlist;
+ struct brw_reg outlist;
+ struct brw_reg freelist;
+
+ struct brw_reg dir;
+ struct brw_reg tmp0, tmp1;
+ struct brw_reg offset;
+
+ struct brw_reg fixed_planes;
+ struct brw_reg plane_equation;
+
+ struct brw_reg ff_sync;
+ } reg;
+
+ /* 3 different ways of expressing vertex size, including
+ * key.nr_attrs.
+ */
+ GLuint nr_regs;
+ GLuint nr_bytes;
+
+ GLuint first_tmp;
+ GLuint last_tmp;
+
+ GLboolean need_direction;
+ struct brw_chipset chipset;
+
+ GLuint last_mrf;
+
+ GLuint header_position_offset;
+ GLboolean need_ff_sync;
+
+ GLuint nr_color_attrs;
+ GLuint offset_color0;
+ GLuint offset_color1;
+ GLuint offset_bfc0;
+ GLuint offset_bfc1;
+
+ GLuint offset_hpos;
+ GLuint offset_edgeflag;
+};
+
+#define ATTR_SIZE (4*4)
+
+/* Points are only culled, so no need for a clip routine, however it
+ * works out easier to have a dummy one.
+ */
+void brw_emit_unfilled_clip( struct brw_clip_compile *c );
+void brw_emit_tri_clip( struct brw_clip_compile *c );
+void brw_emit_line_clip( struct brw_clip_compile *c );
+void brw_emit_point_clip( struct brw_clip_compile *c );
+
+/* brw_clip_tri.c, for use by the unfilled clip routine:
+ */
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c );
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c );
+void brw_clip_tri( struct brw_clip_compile *c );
+void brw_clip_tri_emit_polygon( struct brw_clip_compile *c );
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+ GLuint nr_verts );
+
+
+/* Utils:
+ */
+
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+ struct brw_indirect dest_ptr,
+ struct brw_indirect v0_ptr, /* from */
+ struct brw_indirect v1_ptr, /* to */
+ struct brw_reg t0,
+ GLboolean force_edgeflag );
+
+void brw_clip_init_planes( struct brw_clip_compile *c );
+
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+ struct brw_indirect vert,
+ GLboolean allocate,
+ GLboolean eot,
+ GLuint header);
+
+void brw_clip_kill_thread(struct brw_clip_compile *c);
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c );
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c );
+
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+ GLuint to, GLuint from );
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c );
+
+struct brw_reg get_tmp( struct brw_clip_compile *c );
+
+void brw_clip_project_position(struct brw_clip_compile *c,
+ struct brw_reg pos );
+void brw_clip_ff_sync(struct brw_clip_compile *c);
+void brw_clip_init_ff_sync(struct brw_clip_compile *c);
+#endif
diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c
new file mode 100644
index 0000000000..54282d975e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_line.c
@@ -0,0 +1,271 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_debug.h"
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+
+static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
+{
+ GLuint i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ if (c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec4_grf(i, 0);
+ i += (6 + c->key.nr_userclip + 1) / 2;
+
+ c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+ }
+ else
+ c->prog_data.curb_read_length = 0;
+
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < 4; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ c->reg.t = brw_vec1_grf(i, 0);
+ c->reg.t0 = brw_vec1_grf(i, 1);
+ c->reg.t1 = brw_vec1_grf(i, 2);
+ c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+ c->reg.plane_equation = brw_vec4_grf(i, 4);
+ i++;
+
+ c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+ c->reg.dp1 = brw_vec1_grf(i, 4);
+ i++;
+
+ if (!c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec8_grf(i, 0);
+ i++;
+ }
+
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
+
+ c->first_tmp = i;
+ c->last_tmp = i;
+
+ c->prog_data.urb_read_length = c->nr_regs; /* ? */
+ c->prog_data.total_grf = i;
+}
+
+
+
+/* Line clipping, more or less following the following algorithm:
+ *
+ * for (p=0;p<MAX_PLANES;p++) {
+ * if (clipmask & (1 << p)) {
+ * GLfloat dp0 = DOTPROD( vtx0, plane[p] );
+ * GLfloat dp1 = DOTPROD( vtx1, plane[p] );
+ *
+ * if (IS_NEGATIVE(dp1)) {
+ * GLfloat t = dp1 / (dp1 - dp0);
+ * if (t > t1) t1 = t;
+ * } else {
+ * GLfloat t = dp0 / (dp0 - dp1);
+ * if (t > t0) t0 = t;
+ * }
+ *
+ * if (t0 + t1 >= 1.0)
+ * return;
+ * }
+ * }
+ *
+ * interp( ctx, newvtx0, vtx0, vtx1, t0 );
+ * interp( ctx, newvtx1, vtx1, vtx0, t1 );
+ *
+ */
+static void clip_and_emit_line( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_indirect vtx0 = brw_indirect(0, 0);
+ struct brw_indirect vtx1 = brw_indirect(1, 0);
+ struct brw_indirect newvtx0 = brw_indirect(2, 0);
+ struct brw_indirect newvtx1 = brw_indirect(3, 0);
+ struct brw_indirect plane_ptr = brw_indirect(4, 0);
+ struct brw_instruction *plane_loop;
+ struct brw_instruction *plane_active;
+ struct brw_instruction *is_negative;
+ struct brw_instruction *is_neg2 = NULL;
+ struct brw_instruction *not_culled;
+ struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
+
+ brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0]));
+ brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1]));
+ brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2]));
+ brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3]));
+ brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+
+ /* Note: init t0, t1 together:
+ */
+ brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));
+
+ brw_clip_init_planes(c);
+ brw_clip_init_clipmask(c);
+
+ /* -ve rhw workaround */
+ if (c->chipset.is_965) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(1<<20));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
+ }
+
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ plane_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* if (planemask & 1)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
+
+ plane_active = brw_IF(p, BRW_EXECUTE_1);
+ {
+ if (c->key.nr_userclip)
+ brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+ else
+ brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+ /* dp = DP4(vtx->position, plane)
+ */
+ brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset_hpos), c->reg.plane_equation);
+
+ /* if (IS_NEGATIVE(dp1))
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset_hpos), c->reg.plane_equation);
+ is_negative = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /*
+ * Both can be negative on GM965/G965 due to RHW workaround
+ * if so, this object should be rejected.
+ */
+ if (c->chipset.is_965) {
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
+ is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_neg2);
+ }
+
+ brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
+ brw_MOV(p, c->reg.t1, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ is_negative = brw_ELSE(p, is_negative);
+ {
+ /* Coming back in. We know that both cannot be negative
+ * because the line would have been culled in that case.
+ */
+
+ /* If both are positive, do nothing */
+ /* Only on GM965/G965 */
+ if (c->chipset.is_965) {
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
+ is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+ }
+
+ {
+ brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
+ brw_MOV(p, c->reg.t0, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ if (c->chipset.is_965) {
+ brw_ENDIF(p, is_neg2);
+ }
+ }
+ brw_ENDIF(p, is_negative);
+ }
+ brw_ENDIF(p, plane_active);
+
+ /* plane_ptr++;
+ */
+ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+ /* while (planemask>>=1) != 0
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+ }
+ brw_WHILE(p, plane_loop);
+
+ brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
+ not_culled = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE);
+ brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE);
+
+ brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+ brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+ }
+ brw_ENDIF(p, not_culled);
+ brw_clip_kill_thread(c);
+}
+
+
+
+void brw_emit_line_clip( struct brw_clip_compile *c )
+{
+ brw_clip_line_alloc_regs(c);
+ brw_clip_init_ff_sync(c);
+
+ if (c->key.do_flat_shading)
+ brw_clip_copy_colors(c, 0, 1);
+
+ clip_and_emit_line(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c
new file mode 100644
index 0000000000..e0a5330556
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_point.c
@@ -0,0 +1,48 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+/* Point clipping, nothing to do?
+ */
+void brw_emit_point_clip( struct brw_clip_compile *c )
+{
+ /* Send an empty message to kill the thread:
+ */
+ brw_clip_tri_alloc_regs(c, 0);
+ brw_clip_init_ff_sync(c);
+
+ brw_clip_kill_thread(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
new file mode 100644
index 0000000000..5c3ccfd8d0
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -0,0 +1,209 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+#include "brw_context.h"
+#include "brw_clip.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+struct brw_clip_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+ unsigned int curb_entry_read_length;
+ unsigned int clip_mode;
+
+ unsigned int curbe_offset;
+
+ unsigned int nr_urb_entries, urb_size;
+
+ GLboolean depth_clamp;
+};
+
+static void
+clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_CLIP_PROG */
+ key->total_grf = brw->clip.prog_data->total_grf;
+ key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+ key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
+ key->clip_mode = brw->clip.prog_data->clip_mode;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ key->curbe_offset = brw->curbe.clip_start;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_clip_entries;
+ key->urb_size = brw->urb.vsize;
+
+ /* */
+ key->depth_clamp = 0; /* XXX: add this to gallium: ctx->Transform.DepthClamp; */
+}
+
+static enum pipe_error
+clip_unit_create_from_key(struct brw_context *brw,
+ struct brw_clip_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_clip_unit_state clip;
+ enum pipe_error ret;
+
+ memset(&clip, 0, sizeof(clip));
+
+ clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ /* reloc */
+ clip.thread0.kernel_start_pointer = 0;
+
+ clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ clip.thread1.single_program_flow = 1;
+
+ clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+ clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+ clip.thread3.dispatch_grf_start_reg = 1;
+ clip.thread3.urb_entry_read_offset = 0;
+
+ clip.thread4.nr_urb_entries = key->nr_urb_entries;
+ clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+ /* If we have enough clip URB entries to run two threads, do so.
+ */
+ if (key->nr_urb_entries >= 10) {
+ /* Half of the URB entries go to each thread, and it has to be an
+ * even number.
+ */
+ assert(key->nr_urb_entries % 2 == 0);
+
+ /* Although up to 16 concurrent Clip threads are allowed on IGDNG,
+ * only 2 threads can output VUEs at a time.
+ */
+ if (BRW_IS_IGDNG(brw))
+ clip.thread4.max_threads = 16 - 1;
+ else
+ clip.thread4.max_threads = 2 - 1;
+ } else {
+ assert(key->nr_urb_entries >= 5);
+ clip.thread4.max_threads = 1 - 1;
+ }
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ clip.thread4.max_threads = 0;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ clip.thread4.stats_enable = 1;
+
+ clip.clip5.userclip_enable_flags = 0x7f;
+ clip.clip5.userclip_must_clip = 1;
+ clip.clip5.guard_band_enable = 0;
+ if (!key->depth_clamp)
+ clip.clip5.viewport_z_clip_enable = 1;
+ clip.clip5.viewport_xy_clip_enable = 1;
+ clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+ clip.clip5.api_mode = BRW_CLIP_API_OGL;
+ clip.clip5.clip_mode = key->clip_mode;
+
+ if (BRW_IS_G4X(brw))
+ clip.clip5.negative_w_clip_test = 1;
+
+ clip.clip6.clipper_viewport_state_ptr = 0;
+ clip.viewport_xmin = -1;
+ clip.viewport_xmax = 1;
+ clip.viewport_ymin = -1;
+ clip.viewport_ymax = 1;
+
+ ret = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
+ key, sizeof(*key),
+ reloc, 1,
+ &clip, sizeof(clip),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static int upload_clip_unit( struct brw_context *brw )
+{
+ struct brw_clip_unit_key key;
+ struct brw_winsys_reloc reloc[1];
+ unsigned grf_reg_count;
+ enum pipe_error ret;
+
+ clip_unit_populate_key(brw, &key);
+
+ grf_reg_count = align(key.total_grf, 16) / 16 - 1;
+
+ /* clip program relocation
+ *
+ * XXX: these reloc structs are long lived and only need to be
+ * updated when the bound BO changes. Hopefully the stuff mixed in
+ * in the delta's is non-orthogonal.
+ */
+ assert(brw->clip.prog_bo);
+ make_reloc(&reloc[0],
+ BRW_USAGE_STATE,
+ grf_reg_count << 1,
+ offsetof(struct brw_clip_unit_state, thread0),
+ brw->clip.prog_bo);
+
+
+ if (brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
+ &key, sizeof(key),
+ reloc, 1,
+ NULL,
+ &brw->clip.state_bo))
+ return PIPE_OK;
+
+ /* Create new:
+ */
+ ret = clip_unit_create_from_key(brw, &key,
+ reloc,
+ &brw->clip.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_clip_unit = {
+ .dirty = {
+ .mesa = 0,
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_CLIP_PROG
+ },
+ .prepare = upload_clip_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c
new file mode 100644
index 0000000000..4cde7294ea
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_tri.c
@@ -0,0 +1,595 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+static void release_tmps( struct brw_clip_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+ GLuint nr_verts )
+{
+ GLuint i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ if (c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec4_grf(i, 0);
+ i += (6 + c->key.nr_userclip + 1) / 2;
+
+ c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+ }
+ else
+ c->prog_data.curb_read_length = 0;
+
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < nr_verts; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ if (c->key.nr_attrs & 1) {
+ for (j = 0; j < 3; j++) {
+ GLuint delta = c->key.nr_attrs*16 + 32;
+
+ if (c->chipset.is_igdng)
+ delta = c->key.nr_attrs * 16 + 32 * 3;
+
+ brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
+ }
+ }
+
+ c->reg.t = brw_vec1_grf(i, 0);
+ c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
+ c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
+ c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+ c->reg.plane_equation = brw_vec4_grf(i, 4);
+ i++;
+
+ c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+ c->reg.dp = brw_vec1_grf(i, 4);
+ i++;
+
+ c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ if (!c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec8_grf(i, 0);
+ i++;
+ }
+
+ if (c->key.do_unfilled) {
+ c->reg.dir = brw_vec4_grf(i, 0);
+ c->reg.offset = brw_vec4_grf(i, 4);
+ i++;
+ c->reg.tmp0 = brw_vec4_grf(i, 0);
+ c->reg.tmp1 = brw_vec4_grf(i, 4);
+ i++;
+ }
+
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
+
+ c->first_tmp = i;
+ c->last_tmp = i;
+
+ c->prog_data.urb_read_length = c->nr_regs; /* ? */
+ c->prog_data.total_grf = i;
+}
+
+
+
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+ struct brw_instruction *is_rev;
+
+ /* Initial list of indices for incoming vertexes:
+ */
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
+
+ /* XXX: Is there an easier way to do this? Need to reverse every
+ * second tristrip element: Can ignore sometimes?
+ */
+ is_rev = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) );
+ brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) );
+ if (c->need_direction)
+ brw_MOV(p, c->reg.dir, brw_imm_f(-1));
+ }
+ is_rev = brw_ELSE(p, is_rev);
+ {
+ brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) );
+ brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) );
+ if (c->need_direction)
+ brw_MOV(p, c->reg.dir, brw_imm_f(1));
+ }
+ brw_ENDIF(p, is_rev);
+
+ brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) );
+ brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
+ brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3));
+}
+
+
+
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_poly;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_POLYGON));
+
+ is_poly = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_copy_colors(c, 1, 0);
+ brw_clip_copy_colors(c, 2, 0);
+ }
+ is_poly = brw_ELSE(p, is_poly);
+ {
+ brw_clip_copy_colors(c, 0, 2);
+ brw_clip_copy_colors(c, 1, 2);
+ }
+ brw_ENDIF(p, is_poly);
+}
+
+
+
+/* Use mesa's clipping algorithms, translated to GEN4 assembly.
+ */
+void brw_clip_tri( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_indirect vtx = brw_indirect(0, 0);
+ struct brw_indirect vtxPrev = brw_indirect(1, 0);
+ struct brw_indirect vtxOut = brw_indirect(2, 0);
+ struct brw_indirect plane_ptr = brw_indirect(3, 0);
+ struct brw_indirect inlist_ptr = brw_indirect(4, 0);
+ struct brw_indirect outlist_ptr = brw_indirect(5, 0);
+ struct brw_indirect freelist_ptr = brw_indirect(6, 0);
+ struct brw_instruction *plane_loop;
+ struct brw_instruction *plane_active;
+ struct brw_instruction *vertex_loop;
+ struct brw_instruction *next_test;
+ struct brw_instruction *prev_test;
+
+ brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) );
+ brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+ brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+
+ brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );
+
+ plane_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* if (planemask & 1)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
+
+ plane_active = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* vtxOut = freelist_ptr++
+ */
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) );
+ brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));
+
+ if (c->key.nr_userclip)
+ brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+ else
+ brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));
+
+ vertex_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* vtx = *input_ptr;
+ */
+ brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));
+
+ /* IS_NEGATIVE(prev) */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset_hpos), c->reg.plane_equation);
+ prev_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* IS_POSITIVE(next)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
+ brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation);
+ next_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+
+ /* Coming back in.
+ */
+ brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);
+
+ /* If (vtxOut == 0) vtxOut = vtxPrev
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE);
+
+ /* *outlist_ptr++ = vtxOut;
+ * nr_verts++;
+ * vtxOut = 0;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+ brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ }
+ brw_ENDIF(p, next_test);
+
+ }
+ prev_test = brw_ELSE(p, prev_test);
+ {
+ /* *outlist_ptr++ = vtxPrev;
+ * nr_verts++;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+
+ /* IS_NEGATIVE(next)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation);
+ next_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* Going out of bounds. Avoid division by zero as we
+ * know dp != dpPrev from DIFFERENT_SIGNS, above.
+ */
+ brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);
+
+ /* If (vtxOut == 0) vtxOut = vtx
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE);
+
+ /* *outlist_ptr++ = vtxOut;
+ * nr_verts++;
+ * vtxOut = 0;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+ brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ }
+ brw_ENDIF(p, next_test);
+ }
+ brw_ENDIF(p, prev_test);
+
+ /* vtxPrev = vtx;
+ * inlist_ptr++;
+ */
+ brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
+ brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));
+
+ /* while (--loopcount != 0)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, vertex_loop);
+
+ /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1]
+ * inlist = outlist
+ * inlist_ptr = &inlist[0]
+ * outlist_ptr = &outlist[0]
+ */
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
+ brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
+ brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
+ brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+ }
+ brw_ENDIF(p, plane_active);
+
+ /* plane_ptr++;
+ */
+ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+ /* nr_verts >= 3
+ */
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ c->reg.nr_verts,
+ brw_imm_ud(3));
+
+ /* && (planemask>>=1) != 0
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+ }
+ brw_WHILE(p, plane_loop);
+}
+
+
+
+void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop, *if_insn;
+
+ /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+ brw_ADD(p,
+ c->reg.loopcount,
+ c->reg.nr_verts,
+ brw_imm_d(-2));
+
+ if_insn = brw_IF(p, BRW_EXECUTE_1);
+ {
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect vptr = brw_indirect(1, 0);
+
+ brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START));
+
+ brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2));
+
+ brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+
+ brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END));
+ }
+ brw_ENDIF(p, if_insn);
+}
+
+static void do_clip_tri( struct brw_clip_compile *c )
+{
+ brw_clip_init_planes(c);
+
+ brw_clip_tri(c);
+}
+
+
+static void maybe_do_clip_tri( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *do_clip;
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+ do_clip = brw_IF(p, BRW_EXECUTE_1);
+ {
+ do_clip_tri(c);
+ }
+ brw_ENDIF(p, do_clip);
+}
+
+static void brw_clip_test( struct brw_clip_compile *c )
+{
+ struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+
+ struct brw_reg v0 = get_tmp(c);
+ struct brw_reg v1 = get_tmp(c);
+ struct brw_reg v2 = get_tmp(c);
+
+ struct brw_indirect vt0 = brw_indirect(0, 0);
+ struct brw_indirect vt1 = brw_indirect(1, 0);
+ struct brw_indirect vt2 = brw_indirect(2, 0);
+
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_outside;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+ brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
+ brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
+ brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
+ brw_MOV(p, v0, deref_4f(vt0, c->offset_hpos));
+ brw_MOV(p, v1, deref_4f(vt1, c->offset_hpos));
+ brw_MOV(p, v2, deref_4f(vt2, c->offset_hpos));
+ brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
+
+ /* test nearz, xmin, ymin plane */
+ /* clip.xyz < -clip.w */
+ brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* All vertices are outside of a plane, rejected */
+ brw_AND(p, t, t1, t2);
+ brw_AND(p, t, t, t3);
+ brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+ brw_OR(p, tmp0, tmp0, get_element(t, 2));
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+ is_outside = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_outside);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* some vertices are inside a plane, some are outside,need to clip */
+ brw_XOR(p, t, t1, t2);
+ brw_XOR(p, t1, t2, t3);
+ brw_OR(p, t, t, t1);
+ brw_AND(p, t, t, brw_imm_ud(0x1));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* test farz, xmax, ymax plane */
+ /* clip.xyz > clip.w */
+ brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* All vertices are outside of a plane, rejected */
+ brw_AND(p, t, t1, t2);
+ brw_AND(p, t, t, t3);
+ brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+ brw_OR(p, tmp0, tmp0, get_element(t, 2));
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+ is_outside = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_outside);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* some vertices are inside a plane, some are outside,need to clip */
+ brw_XOR(p, t, t1, t2);
+ brw_XOR(p, t1, t2, t3);
+ brw_OR(p, t, t, t1);
+ brw_AND(p, t, t, brw_imm_ud(0x1));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ release_tmps(c);
+}
+
+
+void brw_emit_tri_clip( struct brw_clip_compile *c )
+{
+ struct brw_instruction *neg_rhw;
+ struct brw_compile *p = &c->func;
+ brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+ brw_clip_tri_init_vertices(c);
+ brw_clip_init_clipmask(c);
+ brw_clip_init_ff_sync(c);
+
+ /* if -ve rhw workaround bit is set,
+ do cliptest */
+ if (c->chipset.is_965) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(1<<20));
+ neg_rhw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_test(c);
+ }
+ brw_ENDIF(p, neg_rhw);
+ }
+ /* Can't push into do_clip_tri because with polygon (or quad)
+ * flatshading, need to apply the flatshade here because we don't
+ * respect the PV when converting to trifan for emit:
+ */
+ if (c->key.do_flat_shading)
+ brw_clip_tri_flat_shade(c);
+
+ if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) ||
+ (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP))
+ do_clip_tri(c);
+ else
+ maybe_do_clip_tri(c);
+
+ brw_clip_tri_emit_polygon(c);
+
+ /* Send an empty message to kill the thread:
+ */
+ brw_clip_kill_thread(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c
new file mode 100644
index 0000000000..aec835b8ce
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_unfilled.c
@@ -0,0 +1,497 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+/* This is performed against the original triangles, so no indirection
+ * required:
+BZZZT!
+ */
+static void compute_tri_direction( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg e = c->reg.tmp0;
+ struct brw_reg f = c->reg.tmp1;
+ struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset_hpos);
+ struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset_hpos);
+ struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset_hpos);
+
+
+ struct brw_reg v0n = get_tmp(c);
+ struct brw_reg v1n = get_tmp(c);
+ struct brw_reg v2n = get_tmp(c);
+
+ /* Convert to NDC.
+ * NOTE: We can't modify the original vertex coordinates,
+ * as it may impact further operations.
+ * So, we have to keep normalized coordinates in temp registers.
+ *
+ * TBD-KC
+ * Try to optimize unnecessary MOV's.
+ */
+ brw_MOV(p, v0n, v0);
+ brw_MOV(p, v1n, v1);
+ brw_MOV(p, v2n, v2);
+
+ brw_clip_project_position(c, v0n);
+ brw_clip_project_position(c, v1n);
+ brw_clip_project_position(c, v2n);
+
+ /* Calculate the vectors of two edges of the triangle:
+ */
+ brw_ADD(p, e, v0n, negate(v2n));
+ brw_ADD(p, f, v1n, negate(v2n));
+
+ /* Take their crossproduct:
+ */
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3));
+ brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
+}
+
+
+static void cull_direction( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+ GLuint conditional;
+
+ assert (!(c->key.fill_ccw == CLIP_CULL &&
+ c->key.fill_cw == CLIP_CULL));
+
+ if (c->key.fill_ccw == CLIP_CULL)
+ conditional = BRW_CONDITIONAL_GE;
+ else
+ conditional = BRW_CONDITIONAL_L;
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ conditional,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, ccw);
+}
+
+
+
+static void copy_bfc( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+ GLuint conditional;
+
+ /* Do we have any colors to copy?
+ */
+ if ((c->offset_color0 == 0 || c->offset_bfc0 == 0) &&
+ (c->offset_color1 == 0 || c->offset_bfc1 == 0))
+ return;
+
+ /* In some wierd degnerate cases we can end up testing the
+ * direction twice, once for culling and once for bfc copying. Oh
+ * well, that's what you get for setting wierd GL state.
+ */
+ if (c->key.copy_bfc_ccw)
+ conditional = BRW_CONDITIONAL_GE;
+ else
+ conditional = BRW_CONDITIONAL_L;
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ conditional,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ GLuint i;
+
+ for (i = 0; i < 3; i++) {
+ if (c->offset_color0 && c->offset_bfc0)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[i], c->offset_color0),
+ byte_offset(c->reg.vertex[i], c->offset_bfc0));
+
+ if (c->offset_color1 && c->offset_bfc1)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[i], c->offset_color0),
+ byte_offset(c->reg.vertex[i], c->offset_bfc0));
+ }
+ }
+ brw_ENDIF(p, ccw);
+}
+
+
+
+
+/*
+ GLfloat iz = 1.0 / dir.z;
+ GLfloat ac = dir.x * iz;
+ GLfloat bc = dir.y * iz;
+ offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
+ offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
+ offset *= MRD;
+*/
+static void compute_offset( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg off = c->reg.offset;
+ struct brw_reg dir = c->reg.dir;
+
+ brw_math_invert(p, get_element(off, 2), get_element(dir, 2));
+ brw_MUL(p, vec2(off), dir, get_element(off, 2));
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ brw_abs(get_element(off, 0)),
+ brw_abs(get_element(off, 1)));
+
+ brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor));
+ brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units));
+}
+
+
+static void merge_edgeflags( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_poly;
+ struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);
+
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_POLYGON));
+
+ /* Get away with using reg.vertex because we know that this is not
+ * a _3DPRIM_TRISTRIP_REVERSE:
+ */
+ is_poly = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+ brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
+ brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset_edgeflag), brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+ brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
+ brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset_edgeflag), brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ brw_ENDIF(p, is_poly);
+}
+
+
+
+static void apply_one_offset( struct brw_clip_compile *c,
+ struct brw_indirect vert )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg z = deref_1f(vert, c->header_position_offset +
+ 2 * type_sz(BRW_REGISTER_TYPE_F));
+
+ brw_ADD(p, z, z, vec1(c->reg.offset));
+}
+
+
+
+/***********************************************************************
+ * Output clipped polygon as an unfilled primitive:
+ */
+static void emit_lines(struct brw_clip_compile *c,
+ GLboolean do_offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop;
+ struct brw_instruction *draw_edge;
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect v1 = brw_indirect(1, 0);
+ struct brw_indirect v0ptr = brw_indirect(2, 0);
+ struct brw_indirect v1ptr = brw_indirect(3, 0);
+
+ /* Need a seperate loop for offset:
+ */
+ if (do_offset) {
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ apply_one_offset(c, v0);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+ }
+
+ /* v1ptr = &inlist[nr_verts]
+ * *v1ptr = v0
+ */
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+ brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+ brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+ brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ /* draw edge if edgeflag != 0 */
+ brw_CMP(p,
+ vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ deref_1f(v0, c->offset_edgeflag),
+ brw_imm_f(0));
+ draw_edge = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+ brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+ }
+ brw_ENDIF(p, draw_edge);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+}
+
+
+
+static void emit_points(struct brw_clip_compile *c,
+ GLboolean do_offset )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop;
+ struct brw_instruction *draw_point;
+
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect v0ptr = brw_indirect(2, 0);
+
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ /* draw if edgeflag != 0
+ */
+ brw_CMP(p,
+ vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ deref_1f(v0, c->offset_edgeflag),
+ brw_imm_f(0));
+ draw_point = brw_IF(p, BRW_EXECUTE_1);
+ {
+ if (do_offset)
+ apply_one_offset(c, v0);
+
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END);
+ }
+ brw_ENDIF(p, draw_point);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+}
+
+
+
+
+
+
+
+static void emit_primitives( struct brw_clip_compile *c,
+ GLuint mode,
+ GLboolean do_offset )
+{
+ switch (mode) {
+ case CLIP_FILL:
+ brw_clip_tri_emit_polygon(c);
+ break;
+
+ case CLIP_LINE:
+ emit_lines(c, do_offset);
+ break;
+
+ case CLIP_POINT:
+ emit_points(c, do_offset);
+ break;
+
+ case CLIP_CULL:
+ assert(0);
+ break;
+ }
+}
+
+
+
+static void emit_unfilled_primitives( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+
+ /* Direction culling has already been done.
+ */
+ if (c->key.fill_ccw != c->key.fill_cw &&
+ c->key.fill_ccw != CLIP_CULL &&
+ c->key.fill_cw != CLIP_CULL)
+ {
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+ }
+ ccw = brw_ELSE(p, ccw);
+ {
+ emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+ }
+ brw_ENDIF(p, ccw);
+ }
+ else if (c->key.fill_cw != CLIP_CULL) {
+ emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+ }
+ else if (c->key.fill_ccw != CLIP_CULL) {
+ emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+ }
+}
+
+
+
+
+static void check_nr_verts( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));
+ if_insn = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, if_insn);
+}
+
+
+void brw_emit_unfilled_clip( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *do_clip;
+
+
+ c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
+ (c->key.fill_ccw != c->key.fill_cw) ||
+ c->key.fill_ccw == CLIP_CULL ||
+ c->key.fill_cw == CLIP_CULL ||
+ c->key.copy_bfc_cw ||
+ c->key.copy_bfc_ccw);
+
+ brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+ brw_clip_tri_init_vertices(c);
+ brw_clip_init_ff_sync(c);
+
+ assert(c->offset_edgeflag);
+
+ if (c->key.fill_ccw == CLIP_CULL &&
+ c->key.fill_cw == CLIP_CULL) {
+ brw_clip_kill_thread(c);
+ return;
+ }
+
+ merge_edgeflags(c);
+
+ /* Need to use the inlist indirection here:
+ */
+ if (c->need_direction)
+ compute_tri_direction(c);
+
+ if (c->key.fill_ccw == CLIP_CULL ||
+ c->key.fill_cw == CLIP_CULL)
+ cull_direction(c);
+
+ if (c->key.offset_ccw ||
+ c->key.offset_cw)
+ compute_offset(c);
+
+ if (c->key.copy_bfc_ccw ||
+ c->key.copy_bfc_cw)
+ copy_bfc(c);
+
+ /* Need to do this whether we clip or not:
+ */
+ if (c->key.do_flat_shading)
+ brw_clip_tri_flat_shade(c);
+
+ brw_clip_init_clipmask(c);
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+ do_clip = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_init_planes(c);
+ brw_clip_tri(c);
+ check_nr_verts(c);
+ }
+ brw_ENDIF(p, do_clip);
+
+ emit_unfilled_primitives(c);
+ brw_clip_kill_thread(c);
+}
+
+
+
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
new file mode 100644
index 0000000000..97a5710310
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -0,0 +1,388 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+
+struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+ struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+
+static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
+{
+ return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
+}
+
+
+void brw_clip_init_planes( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+
+ if (!c->key.nr_userclip) {
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1));
+ }
+}
+
+
+
+#define W 3
+
+/* Project 'pos' to screen space (or back again), overwrite with results:
+ */
+void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
+{
+ struct brw_compile *p = &c->func;
+
+ /* calc rhw
+ */
+ brw_math_invert(p, get_element(pos, W), get_element(pos, W));
+
+ /* value.xyz *= value.rhw
+ */
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MUL(p, brw_writemask(pos, BRW_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+}
+
+
+static void brw_clip_project_vertex( struct brw_clip_compile *c,
+ struct brw_indirect vert_addr )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+
+ /* Fixup position. Extract from the original vertex and re-project
+ * to screen space:
+ */
+ brw_MOV(p, tmp, deref_4f(vert_addr, c->offset_hpos));
+ brw_clip_project_position(c, tmp);
+ brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
+
+ release_tmp(c, tmp);
+}
+
+
+
+
+/* Interpolate between two vertices and put the result into a0.0.
+ * Increment a0.0 accordingly.
+ */
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+ struct brw_indirect dest_ptr,
+ struct brw_indirect v0_ptr, /* from */
+ struct brw_indirect v1_ptr, /* to */
+ struct brw_reg t0,
+ GLboolean force_edgeflag)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+ GLuint i;
+
+ /* Just copy the vertex header:
+ */
+ /*
+ * After CLIP stage, only first 256 bits of the VUE are read
+ * back on IGDNG, so needn't change it
+ */
+ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
+
+ /* Iterate over each attribute (could be done in pairs?)
+ */
+ for (i = 0; i < c->key.nr_attrs; i++) {
+ GLuint delta = i*16 + 32;
+
+ if (c->chipset.is_igdng)
+ delta = i * 16 + 32 * 3;
+
+ if (delta == c->offset_edgeflag) {
+ if (force_edgeflag)
+ brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
+ else
+ brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
+ }
+ else {
+ /* Interpolate:
+ *
+ * New = attr0 + t*attr1 - t*attr0
+ */
+ brw_MUL(p,
+ vec4(brw_null_reg()),
+ deref_4f(v1_ptr, delta),
+ t0);
+
+ brw_MAC(p,
+ tmp,
+ negate(deref_4f(v0_ptr, delta)),
+ t0);
+
+ brw_ADD(p,
+ deref_4f(dest_ptr, delta),
+ deref_4f(v0_ptr, delta),
+ tmp);
+ }
+ }
+
+ if (i & 1) {
+ GLuint delta = i*16 + 32;
+
+ if (c->chipset.is_igdng)
+ delta = i * 16 + 32 * 3;
+
+ brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
+ }
+
+ release_tmp(c, tmp);
+
+ /* Recreate the projected (NDC) coordinate in the new vertex
+ * header:
+ */
+ brw_clip_project_vertex(c, dest_ptr );
+}
+
+
+
+
+#define MAX_MRF 16
+
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+ struct brw_indirect vert,
+ GLboolean allocate,
+ GLboolean eot,
+ GLuint header)
+{
+ struct brw_compile *p = &c->func;
+ GLuint start = c->last_mrf;
+
+ brw_clip_ff_sync(c);
+
+ assert(!(allocate && eot));
+
+ /* Cycle through mrf regs - probably futile as we have to wait for
+ * the allocation response anyway. Also, the order this function
+ * is invoked doesn't correspond to the order the instructions will
+ * be executed, so it won't have any effect in many cases.
+ */
+#if 0
+ if (start + c->nr_regs + 1 >= MAX_MRF)
+ start = 0;
+
+ c->last_mrf = start + c->nr_regs + 1;
+#endif
+
+ /* Copy the vertex from vertn into m1..mN+1:
+ */
+ brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
+
+ /* Overwrite PrimType and PrimStart in the message header, for
+ * each vertex in turn:
+ */
+ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+
+ /* Send each vertex as a seperate write to the urb. This
+ * is different to the concept in brw_sf_emit.c, where
+ * subsequent writes are used to build up a single urb
+ * entry. Each of these writes instantiates a seperate
+ * urb entry - (I think... what about 'allocate'?)
+ */
+ brw_urb_WRITE(p,
+ allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ start,
+ c->reg.R0,
+ allocate,
+ 1, /* used */
+ c->nr_regs + 1, /* msg length */
+ allocate ? 1 : 0, /* response_length */
+ eot, /* eot */
+ 1, /* writes_complete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+void brw_clip_kill_thread(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+
+ brw_clip_ff_sync(c);
+ /* Send an empty message to kill the thread and release any
+ * allocated urb entry:
+ */
+ brw_urb_WRITE(p,
+ retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ 0,
+ c->reg.R0,
+ 0, /* allocate */
+ 0, /* used */
+ 1, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ 0,
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
+{
+ return brw_address(c->reg.fixed_planes);
+}
+
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
+{
+ if (c->key.nr_userclip) {
+ return brw_imm_uw(16);
+ }
+ else {
+ return brw_imm_uw(4);
+ }
+}
+
+
+/* If flatshading, distribute color from provoking vertex prior to
+ * clipping.
+ */
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+ GLuint to, GLuint from )
+{
+ struct brw_compile *p = &c->func;
+
+ if (c->offset_color0)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset_color0),
+ byte_offset(c->reg.vertex[from], c->offset_color0));
+
+ if (c->offset_color1)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset_color1),
+ byte_offset(c->reg.vertex[from], c->offset_color1));
+
+ if (c->offset_bfc0)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset_bfc0),
+ byte_offset(c->reg.vertex[from], c->offset_bfc0));
+
+ if (c->offset_bfc1)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset_bfc1),
+ byte_offset(c->reg.vertex[from], c->offset_bfc1));
+}
+
+
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
+
+ /* Shift so that lowest outcode bit is rightmost:
+ */
+ brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
+
+ if (c->key.nr_userclip) {
+ struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
+
+ /* Rearrange userclip outcodes so that they come directly after
+ * the fixed plane bits.
+ */
+ brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
+ brw_SHR(p, tmp, tmp, brw_imm_ud(8));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
+
+ release_tmp(c, tmp);
+ }
+}
+
+void brw_clip_ff_sync(struct brw_clip_compile *c)
+{
+ if (c->need_ff_sync) {
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *need_ff_sync;
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
+ need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+ }
+ brw_ENDIF(p, need_ff_sync);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+}
+
+void brw_clip_init_ff_sync(struct brw_clip_compile *c)
+{
+ if (c->need_ff_sync) {
+ struct brw_compile *p = &c->func;
+
+ brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
new file mode 100644
index 0000000000..e67551882d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -0,0 +1,154 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_context.h"
+#include "util/u_simple_list.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+#include "brw_winsys.h"
+#include "brw_screen.h"
+
+
+static void brw_destroy_context( struct pipe_context *pipe )
+{
+ struct brw_context *brw = brw_context(pipe);
+ int i;
+
+ brw_context_flush( brw );
+ brw_batchbuffer_free( brw->batch );
+ brw_destroy_state(brw);
+
+ brw_draw_cleanup( brw );
+
+ brw_pipe_blend_cleanup( brw );
+ brw_pipe_depth_stencil_cleanup( brw );
+ brw_pipe_framebuffer_cleanup( brw );
+ brw_pipe_flush_cleanup( brw );
+ brw_pipe_misc_cleanup( brw );
+ brw_pipe_query_cleanup( brw );
+ brw_pipe_rast_cleanup( brw );
+ brw_pipe_sampler_cleanup( brw );
+ brw_pipe_shader_cleanup( brw );
+ brw_pipe_vertex_cleanup( brw );
+ brw_pipe_clear_cleanup( brw );
+
+ brw_hw_cc_cleanup( brw );
+
+
+ FREE(brw->wm.compile_data);
+
+ for (i = 0; i < brw->curr.fb.nr_cbufs; i++)
+ pipe_surface_reference(&brw->curr.fb.cbufs[i], NULL);
+ brw->curr.fb.nr_cbufs = 0;
+ pipe_surface_reference(&brw->curr.fb.zsbuf, NULL);
+
+ bo_reference(&brw->curbe.curbe_bo, NULL);
+ bo_reference(&brw->vs.prog_bo, NULL);
+ bo_reference(&brw->vs.state_bo, NULL);
+ bo_reference(&brw->vs.bind_bo, NULL);
+ bo_reference(&brw->gs.prog_bo, NULL);
+ bo_reference(&brw->gs.state_bo, NULL);
+ bo_reference(&brw->clip.prog_bo, NULL);
+ bo_reference(&brw->clip.state_bo, NULL);
+ bo_reference(&brw->clip.vp_bo, NULL);
+ bo_reference(&brw->sf.prog_bo, NULL);
+ bo_reference(&brw->sf.state_bo, NULL);
+ bo_reference(&brw->sf.vp_bo, NULL);
+
+ for (i = 0; i < Elements(brw->wm.sdc_bo); i++)
+ bo_reference(&brw->wm.sdc_bo[i], NULL);
+
+ bo_reference(&brw->wm.bind_bo, NULL);
+
+ for (i = 0; i < Elements(brw->wm.surf_bo); i++)
+ bo_reference(&brw->wm.surf_bo[i], NULL);
+
+ bo_reference(&brw->wm.sampler_bo, NULL);
+ bo_reference(&brw->wm.prog_bo, NULL);
+ bo_reference(&brw->wm.state_bo, NULL);
+}
+
+
+struct pipe_context *brw_create_context(struct pipe_screen *screen)
+{
+ struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
+
+ if (!brw) {
+ debug_printf("%s: failed to alloc context\n", __FUNCTION__);
+ return NULL;
+ }
+
+ brw->base.screen = screen;
+ brw->base.destroy = brw_destroy_context;
+ brw->sws = brw_screen(screen)->sws;
+ brw->chipset = brw_screen(screen)->chipset;
+
+ brw_pipe_blend_init( brw );
+ brw_pipe_depth_stencil_init( brw );
+ brw_pipe_framebuffer_init( brw );
+ brw_pipe_flush_init( brw );
+ brw_pipe_misc_init( brw );
+ brw_pipe_query_init( brw );
+ brw_pipe_rast_init( brw );
+ brw_pipe_sampler_init( brw );
+ brw_pipe_shader_init( brw );
+ brw_pipe_vertex_init( brw );
+ brw_pipe_clear_init( brw );
+
+ brw_hw_cc_init( brw );
+
+ brw_init_state( brw );
+ brw_draw_init( brw );
+
+ brw->state.dirty.mesa = ~0;
+ brw->state.dirty.brw = ~0;
+
+ brw->flags.always_emit_state = 0;
+
+ make_empty_list(&brw->query.active_head);
+
+ brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset );
+ if (brw->batch == NULL)
+ goto fail;
+
+ return &brw->base;
+
+fail:
+ if (brw->batch)
+ brw_batchbuffer_free( brw->batch );
+ return NULL;
+}
+
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
new file mode 100644
index 0000000000..8c006bb95b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -0,0 +1,858 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRWCONTEXT_INC
+#define BRWCONTEXT_INC
+
+#include "brw_structs.h"
+#include "brw_winsys.h"
+#include "brw_reg.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "tgsi/tgsi_scan.h"
+
+
+/* Glossary:
+ *
+ * URB - uniform resource buffer. A mid-sized buffer which is
+ * partitioned between the fixed function units and used for passing
+ * values (vertices, primitives, constants) between them.
+ *
+ * CURBE - constant URB entry. An urb region (entry) used to hold
+ * constant values which the fixed function units can be instructed to
+ * preload into the GRF when spawning a thread.
+ *
+ * VUE - vertex URB entry. An urb entry holding a vertex and usually
+ * a vertex header. The header contains control information and
+ * things like primitive type, Begin/end flags and clip codes.
+ *
+ * PUE - primitive URB entry. An urb entry produced by the setup (SF)
+ * unit holding rasterization and interpolation parameters.
+ *
+ * GRF - general register file. One of several register files
+ * addressable by programmed threads. The inputs (r0, payload, curbe,
+ * urb) of the thread are preloaded to this area before the thread is
+ * spawned. The registers are individually 8 dwords wide and suitable
+ * for general usage. Registers holding thread input values are not
+ * special and may be overwritten.
+ *
+ * MRF - message register file. Threads communicate (and terminate)
+ * by sending messages. Message parameters are placed in contiguous
+ * MRF registers. All program output is via these messages. URB
+ * entries are populated by sending a message to the shared URB
+ * function containing the new data, together with a control word,
+ * often an unmodified copy of R0.
+ *
+ * R0 - GRF register 0. Typically holds control information used when
+ * sending messages to other threads.
+ *
+ * EU or GEN4 EU: The name of the programmable subsystem of the
+ * i965 hardware. Threads are executed by the EU, the registers
+ * described above are part of the EU architecture.
+ *
+ * Fixed function units:
+ *
+ * CS - Command streamer. Notional first unit, little software
+ * interaction. Holds the URB entries used for constant data, ie the
+ * CURBEs.
+ *
+ * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of
+ * this unit is responsible for pulling vertices out of vertex buffers
+ * in vram and injecting them into the processing pipe as VUEs. If
+ * enabled, it first passes them to a VS thread which is a good place
+ * for the driver to implement any active vertex shader.
+ *
+ * GS - Geometry Shader. This corresponds to a new DX10 concept. If
+ * enabled, incoming strips etc are passed to GS threads in individual
+ * line/triangle/point units. The GS thread may perform arbitary
+ * computation and emit whatever primtives with whatever vertices it
+ * chooses. This makes GS an excellent place to implement GL's
+ * unfilled polygon modes, though of course it is capable of much
+ * more. Additionally, GS is used to translate away primitives not
+ * handled by latter units, including Quads and Lineloops.
+ *
+ * CS - Clipper. Mesa's clipping algorithms are imported to run on
+ * this unit. The fixed function part performs cliptesting against
+ * the 6 fixed clipplanes and makes decisions on whether or not the
+ * incoming primitive needs to be passed to a thread for clipping.
+ * User clip planes are handled via cooperation with the VS thread.
+ *
+ * SF - Strips Fans or Setup: Triangles are prepared for
+ * rasterization. Interpolation coefficients are calculated.
+ * Flatshading and two-side lighting usually performed here.
+ *
+ * WM - Windower. Interpolation of vertex attributes performed here.
+ * Fragment shader implemented here. SIMD aspects of EU taken full
+ * advantage of, as pixels are processed in blocks of 16.
+ *
+ * CC - Color Calculator. No EU threads associated with this unit.
+ * Handles blending and (presumably) depth and stencil testing.
+ */
+
+#define BRW_MAX_CURBE (32*16)
+
+
+/* Need a value to say a particular vertex shader output isn't
+ * present. Limits us to 63 outputs currently.
+ */
+#define BRW_OUTPUT_NOT_PRESENT ((1<<6)-1)
+
+
+struct brw_context;
+
+struct brw_depth_stencil_state {
+ /* Precalculated hardware state:
+ */
+ struct brw_cc0 cc0;
+ struct brw_cc1 cc1;
+ struct brw_cc2 cc2;
+ struct brw_cc3 cc3;
+ struct brw_cc7 cc7;
+
+ unsigned iz_lookup;
+};
+
+
+struct brw_blend_state {
+ /* Precalculated hardware state:
+ */
+ struct brw_cc2 cc2;
+ struct brw_cc3 cc3;
+ struct brw_cc5 cc5;
+ struct brw_cc6 cc6;
+
+ struct brw_surf_ss0 ss0;
+};
+
+
+struct brw_rasterizer_state;
+
+struct brw_immediate_data {
+ unsigned nr;
+ float (*data)[4];
+};
+
+struct brw_vertex_shader {
+ const struct tgsi_token *tokens;
+ struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */
+
+ struct tgsi_shader_info info;
+ struct brw_immediate_data immediates;
+
+ GLuint has_flow_control:1;
+ GLuint use_const_buffer:1;
+
+ /* Offsets of special vertex shader outputs required for clipping.
+ */
+ GLuint output_hpos:6; /* not always zero? */
+ GLuint output_color0:6;
+ GLuint output_color1:6;
+ GLuint output_bfc0:6;
+ GLuint output_bfc1:6;
+ GLuint output_edgeflag:6;
+
+ unsigned id;
+};
+
+struct brw_fs_signature {
+ GLuint nr_inputs;
+ struct {
+ GLuint interp:3; /* TGSI_INTERPOLATE_x */
+ GLuint semantic:5; /* TGSI_SEMANTIC_x */
+ GLuint semantic_index:24;
+ } input[PIPE_MAX_SHADER_INPUTS];
+};
+
+#define brw_fs_signature_size(s) (offsetof(struct brw_fs_signature, input) + \
+ ((s)->nr_inputs * sizeof (s)->input[0]))
+
+
+struct brw_fragment_shader {
+ const struct tgsi_token *tokens;
+ struct tgsi_shader_info info;
+
+ struct brw_fs_signature signature;
+ struct brw_immediate_data immediates;
+
+ unsigned iz_lookup;
+ /*unsigned wm_lookup;*/
+
+ unsigned uses_depth:1;
+ unsigned has_flow_control:1;
+
+ unsigned id;
+ struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */
+ GLboolean use_const_buffer;
+};
+
+
+struct brw_sampler {
+ struct brw_ss0 ss0;
+ struct brw_ss1 ss1;
+ float border_color[4];
+ struct brw_ss3 ss3;
+};
+
+
+
+#define PIPE_NEW_DEPTH_STENCIL_ALPHA 0x1
+#define PIPE_NEW_RAST 0x2
+#define PIPE_NEW_BLEND 0x4
+#define PIPE_NEW_VIEWPORT 0x8
+#define PIPE_NEW_SAMPLERS 0x10
+#define PIPE_NEW_VERTEX_BUFFER 0x20
+#define PIPE_NEW_VERTEX_ELEMENT 0x40
+#define PIPE_NEW_FRAGMENT_SHADER 0x80
+#define PIPE_NEW_VERTEX_SHADER 0x100
+#define PIPE_NEW_FRAGMENT_CONSTANTS 0x200
+#define PIPE_NEW_VERTEX_CONSTANTS 0x400
+#define PIPE_NEW_CLIP 0x800
+#define PIPE_NEW_INDEX_BUFFER 0x1000
+#define PIPE_NEW_INDEX_RANGE 0x2000
+#define PIPE_NEW_BLEND_COLOR 0x4000
+#define PIPE_NEW_POLYGON_STIPPLE 0x8000
+#define PIPE_NEW_FRAMEBUFFER_DIMENSIONS 0x10000
+#define PIPE_NEW_DEPTH_BUFFER 0x20000
+#define PIPE_NEW_COLOR_BUFFERS 0x40000
+#define PIPE_NEW_QUERY 0x80000
+#define PIPE_NEW_SCISSOR 0x100000
+#define PIPE_NEW_BOUND_TEXTURES 0x200000
+#define PIPE_NEW_NR_CBUFS 0x400000
+#define PIPE_NEW_FRAGMENT_SIGNATURE 0x800000
+
+
+
+#define BRW_NEW_URB_FENCE 0x1
+#define BRW_NEW_FRAGMENT_PROGRAM 0x2
+#define BRW_NEW_VERTEX_PROGRAM 0x4
+#define BRW_NEW_INPUT_DIMENSIONS 0x8
+#define BRW_NEW_CURBE_OFFSETS 0x10
+#define BRW_NEW_REDUCED_PRIMITIVE 0x20
+#define BRW_NEW_PRIMITIVE 0x40
+#define BRW_NEW_CONTEXT 0x80
+#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
+#define BRW_NEW_PSP 0x800
+#define BRW_NEW_WM_SURFACES 0x1000
+#define BRW_NEW_xxx 0x2000 /* was FENCE */
+#define BRW_NEW_INDICES 0x4000
+
+/**
+ * Used for any batch entry with a relocated pointer that will be used
+ * by any 3D rendering. Need to re-emit these fresh in each
+ * batchbuffer as the referenced buffers may be relocated in the
+ * meantime.
+ */
+#define BRW_NEW_BATCH 0x10000
+#define BRW_NEW_NR_WM_SURFACES 0x40000
+#define BRW_NEW_NR_VS_SURFACES 0x80000
+#define BRW_NEW_INDEX_BUFFER 0x100000
+
+struct brw_state_flags {
+ /** State update flags signalled by mesa internals */
+ GLuint mesa;
+ /**
+ * State update flags signalled as the result of brw_tracked_state updates
+ */
+ GLuint brw;
+ /** State update flags signalled by brw_state_cache.c searches */
+ GLuint cache;
+};
+
+
+
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+struct brw_wm_prog_data {
+ GLuint curb_read_length;
+ GLuint urb_read_length;
+
+ GLuint first_curbe_grf;
+ GLuint total_grf;
+ GLuint total_scratch;
+
+ GLuint nr_params; /**< number of float params/constants */
+ GLboolean error;
+
+ /* Pointer to tracked values (only valid once
+ * _mesa_load_state_parameters has been called at runtime).
+ */
+ const GLfloat *param[BRW_MAX_CURBE];
+};
+
+struct brw_sf_prog_data {
+ GLuint urb_read_length;
+ GLuint total_grf;
+
+ /* Each vertex may have upto 12 attributes, 4 components each,
+ * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11
+ * rows.
+ *
+ * Actually we use 4 for each, so call it 12 rows.
+ */
+ GLuint urb_entry_size;
+};
+
+
+struct brw_clip_prog_data;
+
+struct brw_gs_prog_data {
+ GLuint urb_read_length;
+ GLuint total_grf;
+};
+
+struct brw_vs_prog_data {
+ GLuint curb_read_length;
+ GLuint urb_read_length;
+ GLuint total_grf;
+
+ GLuint nr_outputs;
+ GLuint nr_inputs;
+
+ GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */
+
+ GLboolean writes_psiz;
+
+ /* Used for calculating urb partitions:
+ */
+ GLuint urb_entry_size;
+};
+
+
+/* Size == 0 if output either not written, or always [0,0,0,1]
+ */
+struct brw_vs_ouput_sizes {
+ GLubyte output_size[PIPE_MAX_SHADER_OUTPUTS];
+};
+
+
+/** Number of texture sampler units */
+#define BRW_MAX_TEX_UNIT 16
+
+/** Max number of render targets in a shader */
+#define BRW_MAX_DRAW_BUFFERS 4
+
+/**
+ * Size of our surface binding table for the WM.
+ * This contains pointers to the drawing surfaces and current texture
+ * objects and shader constant buffers (+2).
+ */
+#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+
+/**
+ * Helpers to convert drawing buffers, textures and constant buffers
+ * to surface binding table indexes, for WM.
+ */
+#define BTI_COLOR_BUF(d) (d)
+#define BTI_FRAGMENT_CONSTANTS (BRW_MAX_DRAW_BUFFERS)
+#define BTI_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t))
+
+/**
+ * Size of surface binding table for the VS.
+ * Only one constant buffer for now.
+ */
+#define BRW_VS_MAX_SURF 1
+
+/**
+ * Only a VS constant buffer
+ */
+#define SURF_INDEX_VERT_CONST_BUFFER 0
+
+
+/* Bit of a hack to align these with the winsys buffer_data_type enum.
+ */
+enum brw_cache_id {
+ BRW_CC_VP = BRW_DATA_GS_CC_VP,
+ BRW_CC_UNIT = BRW_DATA_GS_CC_UNIT,
+ BRW_WM_PROG = BRW_DATA_GS_WM_PROG,
+ BRW_SAMPLER_DEFAULT_COLOR = BRW_DATA_GS_SAMPLER_DEFAULT_COLOR,
+ BRW_SAMPLER = BRW_DATA_GS_SAMPLER,
+ BRW_WM_UNIT = BRW_DATA_GS_WM_UNIT,
+ BRW_SF_PROG = BRW_DATA_GS_SF_PROG,
+ BRW_SF_VP = BRW_DATA_GS_SF_VP,
+ BRW_SF_UNIT = BRW_DATA_GS_SF_UNIT,
+ BRW_VS_UNIT = BRW_DATA_GS_VS_UNIT,
+ BRW_VS_PROG = BRW_DATA_GS_VS_PROG,
+ BRW_GS_UNIT = BRW_DATA_GS_GS_UNIT,
+ BRW_GS_PROG = BRW_DATA_GS_GS_PROG,
+ BRW_CLIP_VP = BRW_DATA_GS_CLIP_VP,
+ BRW_CLIP_UNIT = BRW_DATA_GS_CLIP_UNIT,
+ BRW_CLIP_PROG = BRW_DATA_GS_CLIP_PROG,
+ BRW_SS_SURFACE = BRW_DATA_SS_SURFACE,
+ BRW_SS_SURF_BIND = BRW_DATA_SS_SURF_BIND,
+
+ BRW_MAX_CACHE
+};
+
+struct brw_cache_item {
+ /**
+ * Effectively part of the key, cache_id identifies what kind of state
+ * buffer is involved, and also which brw->state.dirty.cache flag should
+ * be set when this cache item is chosen.
+ */
+ enum brw_cache_id cache_id;
+ /** 32-bit hash of the key data */
+ GLuint hash;
+ GLuint key_size; /* for variable-sized keys */
+ const void *key;
+ struct brw_winsys_reloc *relocs;
+ GLuint nr_relocs;
+
+ struct brw_winsys_buffer *bo;
+ GLuint data_size;
+
+ struct brw_cache_item *next;
+};
+
+
+
+struct brw_cache {
+ struct brw_context *brw;
+ struct brw_winsys_screen *sws;
+
+ struct brw_cache_item **items;
+ GLuint size, n_items;
+
+ enum brw_buffer_type buffer_type;
+
+ GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */
+ GLuint aux_size[BRW_MAX_CACHE];
+ char *name[BRW_MAX_CACHE];
+
+
+ /* Record of the last BOs chosen for each cache_id. Used to set
+ * brw->state.dirty.cache when a new cache item is chosen.
+ */
+ struct brw_winsys_buffer *last_bo[BRW_MAX_CACHE];
+};
+
+
+struct brw_tracked_state {
+ struct brw_state_flags dirty;
+ int (*prepare)( struct brw_context *brw );
+ int (*emit)( struct brw_context *brw );
+};
+
+/* Flags for brw->state.cache.
+ */
+#define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
+#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
+#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
+#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR)
+#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
+#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
+#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
+#define CACHE_NEW_SF_VP (1<<BRW_SF_VP)
+#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT)
+#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT)
+#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG)
+#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT)
+#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG)
+#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
+#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
+#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
+#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE)
+#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND)
+
+struct brw_cached_batch_item {
+ struct header *header;
+ GLuint sz;
+ struct brw_cached_batch_item *next;
+};
+
+
+
+/* Protect against a future where VERT_ATTRIB_MAX > 32. Wouldn't life
+ * be easier if C allowed arrays of packed elements?
+ */
+#define VS_INPUT_BITMASK_DWORDS ((PIPE_MAX_SHADER_INPUTS+31)/32)
+
+
+
+
+struct brw_vertex_info {
+ GLuint sizes[VS_INPUT_BITMASK_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
+};
+
+
+struct brw_query_object {
+ /** Doubly linked list of active query objects in the context. */
+ struct brw_query_object *prev, *next;
+
+ /** Last query BO associated with this query. */
+ struct brw_winsys_buffer *bo;
+ /** First index in bo with query data for this object. */
+ int first_index;
+ /** Last index in bo with query data for this object. */
+ int last_index;
+
+ /* Total count of pixels from previous BOs */
+ uint64_t result;
+};
+
+#define CC_RELOC_VP 0
+
+
+/**
+ * brw_context is derived from pipe_context
+ */
+struct brw_context
+{
+ struct pipe_context base;
+ struct brw_chipset chipset;
+
+ struct brw_winsys_screen *sws;
+
+ struct brw_batchbuffer *batch;
+
+ GLuint primitive;
+ GLuint reduced_primitive;
+
+ /* Active state from the state tracker:
+ */
+ struct {
+ struct brw_vertex_shader *vertex_shader;
+ struct brw_fragment_shader *fragment_shader;
+ const struct brw_blend_state *blend;
+ const struct brw_rasterizer_state *rast;
+ const struct brw_depth_stencil_state *zstencil;
+
+ const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS];
+ unsigned num_samplers;
+
+ struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+ unsigned num_vertex_elements;
+ unsigned num_textures;
+ unsigned num_vertex_buffers;
+
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state fb;
+ struct pipe_clip_state ucp;
+ struct pipe_buffer *vertex_constants;
+ struct pipe_buffer *fragment_constants;
+
+ struct brw_blend_constant_color bcc;
+ struct brw_polygon_stipple bps;
+ struct brw_cc_viewport ccv;
+
+ /**
+ * Index buffer for this draw_prims call.
+ *
+ * Updates are signaled by PIPE_NEW_INDEX_BUFFER.
+ */
+ struct pipe_buffer *index_buffer;
+ unsigned index_size;
+
+ /* Updates are signalled by PIPE_NEW_INDEX_RANGE:
+ */
+ unsigned min_index;
+ unsigned max_index;
+
+ } curr;
+
+ struct {
+ struct brw_state_flags dirty;
+
+ /**
+ * List of buffers accumulated in brw_validate_state to receive
+ * dri_bo_check_aperture treatment before exec, so we can know if we
+ * should flush the batch and try again before emitting primitives.
+ *
+ * This can be a fixed number as we only have a limited number of
+ * objects referenced from the batchbuffer in a primitive emit,
+ * consisting of the vertex buffers, pipelined state pointers,
+ * the CURBE, the depth buffer, and a query BO.
+ */
+ struct brw_winsys_buffer *validated_bos[PIPE_MAX_SHADER_INPUTS + 16];
+ int validated_bo_count;
+ } state;
+
+ struct brw_cache cache; /** non-surface items */
+ struct brw_cache surface_cache; /* surface items */
+ struct brw_cached_batch_item *cached_batch_items;
+
+ struct {
+ struct u_upload_mgr *upload_vertex;
+ struct u_upload_mgr *upload_index;
+
+ /* Information on uploaded vertex buffers:
+ */
+ struct {
+ unsigned stride; /* in bytes between successive vertices */
+ unsigned offset; /* in bytes, of first vertex in bo */
+ unsigned vertex_count; /* count of valid vertices which may be accessed */
+ struct brw_winsys_buffer *bo;
+ } vb[PIPE_MAX_ATTRIBS];
+
+ unsigned nr_vb; /* currently the same as curr.num_vertex_buffers */
+ } vb;
+
+ struct {
+ /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
+ struct brw_winsys_buffer *bo;
+ unsigned int offset;
+ unsigned int size;
+ /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
+ * avoid re-uploading the IB packet over and over if we're actually
+ * referencing the same index buffer.
+ */
+ unsigned int start_vertex_offset;
+ } ib;
+
+
+ /* BRW_NEW_URB_ALLOCATIONS:
+ */
+ struct {
+ GLuint vsize; /* vertex size plus header in urb registers */
+ GLuint csize; /* constant buffer size in urb registers */
+ GLuint sfsize; /* setup data size in urb registers */
+
+ GLboolean constrained;
+
+ GLuint nr_vs_entries;
+ GLuint nr_gs_entries;
+ GLuint nr_clip_entries;
+ GLuint nr_sf_entries;
+ GLuint nr_cs_entries;
+
+ GLuint vs_start;
+ GLuint gs_start;
+ GLuint clip_start;
+ GLuint sf_start;
+ GLuint cs_start;
+ } urb;
+
+
+ /* BRW_NEW_CURBE_OFFSETS:
+ */
+ struct {
+ GLuint wm_start; /**< pos of first wm const in CURBE buffer */
+ GLuint wm_size; /**< number of float[4] consts, multiple of 16 */
+ GLuint clip_start;
+ GLuint clip_size;
+ GLuint vs_start;
+ GLuint vs_size;
+ GLuint total_size;
+
+ struct brw_winsys_buffer *curbe_bo;
+ /** Offset within curbe_bo of space for current curbe entry */
+ GLuint curbe_offset;
+ /** Offset within curbe_bo of space for next curbe entry */
+ GLuint curbe_next_offset;
+
+ GLfloat *last_buf;
+ GLuint last_bufsz;
+ /**
+ * Whether we should create a new bo instead of reusing the old one
+ * (if we just dispatch the batch pointing at the old one.
+ */
+ GLboolean need_new_bo;
+ } curbe;
+
+ struct {
+ struct brw_vs_prog_data *prog_data;
+
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+
+ /** Binding table of pointers to surf_bo entries */
+ struct brw_winsys_buffer *bind_bo;
+ struct brw_winsys_buffer *surf_bo[BRW_VS_MAX_SURF];
+ GLuint nr_surfaces;
+ } vs;
+
+ struct {
+ struct brw_gs_prog_data *prog_data;
+
+ GLboolean prog_active;
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+ } gs;
+
+ struct {
+ struct brw_clip_prog_data *prog_data;
+
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+ struct brw_winsys_buffer *vp_bo;
+ } clip;
+
+
+ struct {
+ struct brw_sf_prog_data *prog_data;
+
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+ struct brw_winsys_buffer *vp_bo;
+ } sf;
+
+ struct {
+ struct brw_wm_prog_data *prog_data;
+ struct brw_wm_compile *compile_data;
+
+ /** Input sizes, calculated from active vertex program.
+ * One bit per fragment program input attribute.
+ */
+ /*GLbitfield input_size_masks[4];*/
+
+ /** Array of surface default colors (texture border color) */
+ struct brw_winsys_buffer *sdc_bo[BRW_MAX_TEX_UNIT];
+
+ GLuint render_surf;
+ GLuint nr_surfaces;
+
+ GLuint max_threads;
+ struct brw_winsys_buffer *scratch_bo;
+
+ GLuint sampler_count;
+ struct brw_winsys_buffer *sampler_bo;
+
+ /** Binding table of pointers to surf_bo entries */
+ struct brw_winsys_buffer *bind_bo;
+ struct brw_winsys_buffer *surf_bo[BRW_WM_MAX_SURF];
+
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+ } wm;
+
+
+ struct {
+ struct brw_winsys_buffer *state_bo;
+
+ struct brw_cc_unit_state cc;
+ struct brw_winsys_reloc reloc[1];
+ } cc;
+
+ struct {
+ struct brw_query_object active_head;
+ struct brw_winsys_buffer *bo;
+ int index;
+ GLboolean active;
+ int stats_wm;
+ } query;
+
+ struct {
+ unsigned always_emit_state:1;
+ unsigned always_flush_batch:1;
+ unsigned force_swtnl:1;
+ unsigned no_swtnl:1;
+ } flags;
+
+ /* Used to give every program string a unique id
+ */
+ GLuint program_id;
+};
+
+
+
+/*======================================================================
+ * brw_queryobj.c
+ */
+void brw_init_query(struct brw_context *brw);
+enum pipe_error brw_prepare_query_begin(struct brw_context *brw);
+void brw_emit_query_begin(struct brw_context *brw);
+void brw_emit_query_end(struct brw_context *brw);
+
+/*======================================================================
+ * brw_state_dump.c
+ */
+void brw_debug_batch(struct brw_context *intel);
+
+
+/*======================================================================
+ * brw_pipe_*.c
+ */
+void brw_pipe_blend_init( struct brw_context *brw );
+void brw_pipe_depth_stencil_init( struct brw_context *brw );
+void brw_pipe_framebuffer_init( struct brw_context *brw );
+void brw_pipe_flush_init( struct brw_context *brw );
+void brw_pipe_misc_init( struct brw_context *brw );
+void brw_pipe_query_init( struct brw_context *brw );
+void brw_pipe_rast_init( struct brw_context *brw );
+void brw_pipe_sampler_init( struct brw_context *brw );
+void brw_pipe_shader_init( struct brw_context *brw );
+void brw_pipe_vertex_init( struct brw_context *brw );
+void brw_pipe_clear_init( struct brw_context *brw );
+
+
+void brw_pipe_blend_cleanup( struct brw_context *brw );
+void brw_pipe_depth_stencil_cleanup( struct brw_context *brw );
+void brw_pipe_framebuffer_cleanup( struct brw_context *brw );
+void brw_pipe_flush_cleanup( struct brw_context *brw );
+void brw_pipe_misc_cleanup( struct brw_context *brw );
+void brw_pipe_query_cleanup( struct brw_context *brw );
+void brw_pipe_rast_cleanup( struct brw_context *brw );
+void brw_pipe_sampler_cleanup( struct brw_context *brw );
+void brw_pipe_shader_cleanup( struct brw_context *brw );
+void brw_pipe_vertex_cleanup( struct brw_context *brw );
+void brw_pipe_clear_cleanup( struct brw_context *brw );
+
+void brw_hw_cc_init( struct brw_context *brw );
+void brw_hw_cc_cleanup( struct brw_context *brw );
+
+
+
+void brw_context_flush( struct brw_context *brw );
+
+
+/* brw_urb.c
+ */
+int brw_upload_urb_fence(struct brw_context *brw);
+
+/* brw_curbe.c
+ */
+int brw_upload_cs_urb_state(struct brw_context *brw);
+
+
+/*======================================================================
+ * Inline conversion functions. These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct brw_context *
+brw_context( struct pipe_context *ctx )
+{
+ return (struct brw_context *)ctx;
+}
+
+
+#define BRW_IS_965(brw) ((brw)->chipset.is_965)
+#define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng)
+#define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x)
+
+
+#endif
+
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
new file mode 100644
index 0000000000..3f031577d5
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -0,0 +1,390 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_util.h"
+#include "brw_debug.h"
+#include "brw_screen.h"
+
+
+/**
+ * Partition the CURBE between the various users of constant values:
+ * Note that vertex and fragment shaders can now fetch constants out
+ * of constant buffers. We no longer allocatea block of the GRF for
+ * constants. That greatly reduces the demand for space in the CURBE.
+ * Some of the comments within are dated...
+ */
+static int calculate_curbe_offsets( struct brw_context *brw )
+{
+ /* CACHE_NEW_WM_PROG */
+ const GLuint nr_fp_regs = brw->wm.prog_data->curb_read_length;
+
+ /* BRW_NEW_VERTEX_PROGRAM */
+ const GLuint nr_vp_regs = brw->vs.prog_data->curb_read_length;
+ GLuint nr_clip_regs = 0;
+ GLuint total_regs;
+
+ /* PIPE_NEW_CLIP */
+ if (brw->curr.ucp.nr) {
+ GLuint nr_planes = 6 + brw->curr.ucp.nr;
+ nr_clip_regs = (nr_planes * 4 + 15) / 16;
+ }
+
+
+ total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
+
+ /* When this is > 32, want to use a true constant buffer to hold
+ * the extra constants.
+ */
+ assert(total_regs <= 32);
+
+ /* Lazy resize:
+ */
+ if (nr_fp_regs > brw->curbe.wm_size ||
+ nr_vp_regs > brw->curbe.vs_size ||
+ nr_clip_regs != brw->curbe.clip_size ||
+ (total_regs < brw->curbe.total_size / 4 &&
+ brw->curbe.total_size > 16)) {
+
+ GLuint reg = 0;
+
+ /* Calculate a new layout:
+ */
+ reg = 0;
+ brw->curbe.wm_start = reg;
+ brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
+ brw->curbe.clip_start = reg;
+ brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
+ brw->curbe.vs_start = reg;
+ brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
+ brw->curbe.total_size = reg;
+
+ if (BRW_DEBUG & DEBUG_CURBE)
+ debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+ brw->curbe.wm_start,
+ brw->curbe.wm_size,
+ brw->curbe.clip_start,
+ brw->curbe.clip_size,
+ brw->curbe.vs_start,
+ brw->curbe.vs_size );
+
+ brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
+ }
+
+ return 0;
+}
+
+
+const struct brw_tracked_state brw_curbe_offsets = {
+ .dirty = {
+ .mesa = PIPE_NEW_CLIP,
+ .brw = BRW_NEW_VERTEX_PROGRAM,
+ .cache = CACHE_NEW_WM_PROG
+ },
+ .prepare = calculate_curbe_offsets
+};
+
+
+
+
+/* Define the number of curbes within CS's urb allocation. Multiple
+ * urb entries -> multiple curbes. These will be used by
+ * fixed-function hardware in a double-buffering scheme to avoid a
+ * pipeline stall each time the contents of the curbe is changed.
+ */
+int brw_upload_cs_urb_state(struct brw_context *brw)
+{
+ struct brw_cs_urb_state cs_urb;
+ memset(&cs_urb, 0, sizeof(cs_urb));
+
+ /* It appears that this is the state packet for the CS unit, ie. the
+ * urb entries detailed here are housed in the CS range from the
+ * URB_FENCE command.
+ */
+ cs_urb.header.opcode = CMD_CS_URB_STATE;
+ cs_urb.header.length = sizeof(cs_urb)/4 - 2;
+
+ /* BRW_NEW_URB_FENCE */
+ cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+ cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
+
+ assert(brw->urb.nr_cs_entries);
+ BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
+ return 0;
+}
+
+static GLfloat fixed_plane[6][4] = {
+ { 0, 0, -1, 1 },
+ { 0, 0, 1, 1 },
+ { 0, -1, 0, 1 },
+ { 0, 1, 0, 1 },
+ {-1, 0, 0, 1 },
+ { 1, 0, 0, 1 }
+};
+
+/* Upload a new set of constants. Too much variability to go into the
+ * cache mechanism, but maybe would benefit from a comparison against
+ * the current uploaded set of constants.
+ */
+static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
+{
+ struct pipe_screen *screen = brw->base.screen;
+ const GLuint sz = brw->curbe.total_size;
+ const GLuint bufsz = sz * 16 * sizeof(GLfloat);
+ enum pipe_error ret;
+ GLfloat *buf;
+ GLuint i;
+
+ if (sz == 0) {
+ if (brw->curbe.last_buf) {
+ free(brw->curbe.last_buf);
+ brw->curbe.last_buf = NULL;
+ brw->curbe.last_bufsz = 0;
+ }
+ return 0;
+ }
+
+ buf = (GLfloat *) CALLOC(bufsz, 1);
+
+ /* fragment shader constants */
+ if (brw->curbe.wm_size) {
+ const struct brw_fragment_shader *fs = brw->curr.fragment_shader;
+ GLuint offset = brw->curbe.wm_start * 16;
+ GLuint nr_immediate, nr_const;
+
+ nr_immediate = fs->immediates.nr;
+ if (nr_immediate) {
+ memcpy(&buf[offset],
+ fs->immediates.data,
+ nr_immediate * 4 * sizeof(float));
+
+ offset += nr_immediate * 4;
+ }
+
+ nr_const = fs->info.file_max[TGSI_FILE_CONSTANT] + 1;
+/* nr_const = brw->wm.prog_data->nr_params; */
+ if (nr_const) {
+ const GLfloat *value = screen->buffer_map( screen,
+ brw->curr.fragment_constants,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ memcpy(&buf[offset], value,
+ nr_const * 4 * sizeof(float));
+
+ screen->buffer_unmap( screen,
+ brw->curr.fragment_constants );
+ }
+ }
+
+
+ /* The clipplanes are actually delivered to both CLIP and VS units.
+ * VS uses them to calculate the outcode bitmasks.
+ */
+ if (brw->curbe.clip_size) {
+ GLuint offset = brw->curbe.clip_start * 16;
+ GLuint j;
+
+ /* If any planes are going this way, send them all this way:
+ */
+ for (i = 0; i < 6; i++) {
+ buf[offset + i * 4 + 0] = fixed_plane[i][0];
+ buf[offset + i * 4 + 1] = fixed_plane[i][1];
+ buf[offset + i * 4 + 2] = fixed_plane[i][2];
+ buf[offset + i * 4 + 3] = fixed_plane[i][3];
+ }
+
+ /* Clip planes:
+ */
+ assert(brw->curr.ucp.nr <= 6);
+ for (j = 0; j < brw->curr.ucp.nr; j++) {
+ buf[offset + i * 4 + 0] = brw->curr.ucp.ucp[j][0];
+ buf[offset + i * 4 + 1] = brw->curr.ucp.ucp[j][1];
+ buf[offset + i * 4 + 2] = brw->curr.ucp.ucp[j][2];
+ buf[offset + i * 4 + 3] = brw->curr.ucp.ucp[j][3];
+ i++;
+ }
+ }
+
+ /* vertex shader constants */
+ if (brw->curbe.vs_size) {
+ GLuint offset = brw->curbe.vs_start * 16;
+ const struct brw_vertex_shader *vs = brw->curr.vertex_shader;
+ GLuint nr_immediate, nr_const;
+
+ nr_immediate = vs->immediates.nr;
+ if (nr_immediate) {
+ memcpy(&buf[offset],
+ vs->immediates.data,
+ nr_immediate * 4 * sizeof(float));
+
+ offset += nr_immediate * 4;
+ }
+
+ nr_const = vs->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ if (nr_const) {
+ /* XXX: note that constant buffers are currently *already* in
+ * buffer objects. If we want to keep on putting them into the
+ * curbe, makes sense to treat constbuf's specially with malloc.
+ */
+ const GLfloat *value = screen->buffer_map( screen,
+ brw->curr.vertex_constants,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ /* XXX: what if user's constant buffer is too small?
+ */
+ memcpy(&buf[offset], value, nr_const * 4 * sizeof(float));
+
+ screen->buffer_unmap( screen, brw->curr.vertex_constants );
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_CURBE) {
+ for (i = 0; i < sz*16; i+=4)
+ debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+ buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
+
+ debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+ (void *)brw->curbe.last_buf, (void *)buf,
+ bufsz, brw->curbe.last_bufsz,
+ brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
+ }
+
+ if (brw->curbe.curbe_bo != NULL &&
+ brw->curbe.last_buf &&
+ bufsz == brw->curbe.last_bufsz &&
+ memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
+ /* constants have not changed */
+ FREE(buf);
+ }
+ else {
+ /* constants have changed */
+ FREE(brw->curbe.last_buf);
+
+ brw->curbe.last_buf = buf;
+ brw->curbe.last_bufsz = bufsz;
+
+ if (brw->curbe.curbe_bo != NULL &&
+ (brw->curbe.need_new_bo ||
+ brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
+ {
+ bo_reference(&brw->curbe.curbe_bo, NULL);
+ }
+
+ if (brw->curbe.curbe_bo == NULL) {
+ /* Allocate a single page for CURBE entries for this
+ * batchbuffer. They're generally around 64b. We will
+ * discard the curbe buffer after the batch is flushed to
+ * avoid synchronous updates.
+ */
+ ret = brw->sws->bo_alloc(brw->sws,
+ BRW_BUFFER_TYPE_CURBE,
+ 4096, 1 << 6,
+ &brw->curbe.curbe_bo);
+ if (ret)
+ return ret;
+
+ brw->curbe.curbe_next_offset = 0;
+ }
+
+ brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
+ brw->curbe.curbe_next_offset += bufsz;
+ brw->curbe.curbe_next_offset = align(brw->curbe.curbe_next_offset, 64);
+
+ /* Copy data to the buffer:
+ */
+ brw->sws->bo_subdata(brw->curbe.curbe_bo,
+ BRW_DATA_CONSTANT_BUFFER,
+ brw->curbe.curbe_offset,
+ bufsz,
+ buf,
+ NULL, 0);
+ }
+
+ brw_add_validated_bo(brw, brw->curbe.curbe_bo);
+
+ /* Because this provokes an action (ie copy the constants into the
+ * URB), it shouldn't be shortcircuited if identical to the
+ * previous time - because eg. the urb destination may have
+ * changed, or the urb contents different to last time.
+ *
+ * Note that the data referred to is actually copied internally,
+ * not just used in place according to passed pointer.
+ *
+ * It appears that the CS unit takes care of using each available
+ * URB entry (Const URB Entry == CURBE) in turn, and issuing
+ * flushes as necessary when doublebuffering of CURBEs isn't
+ * possible.
+ */
+
+ return 0;
+}
+
+static enum pipe_error emit_curbe_buffer(struct brw_context *brw)
+{
+ GLuint sz = brw->curbe.total_size;
+
+ BEGIN_BATCH(2, IGNORE_CLIPRECTS);
+ if (sz == 0) {
+ OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
+ OUT_BATCH(0);
+ } else {
+ OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
+ OUT_RELOC(brw->curbe.curbe_bo,
+ BRW_USAGE_STATE,
+ (sz - 1) + brw->curbe.curbe_offset);
+ }
+ ADVANCE_BATCH();
+ return 0;
+}
+
+const struct brw_tracked_state brw_curbe_buffer = {
+ .dirty = {
+ .mesa = (PIPE_NEW_FRAGMENT_CONSTANTS |
+ PIPE_NEW_VERTEX_CONSTANTS |
+ PIPE_NEW_CLIP),
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_VERTEX_PROGRAM |
+ BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
+ BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
+ BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_BATCH),
+ .cache = (CACHE_NEW_WM_PROG)
+ },
+ .prepare = prepare_curbe_buffer,
+ .emit = emit_curbe_buffer,
+};
+
diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h
new file mode 100644
index 0000000000..ae8e9254a6
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_debug.h
@@ -0,0 +1,43 @@
+#ifndef BRW_DEBUG_H
+#define BRW_DEBUG_H
+
+/* ================================================================
+ * Debugging:
+ */
+
+#define DEBUG_TEXTURE 0x1
+#define DEBUG_STATE 0x2
+#define DEBUG_IOCTL 0x4
+#define DEBUG_BLIT 0x8
+#define DEBUG_CURBE 0x10
+#define DEBUG_FALLBACKS 0x20
+#define DEBUG_VERBOSE 0x40
+#define DEBUG_BATCH 0x80
+#define DEBUG_PIXEL 0x100
+#define DEBUG_WINSYS 0x200
+#define DEBUG_MIN_URB 0x400
+#define DEBUG_DISASSEM 0x800
+#define DEBUG_unused3 0x1000
+#define DEBUG_SYNC 0x2000
+#define DEBUG_PRIMS 0x4000
+#define DEBUG_VERTS 0x8000
+#define DEBUG_unused4 0x10000
+#define DEBUG_DMA 0x20000
+#define DEBUG_SANITY 0x40000
+#define DEBUG_SLEEP 0x80000
+#define DEBUG_STATS 0x100000
+#define DEBUG_unused5 0x200000
+#define DEBUG_SINGLE_THREAD 0x400000
+#define DEBUG_WM 0x800000
+#define DEBUG_URB 0x1000000
+#define DEBUG_VS 0x2000000
+
+#ifdef DEBUG
+extern int BRW_DEBUG;
+#else
+#define BRW_DEBUG 0
+#endif
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
new file mode 100644
index 0000000000..e201ce4d7c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -0,0 +1,847 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED 0x0
+#define _3DOP_3DSTATE_NONPIPELINED 0x1
+#define _3DOP_3DCONTROL 0x2
+#define _3DOP_3DPRIMITIVE 0x3
+
+#define _3DSTATE_PIPELINED_POINTERS 0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
+#define _3DSTATE_VERTEX_BUFFERS 0x08
+#define _3DSTATE_VERTEX_ELEMENTS 0x09
+#define _3DSTATE_INDEX_BUFFER 0x0A
+#define _3DSTATE_VF_STATISTICS 0x0B
+#define _3DSTATE_DRAWING_RECTANGLE 0x00
+#define _3DSTATE_CONSTANT_COLOR 0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
+#define _3DSTATE_CHROMA_KEY 0x04
+#define _3DSTATE_DEPTH_BUFFER 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
+#define _3DSTATE_LINE_STIPPLE 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
+#define _3DCONTROL 0x00
+
+#define PIPE_CONTROL_NOWRITE 0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
+#define PIPE_CONTROL_WRITEDEPTH 0x02
+#define PIPE_CONTROL_WRITETIMESTAMP 0x03
+
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
+
+#define BRW_ANISORATIO_2 0
+#define BRW_ANISORATIO_4 1
+#define BRW_ANISORATIO_6 2
+#define BRW_ANISORATIO_8 3
+#define BRW_ANISORATIO_10 4
+#define BRW_ANISORATIO_12 5
+#define BRW_ANISORATIO_14 6
+#define BRW_ANISORATIO_16 7
+
+#define BRW_BLENDFACTOR_ONE 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR 0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA 0x3
+#define BRW_BLENDFACTOR_DST_ALPHA 0x4
+#define BRW_BLENDFACTOR_DST_COLOR 0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define BRW_BLENDFACTOR_CONST_COLOR 0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA 0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR 0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define BRW_BLENDFACTOR_ZERO 0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define BRW_BLENDFUNCTION_ADD 0
+#define BRW_BLENDFUNCTION_SUBTRACT 1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define BRW_BLENDFUNCTION_MIN 3
+#define BRW_BLENDFUNCTION_MAX 4
+
+#define BRW_ALPHATEST_FORMAT_UNORM8 0
+#define BRW_ALPHATEST_FORMAT_FLOAT32 1
+
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define BRW_CHROMAKEY_REPLACE_BLACK 1
+
+#define BRW_CLIP_API_OGL 0
+#define BRW_CLIP_API_DX 1
+
+#define BRW_CLIPMODE_NORMAL 0
+#define BRW_CLIPMODE_CLIP_ALL 1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED 2
+#define BRW_CLIPMODE_REJECT_ALL 3
+#define BRW_CLIPMODE_ACCEPT_ALL 4
+#define BRW_CLIPMODE_KERNEL_CLIP 5
+
+#define BRW_CLIP_NDCSPACE 0
+#define BRW_CLIP_SCREENSPACE 1
+
+#define BRW_COMPAREFUNCTION_ALWAYS 0
+#define BRW_COMPAREFUNCTION_NEVER 1
+#define BRW_COMPAREFUNCTION_LESS 2
+#define BRW_COMPAREFUNCTION_EQUAL 3
+#define BRW_COMPAREFUNCTION_LEQUAL 4
+#define BRW_COMPAREFUNCTION_GREATER 5
+#define BRW_COMPAREFUNCTION_NOTEQUAL 6
+#define BRW_COMPAREFUNCTION_GEQUAL 7
+
+#define BRW_COVERAGE_PIXELS_HALF 0
+#define BRW_COVERAGE_PIXELS_1 1
+#define BRW_COVERAGE_PIXELS_2 2
+#define BRW_COVERAGE_PIXELS_4 3
+
+#define BRW_CULLMODE_BOTH 0
+#define BRW_CULLMODE_NONE 1
+#define BRW_CULLMODE_FRONT 2
+#define BRW_CULLMODE_BACK 3
+
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define BRW_DEPTHFORMAT_D32_FLOAT 1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define BRW_DEPTHFORMAT_D16_UNORM 5
+
+#define BRW_FLOATING_POINT_IEEE_754 0
+#define BRW_FLOATING_POINT_NON_IEEE_754 1
+
+#define BRW_FRONTWINDING_CW 0
+#define BRW_FRONTWINDING_CCW 1
+
+#define BRW_SPRITE_POINT_ENABLE 16
+
+#define BRW_INDEX_BYTE 0
+#define BRW_INDEX_WORD 1
+#define BRW_INDEX_DWORD 2
+
+#define BRW_LOGICOPFUNCTION_CLEAR 0
+#define BRW_LOGICOPFUNCTION_NOR 1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED 2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE 4
+#define BRW_LOGICOPFUNCTION_INVERT 5
+#define BRW_LOGICOPFUNCTION_XOR 6
+#define BRW_LOGICOPFUNCTION_NAND 7
+#define BRW_LOGICOPFUNCTION_AND 8
+#define BRW_LOGICOPFUNCTION_EQUIV 9
+#define BRW_LOGICOPFUNCTION_NOOP 10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED 11
+#define BRW_LOGICOPFUNCTION_COPY 12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE 13
+#define BRW_LOGICOPFUNCTION_OR 14
+#define BRW_LOGICOPFUNCTION_SET 15
+
+#define BRW_MAPFILTER_NEAREST 0x0
+#define BRW_MAPFILTER_LINEAR 0x1
+#define BRW_MAPFILTER_ANISOTROPIC 0x2
+
+#define BRW_MIPFILTER_NONE 0
+#define BRW_MIPFILTER_NEAREST 1
+#define BRW_MIPFILTER_LINEAR 3
+
+#define BRW_POLYGON_FRONT_FACING 0
+#define BRW_POLYGON_BACK_FACING 1
+
+#define BRW_PREFILTER_ALWAYS 0x0
+#define BRW_PREFILTER_NEVER 0x1
+#define BRW_PREFILTER_LESS 0x2
+#define BRW_PREFILTER_EQUAL 0x3
+#define BRW_PREFILTER_LEQUAL 0x4
+#define BRW_PREFILTER_GREATER 0x5
+#define BRW_PREFILTER_NOTEQUAL 0x6
+#define BRW_PREFILTER_GEQUAL 0x7
+
+#define BRW_PROVOKING_VERTEX_0 0
+#define BRW_PROVOKING_VERTEX_1 1
+#define BRW_PROVOKING_VERTEX_2 2
+
+#define BRW_RASTRULE_UPPER_LEFT 0
+#define BRW_RASTRULE_UPPER_RIGHT 1
+/* These are listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "Intel® 965 Express Chipset Family and Intel® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at
+ * http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * These appear to be supported on at least some
+ * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
+ * is useful when using OpenGL to render to a FBO
+ * (which has the pixel coordinate Y orientation inverted
+ * with respect to the normal OpenGL pixel coordinate system).
+ */
+#define BRW_RASTRULE_LOWER_LEFT 2
+#define BRW_RASTRULE_LOWER_RIGHT 3
+
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define BRW_STENCILOP_KEEP 0
+#define BRW_STENCILOP_ZERO 1
+#define BRW_STENCILOP_REPLACE 2
+#define BRW_STENCILOP_INCRSAT 3
+#define BRW_STENCILOP_DECRSAT 4
+#define BRW_STENCILOP_INCR 5
+#define BRW_STENCILOP_DECR 6
+#define BRW_STENCILOP_INVERT 7
+
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1
+
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define BRW_SURFACEFORMAT_R32G32_SINT 0x086
+#define BRW_SURFACEFORMAT_R32G32_UINT 0x087
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define BRW_SURFACEFORMAT_R32_SINT 0x0D6
+#define BRW_SURFACEFORMAT_R32_UINT 0x0D7
+#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1
+#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106
+#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107
+#define BRW_SURFACEFORMAT_R8G8_SINT 0x108
+#define BRW_SURFACEFORMAT_R8G8_UINT 0x109
+#define BRW_SURFACEFORMAT_R16_UNORM 0x10A
+#define BRW_SURFACEFORMAT_R16_SNORM 0x10B
+#define BRW_SURFACEFORMAT_R16_SINT 0x10C
+#define BRW_SURFACEFORMAT_R16_UINT 0x10D
+#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E
+#define BRW_SURFACEFORMAT_I16_UNORM 0x111
+#define BRW_SURFACEFORMAT_L16_UNORM 0x112
+#define BRW_SURFACEFORMAT_A16_UNORM 0x113
+#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114
+#define BRW_SURFACEFORMAT_I16_FLOAT 0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT 0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT 0x117
+#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED 0x11F
+#define BRW_SURFACEFORMAT_R8_UNORM 0x140
+#define BRW_SURFACEFORMAT_R8_SNORM 0x141
+#define BRW_SURFACEFORMAT_R8_SINT 0x142
+#define BRW_SURFACEFORMAT_R8_UINT 0x143
+#define BRW_SURFACEFORMAT_A8_UNORM 0x144
+#define BRW_SURFACEFORMAT_I8_UNORM 0x145
+#define BRW_SURFACEFORMAT_L8_UNORM 0x146
+#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147
+#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
+#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
+#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C
+#define BRW_SURFACEFORMAT_R1_UINT 0x181
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define BRW_SURFACEFORMAT_BC1_UNORM 0x186
+#define BRW_SURFACEFORMAT_BC2_UNORM 0x187
+#define BRW_SURFACEFORMAT_BC3_UNORM 0x188
+#define BRW_SURFACEFORMAT_BC4_UNORM 0x189
+#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define BRW_SURFACEFORMAT_MONO8 0x18E
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define BRW_SURFACEFORMAT_DXT1_RGB 0x191
+#define BRW_SURFACEFORMAT_FXT1 0x192
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define BRW_SURFACEFORMAT_BC4_SNORM 0x199
+#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+#define BRW_SURFACEFORMAT_INVALID 0xFFF
+
+#define BRW_SURFACERETURNFORMAT_FLOAT32 0
+#define BRW_SURFACERETURNFORMAT_S1 1
+
+#define BRW_SURFACE_1D 0
+#define BRW_SURFACE_2D 1
+#define BRW_SURFACE_3D 2
+#define BRW_SURFACE_CUBE 3
+#define BRW_SURFACE_BUFFER 4
+#define BRW_SURFACE_NULL 7
+
+#define BRW_TEXCOORDMODE_WRAP 0
+#define BRW_TEXCOORDMODE_MIRROR 1
+#define BRW_TEXCOORDMODE_CLAMP 2
+#define BRW_TEXCOORDMODE_CUBE 3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER 4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define BRW_THREAD_PRIORITY_NORMAL 0
+#define BRW_THREAD_PRIORITY_HIGH 1
+
+#define BRW_TILEWALK_XMAJOR 0
+#define BRW_TILEWALK_YMAJOR 1
+
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+/* Execution Unit (EU) defines
+ */
+
+#define BRW_ALIGN_1 0
+#define BRW_ALIGN_16 1
+
+#define BRW_ADDRESS_DIRECT 0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define BRW_CHANNEL_X 0
+#define BRW_CHANNEL_Y 1
+#define BRW_CHANNEL_Z 2
+#define BRW_CHANNEL_W 3
+
+#define BRW_COMPRESSION_NONE 0
+#define BRW_COMPRESSION_2NDHALF 1
+#define BRW_COMPRESSION_COMPRESSED 2
+
+#define BRW_CONDITIONAL_NONE 0
+#define BRW_CONDITIONAL_Z 1
+#define BRW_CONDITIONAL_NZ 2
+#define BRW_CONDITIONAL_EQ 1 /* Z */
+#define BRW_CONDITIONAL_NEQ 2 /* NZ */
+#define BRW_CONDITIONAL_G 3
+#define BRW_CONDITIONAL_GE 4
+#define BRW_CONDITIONAL_L 5
+#define BRW_CONDITIONAL_LE 6
+#define BRW_CONDITIONAL_R 7
+#define BRW_CONDITIONAL_O 8
+#define BRW_CONDITIONAL_U 9
+
+#define BRW_DEBUG_NONE 0
+#define BRW_DEBUG_BREAKPOINT 1
+
+#define BRW_DEPENDENCY_NORMAL 0
+#define BRW_DEPENDENCY_NOTCLEARED 1
+#define BRW_DEPENDENCY_NOTCHECKED 2
+#define BRW_DEPENDENCY_DISABLE 3
+
+#define BRW_EXECUTE_1 0
+#define BRW_EXECUTE_2 1
+#define BRW_EXECUTE_4 2
+#define BRW_EXECUTE_8 3
+#define BRW_EXECUTE_16 4
+#define BRW_EXECUTE_32 5
+
+#define BRW_HORIZONTAL_STRIDE_0 0
+#define BRW_HORIZONTAL_STRIDE_1 1
+#define BRW_HORIZONTAL_STRIDE_2 2
+#define BRW_HORIZONTAL_STRIDE_4 3
+
+#define BRW_INSTRUCTION_NORMAL 0
+#define BRW_INSTRUCTION_SATURATE 1
+
+#define BRW_MASK_ENABLE 0
+#define BRW_MASK_DISABLE 1
+
+#define BRW_OPCODE_MOV 1
+#define BRW_OPCODE_SEL 2
+#define BRW_OPCODE_NOT 4
+#define BRW_OPCODE_AND 5
+#define BRW_OPCODE_OR 6
+#define BRW_OPCODE_XOR 7
+#define BRW_OPCODE_SHR 8
+#define BRW_OPCODE_SHL 9
+#define BRW_OPCODE_RSR 10
+#define BRW_OPCODE_RSL 11
+#define BRW_OPCODE_ASR 12
+#define BRW_OPCODE_CMP 16
+#define BRW_OPCODE_CMPN 17
+#define BRW_OPCODE_JMPI 32
+#define BRW_OPCODE_IF 34
+#define BRW_OPCODE_IFF 35
+#define BRW_OPCODE_ELSE 36
+#define BRW_OPCODE_ENDIF 37
+#define BRW_OPCODE_DO 38
+#define BRW_OPCODE_WHILE 39
+#define BRW_OPCODE_BREAK 40
+#define BRW_OPCODE_CONTINUE 41
+#define BRW_OPCODE_HALT 42
+#define BRW_OPCODE_MSAVE 44
+#define BRW_OPCODE_MRESTORE 45
+#define BRW_OPCODE_PUSH 46
+#define BRW_OPCODE_POP 47
+#define BRW_OPCODE_WAIT 48
+#define BRW_OPCODE_SEND 49
+#define BRW_OPCODE_ADD 64
+#define BRW_OPCODE_MUL 65
+#define BRW_OPCODE_AVG 66
+#define BRW_OPCODE_FRC 67
+#define BRW_OPCODE_RNDU 68
+#define BRW_OPCODE_RNDD 69
+#define BRW_OPCODE_RNDE 70
+#define BRW_OPCODE_RNDZ 71
+#define BRW_OPCODE_MAC 72
+#define BRW_OPCODE_MACH 73
+#define BRW_OPCODE_LZD 74
+#define BRW_OPCODE_SAD2 80
+#define BRW_OPCODE_SADA2 81
+#define BRW_OPCODE_DP4 84
+#define BRW_OPCODE_DPH 85
+#define BRW_OPCODE_DP3 86
+#define BRW_OPCODE_DP2 87
+#define BRW_OPCODE_DPA2 88
+#define BRW_OPCODE_LINE 89
+#define BRW_OPCODE_NOP 126
+
+#define BRW_PREDICATE_NONE 0
+#define BRW_PREDICATE_NORMAL 1
+#define BRW_PREDICATE_ALIGN1_ANYV 2
+#define BRW_PREDICATE_ALIGN1_ALLV 3
+#define BRW_PREDICATE_ALIGN1_ANY2H 4
+#define BRW_PREDICATE_ALIGN1_ALL2H 5
+#define BRW_PREDICATE_ALIGN1_ANY4H 6
+#define BRW_PREDICATE_ALIGN1_ALL4H 7
+#define BRW_PREDICATE_ALIGN1_ANY8H 8
+#define BRW_PREDICATE_ALIGN1_ALL8H 9
+#define BRW_PREDICATE_ALIGN1_ANY16H 10
+#define BRW_PREDICATE_ALIGN1_ALL16H 11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
+#define BRW_PREDICATE_ALIGN16_ANY4H 6
+#define BRW_PREDICATE_ALIGN16_ALL4H 7
+
+#define BRW_ARCHITECTURE_REGISTER_FILE 0
+#define BRW_GENERAL_REGISTER_FILE 1
+#define BRW_MESSAGE_REGISTER_FILE 2
+#define BRW_IMMEDIATE_VALUE 3
+
+#define BRW_REGISTER_TYPE_UD 0
+#define BRW_REGISTER_TYPE_D 1
+#define BRW_REGISTER_TYPE_UW 2
+#define BRW_REGISTER_TYPE_W 3
+#define BRW_REGISTER_TYPE_UB 4
+#define BRW_REGISTER_TYPE_B 5
+#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF 6
+#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F 7
+
+#define BRW_ARF_NULL 0x00
+#define BRW_ARF_ADDRESS 0x10
+#define BRW_ARF_ACCUMULATOR 0x20
+#define BRW_ARF_FLAG 0x30
+#define BRW_ARF_MASK 0x40
+#define BRW_ARF_MASK_STACK 0x50
+#define BRW_ARF_MASK_STACK_DEPTH 0x60
+#define BRW_ARF_STATE 0x70
+#define BRW_ARF_CONTROL 0x80
+#define BRW_ARF_NOTIFICATION_COUNT 0x90
+#define BRW_ARF_IP 0xA0
+
+#define BRW_AMASK 0
+#define BRW_IMASK 1
+#define BRW_LMASK 2
+#define BRW_CMASK 3
+
+
+
+#define BRW_THREAD_NORMAL 0
+#define BRW_THREAD_ATOMIC 1
+#define BRW_THREAD_SWITCH 2
+
+#define BRW_VERTICAL_STRIDE_0 0
+#define BRW_VERTICAL_STRIDE_1 1
+#define BRW_VERTICAL_STRIDE_2 2
+#define BRW_VERTICAL_STRIDE_4 3
+#define BRW_VERTICAL_STRIDE_8 4
+#define BRW_VERTICAL_STRIDE_16 5
+#define BRW_VERTICAL_STRIDE_32 6
+#define BRW_VERTICAL_STRIDE_64 7
+#define BRW_VERTICAL_STRIDE_128 8
+#define BRW_VERTICAL_STRIDE_256 9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define BRW_WIDTH_1 0
+#define BRW_WIDTH_2 1
+#define BRW_WIDTH_4 2
+#define BRW_WIDTH_8 3
+#define BRW_WIDTH_16 4
+
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define BRW_POLYGON_FACING_FRONT 0
+#define BRW_POLYGON_FACING_BACK 1
+
+#define BRW_MESSAGE_TARGET_NULL 0
+#define BRW_MESSAGE_TARGET_MATH 1
+#define BRW_MESSAGE_TARGET_SAMPLER 2
+#define BRW_MESSAGE_TARGET_GATEWAY 3
+#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define BRW_MESSAGE_TARGET_URB 6
+#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
+
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3
+
+/* for IGDNG only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
+
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+#define BRW_MATH_FUNCTION_INV 1
+#define BRW_MATH_FUNCTION_LOG 2
+#define BRW_MATH_FUNCTION_EXP 3
+#define BRW_MATH_FUNCTION_SQRT 4
+#define BRW_MATH_FUNCTION_RSQ 5
+#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN 9
+#define BRW_MATH_FUNCTION_POW 10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define BRW_MATH_INTEGER_UNSIGNED 0
+#define BRW_MATH_INTEGER_SIGNED 1
+
+#define BRW_MATH_PRECISION_FULL 0
+#define BRW_MATH_PRECISION_PARTIAL 1
+
+#define BRW_MATH_SATURATE_NONE 0
+#define BRW_MATH_SATURATE_SATURATE 1
+
+#define BRW_MATH_DATA_VECTOR 0
+#define BRW_MATH_DATA_SCALAR 1
+
+#define BRW_URB_OPCODE_WRITE 0
+
+#define BRW_URB_SWIZZLE_NONE 0
+#define BRW_URB_SWIZZLE_INTERLEAVE 1
+#define BRW_URB_SWIZZLE_TRANSPOSE 2
+
+#define BRW_SCRATCH_SPACE_SIZE_1K 0
+#define BRW_SCRATCH_SPACE_SIZE_2K 1
+#define BRW_SCRATCH_SPACE_SIZE_4K 2
+#define BRW_SCRATCH_SPACE_SIZE_8K 3
+#define BRW_SCRATCH_SPACE_SIZE_16K 4
+#define BRW_SCRATCH_SPACE_SIZE_32K 5
+#define BRW_SCRATCH_SPACE_SIZE_64K 6
+#define BRW_SCRATCH_SPACE_SIZE_128K 7
+#define BRW_SCRATCH_SPACE_SIZE_256K 8
+#define BRW_SCRATCH_SPACE_SIZE_512K 9
+#define BRW_SCRATCH_SPACE_SIZE_1M 10
+#define BRW_SCRATCH_SPACE_SIZE_2M 11
+
+
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CS_URB_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_INSN_POINTER 0x6102
+#define CMD_PIPELINE_SELECT_965 0x6104
+#define CMD_PIPELINE_SELECT_GM45 0x6904
+
+#define CMD_PIPELINED_STATE_POINTERS 0x7800
+#define CMD_BINDING_TABLE_PTRS 0x7801
+
+#define CMD_VERTEX_BUFFER 0x7808
+# define BRW_VB0_INDEX_SHIFT 27
+# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
+# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
+# define BRW_VB0_PITCH_SHIFT 0
+
+#define CMD_VERTEX_ELEMENT 0x7809
+# define BRW_VE0_INDEX_SHIFT 27
+# define BRW_VE0_FORMAT_SHIFT 16
+# define BRW_VE0_VALID (1 << 26)
+# define BRW_VE0_SRC_OFFSET_SHIFT 0
+# define BRW_VE1_COMPONENT_NOSTORE 0
+# define BRW_VE1_COMPONENT_STORE_SRC 1
+# define BRW_VE1_COMPONENT_STORE_0 2
+# define BRW_VE1_COMPONENT_STORE_1_FLT 3
+# define BRW_VE1_COMPONENT_STORE_1_INT 4
+# define BRW_VE1_COMPONENT_STORE_VID 5
+# define BRW_VE1_COMPONENT_STORE_IID 6
+# define BRW_VE1_COMPONENT_STORE_PID 7
+# define BRW_VE1_COMPONENT_0_SHIFT 28
+# define BRW_VE1_COMPONENT_1_SHIFT 24
+# define BRW_VE1_COMPONENT_2_SHIFT 20
+# define BRW_VE1_COMPONENT_3_SHIFT 16
+# define BRW_VE1_DST_OFFSET_SHIFT 0
+
+#define CMD_INDEX_BUFFER 0x780a
+#define CMD_VF_STATISTICS_965 0x780b
+#define CMD_VF_STATISTICS_GM45 0x680b
+
+#define CMD_DRAW_RECT 0x7900
+#define CMD_BLEND_CONSTANT_COLOR 0x7901
+#define CMD_CHROMA_KEY 0x7904
+#define CMD_DEPTH_BUFFER 0x7905
+#define CMD_POLY_STIPPLE_OFFSET 0x7906
+#define CMD_POLY_STIPPLE_PATTERN 0x7907
+#define CMD_LINE_STIPPLE_PATTERN 0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+#define CMD_AA_LINE_PARAMETERS 0x790a
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_3D_PRIM 0x7b00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END 0x1
+#define R02_PRIM_START 0x2
+
+#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \
+ (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
new file mode 100644
index 0000000000..65db27248b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -0,0 +1,922 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+#include "brw_disasm.h"
+#include "brw_structs.h"
+#include "brw_reg.h"
+#include "brw_defines.h"
+
+struct {
+ char *name;
+ int nsrc;
+ int ndst;
+} opcode[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+char *conditional_modifier[16] = {
+ [BRW_CONDITIONAL_NONE] = "",
+ [BRW_CONDITIONAL_Z] = ".e",
+ [BRW_CONDITIONAL_NZ] = ".ne",
+ [BRW_CONDITIONAL_G] = ".g",
+ [BRW_CONDITIONAL_GE] = ".ge",
+ [BRW_CONDITIONAL_L] = ".l",
+ [BRW_CONDITIONAL_LE] = ".le",
+ [BRW_CONDITIONAL_R] = ".r",
+ [BRW_CONDITIONAL_O] = ".o",
+ [BRW_CONDITIONAL_U] = ".u",
+};
+
+char *negate[2] = {
+ [0] = "",
+ [1] = "-",
+};
+
+char *_abs[2] = {
+ [0] = "",
+ [1] = "(abs)",
+};
+
+char *vert_stride[16] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4",
+ [4] = "8",
+ [5] = "16",
+ [6] = "32",
+ [15] = "VxH",
+};
+
+char *width[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+};
+
+char *horiz_stride[4] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4"
+};
+
+char *chan_sel[4] = {
+ [0] = "x",
+ [1] = "y",
+ [2] = "z",
+ [3] = "w",
+};
+
+char *dest_condmod[16] = {
+ [0] = NULL
+};
+
+char *debug_ctrl[2] = {
+ [0] = "",
+ [1] = ".breakpoint"
+};
+
+char *saturate[2] = {
+ [0] = "",
+ [1] = ".sat"
+};
+
+char *exec_size[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+ [5] = "32"
+};
+
+char *pred_inv[2] = {
+ [0] = "+",
+ [1] = "-"
+};
+
+char *pred_ctrl_align16[16] = {
+ [1] = "",
+ [2] = ".x",
+ [3] = ".y",
+ [4] = ".z",
+ [5] = ".w",
+ [6] = ".any4h",
+ [7] = ".all4h",
+};
+
+char *pred_ctrl_align1[16] = {
+ [1] = "",
+ [2] = ".anyv",
+ [3] = ".allv",
+ [4] = ".any2h",
+ [5] = ".all2h",
+ [6] = ".any4h",
+ [7] = ".all4h",
+ [8] = ".any8h",
+ [9] = ".all8h",
+ [10] = ".any16h",
+ [11] = ".all16h",
+};
+
+char *thread_ctrl[4] = {
+ [0] = "",
+ [2] = "switch"
+};
+
+char *compr_ctrl[4] = {
+ [0] = "",
+ [1] = "sechalf",
+ [2] = "compr",
+};
+
+char *dep_ctrl[4] = {
+ [0] = "",
+ [1] = "NoDDClr",
+ [2] = "NoDDChk",
+ [3] = "NoDDClr,NoDDChk",
+};
+
+char *mask_ctrl[4] = {
+ [0] = "",
+ [1] = "nomask",
+};
+
+char *access_mode[2] = {
+ [0] = "align1",
+ [1] = "align16",
+};
+
+char *reg_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [4] = "UB",
+ [5] = "B",
+ [7] = "F"
+};
+
+char *imm_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [5] = "VF",
+ [5] = "V",
+ [7] = "F"
+};
+
+char *reg_file[4] = {
+ [0] = "A",
+ [1] = "g",
+ [2] = "m",
+ [3] = "imm",
+};
+
+char *writemask[16] = {
+ [0x0] = ".",
+ [0x1] = ".x",
+ [0x2] = ".y",
+ [0x3] = ".xy",
+ [0x4] = ".z",
+ [0x5] = ".xz",
+ [0x6] = ".yz",
+ [0x7] = ".xyz",
+ [0x8] = ".w",
+ [0x9] = ".xw",
+ [0xa] = ".yw",
+ [0xb] = ".xyw",
+ [0xc] = ".zw",
+ [0xd] = ".xzw",
+ [0xe] = ".yzw",
+ [0xf] = "",
+};
+
+char *end_of_thread[2] = {
+ [0] = "",
+ [1] = "EOT"
+};
+
+char *target_function[16] = {
+ [BRW_MESSAGE_TARGET_NULL] = "null",
+ [BRW_MESSAGE_TARGET_MATH] = "math",
+ [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+ [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+ [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read",
+ [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write",
+ [BRW_MESSAGE_TARGET_URB] = "urb",
+ [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *math_function[16] = {
+ [BRW_MATH_FUNCTION_INV] = "inv",
+ [BRW_MATH_FUNCTION_LOG] = "log",
+ [BRW_MATH_FUNCTION_EXP] = "exp",
+ [BRW_MATH_FUNCTION_SQRT] = "sqrt",
+ [BRW_MATH_FUNCTION_RSQ] = "rsq",
+ [BRW_MATH_FUNCTION_SIN] = "sin",
+ [BRW_MATH_FUNCTION_COS] = "cos",
+ [BRW_MATH_FUNCTION_SINCOS] = "sincos",
+ [BRW_MATH_FUNCTION_TAN] = "tan",
+ [BRW_MATH_FUNCTION_POW] = "pow",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod",
+ [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv",
+};
+
+char *math_saturate[2] = {
+ [0] = "",
+ [1] = "sat"
+};
+
+char *math_signed[2] = {
+ [0] = "",
+ [1] = "signed"
+};
+
+char *math_scalar[2] = {
+ [0] = "",
+ [1] = "scalar"
+};
+
+char *math_precision[2] = {
+ [0] = "",
+ [1] = "partial_precision"
+};
+
+char *urb_swizzle[4] = {
+ [BRW_URB_SWIZZLE_NONE] = "",
+ [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
+ [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
+};
+
+char *urb_allocate[2] = {
+ [0] = "",
+ [1] = "allocate"
+};
+
+char *urb_used[2] = {
+ [0] = "",
+ [1] = "used"
+};
+
+char *urb_complete[2] = {
+ [0] = "",
+ [1] = "complete"
+};
+
+char *sampler_target_format[4] = {
+ [0] = "F",
+ [2] = "UD",
+ [3] = "D"
+};
+
+
+static int column;
+
+static int string (FILE *file, char *string)
+{
+ fputs (string, file);
+ column += strlen (string);
+ return 0;
+}
+
+static int format (FILE *f, char *format, ...)
+{
+ char buf[1024];
+ va_list args;
+ va_start (args, format);
+
+ vsnprintf (buf, sizeof (buf) - 1, format, args);
+ string (f, buf);
+ return 0;
+}
+
+static int newline (FILE *f)
+{
+ putc ('\n', f);
+ column = 0;
+ return 0;
+}
+
+static int pad (FILE *f, int c)
+{
+ do
+ string (f, " ");
+ while (column < c);
+ return 0;
+}
+
+static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space)
+{
+ if (!ctrl[id]) {
+ fprintf (file, "*** invalid %s value %d ",
+ name, id);
+ return 1;
+ }
+ if (ctrl[id][0])
+ {
+ if (space && *space)
+ string (file, " ");
+ string (file, ctrl[id]);
+ if (space)
+ *space = 1;
+ }
+ return 0;
+}
+
+static int print_opcode (FILE *file, int id)
+{
+ if (!opcode[id].name) {
+ format (file, "*** invalid opcode value %d ", id);
+ return 1;
+ }
+ string (file, opcode[id].name);
+ return 0;
+}
+
+static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
+{
+ int err = 0;
+ if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ switch (_reg_nr & 0xf0) {
+ case BRW_ARF_NULL:
+ string (file, "null");
+ return -1;
+ case BRW_ARF_ADDRESS:
+ format (file, "a%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ format (file, "acc%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK:
+ format (file, "mask%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK_STACK:
+ format (file, "msd%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_STATE:
+ format (file, "sr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_CONTROL:
+ format (file, "cr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_NOTIFICATION_COUNT:
+ format (file, "n%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_IP:
+ string (file, "ip");
+ return -1;
+ break;
+ default:
+ format (file, "ARF%d", _reg_nr);
+ break;
+ }
+ } else {
+ err |= control (file, "src reg file", reg_file, _reg_file, NULL);
+ format (file, "%d", _reg_nr);
+ }
+ return err;
+}
+
+static int dest (FILE *file, const struct brw_instruction *inst)
+{
+ int err = 0;
+
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+ format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+ }
+ else
+ {
+ string (file, "g[a0");
+ if (inst->bits1.ia1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+ if (inst->bits1.ia1.dest_indirect_offset)
+ format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+ string (file, "]");
+ format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+ }
+ }
+ else
+ {
+ if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da16.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+ }
+ else
+ {
+ err = 1;
+ string (file, "Indirect align16 address mode not supported");
+ }
+ }
+
+ return 0;
+}
+
+static int src_align1_region (FILE *file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
+{
+ int err = 0;
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",");
+ err |= control (file, "width", width, _width, NULL);
+ string (file, ",");
+ err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+ string (file, ">");
+ return err;
+}
+
+static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
+ GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, reg_num);
+ if (err == -1)
+ return 0;
+ if (sub_reg_num)
+ format (file, ".%d", sub_reg_num);
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_ia1 (FILE *file,
+ GLuint type,
+ GLuint _reg_file,
+ GLint _addr_imm,
+ GLuint _addr_subreg_nr,
+ GLuint _negate,
+ GLuint __abs,
+ GLuint _addr_mode,
+ GLuint _horiz_stride,
+ GLuint _width,
+ GLuint _vert_stride)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ string (file, "g[a0");
+ if (_addr_subreg_nr)
+ format (file, ".%d", _addr_subreg_nr);
+ if (_addr_imm)
+ format (file, " %d", _addr_imm);
+ string (file, "]");
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_da16 (FILE *file,
+ GLuint _reg_type,
+ GLuint _reg_file,
+ GLuint _vert_stride,
+ GLuint _reg_nr,
+ GLuint _subreg_nr,
+ GLuint __abs,
+ GLuint _negate,
+ GLuint swz_x,
+ GLuint swz_y,
+ GLuint swz_z,
+ GLuint swz_w)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, _reg_nr);
+ if (err == -1)
+ return 0;
+ if (_subreg_nr)
+ format (file, ".%d", _subreg_nr);
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+
+static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) {
+ switch (type) {
+ case BRW_REGISTER_TYPE_UD:
+ format (file, "0x%08xUD", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ format (file, "%dD", inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UW:
+ format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_W:
+ format (file, "%dW", (int16_t) inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UB:
+ format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_VF:
+ format (file, "Vector Float");
+ break;
+ case BRW_REGISTER_TYPE_V:
+ format (file, "0x%08xV", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_F:
+ format (file, "%-gF", inst->bits3.f);
+ }
+ return 0;
+}
+
+static int src0 (FILE *file, const struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src0_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src0_reg_type,
+ inst->bits1.da1.src0_reg_file,
+ inst->bits2.da1.src0_vert_stride,
+ inst->bits2.da1.src0_width,
+ inst->bits2.da1.src0_horiz_stride,
+ inst->bits2.da1.src0_reg_nr,
+ inst->bits2.da1.src0_subreg_nr,
+ inst->bits2.da1.src0_abs,
+ inst->bits2.da1.src0_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src0_reg_type,
+ inst->bits1.ia1.src0_reg_file,
+ inst->bits2.ia1.src0_indirect_offset,
+ inst->bits2.ia1.src0_subreg_nr,
+ inst->bits2.ia1.src0_negate,
+ inst->bits2.ia1.src0_abs,
+ inst->bits2.ia1.src0_address_mode,
+ inst->bits2.ia1.src0_horiz_stride,
+ inst->bits2.ia1.src0_width,
+ inst->bits2.ia1.src0_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src0_reg_type,
+ inst->bits1.da16.src0_reg_file,
+ inst->bits2.da16.src0_vert_stride,
+ inst->bits2.da16.src0_reg_nr,
+ inst->bits2.da16.src0_subreg_nr,
+ inst->bits2.da16.src0_abs,
+ inst->bits2.da16.src0_negate,
+ inst->bits2.da16.src0_swz_x,
+ inst->bits2.da16.src0_swz_y,
+ inst->bits2.da16.src0_swz_z,
+ inst->bits2.da16.src0_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+static int src1 (FILE *file, const struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src1_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src1_reg_type,
+ inst->bits1.da1.src1_reg_file,
+ inst->bits3.da1.src1_vert_stride,
+ inst->bits3.da1.src1_width,
+ inst->bits3.da1.src1_horiz_stride,
+ inst->bits3.da1.src1_reg_nr,
+ inst->bits3.da1.src1_subreg_nr,
+ inst->bits3.da1.src1_abs,
+ inst->bits3.da1.src1_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src1_reg_type,
+ inst->bits1.ia1.src1_reg_file,
+ inst->bits3.ia1.src1_indirect_offset,
+ inst->bits3.ia1.src1_subreg_nr,
+ inst->bits3.ia1.src1_negate,
+ inst->bits3.ia1.src1_abs,
+ inst->bits3.ia1.src1_address_mode,
+ inst->bits3.ia1.src1_horiz_stride,
+ inst->bits3.ia1.src1_width,
+ inst->bits3.ia1.src1_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src1_reg_type,
+ inst->bits1.da16.src1_reg_file,
+ inst->bits3.da16.src1_vert_stride,
+ inst->bits3.da16.src1_reg_nr,
+ inst->bits3.da16.src1_subreg_nr,
+ inst->bits3.da16.src1_abs,
+ inst->bits3.da16.src1_negate,
+ inst->bits3.da16.src1_swz_x,
+ inst->bits3.da16.src1_swz_y,
+ inst->bits3.da16.src1_swz_z,
+ inst->bits3.da16.src1_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
+{
+ int err = 0;
+ int space = 0;
+
+ if (inst->header.predicate_control) {
+ string (file, "(");
+ err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+ string (file, "f0");
+ if (inst->bits2.da1.flag_reg_nr)
+ format (file, ".%d", inst->bits2.da1.flag_reg_nr);
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ err |= control (file, "predicate control align1", pred_ctrl_align1,
+ inst->header.predicate_control, NULL);
+ else
+ err |= control (file, "predicate control align16", pred_ctrl_align16,
+ inst->header.predicate_control, NULL);
+ string (file, ") ");
+ }
+
+ err |= print_opcode (file, inst->header.opcode);
+ err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+ err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+
+ if (inst->header.opcode != BRW_OPCODE_SEND)
+ err |= control (file, "conditional modifier", conditional_modifier,
+ inst->header.destreg__conditionalmod, NULL);
+
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "(");
+ err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+ string (file, ")");
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND)
+ format (file, " %d", inst->header.destreg__conditionalmod);
+
+ if (opcode[inst->header.opcode].ndst > 0) {
+ pad (file, 16);
+ err |= dest (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 0) {
+ pad (file, 32);
+ err |= src0 (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 1) {
+ pad (file, 48);
+ err |= src1 (file, inst);
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND) {
+ newline (file);
+ pad (file, 16);
+ space = 0;
+ err |= control (file, "target function", target_function,
+ inst->bits3.generic.msg_target, &space);
+ switch (inst->bits3.generic.msg_target) {
+ case BRW_MESSAGE_TARGET_MATH:
+ err |= control (file, "math function", math_function,
+ inst->bits3.math.function, &space);
+ err |= control (file, "math saturate", math_saturate,
+ inst->bits3.math.saturate, &space);
+ err |= control (file, "math signed", math_signed,
+ inst->bits3.math.int_type, &space);
+ err |= control (file, "math scalar", math_scalar,
+ inst->bits3.math.data_type, &space);
+ err |= control (file, "math precision", math_precision,
+ inst->bits3.math.precision, &space);
+ break;
+ case BRW_MESSAGE_TARGET_SAMPLER:
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format", sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ break;
+ case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ break;
+ case BRW_MESSAGE_TARGET_URB:
+ format (file, " %d", inst->bits3.urb.offset);
+ space = 1;
+ err |= control (file, "urb swizzle", urb_swizzle,
+ inst->bits3.urb.swizzle_control, &space);
+ err |= control (file, "urb allocate", urb_allocate,
+ inst->bits3.urb.allocate, &space);
+ err |= control (file, "urb used", urb_used,
+ inst->bits3.urb.used, &space);
+ err |= control (file, "urb complete", urb_complete,
+ inst->bits3.urb.complete, &space);
+ break;
+ case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
+ break;
+ default:
+ format (file, "unsupported target %d", inst->bits3.generic.msg_target);
+ break;
+ }
+ if (space)
+ string (file, " ");
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
+ pad (file, 64);
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "{");
+ space = 1;
+ err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+ err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+ err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+ err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
+ err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+ if (inst->header.opcode == BRW_OPCODE_SEND)
+ err |= control (file, "end of thread", end_of_thread,
+ inst->bits3.generic.end_of_thread, &space);
+ if (space)
+ string (file, " ");
+ string (file, "}");
+ }
+ string (file, ";");
+ newline (file);
+ return err;
+}
+
+
+int brw_disasm (FILE *file,
+ const struct brw_instruction *inst,
+ unsigned count)
+{
+ int i, err;
+
+ for (i = 0; i < count; i++) {
+ err = brw_disasm_insn(stderr, &inst[i]);
+ if (err)
+ return err;
+ }
+
+ fprintf(file, "\n");
+ return 0;
+}
+
diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h
new file mode 100644
index 0000000000..ba5b109c48
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_disasm.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef BRW_DISASM_H
+#define BRW_DISASM_H
+
+#include <stdio.h>
+
+struct brw_instruction;
+
+int brw_disasm_insn (FILE *file, const struct brw_instruction *inst);
+int brw_disasm (FILE *file,
+ const struct brw_instruction *inst,
+ unsigned count);
+
+#endif
+
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
new file mode 100644
index 0000000000..852fd22982
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -0,0 +1,291 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_debug.h"
+#include "brw_screen.h"
+
+#include "brw_batchbuffer.h"
+
+
+static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
+ _3DPRIM_POINTLIST,
+ _3DPRIM_LINELIST,
+ _3DPRIM_LINELOOP,
+ _3DPRIM_LINESTRIP,
+ _3DPRIM_TRILIST,
+ _3DPRIM_TRISTRIP,
+ _3DPRIM_TRIFAN,
+ _3DPRIM_QUADLIST,
+ _3DPRIM_QUADSTRIP,
+ _3DPRIM_POLYGON
+};
+
+
+
+/* When the primitive changes, set a state bit and re-validate. Not
+ * the nicest and would rather deal with this by having all the
+ * programs be immune to the active primitive (ie. cope with all
+ * possibilities). That may not be realistic however.
+ */
+static int brw_set_prim(struct brw_context *brw, unsigned prim )
+{
+
+ if (BRW_DEBUG & DEBUG_PRIMS)
+ debug_printf("PRIM: %s\n", u_prim_name(prim));
+
+ if (prim != brw->primitive) {
+ unsigned reduced_prim;
+
+ brw->primitive = prim;
+ brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+
+ reduced_prim = u_reduced_prim(prim);
+ if (reduced_prim != brw->reduced_primitive) {
+ brw->reduced_primitive = reduced_prim;
+ brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
+ }
+ }
+
+ return prim_to_hw_prim[prim];
+}
+
+
+
+static int brw_emit_prim(struct brw_context *brw,
+ unsigned start,
+ unsigned count,
+ boolean indexed,
+ uint32_t hw_prim)
+{
+ struct brw_3d_primitive prim_packet;
+ int ret;
+
+ if (BRW_DEBUG & DEBUG_PRIMS)
+ debug_printf("%s start %d count %d indexed %d hw_prim %d\n",
+ __FUNCTION__, start, count, indexed, hw_prim);
+
+ prim_packet.header.opcode = CMD_3D_PRIM;
+ prim_packet.header.length = sizeof(prim_packet)/4 - 2;
+ prim_packet.header.pad = 0;
+ prim_packet.header.topology = hw_prim;
+ prim_packet.header.indexed = indexed;
+
+ prim_packet.verts_per_instance = count;
+ prim_packet.start_vert_location = start;
+ if (indexed)
+ prim_packet.start_vert_location += brw->ib.start_vertex_offset;
+ prim_packet.instance_count = 1;
+ prim_packet.start_instance_location = 0;
+ prim_packet.base_vert_location = 0; /* prim->basevertex; XXX: add this to gallium */
+
+
+ /* If we're set to always flush, do it before and after the primitive emit.
+ * We want to catch both missed flushes that hurt instruction/state cache
+ * and missed flushes of the render cache as it heads to other parts of
+ * the besides the draw code.
+ */
+ if (0) {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
+ ADVANCE_BATCH();
+ }
+ if (prim_packet.verts_per_instance) {
+ ret = brw_batchbuffer_data( brw->batch, &prim_packet,
+ sizeof(prim_packet), LOOP_CLIPRECTS);
+ if (ret)
+ return ret;
+ }
+ if (0) {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
+ ADVANCE_BATCH();
+ }
+
+ return 0;
+}
+
+
+/* May fail if out of video memory for texture or vbo upload, or on
+ * fallback conditions.
+ */
+static int
+try_draw_range_elements(struct brw_context *brw,
+ struct pipe_buffer *index_buffer,
+ unsigned hw_prim,
+ unsigned start, unsigned count)
+{
+ int ret;
+
+ ret = brw_validate_state(brw);
+ if (ret)
+ return ret;
+
+ /* Check that we can fit our state in with our existing batchbuffer, or
+ * flush otherwise.
+ */
+ ret = brw->sws->check_aperture_space(brw->sws,
+ brw->state.validated_bos,
+ brw->state.validated_bo_count);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_state(brw);
+ if (ret)
+ return ret;
+
+ ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim);
+ if (ret)
+ return ret;
+
+ if (brw->flags.always_flush_batch)
+ brw_context_flush( brw );
+
+ return 0;
+}
+
+
+static boolean
+brw_draw_range_elements(struct pipe_context *pipe,
+ struct pipe_buffer *index_buffer,
+ unsigned index_size,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct brw_context *brw = brw_context(pipe);
+ int ret;
+ uint32_t hw_prim;
+
+ hw_prim = brw_set_prim(brw, mode);
+
+ if (BRW_DEBUG & DEBUG_PRIMS)
+ debug_printf("PRIM: %s start %d count %d index_buffer %p\n",
+ u_prim_name(mode), start, count, (void *)index_buffer);
+
+ /* Potentially trigger upload of new index buffer.
+ *
+ * XXX: do we need to go through state validation to achieve this?
+ * Could just call upload code directly.
+ */
+ if (brw->curr.index_buffer != index_buffer ||
+ brw->curr.index_size != index_size) {
+ pipe_buffer_reference( &brw->curr.index_buffer, index_buffer );
+ brw->curr.index_size = index_size;
+ brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER;
+ }
+
+ /* XXX: do we really care?
+ */
+ if (brw->curr.min_index != min_index ||
+ brw->curr.max_index != max_index)
+ {
+ brw->curr.min_index = min_index;
+ brw->curr.max_index = max_index;
+ brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE;
+ }
+
+
+ /* Make a first attempt at drawing:
+ */
+ ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
+
+ /* Otherwise, flush and retry:
+ */
+ if (ret != 0) {
+ brw_context_flush( brw );
+ ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
+ assert(ret == 0);
+ }
+
+ return TRUE;
+}
+
+static boolean
+brw_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *index_buffer,
+ unsigned index_size,
+ unsigned mode,
+ unsigned start, unsigned count)
+{
+ return brw_draw_range_elements( pipe, index_buffer,
+ index_size,
+ 0, 0xffffffff,
+ mode,
+ start, count );
+}
+
+static boolean
+brw_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count)
+{
+ return brw_draw_elements(pipe, NULL, 0, mode, start, count);
+}
+
+
+
+boolean brw_draw_init( struct brw_context *brw )
+{
+ /* Register our drawing function:
+ */
+ brw->base.draw_arrays = brw_draw_arrays;
+ brw->base.draw_elements = brw_draw_elements;
+ brw->base.draw_range_elements = brw_draw_range_elements;
+
+ /* Create helpers for uploading data in user buffers:
+ */
+ brw->vb.upload_vertex = u_upload_create( brw->base.screen,
+ 128 * 1024,
+ 64,
+ PIPE_BUFFER_USAGE_VERTEX );
+ if (brw->vb.upload_vertex == NULL)
+ return FALSE;
+
+ brw->vb.upload_index = u_upload_create( brw->base.screen,
+ 32 * 1024,
+ 64,
+ PIPE_BUFFER_USAGE_INDEX );
+ if (brw->vb.upload_index == NULL)
+ return FALSE;
+
+ return TRUE;
+}
+
+void brw_draw_cleanup( struct brw_context *brw )
+{
+ u_upload_destroy( brw->vb.upload_vertex );
+ u_upload_destroy( brw->vb.upload_index );
+
+ bo_reference(&brw->ib.bo, NULL);
+}
diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h
new file mode 100644
index 0000000000..8dc5dbce62
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw.h
@@ -0,0 +1,39 @@
+ /**************************************************************************
+ *
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_DRAW_H
+#define BRW_DRAW_H
+
+#include "brw_types.h"
+
+struct brw_context;
+
+boolean brw_draw_init( struct brw_context *brw );
+void brw_draw_cleanup( struct brw_context *brw );
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
new file mode 100644
index 0000000000..a27da5f1c1
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -0,0 +1,542 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_context.h"
+
+#include "util/u_upload_mgr.h"
+#include "util/u_math.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_screen.h"
+#include "brw_batchbuffer.h"
+#include "brw_debug.h"
+
+
+
+
+static unsigned brw_translate_surface_format( unsigned id )
+{
+ switch (id) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return BRW_SURFACEFORMAT_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return BRW_SURFACEFORMAT_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return BRW_SURFACEFORMAT_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return BRW_SURFACEFORMAT_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return BRW_SURFACEFORMAT_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return BRW_SURFACEFORMAT_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return BRW_SURFACEFORMAT_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return BRW_SURFACEFORMAT_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return BRW_SURFACEFORMAT_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return BRW_SURFACEFORMAT_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return BRW_SURFACEFORMAT_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return BRW_SURFACEFORMAT_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return BRW_SURFACEFORMAT_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return BRW_SURFACEFORMAT_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned get_index_type(int type)
+{
+ switch (type) {
+ case 1: return BRW_INDEX_BYTE;
+ case 2: return BRW_INDEX_WORD;
+ case 4: return BRW_INDEX_DWORD;
+ default: assert(0); return 0;
+ }
+}
+
+
+static int brw_prepare_vertices(struct brw_context *brw)
+{
+ unsigned int min_index = brw->curr.min_index;
+ unsigned int max_index = brw->curr.max_index;
+ GLuint i;
+ int ret;
+
+ if (BRW_DEBUG & DEBUG_VERTS)
+ debug_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+
+
+ for (i = 0; i < brw->curr.num_vertex_buffers; i++) {
+ struct pipe_vertex_buffer *vb = &brw->curr.vertex_buffer[i];
+ struct brw_winsys_buffer *bo;
+ struct pipe_buffer *upload_buf = NULL;
+ unsigned offset;
+
+ if (BRW_DEBUG & DEBUG_VERTS)
+ debug_printf("%s vb[%d] user:%d offset:0x%x sz:0x%x stride:0x%x\n",
+ __FUNCTION__, i,
+ brw_buffer_is_user_buffer(vb->buffer),
+ vb->buffer_offset,
+ vb->buffer->size,
+ vb->stride);
+
+ if (brw_buffer_is_user_buffer(vb->buffer)) {
+
+ /* XXX: simplify this. Stop the state trackers from generating
+ * zero-stride buffers & have them use additional constants (or
+ * add support for >1 constant buffer) instead.
+ */
+ unsigned size = (vb->stride == 0 ?
+ vb->buffer->size - vb->buffer_offset :
+ MAX2(vb->buffer->size - vb->buffer_offset,
+ vb->stride * (max_index + 1 - min_index)));
+
+ ret = u_upload_buffer( brw->vb.upload_vertex,
+ vb->buffer_offset + min_index * vb->stride,
+ size,
+ vb->buffer,
+ &offset,
+ &upload_buf );
+ if (ret)
+ return ret;
+
+ bo = brw_buffer(upload_buf)->bo;
+
+ assert(offset + size <= bo->size);
+ }
+ else
+ {
+ offset = vb->buffer_offset;
+ bo = brw_buffer(vb->buffer)->bo;
+ }
+
+ assert(offset < bo->size);
+
+ /* Set up post-upload info about this vertex buffer:
+ */
+ brw->vb.vb[i].offset = offset;
+ brw->vb.vb[i].stride = vb->stride;
+ brw->vb.vb[i].vertex_count = (vb->stride == 0 ?
+ 1 :
+ (bo->size - offset) / vb->stride);
+
+ bo_reference( &brw->vb.vb[i].bo, bo );
+
+ /* Don't need to retain this reference. We have a reference on
+ * the underlying winsys buffer:
+ */
+ pipe_buffer_reference( &upload_buf, NULL );
+ }
+
+ brw->vb.nr_vb = i;
+ brw_prepare_query_begin(brw);
+
+ for (i = 0; i < brw->vb.nr_vb; i++) {
+ brw_add_validated_bo(brw, brw->vb.vb[i].bo);
+ }
+
+ return 0;
+}
+
+static int brw_emit_vertex_buffers( struct brw_context *brw )
+{
+ int i;
+
+ /* If the VS doesn't read any inputs (calculating vertex position from
+ * a state variable for some reason, for example), just bail.
+ *
+ * The stale VB state stays in place, but they don't do anything unless
+ * a VE loads from them.
+ */
+ if (brw->vb.nr_vb == 0) {
+ if (BRW_DEBUG & DEBUG_VERTS)
+ debug_printf("%s: no active vertex buffers\n", __FUNCTION__);
+
+ return 0;
+ }
+
+ /* Emit VB state packets.
+ */
+ BEGIN_BATCH(1 + brw->vb.nr_vb * 4, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
+ ((1 + brw->vb.nr_vb * 4) - 2));
+
+ for (i = 0; i < brw->vb.nr_vb; i++) {
+ OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
+ BRW_VB0_ACCESS_VERTEXDATA |
+ (brw->vb.vb[i].stride << BRW_VB0_PITCH_SHIFT));
+ OUT_RELOC(brw->vb.vb[i].bo,
+ BRW_USAGE_VERTEX,
+ brw->vb.vb[i].offset);
+ if (BRW_IS_IGDNG(brw)) {
+ OUT_RELOC(brw->vb.vb[i].bo,
+ BRW_USAGE_VERTEX,
+ brw->vb.vb[i].bo->size - 1);
+ } else
+ OUT_BATCH(brw->vb.vb[i].stride ? brw->vb.vb[i].vertex_count : 0);
+ OUT_BATCH(0); /* Instance data step rate */
+ }
+ ADVANCE_BATCH();
+ return 0;
+}
+
+
+
+
+static int brw_emit_vertex_elements(struct brw_context *brw)
+{
+ GLuint nr = brw->curr.num_vertex_elements;
+ GLuint i;
+
+ brw_emit_query_begin(brw);
+
+ /* If the VS doesn't read any inputs (calculating vertex position from
+ * a state variable for some reason, for example), emit a single pad
+ * VERTEX_ELEMENT struct and bail.
+ *
+ * The stale VB state stays in place, but they don't do anything unless
+ * a VE loads from them.
+ */
+ if (nr == 0) {
+ BEGIN_BATCH(3, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
+ OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+ (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
+ ADVANCE_BATCH();
+ return 0;
+ }
+
+ /* Now emit vertex element (VEP) state packets.
+ *
+ */
+ BEGIN_BATCH(1 + nr * 2, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2));
+ for (i = 0; i < nr; i++) {
+ const struct pipe_vertex_element *input = &brw->curr.vertex_element[i];
+ uint32_t format = brw_translate_surface_format( input->src_format );
+ uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
+
+ switch (input->nr_components) {
+ case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
+ case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
+ case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
+ case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+ break;
+ }
+
+ OUT_BATCH((input->vertex_buffer_index << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (format << BRW_VE0_FORMAT_SHIFT) |
+ (input->src_offset << BRW_VE0_SRC_OFFSET_SHIFT));
+
+ if (BRW_IS_IGDNG(brw))
+ OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
+ else
+ OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
+ ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
+ }
+ ADVANCE_BATCH();
+ return 0;
+}
+
+
+static int brw_emit_vertices( struct brw_context *brw )
+{
+ int ret;
+
+ ret = brw_emit_vertex_buffers( brw );
+ if (ret)
+ return ret;
+
+ ret = brw_emit_vertex_elements( brw );
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+
+const struct brw_tracked_state brw_vertices = {
+ .dirty = {
+ .mesa = (PIPE_NEW_INDEX_RANGE |
+ PIPE_NEW_VERTEX_BUFFER),
+ .brw = BRW_NEW_BATCH,
+ .cache = 0,
+ },
+ .prepare = brw_prepare_vertices,
+ .emit = brw_emit_vertices,
+};
+
+
+static int brw_prepare_indices(struct brw_context *brw)
+{
+ struct pipe_buffer *index_buffer = brw->curr.index_buffer;
+ struct pipe_buffer *upload_buf = NULL;
+ struct brw_winsys_buffer *bo = NULL;
+ GLuint offset;
+ GLuint index_size;
+ GLuint ib_size;
+ int ret;
+
+ if (index_buffer == NULL)
+ return 0;
+
+ if (BRW_DEBUG & DEBUG_VERTS)
+ debug_printf("%s: index_size:%d index_buffer->size:%d\n",
+ __FUNCTION__,
+ brw->curr.index_size,
+ brw->curr.index_buffer->size);
+
+ ib_size = index_buffer->size;
+ index_size = brw->curr.index_size;
+
+ /* Turn userbuffer into a proper hardware buffer?
+ */
+ if (brw_buffer_is_user_buffer(index_buffer)) {
+
+ ret = u_upload_buffer( brw->vb.upload_index,
+ 0,
+ ib_size,
+ index_buffer,
+ &offset,
+ &upload_buf );
+ if (ret)
+ return ret;
+
+ bo = brw_buffer(upload_buf)->bo;
+
+ /* XXX: annotate the userbuffer with the upload information so
+ * that successive calls don't get re-uploaded.
+ */
+ }
+ else {
+ bo = brw_buffer(index_buffer)->bo;
+ ib_size = bo->size;
+ offset = 0;
+ }
+
+ /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the
+ * index buffer state when we're just moving the start index of our
+ * drawing.
+ *
+ * In gallium this will happen in the case where successive draw
+ * calls are made with (distinct?) userbuffers, but the upload_mgr
+ * places the data into a single winsys buffer.
+ *
+ * This statechange doesn't raise any state flags and is always
+ * just merged into the final draw packet:
+ */
+ if (1) {
+ assert((offset & (index_size - 1)) == 0);
+ brw->ib.start_vertex_offset = offset / index_size;
+ }
+
+ /* These statechanges trigger a new CMD_INDEX_BUFFER packet:
+ */
+ if (brw->ib.bo != bo ||
+ brw->ib.size != ib_size)
+ {
+ bo_reference(&brw->ib.bo, bo);
+ brw->ib.size = ib_size;
+ brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
+ }
+
+ pipe_buffer_reference( &upload_buf, NULL );
+ brw_add_validated_bo(brw, brw->ib.bo);
+ return 0;
+}
+
+const struct brw_tracked_state brw_indices = {
+ .dirty = {
+ .mesa = PIPE_NEW_INDEX_BUFFER,
+ .brw = 0,
+ .cache = 0,
+ },
+ .prepare = brw_prepare_indices,
+};
+
+static int brw_emit_index_buffer(struct brw_context *brw)
+{
+ /* Emit the indexbuffer packet:
+ */
+ if (brw->ib.bo)
+ {
+ struct brw_indexbuffer ib;
+
+ memset(&ib, 0, sizeof(ib));
+
+ ib.header.bits.opcode = CMD_INDEX_BUFFER;
+ ib.header.bits.length = sizeof(ib)/4 - 2;
+ ib.header.bits.index_format = get_index_type(brw->ib.size);
+ ib.header.bits.cut_index_enable = 0;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH( ib.header.dword );
+ OUT_RELOC(brw->ib.bo,
+ BRW_USAGE_VERTEX,
+ brw->ib.offset);
+ OUT_RELOC(brw->ib.bo,
+ BRW_USAGE_VERTEX,
+ brw->ib.offset + brw->ib.size - 1);
+ OUT_BATCH( 0 );
+ ADVANCE_BATCH();
+ }
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_index_buffer = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER,
+ .cache = 0,
+ },
+ .emit = brw_emit_index_buffer,
+};
diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c
new file mode 100644
index 0000000000..a8fcb5f97e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu.c
@@ -0,0 +1,262 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+/* How does predicate control work when execution_size != 8? Do I
+ * need to test/set for 0xffff when execution_size is 16?
+ */
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value )
+{
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ if (value != 0xff) {
+ if (value != p->flag_value) {
+ brw_push_insn_state(p);
+ brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
+ p->flag_value = value;
+ brw_pop_insn_state(p);
+ }
+
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+}
+
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
+{
+ p->current->header.predicate_control = pc;
+}
+
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
+{
+ p->current->header.destreg__conditionalmod = conditional;
+}
+
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
+{
+ p->current->header.access_mode = access_mode;
+}
+
+void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control )
+{
+ p->current->header.compression_control = compression_control;
+}
+
+void brw_set_mask_control( struct brw_compile *p, GLuint value )
+{
+ p->current->header.mask_control = value;
+}
+
+void brw_set_saturate( struct brw_compile *p, GLuint value )
+{
+ p->current->header.saturate = value;
+}
+
+void brw_push_insn_state( struct brw_compile *p )
+{
+ assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
+ memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+ p->current++;
+}
+
+void brw_pop_insn_state( struct brw_compile *p )
+{
+ assert(p->current != p->stack);
+ p->current--;
+}
+
+
+/***********************************************************************
+ */
+void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
+{
+ p->brw = brw;
+ p->nr_insn = 0;
+ p->current = p->stack;
+ memset(p->current, 0, sizeof(p->current[0]));
+
+ /* Some defaults?
+ */
+ brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
+ brw_set_saturate(p, 0);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+
+enum pipe_error brw_get_program( struct brw_compile *p,
+ const GLuint **data,
+ GLuint *sz )
+{
+ GLuint i;
+
+ for (i = 0; i < 8; i++)
+ brw_NOP(p);
+
+ /* Is the generated program malformed for some reason?
+ */
+ if (p->error)
+ return PIPE_ERROR_BAD_INPUT;
+
+ *sz = p->nr_insn * sizeof(struct brw_instruction);
+ *data = (const GLuint *)p->store;
+ return PIPE_OK;
+}
+
+
+
+/**
+ * Subroutine calls require special attention.
+ * Mesa instructions may be expanded into multiple hardware instructions
+ * so the prog_instruction::BranchTarget field can't be used as an index
+ * into the hardware instructions.
+ *
+ * The BranchTarget field isn't needed, however. Mesa's GLSL compiler
+ * emits CAL and BGNSUB instructions with labels that can be used to map
+ * subroutine calls to actual subroutine code blocks.
+ *
+ * The structures and function here implement patching of CAL instructions
+ * so they jump to the right subroutine code...
+ */
+
+
+/**
+ * For each OPCODE_BGNSUB we create one of these.
+ */
+struct brw_eu_label
+{
+ GLuint label; /**< the label number */
+ GLuint position; /**< the position of the brw instruction for this label */
+ struct brw_eu_label *next; /**< next in linked list */
+};
+
+
+/**
+ * For each OPCODE_CAL we create one of these.
+ */
+struct brw_eu_call
+{
+ GLuint call_inst_pos; /**< location of the CAL instruction */
+ GLuint label;
+ struct brw_eu_call *next; /**< next in linked list */
+};
+
+
+/**
+ * Called for each OPCODE_BGNSUB.
+ */
+void
+brw_save_label(struct brw_compile *c, unsigned l, GLuint position)
+{
+ struct brw_eu_label *label = CALLOC_STRUCT(brw_eu_label);
+ label->label = l;
+ label->position = position;
+ label->next = c->first_label;
+ c->first_label = label;
+}
+
+
+/**
+ * Called for each OPCODE_CAL.
+ */
+void
+brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos)
+{
+ struct brw_eu_call *call = CALLOC_STRUCT(brw_eu_call);
+ call->call_inst_pos = call_pos;
+ call->label = label;
+ call->next = c->first_call;
+ c->first_call = call;
+}
+
+
+/**
+ * Lookup a label, return label's position/offset.
+ */
+static GLuint
+brw_lookup_label(struct brw_compile *c, unsigned l)
+{
+ const struct brw_eu_label *label;
+ for (label = c->first_label; label; label = label->next) {
+ if (l == label->label) {
+ return label->position;
+ }
+ }
+ abort(); /* should never happen */
+ return ~0;
+}
+
+
+/**
+ * When we're done generating code, this function is called to resolve
+ * subroutine calls.
+ */
+void
+brw_resolve_cals(struct brw_compile *c)
+{
+ const struct brw_eu_call *call;
+
+ for (call = c->first_call; call; call = call->next) {
+ const GLuint sub_loc = brw_lookup_label(c, call->label);
+ struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos];
+ struct brw_instruction *brw_sub_inst = &c->store[sub_loc];
+ GLint offset = brw_sub_inst - brw_call_inst;
+
+ /* patch brw_inst1 to point to brw_inst2 */
+ brw_set_src1(brw_call_inst, brw_imm_d(offset * 16));
+ }
+
+ /* free linked list of calls */
+ {
+ struct brw_eu_call *call, *next;
+ for (call = c->first_call; call; call = next) {
+ next = call->next;
+ FREE(call);
+ }
+ c->first_call = NULL;
+ }
+
+ /* free linked list of labels */
+ {
+ struct brw_eu_label *label, *next;
+ for (label = c->first_label; label; label = next) {
+ next = label->next;
+ FREE(label);
+ }
+ c->first_label = NULL;
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h
new file mode 100644
index 0000000000..af509b2e5f
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu.h
@@ -0,0 +1,992 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_EU_H
+#define BRW_EU_H
+
+#include "util/u_debug.h"
+#include "pipe/p_defines.h"
+
+#include "brw_structs.h"
+#include "brw_defines.h"
+
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+
+#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
+
+#define BRW_WRITEMASK_NONE 0x00
+#define BRW_WRITEMASK_X 0x01
+#define BRW_WRITEMASK_Y 0x02
+#define BRW_WRITEMASK_XY 0x03
+#define BRW_WRITEMASK_Z 0x04
+#define BRW_WRITEMASK_XZ 0x05
+#define BRW_WRITEMASK_YZ 0x06
+#define BRW_WRITEMASK_XYZ 0x07
+#define BRW_WRITEMASK_W 0x08
+#define BRW_WRITEMASK_XW 0x09
+#define BRW_WRITEMASK_YW 0x0A
+#define BRW_WRITEMASK_XYW 0x0B
+#define BRW_WRITEMASK_ZW 0x0C
+#define BRW_WRITEMASK_XZW 0x0D
+#define BRW_WRITEMASK_YZW 0x0E
+#define BRW_WRITEMASK_XYZW 0x0F
+
+
+#define REG_SIZE (8*4)
+
+
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges. Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg
+{
+ GLuint type:4;
+ GLuint file:2;
+ GLuint nr:8;
+ GLuint subnr:5; /* :1 in align16 */
+ GLuint negate:1; /* source only */
+ GLuint abs:1; /* source only */
+ GLuint vstride:4; /* source only */
+ GLuint width:3; /* src only, align1 only */
+ GLuint hstride:2; /* align1 only */
+ GLuint address_mode:1; /* relative addressing, hopefully! */
+ GLuint pad0:1;
+
+ union {
+ struct {
+ GLuint swizzle:8; /* src only, align16 only */
+ GLuint writemask:4; /* dest only, align16 only */
+ GLint indirect_offset:10; /* relative addressing offset */
+ GLuint pad1:10; /* two dwords total */
+ } bits;
+
+ GLfloat f;
+ GLint d;
+ GLuint ud;
+ } dw1;
+};
+
+
+struct brw_indirect {
+ GLuint addr_subnr:4;
+ GLint addr_offset:10;
+ GLuint pad:18;
+};
+
+
+struct brw_eu_label;
+struct brw_eu_call;
+
+
+
+#define BRW_EU_MAX_INSN_STACK 5
+#define BRW_EU_MAX_INSN 10000
+
+struct brw_compile {
+ struct brw_instruction store[BRW_EU_MAX_INSN];
+ GLuint nr_insn;
+
+ /* Allow clients to push/pop instruction state:
+ */
+ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+ struct brw_instruction *current;
+
+ GLuint flag_value;
+ GLboolean single_program_flow;
+ struct brw_context *brw;
+
+ struct brw_eu_label *first_label; /**< linked list of labels */
+ struct brw_eu_call *first_call; /**< linked list of CALs */
+
+ boolean error;
+};
+
+
+void
+brw_save_label(struct brw_compile *c, unsigned label, GLuint position);
+
+void
+brw_save_call(struct brw_compile *c, unsigned label, GLuint call_pos);
+
+void
+brw_resolve_cals(struct brw_compile *c);
+
+
+
+static INLINE int type_sz( GLuint type )
+{
+ switch( type ) {
+ case BRW_REGISTER_TYPE_UD:
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_F:
+ return 4;
+ case BRW_REGISTER_TYPE_HF:
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_W:
+ return 2;
+ case BRW_REGISTER_TYPE_UB:
+ case BRW_REGISTER_TYPE_B:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * Construct a brw_reg.
+ * \param file one of the BRW_x_REGISTER_FILE values
+ * \param nr register number/index
+ * \param subnr register sub number
+ * \param type one of BRW_REGISTER_TYPE_x
+ * \param vstride one of BRW_VERTICAL_STRIDE_x
+ * \param width one of BRW_WIDTH_x
+ * \param hstride one of BRW_HORIZONTAL_STRIDE_x
+ * \param swizzle one of BRW_SWIZZLE_x
+ * \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield
+ */
+static INLINE struct brw_reg brw_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr,
+ GLuint type,
+ GLuint vstride,
+ GLuint width,
+ GLuint hstride,
+ GLuint swizzle,
+ GLuint writemask )
+{
+ struct brw_reg reg;
+ if (type == BRW_GENERAL_REGISTER_FILE)
+ assert(nr < BRW_MAX_GRF);
+ else if (type == BRW_MESSAGE_REGISTER_FILE)
+ assert(nr < BRW_MAX_MRF);
+ else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(nr <= BRW_ARF_IP);
+
+ reg.type = type;
+ reg.file = file;
+ reg.nr = nr;
+ reg.subnr = subnr * type_sz(type);
+ reg.negate = 0;
+ reg.abs = 0;
+ reg.vstride = vstride;
+ reg.width = width;
+ reg.hstride = hstride;
+ reg.address_mode = BRW_ADDRESS_DIRECT;
+ reg.pad0 = 0;
+
+ /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+ * set swizzle and writemask to W, as the lower bits of subnr will
+ * be lost when converted to align16. This is probably too much to
+ * keep track of as you'd want it adjusted by suboffset(), etc.
+ * Perhaps fix up when converting to align16?
+ */
+ reg.dw1.bits.swizzle = swizzle;
+ reg.dw1.bits.writemask = writemask;
+ reg.dw1.bits.indirect_offset = 0;
+ reg.dw1.bits.pad1 = 0;
+ return reg;
+}
+
+/** Construct float[16] register */
+static INLINE struct brw_reg brw_vec16_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_16,
+ BRW_WIDTH_16,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[8] register */
+static INLINE struct brw_reg brw_vec8_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[4] register */
+static INLINE struct brw_reg brw_vec4_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[2] register */
+static INLINE struct brw_reg brw_vec2_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYXY,
+ BRW_WRITEMASK_XY);
+}
+
+/** Construct float[1] register */
+static INLINE struct brw_reg brw_vec1_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ BRW_WRITEMASK_X);
+}
+
+
+static INLINE struct brw_reg retype( struct brw_reg reg,
+ GLuint type )
+{
+ reg.type = type;
+ return reg;
+}
+
+static INLINE struct brw_reg suboffset( struct brw_reg reg,
+ GLuint delta )
+{
+ reg.subnr += delta * type_sz(reg.type);
+ return reg;
+}
+
+
+static INLINE struct brw_reg offset( struct brw_reg reg,
+ GLuint delta )
+{
+ reg.nr += delta;
+ return reg;
+}
+
+
+static INLINE struct brw_reg byte_offset( struct brw_reg reg,
+ GLuint bytes )
+{
+ GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+ reg.nr = newoffset / REG_SIZE;
+ reg.subnr = newoffset % REG_SIZE;
+ return reg;
+}
+
+
+/** Construct unsigned word[16] register */
+static INLINE struct brw_reg brw_uw16_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[8] register */
+static INLINE struct brw_reg brw_uw8_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[1] register */
+static INLINE struct brw_reg brw_uw1_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static INLINE struct brw_reg brw_imm_reg( GLuint type )
+{
+ return brw_reg( BRW_IMMEDIATE_VALUE,
+ 0,
+ 0,
+ type,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ 0,
+ 0);
+}
+
+/** Construct float immediate register */
+static INLINE struct brw_reg brw_imm_f( GLfloat f )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+ imm.dw1.f = f;
+ return imm;
+}
+
+/** Construct integer immediate register */
+static INLINE struct brw_reg brw_imm_d( GLint d )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+ imm.dw1.d = d;
+ return imm;
+}
+
+/** Construct uint immediate register */
+static INLINE struct brw_reg brw_imm_ud( GLuint ud )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+ imm.dw1.ud = ud;
+ return imm;
+}
+
+/** Construct ushort immediate register */
+static INLINE struct brw_reg brw_imm_uw( GLushort uw )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+ imm.dw1.ud = uw | (uw << 16);
+ return imm;
+}
+
+/** Construct short immediate register */
+static INLINE struct brw_reg brw_imm_w( GLshort w )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+ imm.dw1.d = w | (w << 16);
+ return imm;
+}
+
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+
+/** Construct vector of eight signed half-byte values */
+static INLINE struct brw_reg brw_imm_v( GLuint v )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_8;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+/** Construct vector of four 8-bit float values */
+static INLINE struct brw_reg brw_imm_vf( GLuint v )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+#define VF_ZERO 0x0
+#define VF_ONE 0x30
+#define VF_NEG (1<<7)
+
+static INLINE struct brw_reg brw_imm_vf4( GLuint v0,
+ GLuint v1,
+ GLuint v2,
+ GLuint v3)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = ((v0 << 0) |
+ (v1 << 8) |
+ (v2 << 16) |
+ (v3 << 24));
+ return imm;
+}
+
+
+static INLINE struct brw_reg brw_address( struct brw_reg reg )
+{
+ return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+
+/** Construct float[1] general-purpose register */
+static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[2] general-purpose register */
+static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[4] general-purpose register */
+static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[8] general-purpose register */
+static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr )
+{
+ return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr )
+{
+ return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+/** Construct null register (usually used for setting condition codes) */
+static INLINE struct brw_reg brw_null_reg( void )
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NULL,
+ 0);
+}
+
+static INLINE struct brw_reg brw_address_reg( GLuint subnr )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS,
+ subnr);
+}
+
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw. This goes against the convention for other scalar
+ * regs:
+ */
+static INLINE struct brw_reg brw_ip_reg( void )
+{
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_IP,
+ 0,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_4, /* ? */
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, /* NOTE! */
+ BRW_WRITEMASK_XYZW); /* NOTE! */
+}
+
+static INLINE struct brw_reg brw_acc_reg( void )
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ACCUMULATOR,
+ 0);
+}
+
+
+static INLINE struct brw_reg brw_flag_reg( void )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_FLAG,
+ 0);
+}
+
+
+static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_MASK,
+ subnr);
+}
+
+static INLINE struct brw_reg brw_message_reg( GLuint nr )
+{
+ assert(nr < BRW_MAX_MRF);
+ return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
+ nr,
+ 0);
+}
+
+
+
+
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static INLINE GLuint cvt( GLuint val )
+{
+ switch (val) {
+ case 0: return 0;
+ case 1: return 1;
+ case 2: return 2;
+ case 4: return 3;
+ case 8: return 4;
+ case 16: return 5;
+ case 32: return 6;
+ }
+ return 0;
+}
+
+static INLINE struct brw_reg stride( struct brw_reg reg,
+ GLuint vstride,
+ GLuint width,
+ GLuint hstride )
+{
+ reg.vstride = cvt(vstride);
+ reg.width = cvt(width) - 1;
+ reg.hstride = cvt(hstride);
+ return reg;
+}
+
+
+static INLINE struct brw_reg vec16( struct brw_reg reg )
+{
+ return stride(reg, 16,16,1);
+}
+
+static INLINE struct brw_reg vec8( struct brw_reg reg )
+{
+ return stride(reg, 8,8,1);
+}
+
+static INLINE struct brw_reg vec4( struct brw_reg reg )
+{
+ return stride(reg, 4,4,1);
+}
+
+static INLINE struct brw_reg vec2( struct brw_reg reg )
+{
+ return stride(reg, 2,2,1);
+}
+
+static INLINE struct brw_reg vec1( struct brw_reg reg )
+{
+ return stride(reg, 0,1,0);
+}
+
+
+static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt )
+{
+ return vec1(suboffset(reg, elt));
+}
+
+static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
+{
+ return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+
+
+static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
+ GLuint x,
+ GLuint y,
+ GLuint z,
+ GLuint w)
+{
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+ return reg;
+}
+
+
+static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg,
+ GLuint x )
+{
+ return brw_swizzle(reg, x, x, x, x);
+}
+
+static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
+ GLuint mask )
+{
+ reg.dw1.bits.writemask &= mask;
+ return reg;
+}
+
+static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg,
+ GLuint mask )
+{
+ reg.dw1.bits.writemask = mask;
+ return reg;
+}
+
+static INLINE struct brw_reg negate( struct brw_reg reg )
+{
+ reg.negate ^= 1;
+ return reg;
+}
+
+static INLINE struct brw_reg brw_abs( struct brw_reg reg )
+{
+ reg.abs = 1;
+ return reg;
+}
+
+/***********************************************************************
+ */
+static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr,
+ GLint offset )
+{
+ struct brw_reg reg = brw_vec4_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr,
+ GLint offset )
+{
+ struct brw_reg reg = brw_vec1_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
+{
+ return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
+{
+ return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+
+static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+
+static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
+}
+
+static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+
+static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr)
+{
+ return brw_address_reg(ptr.addr_subnr);
+}
+
+static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
+{
+ ptr.addr_offset += offset;
+ return ptr;
+}
+
+static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
+{
+ struct brw_indirect ptr;
+ ptr.addr_subnr = addr_subnr;
+ ptr.addr_offset = offset;
+ ptr.pad = 0;
+ return ptr;
+}
+
+/** Do two brw_regs refer to the same register? */
+static INLINE GLboolean
+brw_same_reg(struct brw_reg r1, struct brw_reg r2)
+{
+ return r1.file == r2.file && r1.nr == r2.nr;
+}
+
+static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
+{
+ return &p->store[p->nr_insn];
+}
+
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, GLuint value );
+void brw_set_saturate( struct brw_compile *p, GLuint value );
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
+void brw_set_compression_control( struct brw_compile *p, GLboolean control );
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+
+void brw_init_compile( struct brw_context *, struct brw_compile *p );
+
+enum pipe_error brw_get_program( struct brw_compile *p,
+ const GLuint **program,
+ GLuint *sz );
+
+
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0);
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1);
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDZ)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+#undef ALU1
+#undef ALU2
+
+
+
+/* Helpers for SEND instruction:
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle);
+
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle);
+
+void brw_fb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot);
+
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint writemask,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode);
+
+void brw_math_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint precision );
+
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint data_type,
+ GLuint precision );
+
+void brw_dp_READ_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint scratch_offset );
+
+void brw_dp_READ_4( struct brw_compile *p,
+ struct brw_reg dest,
+ GLboolean relAddr,
+ GLuint location,
+ GLuint bind_table_index );
+
+void brw_dp_READ_4_vs( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint oword,
+ GLboolean relAddr,
+ struct brw_reg addrReg,
+ GLuint location,
+ GLuint bind_table_index );
+
+void brw_dp_WRITE_16( struct brw_compile *p,
+ struct brw_reg src,
+ GLuint scratch_offset );
+
+/* If/else/endif. Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p,
+ GLuint execute_size);
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_insn);
+
+void brw_ENDIF(struct brw_compile *p,
+ struct brw_instruction *if_or_else_insn);
+
+
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+ GLuint execute_size);
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+ struct brw_instruction *patch_insn);
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+ struct brw_instruction *jmp_insn);
+
+
+
+void brw_NOP(struct brw_compile *p);
+
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint conditional,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+void brw_print_reg( struct brw_reg reg );
+
+
+/***********************************************************************
+ * brw_eu_util.c:
+ */
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ GLuint count);
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ GLuint count);
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count);
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count);
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src);
+
+void brw_set_src1( struct brw_instruction *insn,
+ struct brw_reg reg );
+#endif
diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c
new file mode 100644
index 0000000000..5989f5a04e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_debug.c
@@ -0,0 +1,94 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_debug.h"
+
+#include "brw_eu.h"
+
+void brw_print_reg( struct brw_reg hwreg )
+{
+ static const char *file[] = {
+ "arf",
+ "grf",
+ "msg",
+ "imm"
+ };
+
+ static const char *type[] = {
+ "ud",
+ "d",
+ "uw",
+ "w",
+ "ub",
+ "vf",
+ "hf",
+ "f"
+ };
+
+ debug_printf("%s%s",
+ hwreg.abs ? "abs/" : "",
+ hwreg.negate ? "-" : "");
+
+ if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.nr % 2 == 0 &&
+ hwreg.subnr == 0 &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_8 &&
+ hwreg.width == BRW_WIDTH_8 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ /* vector register */
+ debug_printf("vec%d", hwreg.nr);
+ }
+ else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
+ hwreg.width == BRW_WIDTH_1 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ /* "scalar" register */
+ debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+ }
+ else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
+ debug_printf("imm %f", hwreg.dw1.f);
+ }
+ else {
+ debug_printf("%s%d.%d<%d;%d,%d>:%s",
+ file[hwreg.file],
+ hwreg.nr,
+ hwreg.subnr / type_sz(hwreg.type),
+ hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0,
+ 1<<hwreg.width,
+ hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0,
+ type[hwreg.type]);
+ }
+}
+
+
+
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
new file mode 100644
index 0000000000..00d8eaccbc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -0,0 +1,1433 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_debug.h"
+#include "brw_disasm.h"
+
+
+
+
+/***********************************************************************
+ * Internal helper for constructing instructions
+ */
+
+static void guess_execution_size( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ if (reg.width == BRW_WIDTH_8 &&
+ insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
+ insn->header.execution_size = BRW_EXECUTE_16;
+ else
+ insn->header.execution_size = reg.width; /* note - definitions are compatible */
+}
+
+
+static void brw_set_dest( struct brw_instruction *insn,
+ struct brw_reg dest )
+{
+ if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(dest.nr < 128);
+
+ insn->bits1.da1.dest_reg_file = dest.file;
+ insn->bits1.da1.dest_reg_type = dest.type;
+ insn->bits1.da1.dest_address_mode = dest.address_mode;
+
+ if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+ insn->bits1.da1.dest_reg_nr = dest.nr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.da1.dest_subreg_nr = dest.subnr;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.da1.dest_horiz_stride = dest.hstride;
+ }
+ else {
+ insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+ insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+ }
+ }
+ else {
+ insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+
+ /* These are different sizes in align1 vs align16:
+ */
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.ia1.dest_horiz_stride = dest.hstride;
+ }
+ else {
+ insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ }
+ }
+
+ /* NEW: Set the execution size based on dest.width and
+ * insn->compression_control:
+ */
+ guess_execution_size(insn, dest);
+}
+
+static void brw_set_src0( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(reg.nr < 128);
+
+ insn->bits1.da1.src0_reg_file = reg.file;
+ insn->bits1.da1.src0_reg_type = reg.type;
+ insn->bits2.da1.src0_abs = reg.abs;
+ insn->bits2.da1.src0_negate = reg.negate;
+ insn->bits2.da1.src0_address_mode = reg.address_mode;
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+
+ /* Required to set some fields in src1 as well:
+ */
+ insn->bits1.da1.src1_reg_file = 0; /* arf */
+ insn->bits1.da1.src1_reg_type = reg.type;
+ }
+ else
+ {
+ if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.da1.src0_subreg_nr = reg.subnr;
+ insn->bits2.da1.src0_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+ insn->bits2.da16.src0_reg_nr = reg.nr;
+ }
+ }
+ else {
+ insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
+ }
+ else {
+ insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+ }
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits2.da1.src0_width = BRW_WIDTH_1;
+ insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits2.da1.src0_horiz_stride = reg.hstride;
+ insn->bits2.da1.src0_width = reg.width;
+ insn->bits2.da1.src0_vert_stride = reg.vstride;
+ }
+ }
+ else {
+ insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits2.da16.src0_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+void brw_set_src1( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ assert(reg.nr < 128);
+
+ insn->bits1.da1.src1_reg_file = reg.file;
+ insn->bits1.da1.src1_reg_type = reg.type;
+ insn->bits3.da1.src1_abs = reg.abs;
+ insn->bits3.da1.src1_negate = reg.negate;
+
+ /* Only src1 can be immediate in two-argument instructions.
+ */
+ assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+ }
+ else {
+ /* This is a hardware restriction, which may or may not be lifted
+ * in the future:
+ */
+ assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+ /*assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits3.da1.src1_subreg_nr = reg.subnr;
+ insn->bits3.da1.src1_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+ insn->bits3.da16.src1_reg_nr = reg.nr;
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits3.da1.src1_width = BRW_WIDTH_1;
+ insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits3.da1.src1_horiz_stride = reg.hstride;
+ insn->bits3.da1.src1_width = reg.width;
+ insn->bits3.da1.src1_vert_stride = reg.vstride;
+ }
+ }
+ else {
+ insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits3.da16.src1_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+
+static void brw_set_math_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint msg_length,
+ GLuint response_length,
+ GLuint function,
+ GLuint integer_type,
+ GLboolean low_precision,
+ GLboolean saturate,
+ GLuint dataType )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.math_igdng.function = function;
+ insn->bits3.math_igdng.int_type = integer_type;
+ insn->bits3.math_igdng.precision = low_precision;
+ insn->bits3.math_igdng.saturate = saturate;
+ insn->bits3.math_igdng.data_type = dataType;
+ insn->bits3.math_igdng.snapshot = 0;
+ insn->bits3.math_igdng.header_present = 0;
+ insn->bits3.math_igdng.response_length = response_length;
+ insn->bits3.math_igdng.msg_length = msg_length;
+ insn->bits3.math_igdng.end_of_thread = 0;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
+ insn->bits2.send_igdng.end_of_thread = 0;
+ } else {
+ insn->bits3.math.function = function;
+ insn->bits3.math.int_type = integer_type;
+ insn->bits3.math.precision = low_precision;
+ insn->bits3.math.saturate = saturate;
+ insn->bits3.math.data_type = dataType;
+ insn->bits3.math.response_length = response_length;
+ insn->bits3.math.msg_length = msg_length;
+ insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
+ insn->bits3.math.end_of_thread = 0;
+ }
+}
+
+
+static void brw_set_ff_sync_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean end_of_thread,
+ GLboolean complete,
+ GLuint offset,
+ GLuint swizzle_control )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.urb_igdng.opcode = 1;
+ insn->bits3.urb_igdng.offset = offset;
+ insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+ insn->bits3.urb_igdng.allocate = allocate;
+ insn->bits3.urb_igdng.used = used;
+ insn->bits3.urb_igdng.complete = complete;
+ insn->bits3.urb_igdng.header_present = 1;
+ insn->bits3.urb_igdng.response_length = response_length;
+ insn->bits3.urb_igdng.msg_length = msg_length;
+ insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+}
+
+static void brw_set_urb_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean end_of_thread,
+ GLboolean complete,
+ GLuint offset,
+ GLuint swizzle_control )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.urb_igdng.opcode = 0; /* ? */
+ insn->bits3.urb_igdng.offset = offset;
+ insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+ insn->bits3.urb_igdng.allocate = allocate;
+ insn->bits3.urb_igdng.used = used; /* ? */
+ insn->bits3.urb_igdng.complete = complete;
+ insn->bits3.urb_igdng.header_present = 1;
+ insn->bits3.urb_igdng.response_length = response_length;
+ insn->bits3.urb_igdng.msg_length = msg_length;
+ insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.urb.opcode = 0; /* ? */
+ insn->bits3.urb.offset = offset;
+ insn->bits3.urb.swizzle_control = swizzle_control;
+ insn->bits3.urb.allocate = allocate;
+ insn->bits3.urb.used = used; /* ? */
+ insn->bits3.urb.complete = complete;
+ insn->bits3.urb.response_length = response_length;
+ insn->bits3.urb.msg_length = msg_length;
+ insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
+ insn->bits3.urb.end_of_thread = end_of_thread;
+ }
+}
+
+static void brw_set_dp_write_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint msg_length,
+ GLuint pixel_scoreboard_clear,
+ GLuint response_length,
+ GLuint end_of_thread )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
+ insn->bits3.dp_write_igdng.msg_control = msg_control;
+ insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write_igdng.msg_type = msg_type;
+ insn->bits3.dp_write_igdng.send_commit_msg = 0;
+ insn->bits3.dp_write_igdng.header_present = 1;
+ insn->bits3.dp_write_igdng.response_length = response_length;
+ insn->bits3.dp_write_igdng.msg_length = msg_length;
+ insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.dp_write.binding_table_index = binding_table_index;
+ insn->bits3.dp_write.msg_control = msg_control;
+ insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write.msg_type = msg_type;
+ insn->bits3.dp_write.send_commit_msg = 0;
+ insn->bits3.dp_write.response_length = response_length;
+ insn->bits3.dp_write.msg_length = msg_length;
+ insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits3.dp_write.end_of_thread = end_of_thread;
+ }
+}
+
+static void brw_set_dp_read_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint target_cache,
+ GLuint msg_length,
+ GLuint response_length,
+ GLuint end_of_thread )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
+ insn->bits3.dp_read_igdng.msg_control = msg_control;
+ insn->bits3.dp_read_igdng.msg_type = msg_type;
+ insn->bits3.dp_read_igdng.target_cache = target_cache;
+ insn->bits3.dp_read_igdng.header_present = 1;
+ insn->bits3.dp_read_igdng.response_length = response_length;
+ insn->bits3.dp_read_igdng.msg_length = msg_length;
+ insn->bits3.dp_read_igdng.pad1 = 0;
+ insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
+ insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
+ insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
+ insn->bits3.dp_read.response_length = response_length; /*16:19*/
+ insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
+ insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+ insn->bits3.dp_read.pad1 = 0; /*28:30*/
+ insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
+ }
+}
+
+static void brw_set_sampler_message(struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode)
+{
+ assert(eot == 0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
+ insn->bits3.sampler_igdng.sampler = sampler;
+ insn->bits3.sampler_igdng.msg_type = msg_type;
+ insn->bits3.sampler_igdng.simd_mode = simd_mode;
+ insn->bits3.sampler_igdng.header_present = header_present;
+ insn->bits3.sampler_igdng.response_length = response_length;
+ insn->bits3.sampler_igdng.msg_length = msg_length;
+ insn->bits3.sampler_igdng.end_of_thread = eot;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
+ insn->bits2.send_igdng.end_of_thread = eot;
+ } else if (BRW_IS_G4X(brw)) {
+ insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
+ insn->bits3.sampler_g4x.sampler = sampler;
+ insn->bits3.sampler_g4x.msg_type = msg_type;
+ insn->bits3.sampler_g4x.response_length = response_length;
+ insn->bits3.sampler_g4x.msg_length = msg_length;
+ insn->bits3.sampler_g4x.end_of_thread = eot;
+ insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+ } else {
+ insn->bits3.sampler.binding_table_index = binding_table_index;
+ insn->bits3.sampler.sampler = sampler;
+ insn->bits3.sampler.msg_type = msg_type;
+ insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+ insn->bits3.sampler.response_length = response_length;
+ insn->bits3.sampler.msg_length = msg_length;
+ insn->bits3.sampler.end_of_thread = eot;
+ insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+ }
+}
+
+
+
+static struct brw_instruction *next_insn( struct brw_compile *p,
+ GLuint opcode )
+{
+ struct brw_instruction *insn;
+
+ if (0 && (BRW_DEBUG & DEBUG_DISASSEM))
+ {
+ if (p->nr_insn)
+ brw_disasm_insn(stderr, &p->store[p->nr_insn-1]);
+ }
+
+ assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
+
+ insn = &p->store[p->nr_insn++];
+ memcpy(insn, p->current, sizeof(*insn));
+
+ /* Reset this one-shot flag:
+ */
+
+ if (p->current->header.destreg__conditionalmod) {
+ p->current->header.destreg__conditionalmod = 0;
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+
+ insn->header.opcode = opcode;
+ return insn;
+}
+
+
+static struct brw_instruction *brw_alu1( struct brw_compile *p,
+ GLuint opcode,
+ struct brw_reg dest,
+ struct brw_reg src )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ return insn;
+}
+
+static struct brw_instruction *brw_alu2(struct brw_compile *p,
+ GLuint opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1 )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, src1);
+ return insn;
+}
+
+
+/***********************************************************************
+ * Convenience routines.
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0) \
+{ \
+ return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
+}
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1) \
+{ \
+ return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
+}
+
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDZ)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+
+
+
+void brw_NOP(struct brw_compile *p)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_ud(0x0));
+}
+
+
+
+
+
+/***********************************************************************
+ * Comparisons, if/else/endif
+ */
+
+struct brw_instruction *brw_JMPI(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+
+ insn->header.execution_size = 1;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+/* EU takes the value from the flag register and pushes it onto some
+ * sort of a stack (presumably merging with any flag value already on
+ * the stack). Within an if block, the flags at the top of the stack
+ * control execution on each channel of the unit, eg. on each of the
+ * 16 pixel values in our wm programs.
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+ * functions), the relevent flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off. If the stack is now empty, normal execution resumes.
+ *
+ * No attempt is made to deal with stack overflow (14 elements?).
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
+{
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow) {
+ assert(execute_size == BRW_EXECUTE_1);
+
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ insn->header.predicate_inverse = 1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_IF);
+ }
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.execution_size = execute_size;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.predicate_control = BRW_PREDICATE_NORMAL;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ if (!p->single_program_flow)
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_insn)
+{
+ struct brw_instruction *insn;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ } else {
+ insn = next_insn(p, BRW_OPCODE_ELSE);
+ }
+
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = if_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ if (!p->single_program_flow)
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ /* Patch the if instruction to point at this instruction.
+ */
+ if (p->single_program_flow) {
+ assert(if_insn->header.opcode == BRW_OPCODE_ADD);
+
+ if_insn->bits3.ud = (insn - if_insn + 1) * 16;
+ } else {
+ assert(if_insn->header.opcode == BRW_OPCODE_IF);
+
+ if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
+ if_insn->bits3.if_else.pop_count = 0;
+ if_insn->bits3.if_else.pad0 = 0;
+ }
+
+ return insn;
+}
+
+void brw_ENDIF(struct brw_compile *p,
+ struct brw_instruction *patch_insn)
+{
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
+ if (p->single_program_flow) {
+ /* In single program flow mode, there's no need to execute an ENDIF,
+ * since we don't need to do any stack operations, and if we're executing
+ * currently, we want to just continue executing.
+ */
+ struct brw_instruction *next = &p->store[p->nr_insn];
+
+ assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
+
+ patch_insn->bits3.ud = (next - patch_insn) * 16;
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
+
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = patch_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ assert(patch_insn->bits3.if_else.jump_count == 0);
+
+ /* Patch the if or else instructions to point at this or the next
+ * instruction respectively.
+ */
+ if (patch_insn->header.opcode == BRW_OPCODE_IF) {
+ /* Automagically turn it into an IFF:
+ */
+ patch_insn->header.opcode = BRW_OPCODE_IFF;
+ patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
+ patch_insn->bits3.if_else.pop_count = 0;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
+ patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
+ patch_insn->bits3.if_else.pop_count = 1;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else {
+ assert(0);
+ }
+
+ /* Also pop item off the stack in the endif instruction:
+ */
+ insn->bits3.if_else.jump_count = 0;
+ insn->bits3.if_else.pop_count = 1;
+ insn->bits3.if_else.pad0 = 0;
+ }
+}
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_BREAK);
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ insn->bits3.if_else.pad0 = 0;
+ return insn;
+}
+
+struct brw_instruction *brw_CONT(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ insn->bits3.if_else.pad0 = 0;
+ return insn;
+}
+
+/* DO/WHILE loop:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
+{
+ if (p->single_program_flow) {
+ return &p->store[p->nr_insn];
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, brw_null_reg());
+ brw_set_src0(insn, brw_null_reg());
+ brw_set_src1(insn, brw_null_reg());
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = execute_size;
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ /* insn->header.mask_control = BRW_MASK_ENABLE; */
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+
+ return insn;
+ }
+}
+
+
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+ struct brw_instruction *do_insn)
+{
+ struct brw_instruction *insn;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
+ if (p->single_program_flow)
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ else
+ insn = next_insn(p, BRW_OPCODE_WHILE);
+
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+ if (p->single_program_flow) {
+ insn->header.execution_size = BRW_EXECUTE_1;
+
+ insn->bits3.d = (do_insn - insn) * 16;
+ } else {
+ insn->header.execution_size = do_insn->header.execution_size;
+
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
+ insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+ }
+
+/* insn->header.mask_control = BRW_MASK_ENABLE; */
+
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+ return insn;
+}
+
+
+/* FORWARD JUMPS:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+ struct brw_instruction *jmp_insn)
+{
+ struct brw_instruction *landing = &p->store[p->nr_insn];
+ GLuint jmpi = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
+ assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
+ assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
+
+ jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
+}
+
+
+
+/* To integrate with the above, it makes sense that the comparison
+ * instruction should populate the flag register. It might be simpler
+ * just to use the flag reg for most WM tasks?
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint conditional,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+
+ insn->header.destreg__conditionalmod = conditional;
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, src1);
+
+/* guess_execution_size(insn, src0); */
+
+
+ /* Make it so that future instructions will use the computed flag
+ * value until brw_set_predicate_control_flag_value() is called
+ * again.
+ */
+ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.nr == 0) {
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ p->flag_value = 0xff;
+ }
+}
+
+
+
+/***********************************************************************
+ * Helpers for the various SEND message types:
+ */
+
+/** Extended math function, float[8].
+ */
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint data_type,
+ GLuint precision )
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+ GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+
+ /* Example code doesn't set predicate_control for send
+ * instructions.
+ */
+ insn->header.predicate_control = 0;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ data_type);
+}
+
+/**
+ * Extended math function, float[16].
+ * Use 2 send instructions.
+ */
+void brw_math_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint precision )
+{
+ struct brw_instruction *insn;
+ GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+
+ /* First instruction:
+ */
+ brw_push_insn_state(p);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
+
+ /* Second instruction:
+ */
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
+ insn->header.destreg__conditionalmod = msg_reg_nr+1;
+
+ brw_set_dest(insn, offset(dest,1));
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
+
+ brw_pop_insn_state(p);
+}
+
+
+/**
+ * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
+void brw_dp_WRITE_16( struct brw_compile *p,
+ struct brw_reg src,
+ GLuint scratch_offset )
+{
+ GLuint msg_reg_nr = 1;
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+ brw_imm_d(scratch_offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ GLuint msg_length = 3;
+ struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+
+ brw_set_dp_write_message(p->brw,
+ insn,
+ 255, /* binding table index (255=stateless) */
+ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+ msg_length,
+ 0, /* pixel scoreboard */
+ 0, /* response_length */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
+void brw_dp_READ_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint scratch_offset )
+{
+ GLuint msg_reg_nr = 1;
+ {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+ brw_imm_d(scratch_offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest); /* UW? */
+ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ 255, /* binding table index (255=stateless) */
+ 3, /* msg_control (3 means 4 Owords) */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 1, /* target cache (render/scratch) */
+ 1, /* msg_length */
+ 2, /* response_length */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Location (in buffer) should be a multiple of 16.
+ * Used for fetching shader constants.
+ * If relAddr is true, we'll do an indirect fetch using the address register.
+ */
+void brw_dp_READ_4( struct brw_compile *p,
+ struct brw_reg dest,
+ GLboolean relAddr,
+ GLuint location,
+ GLuint bind_table_index )
+{
+ /* XXX: relAddr not implemented */
+ GLuint msg_reg_nr = 1;
+ {
+ struct brw_reg b;
+ brw_push_insn_state(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ /* Setup MRF[1] with location/offset into const buffer */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ brw_MOV(p, b, brw_imm_ud(location));
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ /* cast dest to a uword[8] vector */
+ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ 0, /* msg_control (0 means 1 Oword) */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read float[4] constant(s) from VS constant buffer.
+ * For relative addressing, two float[4] constants will be read into 'dest'.
+ * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
+ */
+void brw_dp_READ_4_vs(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint oword,
+ GLboolean relAddr,
+ struct brw_reg addrReg,
+ GLuint location,
+ GLuint bind_table_index)
+{
+ GLuint msg_reg_nr = 1;
+
+ assert(oword < 2);
+ /*
+ printf("vs const read msg, location %u, msg_reg_nr %d\n",
+ location, msg_reg_nr);
+ */
+
+ /* Setup MRF[1] with location/offset into const buffer */
+ {
+ struct brw_reg b;
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ /*brw_set_access_mode(p, BRW_ALIGN_16);*/
+
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /*b = get_element_ud(b, 2);*/
+ if (relAddr) {
+ brw_ADD(p, b, addrReg, brw_imm_ud(location));
+ }
+ else {
+ brw_MOV(p, b, brw_imm_ud(location));
+ }
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ /*insn->header.access_mode = BRW_ALIGN_16;*/
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ oword, /* 0 = lower Oword, 1 = upper Oword */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+}
+
+
+
+void brw_fb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_dp_write_message(p->brw,
+ insn,
+ binding_table_index,
+ BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
+ msg_length,
+ 1, /* pixel scoreboard */
+ response_length,
+ eot);
+}
+
+
+/**
+ * Texture sample instruction.
+ * Note: the msg_type plus msg_length values determine exactly what kind
+ * of sampling operation is performed. See volume 4, page 161 of docs.
+ */
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint writemask,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode)
+{
+ GLboolean need_stall = 0;
+
+ if (writemask == 0) {
+ /*debug_printf("%s: zero writemask??\n", __FUNCTION__); */
+ return;
+ }
+
+ /* Hardware doesn't do destination dependency checking on send
+ * instructions properly. Add a workaround which generates the
+ * dependency by other means. In practice it seems like this bug
+ * only crops up for texture samples, and only where registers are
+ * written by the send and then written again later without being
+ * read in between. Luckily for us, we already track that
+ * information and use it to modify the writemask for the
+ * instruction, so that is a guide for whether a workaround is
+ * needed.
+ */
+ if (writemask != BRW_WRITEMASK_XYZW) {
+ GLuint dst_offset = 0;
+ GLuint i, newmask = 0, len = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i))
+ break;
+ dst_offset += 2;
+ }
+ for (; i < 4; i++) {
+ if (!(writemask & (1<<i)))
+ break;
+ newmask |= 1<<i;
+ len++;
+ }
+
+ if (newmask != writemask) {
+ need_stall = 1;
+ /* debug_printf("need stall %x %x\n", newmask , writemask); */
+ }
+ else {
+ struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+
+ newmask = ~newmask & BRW_WRITEMASK_XYZW;
+
+ brw_push_insn_state(p);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p, m1, brw_vec8_grf(0,0));
+ brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
+
+ brw_pop_insn_state(p);
+
+ src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ dest = offset(dest, dst_offset);
+ response_length = len * 2;
+ }
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_sampler_message(p->brw, insn,
+ binding_table_index,
+ sampler,
+ msg_type,
+ response_length,
+ msg_length,
+ eot,
+ header_present,
+ simd_mode);
+ }
+
+ if (need_stall) {
+ struct brw_reg reg = vec8(offset(dest, response_length-1));
+
+ /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
+ */
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, reg, reg);
+ brw_pop_insn_state(p);
+ }
+
+}
+
+/* All these variables are pretty confusing - we might be better off
+ * using bitmasks and macros for this, in the old style. Or perhaps
+ * just having the caller instantiate the fields in dword3 itself.
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < BRW_MAX_MRF);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_urb_message(p->brw,
+ insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
+
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < 16);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_ff_sync_message(p->brw,
+ insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
diff --git a/src/gallium/drivers/i965/brw_eu_util.c b/src/gallium/drivers/i965/brw_eu_util.c
new file mode 100644
index 0000000000..5405cf17a4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_util.c
@@ -0,0 +1,126 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ brw_math( p,
+ dst,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ src,
+ BRW_MATH_PRECISION_FULL,
+ BRW_MATH_DATA_VECTOR );
+}
+
+
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count)
+{
+ GLuint i;
+
+ dst = vec4(dst);
+ src = vec4(src);
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
+ }
+}
+
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count)
+{
+ GLuint i;
+
+ dst = vec8(dst);
+ src = vec8(src);
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ }
+}
+
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ GLuint count)
+{
+ GLuint i;
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta));
+ brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16));
+ }
+}
+
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ GLuint count)
+{
+ GLuint i;
+
+ dst = vec4(dst);
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
+ }
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
new file mode 100644
index 0000000000..921b201bae
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -0,0 +1,216 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_gs.h"
+
+
+
+static enum pipe_error compile_gs_prog( struct brw_context *brw,
+ struct brw_gs_prog_key *key,
+ struct brw_winsys_buffer **bo_out )
+{
+ struct brw_gs_compile c;
+ enum pipe_error ret;
+ const GLuint *program;
+ GLuint program_size;
+
+ memset(&c, 0, sizeof(c));
+
+ c.key = *key;
+ c.need_ff_sync = BRW_IS_IGDNG(brw);
+ /* Need to locate the two positions present in vertex + header.
+ * These are currently hardcoded:
+ */
+ c.nr_attrs = c.key.nr_attrs;
+
+ if (BRW_IS_IGDNG(brw))
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
+ else
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
+ c.nr_bytes = c.nr_regs * REG_SIZE;
+
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(brw, &c.func);
+
+ c.func.single_program_flow = 1;
+
+ /* For some reason the thread is spawned with only 4 channels
+ * unmasked.
+ */
+ brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+ /* Note that primitives which don't require a GS program have
+ * already been weeded out by this stage:
+ */
+ switch (key->primitive) {
+ case PIPE_PRIM_QUADS:
+ brw_gs_quads( &c );
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ brw_gs_quad_strip( &c );
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ brw_gs_lines( &c );
+ break;
+ case PIPE_PRIM_LINES:
+ if (key->hint_gs_always)
+ brw_gs_lines( &c );
+ else {
+ return PIPE_OK;
+ }
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ if (key->hint_gs_always)
+ brw_gs_tris( &c );
+ else {
+ return PIPE_OK;
+ }
+ break;
+ case PIPE_PRIM_POINTS:
+ if (key->hint_gs_always)
+ brw_gs_points( &c );
+ else {
+ return PIPE_OK;
+ }
+ break;
+ default:
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+ /* get the program
+ */
+ ret = brw_get_program(&c.func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ /* Upload
+ */
+ ret = brw_upload_cache( &brw->cache, BRW_GS_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->gs.prog_data,
+ bo_out );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static const unsigned gs_prim[PIPE_PRIM_MAX] = {
+ PIPE_PRIM_POINTS,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINE_LOOP,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_QUADS,
+ PIPE_PRIM_QUAD_STRIP,
+ PIPE_PRIM_TRIANGLES
+};
+
+static void populate_key( struct brw_context *brw,
+ struct brw_gs_prog_key *key )
+{
+ const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature;
+
+ memset(key, 0, sizeof(*key));
+
+ /* PIPE_NEW_FRAGMENT_SIGNATURE */
+ key->nr_attrs = sig->nr_inputs + 1;
+
+ /* BRW_NEW_PRIMITIVE */
+ key->primitive = gs_prim[brw->primitive];
+
+ key->hint_gs_always = 0; /* debug code? */
+
+ key->need_gs_prog = (key->hint_gs_always ||
+ brw->primitive == PIPE_PRIM_QUADS ||
+ brw->primitive == PIPE_PRIM_QUAD_STRIP ||
+ brw->primitive == PIPE_PRIM_LINE_LOOP);
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static int prepare_gs_prog(struct brw_context *brw)
+{
+ struct brw_gs_prog_key key;
+ enum pipe_error ret;
+
+ /* Populate the key:
+ */
+ populate_key(brw, &key);
+
+ if (brw->gs.prog_active != key.need_gs_prog) {
+ brw->state.dirty.cache |= CACHE_NEW_GS_PROG;
+ brw->gs.prog_active = key.need_gs_prog;
+ }
+
+ if (!brw->gs.prog_active)
+ return PIPE_OK;
+
+ if (brw_search_cache(&brw->cache, BRW_GS_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->gs.prog_data,
+ &brw->gs.prog_bo))
+ return PIPE_OK;
+
+ ret = compile_gs_prog( brw, &key, &brw->gs.prog_bo );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+const struct brw_tracked_state brw_gs_prog = {
+ .dirty = {
+ .mesa = PIPE_NEW_FRAGMENT_SIGNATURE,
+ .brw = BRW_NEW_PRIMITIVE,
+ .cache = 0,
+ },
+ .prepare = prepare_gs_prog
+};
diff --git a/src/gallium/drivers/i965/brw_gs.h b/src/gallium/drivers/i965/brw_gs.h
new file mode 100644
index 0000000000..6e616dcb87
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_GS_H
+#define BRW_GS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_GS_VERTS (4)
+
+struct brw_gs_prog_key {
+ GLuint nr_attrs:8;
+ GLuint primitive:4;
+ GLuint hint_gs_always:1;
+ GLuint need_gs_prog:1;
+ GLuint pad:18;
+};
+
+struct brw_gs_compile {
+ struct brw_compile func;
+ struct brw_gs_prog_key key;
+ struct brw_gs_prog_data prog_data;
+
+ struct {
+ struct brw_reg R0;
+ struct brw_reg vertex[MAX_GS_VERTS];
+ } reg;
+
+ /* 3 different ways of expressing vertex size:
+ */
+ GLuint nr_attrs;
+ GLuint nr_regs;
+ GLuint nr_bytes;
+ GLboolean need_ff_sync;
+};
+
+#define ATTR_SIZE (4*4)
+
+void brw_gs_quads( struct brw_gs_compile *c );
+void brw_gs_quad_strip( struct brw_gs_compile *c );
+void brw_gs_tris( struct brw_gs_compile *c );
+void brw_gs_lines( struct brw_gs_compile *c );
+void brw_gs_points( struct brw_gs_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c
new file mode 100644
index 0000000000..fd8e2acced
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs_emit.c
@@ -0,0 +1,181 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_gs.h"
+
+static void brw_gs_alloc_regs( struct brw_gs_compile *c,
+ GLuint nr_verts )
+{
+ GLuint i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < nr_verts; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ c->prog_data.urb_read_length = c->nr_regs;
+ c->prog_data.total_grf = i;
+}
+
+
+static void brw_gs_emit_vue(struct brw_gs_compile *c,
+ struct brw_reg vert,
+ GLboolean last,
+ GLuint header)
+{
+ struct brw_compile *p = &c->func;
+ GLboolean allocate = !last;
+
+ /* Overwrite PrimType and PrimStart in the message header, for
+ * each vertex in turn:
+ */
+ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+ /* Copy the vertex from vertn into m1..mN+1:
+ */
+ brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);
+
+ /* Send each vertex as a seperate write to the urb. This is
+ * different to the concept in brw_sf_emit.c, where subsequent
+ * writes are used to build up a single urb entry. Each of these
+ * writes instantiates a seperate urb entry, and a new one must be
+ * allocated each time.
+ */
+ brw_urb_WRITE(p,
+ allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ 0,
+ c->reg.R0,
+ allocate,
+ 1, /* used */
+ c->nr_regs + 1, /* msg length */
+ allocate ? 1 : 0, /* response length */
+ allocate ? 0 : 1, /* eot */
+ 1, /* writes_complete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
+{
+ struct brw_compile *p = &c->func;
+ brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+void brw_gs_quads( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 4);
+
+ /* Use polygons for correct edgeflag behaviour. Note that vertex 3
+ * is the PV for quads, but vertex 0 for polygons:
+ */
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_quad_strip( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 4);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_tris( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 3);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
+}
+
+void brw_gs_lines( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 2);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
+}
+
+void brw_gs_points( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 1);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
+}
+
+
+
+
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
new file mode 100644
index 0000000000..b64ec286ce
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -0,0 +1,169 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+struct brw_gs_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+
+ unsigned int curbe_offset;
+
+ unsigned int nr_urb_entries, urb_size;
+ GLboolean prog_active;
+};
+
+static void
+gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_GS_PROG */
+ key->prog_active = brw->gs.prog_active;
+ if (key->prog_active) {
+ key->total_grf = brw->gs.prog_data->total_grf;
+ key->urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+ } else {
+ key->total_grf = 1;
+ key->urb_entry_read_length = 1;
+ }
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ key->curbe_offset = brw->curbe.clip_start;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_gs_entries;
+ key->urb_size = brw->urb.vsize;
+}
+
+static enum pipe_error
+gs_unit_create_from_key(struct brw_context *brw,
+ struct brw_gs_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ unsigned nr_reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_gs_unit_state gs;
+ enum pipe_error ret;
+
+
+ memset(&gs, 0, sizeof(gs));
+
+ /* reloc */
+ gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ gs.thread0.kernel_start_pointer = 0;
+
+ gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ gs.thread1.single_program_flow = 1;
+
+ gs.thread3.dispatch_grf_start_reg = 1;
+ gs.thread3.const_urb_entry_read_offset = 0;
+ gs.thread3.const_urb_entry_read_length = 0;
+ gs.thread3.urb_entry_read_offset = 0;
+ gs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+ gs.thread4.nr_urb_entries = key->nr_urb_entries;
+ gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+
+ if (key->nr_urb_entries >= 8)
+ gs.thread4.max_threads = 1;
+ else
+ gs.thread4.max_threads = 0;
+
+ if (BRW_IS_IGDNG(brw))
+ gs.thread4.rendering_enable = 1;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ gs.thread4.stats_enable = 1;
+
+ ret = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
+ key, sizeof(*key),
+ reloc, nr_reloc,
+ &gs, sizeof(gs),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static enum pipe_error prepare_gs_unit(struct brw_context *brw)
+{
+ struct brw_gs_unit_key key;
+ enum pipe_error ret;
+ struct brw_winsys_reloc reloc[1];
+ unsigned nr_reloc = 0;
+ unsigned grf_reg_count;
+
+ gs_unit_populate_key(brw, &key);
+
+ grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
+
+ /* GS program relocation */
+ if (key.prog_active) {
+ make_reloc(&reloc[nr_reloc++],
+ BRW_USAGE_STATE,
+ grf_reg_count << 1,
+ offsetof(struct brw_gs_unit_state, thread0),
+ brw->gs.prog_bo);
+ }
+
+ if (brw_search_cache(&brw->cache, BRW_GS_UNIT,
+ &key, sizeof(key),
+ reloc, nr_reloc,
+ NULL,
+ &brw->gs.state_bo))
+ return PIPE_OK;
+
+ ret = gs_unit_create_from_key(brw, &key,
+ reloc, nr_reloc,
+ &brw->gs.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_gs_unit = {
+ .dirty = {
+ .mesa = 0,
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_GS_PROG
+ },
+ .prepare = prepare_gs_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
new file mode 100644
index 0000000000..e4b24229db
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -0,0 +1,513 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_debug.h"
+#include "brw_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_screen.h"
+#include "brw_pipe_rast.h"
+
+
+
+
+
+/***********************************************************************
+ * Blend color
+ */
+
+static int upload_blend_constant_color(struct brw_context *brw)
+{
+ BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bcc);
+ return 0;
+}
+
+
+const struct brw_tracked_state brw_blend_constant_color = {
+ .dirty = {
+ .mesa = PIPE_NEW_BLEND_COLOR,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_blend_constant_color
+};
+
+/***********************************************************************
+ * Drawing rectangle - framebuffer dimensions
+ */
+static int upload_drawing_rect(struct brw_context *brw)
+{
+ BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
+ OUT_BATCH(0);
+ OUT_BATCH(((brw->curr.fb.width - 1) & 0xffff) |
+ ((brw->curr.fb.height - 1) << 16));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ return 0;
+}
+
+const struct brw_tracked_state brw_drawing_rect = {
+ .dirty = {
+ .mesa = PIPE_NEW_FRAMEBUFFER_DIMENSIONS,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_drawing_rect
+};
+
+
+/***********************************************************************
+ * Binding table pointers
+ */
+
+static int prepare_binding_table_pointers(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->vs.bind_bo);
+ brw_add_validated_bo(brw, brw->wm.bind_bo);
+ return 0;
+}
+
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is 0.
+ */
+static int upload_binding_table_pointers(struct brw_context *brw)
+{
+ BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
+ if (brw->vs.bind_bo != NULL)
+ OUT_RELOC(brw->vs.bind_bo,
+ BRW_USAGE_SAMPLER,
+ 0); /* vs */
+ else
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* gs */
+ OUT_BATCH(0); /* clip */
+ OUT_BATCH(0); /* sf */
+ OUT_RELOC(brw->wm.bind_bo,
+ BRW_USAGE_SAMPLER,
+ 0); /* wm/ps */
+ ADVANCE_BATCH();
+ return 0;
+}
+
+const struct brw_tracked_state brw_binding_table_pointers = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH,
+ .cache = CACHE_NEW_SURF_BIND,
+ },
+ .prepare = prepare_binding_table_pointers,
+ .emit = upload_binding_table_pointers,
+};
+
+
+/**********************************************************************
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
+ */
+static int upload_pipelined_state_pointers(struct brw_context *brw )
+{
+ BEGIN_BATCH(7, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
+ OUT_RELOC(brw->vs.state_bo,
+ BRW_USAGE_STATE,
+ 0);
+ if (brw->gs.prog_active)
+ OUT_RELOC(brw->gs.state_bo,
+ BRW_USAGE_STATE,
+ 1);
+ else
+ OUT_BATCH(0);
+ OUT_RELOC(brw->clip.state_bo,
+ BRW_USAGE_STATE,
+ 1);
+ OUT_RELOC(brw->sf.state_bo,
+ BRW_USAGE_STATE,
+ 0);
+ OUT_RELOC(brw->wm.state_bo,
+ BRW_USAGE_STATE,
+ 0);
+ OUT_RELOC(brw->cc.state_bo,
+ BRW_USAGE_STATE,
+ 0);
+ ADVANCE_BATCH();
+
+ brw->state.dirty.brw |= BRW_NEW_PSP;
+ return 0;
+}
+
+
+static int prepare_psp_urb_cbs(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->vs.state_bo);
+ brw_add_validated_bo(brw, brw->gs.state_bo);
+ brw_add_validated_bo(brw, brw->clip.state_bo);
+ brw_add_validated_bo(brw, brw->sf.state_bo);
+ brw_add_validated_bo(brw, brw->wm.state_bo);
+ brw_add_validated_bo(brw, brw->cc.state_bo);
+ return 0;
+}
+
+static int upload_psp_urb_cbs(struct brw_context *brw )
+{
+ int ret;
+
+ ret = upload_pipelined_state_pointers(brw);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_urb_fence(brw);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_cs_urb_state(brw);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_psp_urb_cbs = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH,
+ .cache = (CACHE_NEW_VS_UNIT |
+ CACHE_NEW_GS_UNIT |
+ CACHE_NEW_GS_PROG |
+ CACHE_NEW_CLIP_UNIT |
+ CACHE_NEW_SF_UNIT |
+ CACHE_NEW_WM_UNIT |
+ CACHE_NEW_CC_UNIT)
+ },
+ .prepare = prepare_psp_urb_cbs,
+ .emit = upload_psp_urb_cbs,
+};
+
+
+/***********************************************************************
+ * Depth buffer
+ */
+
+static int prepare_depthbuffer(struct brw_context *brw)
+{
+ struct pipe_surface *zsbuf = brw->curr.fb.zsbuf;
+
+ if (zsbuf)
+ brw_add_validated_bo(brw, brw_surface(zsbuf)->bo);
+
+ return 0;
+}
+
+static int emit_depthbuffer(struct brw_context *brw)
+{
+ struct pipe_surface *surface = brw->curr.fb.zsbuf;
+ unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
+
+ if (surface == NULL) {
+ BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+ (BRW_SURFACE_NULL << 29));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ OUT_BATCH(0);
+
+ ADVANCE_BATCH();
+ } else {
+ struct brw_winsys_buffer *bo;
+ unsigned int format;
+ unsigned int pitch;
+ unsigned int cpp;
+
+ switch (surface->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ format = BRW_DEPTHFORMAT_D16_UNORM;
+ cpp = 2;
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+ cpp = 4;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ format = BRW_DEPTHFORMAT_D32_FLOAT;
+ cpp = 4;
+ break;
+ default:
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+ bo = brw_surface(surface)->bo;
+ pitch = brw_surface(surface)->pitch;
+
+ BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH(((pitch * cpp) - 1) |
+ (format << 18) |
+ (BRW_TILEWALK_YMAJOR << 26) |
+ ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) |
+ (BRW_SURFACE_2D << 29));
+ OUT_RELOC(bo,
+ BRW_USAGE_DEPTH_BUFFER,
+ surface->offset);
+ OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+ ((pitch - 1) << 6) |
+ ((surface->height - 1) << 19));
+ OUT_BATCH(0);
+
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ OUT_BATCH(0);
+
+ ADVANCE_BATCH();
+ }
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_depthbuffer = {
+ .dirty = {
+ .mesa = PIPE_NEW_DEPTH_BUFFER,
+ .brw = BRW_NEW_BATCH,
+ .cache = 0,
+ },
+ .prepare = prepare_depthbuffer,
+ .emit = emit_depthbuffer,
+};
+
+
+
+/***********************************************************************
+ * Polygon stipple packet
+ */
+
+static int upload_polygon_stipple(struct brw_context *brw)
+{
+ BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bps);
+ return 0;
+}
+
+const struct brw_tracked_state brw_polygon_stipple = {
+ .dirty = {
+ .mesa = PIPE_NEW_POLYGON_STIPPLE,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_polygon_stipple
+};
+
+
+/***********************************************************************
+ * Line stipple packet
+ */
+
+static int upload_line_stipple(struct brw_context *brw)
+{
+ const struct brw_line_stipple *bls = &brw->curr.rast->bls;
+ if (bls->header.opcode) {
+ BRW_CACHED_BATCH_STRUCT(brw, bls);
+ }
+ return 0;
+}
+
+const struct brw_tracked_state brw_line_stipple = {
+ .dirty = {
+ .mesa = PIPE_NEW_RAST,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_line_stipple
+};
+
+
+/***********************************************************************
+ * Misc invarient state packets
+ */
+
+static int upload_invarient_state( struct brw_context *brw )
+{
+ {
+ /* 0x61040000 Pipeline Select */
+ /* PipelineSelect : 0 */
+ struct brw_pipeline_select ps;
+
+ memset(&ps, 0, sizeof(ps));
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ ps.header.opcode = CMD_PIPELINE_SELECT_GM45;
+ else
+ ps.header.opcode = CMD_PIPELINE_SELECT_965;
+ ps.header.pipeline_select = 0;
+ BRW_BATCH_STRUCT(brw, &ps);
+ }
+
+ {
+ struct brw_global_depth_offset_clamp gdo;
+ memset(&gdo, 0, sizeof(gdo));
+
+ /* Disable depth offset clamping.
+ */
+ gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
+ gdo.header.length = sizeof(gdo)/4 - 2;
+ gdo.depth_offset_clamp = 0.0;
+
+ BRW_BATCH_STRUCT(brw, &gdo);
+ }
+
+
+ /* 0x61020000 State Instruction Pointer */
+ {
+ struct brw_system_instruction_pointer sip;
+ memset(&sip, 0, sizeof(sip));
+
+ sip.header.opcode = CMD_STATE_INSN_POINTER;
+ sip.header.length = 0;
+ sip.bits0.pad = 0;
+ sip.bits0.system_instruction_pointer = 0;
+ BRW_BATCH_STRUCT(brw, &sip);
+ }
+
+ /* VF Statistics */
+ {
+ struct brw_vf_statistics vfs;
+ memset(&vfs, 0, sizeof(vfs));
+
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ vfs.opcode = CMD_VF_STATISTICS_GM45;
+ else
+ vfs.opcode = CMD_VF_STATISTICS_965;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ vfs.statistics_enable = 1;
+
+ BRW_BATCH_STRUCT(brw, &vfs);
+ }
+
+ if (!BRW_IS_965(brw))
+ {
+ struct brw_aa_line_parameters balp;
+
+ /* use legacy aa line coverage computation */
+ memset(&balp, 0, sizeof(balp));
+ balp.header.opcode = CMD_AA_LINE_PARAMETERS;
+ balp.header.length = sizeof(balp) / 4 - 2;
+
+ BRW_BATCH_STRUCT(brw, &balp);
+ }
+
+ {
+ struct brw_polygon_stipple_offset bpso;
+
+ /* This is invarient state in gallium:
+ */
+ memset(&bpso, 0, sizeof(bpso));
+ bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
+ bpso.header.length = sizeof(bpso)/4-2;
+ bpso.bits0.y_offset = 0;
+ bpso.bits0.x_offset = 0;
+
+ BRW_BATCH_STRUCT(brw, &bpso);
+ }
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_invarient_state = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .emit = upload_invarient_state
+};
+
+
+/***********************************************************************
+ * State base address
+ */
+
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations in many places for
+ * cached state, and instead emit pointers inside of large, mostly-static
+ * state pools. This comes at the expense of memory, and more expensive cache
+ * misses.
+ */
+static int upload_state_base_address( struct brw_context *brw )
+{
+ /* Output the structure (brw_state_base_address) directly to the
+ * batchbuffer, so we can emit relocations inline.
+ */
+ if (BRW_IS_IGDNG(brw)) {
+ BEGIN_BATCH(8, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* Instruction base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ OUT_BATCH(1); /* Instruction access upper bound */
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ ADVANCE_BATCH();
+ }
+ return 0;
+}
+
+const struct brw_tracked_state brw_state_base_address = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0,
+ },
+ .emit = upload_state_base_address
+};
diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c
new file mode 100644
index 0000000000..b759a910b6
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_blend.c
@@ -0,0 +1,208 @@
+
+#include "util/u_memory.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+static int translate_logicop(unsigned logicop)
+{
+ switch (logicop) {
+ case PIPE_LOGICOP_CLEAR:
+ return BRW_LOGICOPFUNCTION_CLEAR;
+ case PIPE_LOGICOP_AND:
+ return BRW_LOGICOPFUNCTION_AND;
+ case PIPE_LOGICOP_AND_REVERSE:
+ return BRW_LOGICOPFUNCTION_AND_REVERSE;
+ case PIPE_LOGICOP_COPY:
+ return BRW_LOGICOPFUNCTION_COPY;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return BRW_LOGICOPFUNCTION_COPY_INVERTED;
+ case PIPE_LOGICOP_AND_INVERTED:
+ return BRW_LOGICOPFUNCTION_AND_INVERTED;
+ case PIPE_LOGICOP_NOOP:
+ return BRW_LOGICOPFUNCTION_NOOP;
+ case PIPE_LOGICOP_XOR:
+ return BRW_LOGICOPFUNCTION_XOR;
+ case PIPE_LOGICOP_OR:
+ return BRW_LOGICOPFUNCTION_OR;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return BRW_LOGICOPFUNCTION_OR_INVERTED;
+ case PIPE_LOGICOP_NOR:
+ return BRW_LOGICOPFUNCTION_NOR;
+ case PIPE_LOGICOP_EQUIV:
+ return BRW_LOGICOPFUNCTION_EQUIV;
+ case PIPE_LOGICOP_INVERT:
+ return BRW_LOGICOPFUNCTION_INVERT;
+ case PIPE_LOGICOP_OR_REVERSE:
+ return BRW_LOGICOPFUNCTION_OR_REVERSE;
+ case PIPE_LOGICOP_NAND:
+ return BRW_LOGICOPFUNCTION_NAND;
+ case PIPE_LOGICOP_SET:
+ return BRW_LOGICOPFUNCTION_SET;
+ default:
+ assert(0);
+ return BRW_LOGICOPFUNCTION_SET;
+ }
+}
+
+
+static unsigned translate_blend_equation( unsigned mode )
+{
+ switch (mode) {
+ case PIPE_BLEND_ADD:
+ return BRW_BLENDFUNCTION_ADD;
+ case PIPE_BLEND_MIN:
+ return BRW_BLENDFUNCTION_MIN;
+ case PIPE_BLEND_MAX:
+ return BRW_BLENDFUNCTION_MAX;
+ case PIPE_BLEND_SUBTRACT:
+ return BRW_BLENDFUNCTION_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
+ default:
+ assert(0);
+ return BRW_BLENDFUNCTION_ADD;
+ }
+}
+
+static unsigned translate_blend_factor( unsigned factor )
+{
+ switch(factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return BRW_BLENDFACTOR_ZERO;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return BRW_BLENDFACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_ONE:
+ return BRW_BLENDFACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return BRW_BLENDFACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return BRW_BLENDFACTOR_INV_SRC_COLOR;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return BRW_BLENDFACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return BRW_BLENDFACTOR_INV_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return BRW_BLENDFACTOR_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return BRW_BLENDFACTOR_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return BRW_BLENDFACTOR_INV_DST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return BRW_BLENDFACTOR_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return BRW_BLENDFACTOR_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return BRW_BLENDFACTOR_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+ default:
+ assert(0);
+ return BRW_BLENDFACTOR_ZERO;
+ }
+}
+
+static void *brw_create_blend_state( struct pipe_context *pipe,
+ const struct pipe_blend_state *templ )
+{
+ struct brw_blend_state *blend = CALLOC_STRUCT(brw_blend_state);
+ if (blend == NULL)
+ return NULL;
+
+ if (templ->logicop_enable) {
+ blend->cc2.logicop_enable = 1;
+ blend->cc5.logicop_func = translate_logicop(templ->logicop_func);
+ }
+ else if (templ->blend_enable) {
+ blend->cc6.dest_blend_factor = translate_blend_factor(templ->rgb_dst_factor);
+ blend->cc6.src_blend_factor = translate_blend_factor(templ->rgb_src_factor);
+ blend->cc6.blend_function = translate_blend_equation(templ->rgb_func);
+
+ blend->cc5.ia_dest_blend_factor = translate_blend_factor(templ->alpha_dst_factor);
+ blend->cc5.ia_src_blend_factor = translate_blend_factor(templ->alpha_src_factor);
+ blend->cc5.ia_blend_function = translate_blend_equation(templ->alpha_func);
+
+ blend->cc3.blend_enable = 1;
+ blend->cc3.ia_blend_enable =
+ (blend->cc6.dest_blend_factor != blend->cc5.ia_dest_blend_factor ||
+ blend->cc6.src_blend_factor != blend->cc5.ia_src_blend_factor ||
+ blend->cc6.blend_function != blend->cc5.ia_blend_function);
+
+ /* Per-surface blend enables, currently just follow global
+ * state:
+ */
+ blend->ss0.color_blend = 1;
+ }
+
+ blend->cc5.dither_enable = templ->dither;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ blend->cc5.statistics_enable = 1;
+
+ /* Per-surface color mask -- just follow global state:
+ */
+ blend->ss0.writedisable_red = (templ->colormask & PIPE_MASK_R) ? 0 : 1;
+ blend->ss0.writedisable_green = (templ->colormask & PIPE_MASK_G) ? 0 : 1;
+ blend->ss0.writedisable_blue = (templ->colormask & PIPE_MASK_B) ? 0 : 1;
+ blend->ss0.writedisable_alpha = (templ->colormask & PIPE_MASK_A) ? 0 : 1;
+
+ return (void *)blend;
+}
+
+static void brw_bind_blend_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ brw->curr.blend = (const struct brw_blend_state *)cso;
+ brw->state.dirty.mesa |= PIPE_NEW_BLEND;
+}
+
+static void brw_delete_blend_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ assert((const void *)cso != (const void *)brw->curr.blend);
+ FREE(cso);
+}
+
+
+static void brw_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_blend_constant_color *bcc = &brw->curr.bcc;
+
+ bcc->blend_constant_color[0] = blend_color->color[0];
+ bcc->blend_constant_color[1] = blend_color->color[1];
+ bcc->blend_constant_color[2] = blend_color->color[2];
+ bcc->blend_constant_color[3] = blend_color->color[3];
+
+ brw->state.dirty.mesa |= PIPE_NEW_BLEND_COLOR;
+}
+
+
+void brw_pipe_blend_init( struct brw_context *brw )
+{
+ brw->base.set_blend_color = brw_set_blend_color;
+ brw->base.create_blend_state = brw_create_blend_state;
+ brw->base.bind_blend_state = brw_bind_blend_state;
+ brw->base.delete_blend_state = brw_delete_blend_state;
+
+ {
+ struct brw_blend_constant_color *bcc = &brw->curr.bcc;
+
+ memset(bcc, 0, sizeof(*bcc));
+ bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR;
+ bcc->header.length = sizeof(*bcc)/4-2;
+ }
+
+}
+
+void brw_pipe_blend_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c
new file mode 100644
index 0000000000..452e1e89f9
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_clear.c
@@ -0,0 +1,218 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_pack_color.h"
+
+#include "pipe/p_state.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_screen.h"
+#include "brw_context.h"
+
+#define MASK16 0xffff
+#define MASK24 0xffffff
+
+
+/**
+ * Use blitting to clear the renderbuffers named by 'flags'.
+ * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
+ * since that might include software renderbuffers or renderbuffers
+ * which we're clearing with triangles.
+ * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear
+ */
+static enum pipe_error
+try_clear( struct brw_context *brw,
+ struct brw_surface *surface,
+ unsigned value )
+{
+ uint32_t BR13, CMD;
+ int x1 = 0;
+ int y1 = 0;
+ int x2 = surface->base.width;
+ int y2 = surface->base.height;
+ int pitch = surface->pitch;
+ int cpp = surface->cpp;
+
+ if (x2 == 0 || y2 == 0)
+ return 0;
+
+ debug_printf("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+ __FUNCTION__,
+ (void *)surface->bo, pitch * cpp,
+ surface->base.offset,
+ x1, y1, x2 - x1, y2 - y1);
+
+ BR13 = 0xf0 << 16;
+ CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_RGB | XY_BLT_WRITE_ALPHA;
+
+ /* Setup the blit command */
+ if (cpp == 4) {
+ BR13 |= BR13_8888;
+ CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+ }
+ else {
+ assert(cpp == 2);
+ BR13 |= BR13_565;
+ }
+
+ /* XXX: nasty hack for clearing depth buffers
+ */
+ if (surface->tiling == BRW_TILING_Y) {
+ x2 = pitch;
+ }
+
+ if (surface->tiling == BRW_TILING_X) {
+ CMD |= XY_DST_TILED;
+ pitch /= 4;
+ }
+
+ BR13 |= (pitch * cpp);
+
+ BEGIN_BATCH(6, 0);
+ OUT_BATCH(CMD);
+ OUT_BATCH(BR13);
+ OUT_BATCH((y1 << 16) | x1);
+ OUT_BATCH((y2 << 16) | x2);
+ OUT_RELOC(surface->bo,
+ BRW_USAGE_BLIT_DEST,
+ surface->base.offset);
+ OUT_BATCH(value);
+ ADVANCE_BATCH();
+
+ return 0;
+}
+
+
+
+
+static void color_clear(struct brw_context *brw,
+ struct brw_surface *bsurface,
+ const float *rgba )
+{
+ enum pipe_error ret;
+ union util_color value;
+
+ util_pack_color( rgba, bsurface->base.format, &value );
+
+ if (bsurface->cpp == 2)
+ value.ui |= value.ui << 16;
+
+ ret = try_clear( brw, bsurface, value.ui );
+
+ if (ret != 0) {
+ brw_context_flush( brw );
+ ret = try_clear( brw, bsurface, value.ui );
+ assert( ret == 0 );
+ }
+}
+
+static void zstencil_clear(struct brw_context *brw,
+ struct brw_surface *bsurface,
+ double depth,
+ unsigned stencil )
+{
+ enum pipe_error ret;
+ unsigned value;
+
+ switch (bsurface->base.format) {
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ value = ((unsigned)(depth * MASK24) & MASK24);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ value = ((unsigned)(depth * MASK16) & MASK16);
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ switch (bsurface->base.format) {
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ value = value | (stencil << 24);
+ break;
+
+ case PIPE_FORMAT_Z16_UNORM:
+ value = value | (value << 16);
+ break;
+
+ default:
+ break;
+ }
+
+ ret = try_clear( brw, bsurface, value );
+
+ if (ret != 0) {
+ brw_context_flush( brw );
+ ret = try_clear( brw, bsurface, value );
+ assert( ret == 0 );
+ }
+}
+
+
+
+/**
+ * Clear the given surface to the specified value.
+ * No masking, no scissor (clear entire buffer).
+ */
+static void brw_clear(struct pipe_context *pipe,
+ unsigned buffers,
+ const float *rgba,
+ double depth,
+ unsigned stencil)
+{
+ struct brw_context *brw = brw_context( pipe );
+ int i;
+
+ if (buffers & PIPE_CLEAR_COLOR) {
+ for (i = 0; i < brw->curr.fb.nr_cbufs; i++) {
+ color_clear( brw,
+ brw_surface(brw->curr.fb.cbufs[i]),
+ rgba );
+ }
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
+ if (brw->curr.fb.zsbuf) {
+ zstencil_clear( brw,
+ brw_surface(brw->curr.fb.zsbuf),
+ depth, stencil );
+ }
+ }
+}
+
+
+void brw_pipe_clear_init( struct brw_context *brw )
+{
+ brw->base.clear = brw_clear;
+}
+
+
+void brw_pipe_clear_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c
new file mode 100644
index 0000000000..e010d76e0d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_depth.c
@@ -0,0 +1,172 @@
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+/* XXX: Fixme - include this to get IZ_ defines
+ */
+#include "brw_wm.h"
+
+static unsigned brw_translate_compare_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ return BRW_COMPAREFUNCTION_NEVER;
+ case PIPE_FUNC_LESS:
+ return BRW_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_LEQUAL:
+ return BRW_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_GREATER:
+ return BRW_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_GEQUAL:
+ return BRW_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_NOTEQUAL:
+ return BRW_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_EQUAL:
+ return BRW_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return BRW_COMPAREFUNCTION_ALWAYS;
+ default:
+ assert(0);
+ return BRW_COMPAREFUNCTION_ALWAYS;
+ }
+}
+
+static unsigned translate_stencil_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_STENCIL_OP_KEEP:
+ return BRW_STENCILOP_KEEP;
+ case PIPE_STENCIL_OP_ZERO:
+ return BRW_STENCILOP_ZERO;
+ case PIPE_STENCIL_OP_REPLACE:
+ return BRW_STENCILOP_REPLACE;
+ case PIPE_STENCIL_OP_INCR:
+ return BRW_STENCILOP_INCRSAT;
+ case PIPE_STENCIL_OP_DECR:
+ return BRW_STENCILOP_DECRSAT;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return BRW_STENCILOP_INCR;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return BRW_STENCILOP_DECR;
+ case PIPE_STENCIL_OP_INVERT:
+ return BRW_STENCILOP_INVERT;
+ default:
+ assert(0);
+ return BRW_STENCILOP_ZERO;
+ }
+}
+
+static void create_bcc_state( struct brw_depth_stencil_state *zstencil,
+ const struct pipe_depth_stencil_alpha_state *templ )
+{
+ if (templ->stencil[0].enabled) {
+ zstencil->cc0.stencil_enable = 1;
+ zstencil->cc0.stencil_func =
+ brw_translate_compare_func(templ->stencil[0].func);
+ zstencil->cc0.stencil_fail_op =
+ translate_stencil_op(templ->stencil[0].fail_op);
+ zstencil->cc0.stencil_pass_depth_fail_op =
+ translate_stencil_op(templ->stencil[0].zfail_op);
+ zstencil->cc0.stencil_pass_depth_pass_op =
+ translate_stencil_op(templ->stencil[0].zpass_op);
+ zstencil->cc1.stencil_ref = templ->stencil[0].ref_value;
+ zstencil->cc1.stencil_write_mask = templ->stencil[0].writemask;
+ zstencil->cc1.stencil_test_mask = templ->stencil[0].valuemask;
+
+ if (templ->stencil[1].enabled) {
+ zstencil->cc0.bf_stencil_enable = 1;
+ zstencil->cc0.bf_stencil_func =
+ brw_translate_compare_func(templ->stencil[1].func);
+ zstencil->cc0.bf_stencil_fail_op =
+ translate_stencil_op(templ->stencil[1].fail_op);
+ zstencil->cc0.bf_stencil_pass_depth_fail_op =
+ translate_stencil_op(templ->stencil[1].zfail_op);
+ zstencil->cc0.bf_stencil_pass_depth_pass_op =
+ translate_stencil_op(templ->stencil[1].zpass_op);
+ zstencil->cc1.bf_stencil_ref = templ->stencil[1].ref_value;
+ zstencil->cc2.bf_stencil_write_mask = templ->stencil[1].writemask;
+ zstencil->cc2.bf_stencil_test_mask = templ->stencil[1].valuemask;
+ }
+
+ zstencil->cc0.stencil_write_enable = (zstencil->cc1.stencil_write_mask ||
+ zstencil->cc2.bf_stencil_write_mask);
+ }
+
+
+ if (templ->alpha.enabled) {
+ zstencil->cc3.alpha_test = 1;
+ zstencil->cc3.alpha_test_func = brw_translate_compare_func(templ->alpha.func);
+ zstencil->cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+ zstencil->cc7.alpha_ref.ub[0] = float_to_ubyte(templ->alpha.ref_value);
+ }
+
+ if (templ->depth.enabled) {
+ zstencil->cc2.depth_test = 1;
+ zstencil->cc2.depth_test_function = brw_translate_compare_func(templ->depth.func);
+ zstencil->cc2.depth_write_enable = templ->depth.writemask;
+ }
+}
+
+static void create_wm_iz_state( struct brw_depth_stencil_state *zstencil )
+{
+ if (zstencil->cc3.alpha_test)
+ zstencil->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (zstencil->cc2.depth_test)
+ zstencil->iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+ if (zstencil->cc2.depth_write_enable)
+ zstencil->iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+ if (zstencil->cc0.stencil_enable)
+ zstencil->iz_lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+ if (zstencil->cc0.stencil_write_enable)
+ zstencil->iz_lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+
+}
+
+
+static void *
+brw_create_depth_stencil_state( struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *templ )
+{
+ struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state);
+
+ create_bcc_state( zstencil, templ );
+ create_wm_iz_state( zstencil );
+
+ return (void *)zstencil;
+}
+
+
+static void brw_bind_depth_stencil_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ brw->curr.zstencil = (const struct brw_depth_stencil_state *)cso;
+ brw->state.dirty.mesa |= PIPE_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+static void brw_delete_depth_stencil_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ assert((const void *)cso != (const void *)brw->curr.zstencil);
+ FREE(cso);
+}
+
+
+void brw_pipe_depth_stencil_init( struct brw_context *brw )
+{
+ brw->base.create_depth_stencil_alpha_state = brw_create_depth_stencil_state;
+ brw->base.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state;
+ brw->base.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state;
+}
+
+void brw_pipe_depth_stencil_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
new file mode 100644
index 0000000000..5d4e5025f9
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -0,0 +1,84 @@
+#include "util/u_math.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+#include "brw_debug.h"
+
+/**
+ * called from intelDrawBuffer()
+ */
+static void brw_set_framebuffer_state( struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb )
+{
+ struct brw_context *brw = brw_context(pipe);
+ unsigned i;
+
+ /* Dimensions:
+ */
+ if (brw->curr.fb.width != fb->width ||
+ brw->curr.fb.height != fb->height) {
+ brw->curr.fb.width = fb->width;
+ brw->curr.fb.height = fb->height;
+ brw->state.dirty.mesa |= PIPE_NEW_FRAMEBUFFER_DIMENSIONS;
+ }
+
+ /* Z/Stencil
+ */
+ if (brw->curr.fb.zsbuf != fb->zsbuf) {
+ pipe_surface_reference(&brw->curr.fb.zsbuf, fb->zsbuf);
+ brw->state.dirty.mesa |= PIPE_NEW_DEPTH_BUFFER;
+ }
+
+ /* Color buffers:
+ */
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (brw->curr.fb.cbufs[i] != fb->cbufs[i]) {
+ brw->state.dirty.mesa |= PIPE_NEW_COLOR_BUFFERS;
+ pipe_surface_reference(&brw->curr.fb.cbufs[i], fb->cbufs[i]);
+ }
+ }
+
+ if (brw->curr.fb.nr_cbufs != fb->nr_cbufs) {
+ brw->curr.fb.nr_cbufs = MIN2(BRW_MAX_DRAW_BUFFERS, fb->nr_cbufs);
+ brw->state.dirty.mesa |= PIPE_NEW_NR_CBUFS;
+ }
+}
+
+
+static void brw_set_viewport_state( struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->curr.viewport = *viewport;
+ brw->curr.ccv.min_depth = viewport->scale[2] * -1.0 + viewport->translate[2];
+ brw->curr.ccv.max_depth = viewport->scale[2] * 1.0 + viewport->translate[2];
+
+ if (0)
+ debug_printf("%s depth range %f .. %f\n",
+ __FUNCTION__,
+ brw->curr.ccv.min_depth,
+ brw->curr.ccv.max_depth);
+
+ brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT;
+}
+
+
+void brw_pipe_framebuffer_init( struct brw_context *brw )
+{
+ brw->base.set_framebuffer_state = brw_set_framebuffer_state;
+ brw->base.set_viewport_state = brw_set_viewport_state;
+}
+
+void brw_pipe_framebuffer_cleanup( struct brw_context *brw )
+{
+ struct pipe_framebuffer_state *fb = &brw->curr.fb;
+ int i;
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ pipe_surface_reference(&fb->cbufs[i], NULL);
+ }
+
+ pipe_surface_reference(&fb->zsbuf, NULL);
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
new file mode 100644
index 0000000000..fdc4814b22
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -0,0 +1,83 @@
+
+#include "util/u_upload_mgr.h"
+
+#include "brw_context.h"
+#include "brw_screen.h"
+#include "brw_batchbuffer.h"
+
+
+
+/* All batchbuffer flushes must go through this function.
+ */
+void brw_context_flush( struct brw_context *brw )
+{
+ /*
+ *
+ */
+ brw_emit_query_end(brw);
+
+ /* Move to the end of the current upload buffer so that we'll force choosing
+ * a new buffer next time.
+ */
+ u_upload_flush( brw->vb.upload_vertex );
+ u_upload_flush( brw->vb.upload_index );
+
+ _brw_batchbuffer_flush( brw->batch, __FILE__, __LINE__ );
+
+ /* Mark all context state as needing to be re-emitted.
+ * This is probably not as severe as on 915, since almost all of our state
+ * is just in referenced buffers.
+ */
+ brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+ brw->state.dirty.mesa |= ~0;
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+
+ brw->curbe.need_new_bo = GL_TRUE;
+}
+
+static void
+brw_flush( struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence )
+{
+ brw_context_flush( brw_context( pipe ) );
+ if (fence)
+ *fence = NULL;
+}
+
+static unsigned brw_is_buffer_referenced(struct pipe_context *pipe,
+ struct pipe_buffer *buffer)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_screen *bscreen = brw_screen(brw->base.screen);
+
+ return brw_is_buffer_referenced_by_bo( bscreen,
+ buffer,
+ brw->batch->buf );
+}
+
+static unsigned brw_is_texture_referenced(struct pipe_context *pipe,
+ struct pipe_texture *texture,
+ unsigned face,
+ unsigned level)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_screen *bscreen = brw_screen(brw->base.screen);
+
+ return brw_is_texture_referenced_by_bo( bscreen,
+ texture, face, level,
+ brw->batch->buf );
+}
+
+void brw_pipe_flush_init( struct brw_context *brw )
+{
+ brw->base.flush = brw_flush;
+ brw->base.is_buffer_referenced = brw_is_buffer_referenced;
+ brw->base.is_texture_referenced = brw_is_texture_referenced;
+}
+
+
+void brw_pipe_flush_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c
new file mode 100644
index 0000000000..3035907807
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_misc.c
@@ -0,0 +1,54 @@
+
+#include "brw_context.h"
+#include "brw_structs.h"
+#include "brw_defines.h"
+
+static void brw_set_polygon_stipple( struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stip )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_polygon_stipple *bps = &brw->curr.bps;
+ GLuint i;
+
+ memset(bps, 0, sizeof *bps);
+ bps->header.opcode = CMD_POLY_STIPPLE_PATTERN;
+ bps->header.length = sizeof *bps/4-2;
+
+ for (i = 0; i < 32; i++)
+ bps->stipple[i] = stip->stipple[i]; /* don't invert */
+
+ brw->state.dirty.mesa |= PIPE_NEW_POLYGON_STIPPLE;
+}
+
+
+static void brw_set_scissor_state( struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->curr.scissor = *scissor;
+ brw->state.dirty.mesa |= PIPE_NEW_SCISSOR;
+}
+
+
+static void brw_set_clip_state( struct pipe_context *pipe,
+ const struct pipe_clip_state *clip )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->curr.ucp = *clip;
+ brw->state.dirty.mesa |= PIPE_NEW_CLIP;
+}
+
+
+void brw_pipe_misc_init( struct brw_context *brw )
+{
+ brw->base.set_polygon_stipple = brw_set_polygon_stipple;
+ brw->base.set_scissor_state = brw_set_scissor_state;
+ brw->base.set_clip_state = brw_set_clip_state;
+}
+
+
+void brw_pipe_misc_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c
new file mode 100644
index 0000000000..2eb862635c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_query.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file support for ARB_query_object
+ *
+ * ARB_query_object is implemented by using the PIPE_CONTROL command to stall
+ * execution on the completion of previous depth tests, and write the
+ * current PS_DEPTH_COUNT to a buffer object.
+ *
+ * We use before and after counts when drawing during a query so that
+ * we don't pick up other clients' query data in ours. To reduce overhead,
+ * a single BO is used to record the query data for all active queries at
+ * once. This also gives us a simple bound on how much batchbuffer space is
+ * required for handling queries, so that we can be sure that we won't
+ * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
+ */
+#include "util/u_simple_list.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+#include "brw_reg.h"
+
+/** Waits on the query object's BO and totals the results for this query */
+static boolean
+brw_query_get_result(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ uint64_t *result)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Map and count the pixels from the current query BO */
+ if (query->bo) {
+ int i;
+ uint64_t *map;
+
+ if (brw->sws->bo_is_busy(query->bo) && !wait)
+ return FALSE;
+
+ map = bo_map_read(brw->sws, query->bo);
+ if (map == NULL)
+ return FALSE;
+
+ for (i = query->first_index; i <= query->last_index; i++) {
+ query->result += map[i * 2 + 1] - map[i * 2];
+ }
+
+ brw->sws->bo_unmap(query->bo);
+ bo_reference(&query->bo, NULL);
+ }
+
+ *result = query->result;
+ return TRUE;
+}
+
+static struct pipe_query *
+brw_query_create(struct pipe_context *pipe, unsigned type )
+{
+ struct brw_query_object *query;
+
+ switch (type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ query = CALLOC_STRUCT( brw_query_object );
+ if (query == NULL)
+ return NULL;
+ return (struct pipe_query *)query;
+
+ default:
+ return NULL;
+ }
+}
+
+static void
+brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ bo_reference(&query->bo, NULL);
+ FREE(query);
+}
+
+static void
+brw_query_begin(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Reset our driver's tracking of query state. */
+ bo_reference(&query->bo, NULL);
+ query->result = 0;
+ query->first_index = -1;
+ query->last_index = -1;
+
+ insert_at_head(&brw->query.active_head, query);
+ brw->query.stats_wm++;
+ brw->state.dirty.mesa |= PIPE_NEW_QUERY;
+}
+
+static void
+brw_query_end(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Flush the batchbuffer in case it has writes to our query BO.
+ * Have later queries write to a new query BO so that further rendering
+ * doesn't delay the collection of our results.
+ */
+ if (query->bo) {
+ brw_emit_query_end(brw);
+ brw_context_flush( brw );
+
+ bo_reference(&brw->query.bo, NULL);
+ }
+
+ remove_from_list(query);
+ brw->query.stats_wm--;
+ brw->state.dirty.mesa |= PIPE_NEW_QUERY;
+}
+
+/***********************************************************************
+ * Internal functions and callbacks to implement queries
+ */
+
+/** Called to set up the query BO and account for its aperture space */
+enum pipe_error
+brw_prepare_query_begin(struct brw_context *brw)
+{
+ enum pipe_error ret;
+
+ /* Skip if we're not doing any queries. */
+ if (is_empty_list(&brw->query.active_head))
+ return PIPE_OK;
+
+ /* Get a new query BO if we're going to need it. */
+ if (brw->query.bo == NULL ||
+ brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
+
+ ret = brw->sws->bo_alloc(brw->sws, BRW_BUFFER_TYPE_QUERY, 4096, 1,
+ &brw->query.bo);
+ if (ret)
+ return ret;
+
+ brw->query.index = 0;
+ }
+
+ brw_add_validated_bo(brw, brw->query.bo);
+
+ return PIPE_OK;
+}
+
+/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
+void
+brw_emit_query_begin(struct brw_context *brw)
+{
+ struct brw_query_object *query;
+
+ /* Skip if we're not doing any queries, or we've emitted the start. */
+ if (brw->query.active || is_empty_list(&brw->query.active_head))
+ return;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT);
+ /* This object could be mapped cacheable, but we don't have an exposed
+ * mechanism to support that. Since it's going uncached, tell GEM that
+ * we're writing to it. The usual clflush should be all that's required
+ * to pick up the results.
+ */
+ OUT_RELOC(brw->query.bo,
+ BRW_USAGE_QUERY_RESULT,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE |
+ ((brw->query.index * 2) * sizeof(uint64_t)));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ foreach(query, &brw->query.active_head) {
+ if (query->bo != brw->query.bo) {
+ uint64_t tmp;
+
+ /* Propogate the results from this buffer to all of the
+ * active queries, as the bo is going away.
+ */
+ if (query->bo != NULL)
+ brw_query_get_result( &brw->base,
+ (struct pipe_query *)query,
+ FALSE,
+ &tmp );
+
+ bo_reference( &query->bo, brw->query.bo );
+ query->first_index = brw->query.index;
+ }
+ query->last_index = brw->query.index;
+ }
+ brw->query.active = GL_TRUE;
+}
+
+/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */
+void
+brw_emit_query_end(struct brw_context *brw)
+{
+ if (!brw->query.active)
+ return;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT);
+ OUT_RELOC(brw->query.bo,
+ BRW_USAGE_QUERY_RESULT,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE |
+ ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ brw->query.active = GL_FALSE;
+ brw->query.index++;
+}
+
+void brw_pipe_query_init( struct brw_context *brw )
+{
+ brw->base.create_query = brw_query_create;
+ brw->base.destroy_query = brw_query_destroy;
+ brw->base.begin_query = brw_query_begin;
+ brw->base.end_query = brw_query_end;
+ brw->base.get_query_result = brw_query_get_result;
+}
+
+
+void brw_pipe_query_cleanup( struct brw_context *brw )
+{
+ /* Unreference brw->query.bo ??
+ */
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c
new file mode 100644
index 0000000000..2117e91a9e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_rast.c
@@ -0,0 +1,161 @@
+
+#include "util/u_memory.h"
+#include "pipe/p_defines.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_pipe_rast.h"
+#include "brw_wm.h"
+
+
+static unsigned translate_fill( unsigned fill )
+{
+ switch (fill) {
+ case PIPE_POLYGON_MODE_FILL:
+ return CLIP_FILL;
+ case PIPE_POLYGON_MODE_LINE:
+ return CLIP_LINE;
+ case PIPE_POLYGON_MODE_POINT:
+ return CLIP_POINT;
+ default:
+ assert(0);
+ return CLIP_FILL;
+ }
+}
+
+
+/* Calculates the key for triangle-mode clipping. Non-triangle
+ * clipping keys use much less information and are computed on the
+ * fly.
+ */
+static void
+calculate_clip_key_rast( const struct brw_context *brw,
+ const struct pipe_rasterizer_state *templ,
+ const struct brw_rasterizer_state *rast,
+ struct brw_clip_prog_key *key)
+{
+ memset(key, 0, sizeof *key);
+
+ if (brw->chipset.is_igdng)
+ key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
+ else
+ key->clip_mode = BRW_CLIPMODE_NORMAL;
+
+ key->do_flat_shading = templ->flatshade;
+
+ if (templ->cull_mode == PIPE_WINDING_BOTH) {
+ key->clip_mode = BRW_CLIPMODE_REJECT_ALL;
+ return;
+ }
+
+ key->fill_ccw = CLIP_CULL;
+ key->fill_cw = CLIP_CULL;
+
+ if (!(templ->cull_mode & PIPE_WINDING_CCW)) {
+ key->fill_ccw = translate_fill(templ->fill_ccw);
+ }
+
+ if (!(templ->cull_mode & PIPE_WINDING_CW)) {
+ key->fill_cw = translate_fill(templ->fill_cw);
+ }
+
+ if (key->fill_cw == CLIP_LINE ||
+ key->fill_ccw == CLIP_LINE ||
+ key->fill_cw == CLIP_POINT ||
+ key->fill_ccw == CLIP_POINT) {
+ key->do_unfilled = 1;
+ key->clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+ }
+
+ key->offset_ccw = templ->offset_ccw;
+ key->offset_cw = templ->offset_cw;
+
+ if (templ->light_twoside && key->fill_cw != CLIP_CULL)
+ key->copy_bfc_cw = 1;
+
+ if (templ->light_twoside && key->fill_ccw != CLIP_CULL)
+ key->copy_bfc_ccw = 1;
+}
+
+
+static void
+calculate_line_stipple_rast( const struct pipe_rasterizer_state *templ,
+ struct brw_line_stipple *bls )
+{
+ GLfloat tmp = 1.0f / (templ->line_stipple_factor + 1);
+ GLint tmpi = tmp * (1<<13);
+
+ bls->header.opcode = CMD_LINE_STIPPLE_PATTERN;
+ bls->header.length = sizeof(*bls)/4 - 2;
+ bls->bits0.pattern = templ->line_stipple_pattern;
+ bls->bits1.repeat_count = templ->line_stipple_factor + 1;
+ bls->bits1.inverse_repeat_count = tmpi;
+}
+
+static void *brw_create_rasterizer_state( struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *templ )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_rasterizer_state *rast;
+
+ rast = CALLOC_STRUCT(brw_rasterizer_state);
+ if (rast == NULL)
+ return NULL;
+
+ rast->templ = *templ;
+
+ calculate_clip_key_rast( brw, templ, rast, &rast->clip_key );
+
+ if (templ->line_stipple_enable)
+ calculate_line_stipple_rast( templ, &rast->bls );
+
+ /* Caclculate lookup value for WM IZ table.
+ */
+ if (templ->line_smooth) {
+ if (templ->fill_cw == PIPE_POLYGON_MODE_LINE &&
+ templ->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+ rast->unfilled_aa_line = AA_ALWAYS;
+ }
+ else if (templ->fill_cw == PIPE_POLYGON_MODE_LINE ||
+ templ->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+ rast->unfilled_aa_line = AA_SOMETIMES;
+ }
+ else {
+ rast->unfilled_aa_line = AA_NEVER;
+ }
+ }
+ else {
+ rast->unfilled_aa_line = AA_NEVER;
+ }
+
+ return (void *)rast;
+}
+
+
+static void brw_bind_rasterizer_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ brw->curr.rast = (const struct brw_rasterizer_state *)cso;
+ brw->state.dirty.mesa |= PIPE_NEW_RAST;
+}
+
+static void brw_delete_rasterizer_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ assert((const void *)cso != (const void *)brw->curr.rast);
+ FREE(cso);
+}
+
+
+
+void brw_pipe_rast_init( struct brw_context *brw )
+{
+ brw->base.create_rasterizer_state = brw_create_rasterizer_state;
+ brw->base.bind_rasterizer_state = brw_bind_rasterizer_state;
+ brw->base.delete_rasterizer_state = brw_delete_rasterizer_state;
+}
+
+void brw_pipe_rast_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h
new file mode 100644
index 0000000000..9354f01e18
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_rast.h
@@ -0,0 +1,16 @@
+#ifndef BRW_PIPE_RAST_H
+#define BRW_PIPE_RAST_H
+
+#include "brw_clip.h"
+
+struct brw_rasterizer_state {
+ struct pipe_rasterizer_state templ; /* for draw module */
+
+ /* Precalculated hardware state:
+ */
+ struct brw_clip_prog_key clip_key;
+ struct brw_line_stipple bls;
+ unsigned unfilled_aa_line;
+};
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c
new file mode 100644
index 0000000000..5ddc63f57e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_sampler.c
@@ -0,0 +1,233 @@
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+
+
+/* The brw (and related graphics cores) do not support GL_CLAMP. The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint translate_wrap_mode( unsigned wrap )
+{
+ switch( wrap ) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return BRW_TEXCOORDMODE_WRAP;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return BRW_TEXCOORDMODE_CLAMP;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return BRW_TEXCOORDMODE_CLAMP_BORDER;
+
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return BRW_TEXCOORDMODE_MIRROR;
+
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return BRW_TEXCOORDMODE_MIRROR_ONCE;
+
+ default:
+ return BRW_TEXCOORDMODE_WRAP;
+ }
+}
+
+static GLuint translate_img_filter( unsigned filter )
+{
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return BRW_MAPFILTER_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR:
+ return BRW_MAPFILTER_LINEAR;
+ case PIPE_TEX_FILTER_ANISO:
+ return BRW_MAPFILTER_ANISOTROPIC;
+ default:
+ assert(0);
+ return BRW_MAPFILTER_NEAREST;
+ }
+}
+
+static GLuint translate_mip_filter( unsigned filter )
+{
+ switch (filter) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ return BRW_MIPFILTER_NONE;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ return BRW_MIPFILTER_NEAREST;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ return BRW_MIPFILTER_LINEAR;
+ default:
+ assert(0);
+ return BRW_MIPFILTER_NONE;
+ }
+}
+
+/* XXX: not sure why there are special translations for the shadow tex
+ * compare functions. In particular ALWAYS is translated to NEVER.
+ * Is this a hardware issue? Does i965 really suffer from this?
+ */
+static GLuint translate_shadow_compare_func( unsigned func )
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ return BRW_COMPAREFUNCTION_ALWAYS;
+ case PIPE_FUNC_LESS:
+ return BRW_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return BRW_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_GREATER:
+ return BRW_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return BRW_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_NOTEQUAL:
+ return BRW_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_EQUAL:
+ return BRW_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return BRW_COMPAREFUNCTION_NEVER;
+ default:
+ assert(0);
+ return BRW_COMPAREFUNCTION_NEVER;
+ }
+}
+
+
+
+
+static void *
+brw_create_sampler_state( struct pipe_context *pipe,
+ const struct pipe_sampler_state *template )
+{
+ struct brw_sampler *sampler = CALLOC_STRUCT(brw_sampler);
+
+ sampler->ss0.min_filter = translate_img_filter( template->min_img_filter );
+ sampler->ss0.mag_filter = translate_img_filter( template->mag_img_filter );
+ sampler->ss0.mip_filter = translate_mip_filter( template->min_mip_filter );
+
+
+ /* XXX: anisotropy logic slightly changed:
+ */
+ if (template->max_anisotropy > 1.0) {
+ sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
+ sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
+
+ if (template->max_anisotropy > 2.0) {
+ sampler->ss3.max_aniso = MIN2((template->max_anisotropy - 2) / 2,
+ BRW_ANISORATIO_16);
+ }
+ }
+
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(template->wrap_r);
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(template->wrap_s);
+ sampler->ss1.t_wrap_mode = translate_wrap_mode(template->wrap_t);
+
+ /* Set LOD bias:
+ */
+ sampler->ss0.lod_bias =
+ util_signed_fixed(CLAMP(template->lod_bias, -16, 15), 6);
+
+
+ sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+ /* Set shadow function:
+ */
+ if (template->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+
+ /* Shadowing is "enabled" by emitting a particular sampler
+ * message (sample_c). So need to recompile WM program when
+ * shadow comparison is enabled on each/any texture unit.
+ */
+ sampler->ss0.shadow_function =
+ translate_shadow_compare_func(template->compare_func);
+ }
+
+ /* Set BaseMipLevel, MaxLOD, MinLOD:
+ */
+ sampler->ss0.base_level =
+ util_unsigned_fixed(0, 1);
+
+ sampler->ss1.max_lod =
+ util_unsigned_fixed(CLAMP(template->max_lod, 0, 13), 6);
+
+ sampler->ss1.min_lod =
+ util_unsigned_fixed(CLAMP(template->min_lod, 0, 13), 6);
+
+ return (void *)sampler;
+}
+
+static void brw_bind_sampler_state(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct brw_context *brw = brw_context(pipe);
+ int i;
+
+ for (i = 0; i < num; i++)
+ brw->curr.sampler[i] = sampler[i];
+
+ for (i = num; i < brw->curr.num_samplers; i++)
+ brw->curr.sampler[i] = NULL;
+
+ brw->curr.num_samplers = num;
+ brw->state.dirty.mesa |= PIPE_NEW_SAMPLERS;
+}
+
+static void brw_delete_sampler_state(struct pipe_context *pipe,
+ void *cso)
+{
+ FREE(cso);
+}
+
+static void brw_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_texture **texture)
+{
+ struct brw_context *brw = brw_context(pipe);
+ int i;
+
+ for (i = 0; i < num; i++)
+ pipe_texture_reference(&brw->curr.texture[i], texture[i]);
+
+ for (i = num; i < brw->curr.num_textures; i++)
+ pipe_texture_reference(&brw->curr.texture[i], NULL);
+
+ brw->curr.num_textures = num;
+ brw->state.dirty.mesa |= PIPE_NEW_BOUND_TEXTURES;
+}
+
+static void brw_set_vertex_sampler_textures(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_texture **texture)
+{
+}
+
+static void brw_bind_vertex_sampler_state(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+}
+
+
+void brw_pipe_sampler_init( struct brw_context *brw )
+{
+ brw->base.create_sampler_state = brw_create_sampler_state;
+ brw->base.delete_sampler_state = brw_delete_sampler_state;
+
+ brw->base.set_fragment_sampler_textures = brw_set_sampler_textures;
+ brw->base.bind_fragment_sampler_states = brw_bind_sampler_state;
+
+ brw->base.set_vertex_sampler_textures = brw_set_vertex_sampler_textures;
+ brw->base.bind_vertex_sampler_states = brw_bind_vertex_sampler_state;
+
+}
+void brw_pipe_sampler_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
new file mode 100644
index 0000000000..bb32d90e33
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -0,0 +1,303 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+
+#include "brw_context.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+
+
+/**
+ * Determine if the given shader uses complex features such as flow
+ * conditionals, loops, subroutines.
+ */
+static GLboolean has_flow_control(const struct tgsi_shader_info *info)
+{
+ return (info->opcode_count[TGSI_OPCODE_ARL] > 0 ||
+ info->opcode_count[TGSI_OPCODE_IF] > 0 ||
+ info->opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */
+ info->opcode_count[TGSI_OPCODE_CAL] > 0 ||
+ info->opcode_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */
+ info->opcode_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */
+ info->opcode_count[TGSI_OPCODE_BGNLOOP] > 0);
+}
+
+
+static void scan_immediates(const struct tgsi_token *tokens,
+ const struct tgsi_shader_info *info,
+ struct brw_immediate_data *imm)
+{
+ struct tgsi_parse_context parse;
+ boolean done = FALSE;
+
+ imm->nr = 0;
+ imm->data = MALLOC(info->immediate_count * 4 * sizeof(float));
+
+ tgsi_parse_init( &parse, tokens );
+ while (!tgsi_parse_end_of_tokens( &parse ) && !done) {
+ tgsi_parse_token( &parse );
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE: {
+ static const float id[4] = {0,0,0,1};
+ const float *value = &parse.FullToken.FullImmediate.u[0].Float;
+ unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ unsigned i;
+
+ for (i = 0; i < size; i++)
+ imm->data[imm->nr][i] = value[i];
+
+ for (; i < 4; i++)
+ imm->data[imm->nr][i] = id[i];
+
+ imm->nr++;
+ break;
+ }
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ done = 1;
+ break;
+ }
+ }
+}
+
+
+static void brw_bind_fs_state( struct pipe_context *pipe, void *prog )
+{
+ struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog;
+ struct brw_context *brw = brw_context(pipe);
+
+ if (brw->curr.fragment_shader == fs)
+ return;
+
+ if (brw->curr.fragment_shader == NULL ||
+ fs == NULL ||
+ memcmp(&brw->curr.fragment_shader->signature, &fs->signature,
+ brw_fs_signature_size(&fs->signature)) != 0) {
+ brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SIGNATURE;
+ }
+
+ brw->curr.fragment_shader = fs;
+ brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SHADER;
+}
+
+static void brw_bind_vs_state( struct pipe_context *pipe, void *prog )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->curr.vertex_shader = (struct brw_vertex_shader *)prog;
+ brw->state.dirty.mesa |= PIPE_NEW_VERTEX_SHADER;
+}
+
+
+
+static void *brw_create_fs_state( struct pipe_context *pipe,
+ const struct pipe_shader_state *shader )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_fragment_shader *fs;
+ int i;
+
+ fs = CALLOC_STRUCT(brw_fragment_shader);
+ if (fs == NULL)
+ return NULL;
+
+ /* Duplicate tokens, scan shader
+ */
+ fs->id = brw->program_id++;
+ fs->has_flow_control = has_flow_control(&fs->info);
+
+ fs->tokens = tgsi_dup_tokens(shader->tokens);
+ if (fs->tokens == NULL)
+ goto fail;
+
+ tgsi_scan_shader(fs->tokens, &fs->info);
+ scan_immediates(fs->tokens, &fs->info, &fs->immediates);
+
+ fs->signature.nr_inputs = fs->info.num_inputs;
+ for (i = 0; i < fs->info.num_inputs; i++) {
+ fs->signature.input[i].interp = fs->info.input_interpolate[i];
+ fs->signature.input[i].semantic = fs->info.input_semantic_name[i];
+ fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i];
+ }
+
+ for (i = 0; i < fs->info.num_inputs; i++)
+ if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION)
+ fs->uses_depth = 1;
+
+ if (fs->info.uses_kill)
+ fs->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (fs->info.writes_z)
+ fs->iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+ return (void *)fs;
+
+fail:
+ FREE(fs);
+ return NULL;
+}
+
+
+static void *brw_create_vs_state( struct pipe_context *pipe,
+ const struct pipe_shader_state *shader )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_vertex_shader *vs;
+ unsigned i;
+
+ vs = CALLOC_STRUCT(brw_vertex_shader);
+ if (vs == NULL)
+ return NULL;
+
+ /* Duplicate tokens, scan shader
+ */
+ vs->tokens = tgsi_dup_tokens(shader->tokens);
+ if (vs->tokens == NULL)
+ goto fail;
+
+ tgsi_scan_shader(vs->tokens, &vs->info);
+ scan_immediates(vs->tokens, &vs->info, &vs->immediates);
+
+ vs->id = brw->program_id++;
+ vs->has_flow_control = has_flow_control(&vs->info);
+
+ vs->output_hpos = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_color0 = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_color1 = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_bfc0 = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_bfc1 = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_edgeflag = BRW_OUTPUT_NOT_PRESENT;
+
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ int index = vs->info.output_semantic_index[i];
+ switch (vs->info.output_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ vs->output_hpos = i;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (index == 0)
+ vs->output_color0 = i;
+ else
+ vs->output_color1 = i;
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (index == 0)
+ vs->output_bfc0 = i;
+ else
+ vs->output_bfc1 = i;
+ break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ vs->output_edgeflag = i;
+ break;
+ }
+ }
+
+
+ /* Done:
+ */
+ return (void *)vs;
+
+fail:
+ FREE(vs);
+ return NULL;
+}
+
+
+static void brw_delete_fs_state( struct pipe_context *pipe, void *prog )
+{
+ struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog;
+
+ bo_reference(&fs->const_buffer, NULL);
+ FREE( (void *)fs->tokens );
+ FREE( fs );
+}
+
+
+static void brw_delete_vs_state( struct pipe_context *pipe, void *prog )
+{
+ struct brw_fragment_shader *vs = (struct brw_fragment_shader *)prog;
+
+ /* Delete draw shader
+ */
+ FREE( (void *)vs->tokens );
+ FREE( vs );
+}
+
+
+static void brw_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ assert(index == 0);
+
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ pipe_buffer_reference( &brw->curr.fragment_constants,
+ buf->buffer );
+
+ brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_CONSTANTS;
+ }
+ else {
+ pipe_buffer_reference( &brw->curr.vertex_constants,
+ buf->buffer );
+
+ brw->state.dirty.mesa |= PIPE_NEW_VERTEX_CONSTANTS;
+ }
+}
+
+
+void brw_pipe_shader_init( struct brw_context *brw )
+{
+ brw->base.set_constant_buffer = brw_set_constant_buffer;
+
+ brw->base.create_vs_state = brw_create_vs_state;
+ brw->base.bind_vs_state = brw_bind_vs_state;
+ brw->base.delete_vs_state = brw_delete_vs_state;
+
+ brw->base.create_fs_state = brw_create_fs_state;
+ brw->base.bind_fs_state = brw_bind_fs_state;
+ brw->base.delete_fs_state = brw_delete_fs_state;
+}
+
+void brw_pipe_shader_cleanup( struct brw_context *brw )
+{
+ pipe_buffer_reference( &brw->curr.fragment_constants, NULL );
+ pipe_buffer_reference( &brw->curr.vertex_constants, NULL );
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
new file mode 100644
index 0000000000..e3c48e3149
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -0,0 +1,71 @@
+#include "brw_context.h"
+
+
+static void brw_set_vertex_elements( struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *elements )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ memcpy(brw->curr.vertex_element, elements, count * sizeof(elements[0]));
+ brw->curr.num_vertex_elements = count;
+
+ brw->state.dirty.mesa |= PIPE_NEW_VERTEX_ELEMENT;
+}
+
+
+static void brw_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct brw_context *brw = brw_context(pipe);
+ unsigned i;
+
+ /* Check for no change */
+ if (count == brw->curr.num_vertex_buffers &&
+ memcmp(brw->curr.vertex_buffer,
+ buffers,
+ count * sizeof buffers[0]) == 0)
+ return;
+
+ /* Adjust refcounts */
+ for (i = 0; i < count; i++)
+ pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer,
+ buffers[i].buffer);
+
+ for ( ; i < brw->curr.num_vertex_buffers; i++)
+ pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer,
+ NULL);
+
+ /* Copy remaining data */
+ memcpy(brw->curr.vertex_buffer, buffers, count * sizeof buffers[0]);
+ brw->curr.num_vertex_buffers = count;
+
+ brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER;
+}
+
+
+void
+brw_pipe_vertex_init( struct brw_context *brw )
+{
+ brw->base.set_vertex_buffers = brw_set_vertex_buffers;
+ brw->base.set_vertex_elements = brw_set_vertex_elements;
+}
+
+
+void
+brw_pipe_vertex_cleanup( struct brw_context *brw )
+{
+
+ /* Release bound pipe vertex_buffers
+ */
+
+ /* Release some other stuff
+ */
+#if 0
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ bo_reference(&brw->vb.inputs[i].bo, NULL);
+ brw->vb.inputs[i].bo = NULL;
+ }
+#endif
+}
diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h
new file mode 100644
index 0000000000..a63403b6af
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_reg.h
@@ -0,0 +1,115 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_REG_H
+#define BRW_REG_H
+
+#define CMD_MI (0x0 << 29)
+#define CMD_2D (0x2 << 29)
+#define CMD_3D (0x3 << 29)
+
+#define MI_NOOP (CMD_MI | 0)
+#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23)
+#define MI_FLUSH (CMD_MI | (4 << 23))
+
+#define _3DSTATE_DRAWRECT_INFO_I965 (CMD_3D | (3 << 27) | (1 << 24) | 0x2)
+
+/** @{
+ *
+ * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
+ * additional flushing control.
+ */
+#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | 2)
+#define PIPE_CONTROL_NO_WRITE (0 << 14)
+#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14)
+#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14)
+#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
+#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define PIPE_CONTROL_WRITE_FLUSH (1 << 12)
+#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11)
+#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
+#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
+#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
+
+/** @} */
+
+#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6)
+#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4)
+#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6)
+
+/* BR00 */
+#define XY_BLT_WRITE_ALPHA (1 << 21)
+#define XY_BLT_WRITE_RGB (1 << 20)
+#define XY_SRC_TILED (1 << 15)
+#define XY_DST_TILED (1 << 11)
+
+/* BR13 */
+#define BR13_565 (0x1 << 24)
+#define BR13_8888 (0x3 << 24)
+
+#define FENCE_LINEAR 0
+#define FENCE_XMAJOR 1
+#define FENCE_YMAJOR 2
+
+
+
+/* PCI IDs
+ */
+#define PCI_CHIP_I965_G 0x29A2
+#define PCI_CHIP_I965_Q 0x2992
+#define PCI_CHIP_I965_G_1 0x2982
+#define PCI_CHIP_I946_GZ 0x2972
+#define PCI_CHIP_I965_GM 0x2A02
+#define PCI_CHIP_I965_GME 0x2A12
+
+#define PCI_CHIP_GM45_GM 0x2A42
+
+#define PCI_CHIP_IGD_E_G 0x2E02
+#define PCI_CHIP_Q45_G 0x2E12
+#define PCI_CHIP_G45_G 0x2E22
+#define PCI_CHIP_G41_G 0x2E32
+#define PCI_CHIP_B43_G 0x2E42
+
+#define PCI_CHIP_ILD_G 0x0042
+#define PCI_CHIP_ILM_G 0x0046
+
+struct brw_chipset {
+ unsigned pci_id:16;
+ unsigned is_965:1;
+ unsigned is_igdng:1;
+ unsigned is_g4x:1;
+ unsigned pad:13;
+};
+
+
+/* XXX: hacks
+ */
+#define VERT_RESULT_HPOS 0 /* not always true */
+#define VERT_RESULT_PSIZ 10000 /* disabled */
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
new file mode 100644
index 0000000000..0ecacac9a3
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -0,0 +1,403 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+
+#include "brw_reg.h"
+#include "brw_context.h"
+#include "brw_screen.h"
+#include "brw_winsys.h"
+#include "brw_debug.h"
+
+#ifdef DEBUG
+static const struct debug_named_value debug_names[] = {
+ { "tex", DEBUG_TEXTURE},
+ { "state", DEBUG_STATE},
+ { "ioctl", DEBUG_IOCTL},
+ { "blit", DEBUG_BLIT},
+ { "curbe", DEBUG_CURBE},
+ { "fall", DEBUG_FALLBACKS},
+ { "verb", DEBUG_VERBOSE},
+ { "bat", DEBUG_BATCH},
+ { "pix", DEBUG_PIXEL},
+ { "wins", DEBUG_WINSYS},
+ { "min", DEBUG_MIN_URB},
+ { "dis", DEBUG_DISASSEM},
+ { "sync", DEBUG_SYNC},
+ { "prim", DEBUG_PRIMS },
+ { "vert", DEBUG_VERTS },
+ { "dma", DEBUG_DMA },
+ { "san", DEBUG_SANITY },
+ { "sleep", DEBUG_SLEEP },
+ { "stats", DEBUG_STATS },
+ { "sing", DEBUG_SINGLE_THREAD },
+ { "thre", DEBUG_SINGLE_THREAD },
+ { "wm", DEBUG_WM },
+ { "urb", DEBUG_URB },
+ { "vs", DEBUG_VS },
+ { NULL, 0 }
+};
+
+static const struct debug_named_value dump_names[] = {
+ { "asm", DUMP_ASM},
+ { "state", DUMP_STATE},
+ { "batch", DUMP_BATCH},
+ { NULL, 0 }
+};
+
+int BRW_DEBUG = 0;
+int BRW_DUMP = 0;
+
+#endif
+
+
+/*
+ * Probe functions
+ */
+
+
+static const char *
+brw_get_vendor(struct pipe_screen *screen)
+{
+ return "VMware, Inc.";
+}
+
+static const char *
+brw_get_name(struct pipe_screen *screen)
+{
+ static char buffer[128];
+ const char *chipset;
+
+ switch (brw_screen(screen)->chipset.pci_id) {
+ case PCI_CHIP_I965_G:
+ chipset = "I965_G";
+ break;
+ case PCI_CHIP_I965_Q:
+ chipset = "I965_Q";
+ break;
+ case PCI_CHIP_I965_G_1:
+ chipset = "I965_G_1";
+ break;
+ case PCI_CHIP_I946_GZ:
+ chipset = "I946_GZ";
+ break;
+ case PCI_CHIP_I965_GM:
+ chipset = "I965_GM";
+ break;
+ case PCI_CHIP_I965_GME:
+ chipset = "I965_GME";
+ break;
+ case PCI_CHIP_GM45_GM:
+ chipset = "GM45_GM";
+ break;
+ case PCI_CHIP_IGD_E_G:
+ chipset = "IGD_E_G";
+ break;
+ case PCI_CHIP_Q45_G:
+ chipset = "Q45_G";
+ break;
+ case PCI_CHIP_G45_G:
+ chipset = "G45_G";
+ break;
+ case PCI_CHIP_G41_G:
+ chipset = "G41_G";
+ break;
+ case PCI_CHIP_B43_G:
+ chipset = "B43_G";
+ break;
+ case PCI_CHIP_ILD_G:
+ chipset = "ILD_G";
+ break;
+ case PCI_CHIP_ILM_G:
+ chipset = "ILM_G";
+ break;
+ }
+
+ util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset);
+ return buffer;
+}
+
+static int
+brw_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 8;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 11; /* max 1024x1024 */
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 8; /* max 128x128x128 */
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 11; /* max 1024x1024 */
+ default:
+ return 0;
+ }
+}
+
+static float
+brw_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 7.5;
+
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 255.0;
+
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 4.0;
+
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0;
+
+ default:
+ return 0;
+ }
+}
+
+static boolean
+brw_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags)
+{
+ static const enum pipe_format tex_supported[] = {
+ PIPE_FORMAT_L8_UNORM,
+ PIPE_FORMAT_I8_UNORM,
+ PIPE_FORMAT_A8_UNORM,
+ PIPE_FORMAT_L16_UNORM,
+ /*PIPE_FORMAT_I16_UNORM,*/
+ /*PIPE_FORMAT_A16_UNORM,*/
+ PIPE_FORMAT_A8L8_UNORM,
+ PIPE_FORMAT_R5G6B5_UNORM,
+ PIPE_FORMAT_A1R5G5B5_UNORM,
+ PIPE_FORMAT_A4R4G4B4_UNORM,
+ PIPE_FORMAT_X8R8G8B8_UNORM,
+ PIPE_FORMAT_A8R8G8B8_UNORM,
+ /* video */
+ PIPE_FORMAT_YCBCR,
+ PIPE_FORMAT_YCBCR_REV,
+ /* compressed */
+ /*PIPE_FORMAT_FXT1_RGBA,*/
+ PIPE_FORMAT_DXT1_RGB,
+ PIPE_FORMAT_DXT1_RGBA,
+ PIPE_FORMAT_DXT3_RGBA,
+ PIPE_FORMAT_DXT5_RGBA,
+ /* sRGB */
+ PIPE_FORMAT_R8G8B8A8_SRGB,
+ PIPE_FORMAT_A8L8_SRGB,
+ PIPE_FORMAT_L8_SRGB,
+ PIPE_FORMAT_DXT1_SRGB,
+ /* depth */
+ PIPE_FORMAT_Z32_FLOAT,
+ PIPE_FORMAT_X8Z24_UNORM,
+ PIPE_FORMAT_S8Z24_UNORM,
+ PIPE_FORMAT_Z16_UNORM,
+ /* signed */
+ PIPE_FORMAT_R8G8_SNORM,
+ PIPE_FORMAT_R8G8B8A8_SNORM,
+ PIPE_FORMAT_NONE /* list terminator */
+ };
+ static const enum pipe_format render_supported[] = {
+ PIPE_FORMAT_X8R8G8B8_UNORM,
+ PIPE_FORMAT_A8R8G8B8_UNORM,
+ PIPE_FORMAT_R5G6B5_UNORM,
+ PIPE_FORMAT_NONE /* list terminator */
+ };
+ static const enum pipe_format depth_supported[] = {
+ PIPE_FORMAT_Z32_FLOAT,
+ PIPE_FORMAT_X8Z24_UNORM,
+ PIPE_FORMAT_S8Z24_UNORM,
+ PIPE_FORMAT_Z16_UNORM,
+ PIPE_FORMAT_NONE /* list terminator */
+ };
+ const enum pipe_format *list;
+ uint i;
+
+ if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL)
+ list = depth_supported;
+ else if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET)
+ list = render_supported;
+ else
+ list = tex_supported;
+
+ for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) {
+ if (list[i] == format)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+/*
+ * Fence functions
+ */
+
+
+static void
+brw_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+}
+
+static int
+brw_fence_signalled(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flags)
+{
+ return 0; /* XXX shouldn't this be a boolean? */
+}
+
+static int
+brw_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flags)
+{
+ return 0;
+}
+
+
+/*
+ * Generic functions
+ */
+
+
+static void
+brw_destroy_screen(struct pipe_screen *screen)
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+
+ if (bscreen->sws)
+ bscreen->sws->destroy(bscreen->sws);
+
+ FREE(bscreen);
+}
+
+/**
+ * Create a new brw_screen object
+ */
+struct pipe_screen *
+brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
+{
+ struct brw_screen *bscreen;
+ struct brw_chipset chipset;
+
+#ifdef DEBUG
+ BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0);
+ BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0);
+ BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB | DEBUG_WM;
+
+ BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0);
+#endif
+
+ memset(&chipset, 0, sizeof chipset);
+
+ chipset.pci_id = pci_id;
+
+ switch (pci_id) {
+ case PCI_CHIP_I965_G:
+ case PCI_CHIP_I965_Q:
+ case PCI_CHIP_I965_G_1:
+ case PCI_CHIP_I946_GZ:
+ case PCI_CHIP_I965_GM:
+ case PCI_CHIP_I965_GME:
+ chipset.is_965 = TRUE;
+ break;
+
+ case PCI_CHIP_GM45_GM:
+ case PCI_CHIP_IGD_E_G:
+ case PCI_CHIP_Q45_G:
+ case PCI_CHIP_G45_G:
+ case PCI_CHIP_G41_G:
+ case PCI_CHIP_B43_G:
+ chipset.is_g4x = TRUE;
+ break;
+
+ case PCI_CHIP_ILD_G:
+ case PCI_CHIP_ILM_G:
+ chipset.is_igdng = TRUE;
+ break;
+
+ default:
+ debug_printf("%s: unknown pci id 0x%x, cannot create screen\n",
+ __FUNCTION__, pci_id);
+ return NULL;
+ }
+
+
+ bscreen = CALLOC_STRUCT(brw_screen);
+ if (!bscreen)
+ return NULL;
+
+ bscreen->chipset = chipset;
+ bscreen->sws = sws;
+ bscreen->base.winsys = NULL;
+ bscreen->base.destroy = brw_destroy_screen;
+ bscreen->base.get_name = brw_get_name;
+ bscreen->base.get_vendor = brw_get_vendor;
+ bscreen->base.get_param = brw_get_param;
+ bscreen->base.get_paramf = brw_get_paramf;
+ bscreen->base.is_format_supported = brw_is_format_supported;
+ bscreen->base.fence_reference = brw_fence_reference;
+ bscreen->base.fence_signalled = brw_fence_signalled;
+ bscreen->base.fence_finish = brw_fence_finish;
+
+ brw_screen_tex_init(bscreen);
+ brw_screen_tex_surface_init(bscreen);
+ brw_screen_buffer_init(bscreen);
+
+ bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE) != NULL;
+
+
+ return &bscreen->base;
+}
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
new file mode 100644
index 0000000000..7226d9228b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -0,0 +1,199 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_SCREEN_H
+#define BRW_SCREEN_H
+
+#include "pipe/p_state.h"
+#include "pipe/p_screen.h"
+
+#include "brw_reg.h"
+#include "brw_structs.h"
+
+struct brw_winsys_screen;
+
+
+/**
+ * Subclass of pipe_screen
+ */
+struct brw_screen
+{
+ struct pipe_screen base;
+ struct brw_chipset chipset;
+ struct brw_winsys_screen *sws;
+ boolean no_tiling;
+};
+
+/**
+ * Subclass of pipe_transfer
+ */
+struct brw_transfer
+{
+ struct pipe_transfer base;
+
+ unsigned offset;
+};
+
+struct brw_buffer
+{
+ struct pipe_buffer base;
+
+ /* One of either bo or user_buffer will be non-null, depending on
+ * whether this is a hardware or user buffer.
+ */
+ struct brw_winsys_buffer *bo;
+ void *user_buffer;
+
+ /* Mapped pointer??
+ */
+ void *ptr;
+};
+
+
+union brw_surface_id {
+ struct {
+ unsigned face:3;
+ unsigned zslice:13;
+ unsigned level:16;
+ } bits;
+ unsigned value;
+};
+
+
+struct brw_surface
+{
+ struct pipe_surface base;
+
+ union brw_surface_id id;
+ unsigned cpp;
+ unsigned pitch;
+ unsigned draw_offset;
+ unsigned tiling;
+
+ struct brw_surface_state ss;
+ struct brw_winsys_buffer *bo;
+ struct brw_surface *next, *prev;
+};
+
+
+
+struct brw_texture
+{
+ struct pipe_texture base;
+ struct brw_winsys_buffer *bo;
+ struct brw_surface_state ss;
+
+ unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS];
+
+ boolean compressed;
+ unsigned brw_target;
+ unsigned pitch;
+ unsigned tiling;
+ unsigned cpp;
+ unsigned total_height;
+
+ struct brw_surface views[2];
+};
+
+
+
+/*
+ * Cast wrappers
+ */
+static INLINE struct brw_screen *
+brw_screen(struct pipe_screen *pscreen)
+{
+ return (struct brw_screen *) pscreen;
+}
+
+static INLINE struct brw_transfer *
+brw_transfer(struct pipe_transfer *transfer)
+{
+ return (struct brw_transfer *)transfer;
+}
+
+static INLINE struct brw_surface *
+brw_surface(struct pipe_surface *surface)
+{
+ return (struct brw_surface *)surface;
+}
+
+static INLINE struct brw_buffer *
+brw_buffer(struct pipe_buffer *buffer)
+{
+ return (struct brw_buffer *)buffer;
+}
+
+static INLINE struct brw_texture *
+brw_texture(struct pipe_texture *texture)
+{
+ return (struct brw_texture *)texture;
+}
+
+
+/* Pipe buffer helpers
+ */
+static INLINE boolean
+brw_buffer_is_user_buffer( const struct pipe_buffer *buf )
+{
+ return ((const struct brw_buffer *)buf)->user_buffer != NULL;
+}
+
+unsigned
+brw_surface_pitch( const struct pipe_surface *surface );
+
+/***********************************************************************
+ * Internal functions
+ */
+GLboolean brw_texture_layout(struct brw_screen *brw_screen,
+ struct brw_texture *tex );
+
+void brw_update_texture( struct brw_screen *brw_screen,
+ struct brw_texture *tex );
+
+
+void brw_screen_tex_init( struct brw_screen *brw_screen );
+void brw_screen_tex_surface_init( struct brw_screen *brw_screen );
+
+void brw_screen_buffer_init(struct brw_screen *brw_screen);
+
+
+boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen,
+ struct pipe_texture *texture,
+ unsigned face,
+ unsigned level,
+ struct brw_winsys_buffer *bo );
+
+boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen,
+ struct pipe_buffer *buffer,
+ struct brw_winsys_buffer *bo );
+
+
+
+#endif /* BRW_SCREEN_H */
diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c
new file mode 100644
index 0000000000..d8141a3f5b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_buffers.c
@@ -0,0 +1,202 @@
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "brw_screen.h"
+#include "brw_winsys.h"
+
+
+
+static void *
+brw_buffer_map_range( struct pipe_screen *screen,
+ struct pipe_buffer *buffer,
+ unsigned offset,
+ unsigned length,
+ unsigned usage )
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ if (buf->user_buffer)
+ return buf->user_buffer;
+
+ return sws->bo_map( buf->bo,
+ BRW_DATA_OTHER,
+ offset,
+ length,
+ (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE,
+ (usage & PIPE_BUFFER_USAGE_DISCARD) ? TRUE : FALSE,
+ (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT) ? TRUE : FALSE);
+}
+
+static void *
+brw_buffer_map( struct pipe_screen *screen,
+ struct pipe_buffer *buffer,
+ unsigned usage )
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ if (buf->user_buffer)
+ return buf->user_buffer;
+
+ return sws->bo_map( buf->bo,
+ BRW_DATA_OTHER,
+ 0,
+ buf->base.size,
+ (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE,
+ FALSE,
+ FALSE);
+}
+
+
+static void
+brw_buffer_flush_mapped_range( struct pipe_screen *screen,
+ struct pipe_buffer *buffer,
+ unsigned offset,
+ unsigned length )
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ if (buf->user_buffer)
+ return;
+
+ sws->bo_flush_range( buf->bo,
+ offset,
+ length );
+}
+
+
+static void
+brw_buffer_unmap( struct pipe_screen *screen,
+ struct pipe_buffer *buffer )
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ if (buf->bo)
+ sws->bo_unmap(buf->bo);
+}
+
+static void
+brw_buffer_destroy( struct pipe_buffer *buffer )
+{
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ assert(!p_atomic_read(&buffer->reference.count));
+
+ bo_reference(&buf->bo, NULL);
+ FREE(buf);
+}
+
+
+static struct pipe_buffer *
+brw_buffer_create(struct pipe_screen *screen,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size)
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf;
+ unsigned buffer_type;
+ enum pipe_error ret;
+
+ buf = CALLOC_STRUCT(brw_buffer);
+ if (!buf)
+ return NULL;
+
+ pipe_reference_init(&buf->base.reference, 1);
+ buf->base.screen = screen;
+ buf->base.alignment = alignment;
+ buf->base.usage = usage;
+ buf->base.size = size;
+
+ switch (usage & (PIPE_BUFFER_USAGE_VERTEX |
+ PIPE_BUFFER_USAGE_INDEX |
+ PIPE_BUFFER_USAGE_PIXEL |
+ PIPE_BUFFER_USAGE_CONSTANT))
+ {
+ case PIPE_BUFFER_USAGE_VERTEX:
+ case PIPE_BUFFER_USAGE_INDEX:
+ case (PIPE_BUFFER_USAGE_VERTEX|PIPE_BUFFER_USAGE_INDEX):
+ buffer_type = BRW_BUFFER_TYPE_VERTEX;
+ break;
+
+ case PIPE_BUFFER_USAGE_PIXEL:
+ buffer_type = BRW_BUFFER_TYPE_PIXEL;
+ break;
+
+ case PIPE_BUFFER_USAGE_CONSTANT:
+ buffer_type = BRW_BUFFER_TYPE_SHADER_CONSTANTS;
+ break;
+
+ default:
+ buffer_type = BRW_BUFFER_TYPE_GENERIC;
+ break;
+ }
+
+ ret = sws->bo_alloc( sws, buffer_type,
+ size, alignment,
+ &buf->bo );
+ if (ret != PIPE_OK)
+ return NULL;
+
+ return &buf->base;
+}
+
+
+static struct pipe_buffer *
+brw_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes)
+{
+ struct brw_buffer *buf;
+
+ buf = CALLOC_STRUCT(brw_buffer);
+ if (!buf)
+ return NULL;
+
+ buf->user_buffer = ptr;
+
+ pipe_reference_init(&buf->base.reference, 1);
+ buf->base.screen = screen;
+ buf->base.alignment = 1;
+ buf->base.usage = 0;
+ buf->base.size = bytes;
+
+ return &buf->base;
+}
+
+
+boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen,
+ struct pipe_buffer *buffer,
+ struct brw_winsys_buffer *bo )
+{
+ struct brw_buffer *buf = brw_buffer(buffer);
+ if (buf->bo == NULL)
+ return FALSE;
+
+ return brw_screen->sws->bo_references( bo, buf->bo );
+}
+
+
+void brw_screen_buffer_init(struct brw_screen *brw_screen)
+{
+ brw_screen->base.buffer_create = brw_buffer_create;
+ brw_screen->base.user_buffer_create = brw_user_buffer_create;
+ brw_screen->base.buffer_map = brw_buffer_map;
+ brw_screen->base.buffer_map_range = brw_buffer_map_range;
+ brw_screen->base.buffer_flush_mapped_range = brw_buffer_flush_mapped_range;
+ brw_screen->base.buffer_unmap = brw_buffer_unmap;
+ brw_screen->base.buffer_destroy = brw_buffer_destroy;
+}
diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
new file mode 100644
index 0000000000..e2b9954e59
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -0,0 +1,262 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+#include "util/u_math.h"
+
+#include "pipe/p_screen.h"
+#include "brw_screen.h"
+#include "brw_defines.h"
+#include "brw_winsys.h"
+
+enum {
+ BRW_VIEW_LINEAR,
+ BRW_VIEW_IN_PLACE
+};
+
+
+static boolean need_linear_view( struct brw_screen *brw_screen,
+ struct brw_texture *brw_texture,
+ union brw_surface_id id,
+ unsigned usage )
+{
+#if 0
+ /* XXX: what about IDGNG?
+ */
+ if (!BRW_IS_G4X(brw->brw_screen->pci_id))
+ {
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+ /* The original gen4 hardware couldn't set up WM surfaces pointing
+ * at an offset within a tile, which can happen when rendering to
+ * anything but the base level of a texture or the +X face/0 depth.
+ * This was fixed with the 4 Series hardware.
+ *
+ * For these original chips, you would have to make the depth and
+ * color destination surfaces include information on the texture
+ * type, LOD, face, and various limits to use them as a destination.
+ *
+ * This is easy in Gallium as surfaces are all backed by
+ * textures, but there's also a nasty requirement that the depth
+ * and the color surfaces all be of the same LOD, which is
+ * harder to get around as we can't look at a surface in
+ * isolation and decide if it's legal.
+ *
+ * Instead, end up being pessimistic and say that for i965,
+ * ... ??
+ */
+ if (brw_tex->tiling != I915_TILING_NONE &&
+ (brw_tex_image_offset(brw_tex, face, level, zslize) & 4095)) {
+ if (BRW_DEBUG & DEBUG_VIEW)
+ debug_printf("%s: need surface view for non-aligned tex image\n",
+ __FUNCTION__);
+ return GL_TRUE;
+ }
+ }
+#endif
+
+ /* Tiled 3d textures don't have subsets that look like 2d surfaces:
+ */
+
+ /* Everything else should be fine to render to in-place:
+ */
+ return GL_FALSE;
+}
+
+/* Look at all texture views and figure out if any of them need to be
+ * back-copied into the texture for sampling
+ */
+void brw_update_texture( struct brw_screen *brw_screen,
+ struct brw_texture *tex )
+{
+ /* currently nothing to do */
+}
+
+
+/* Create a new surface with linear layout to serve as a render-target
+ * where it would be illegal (perhaps due to tiling constraints) to do
+ * this in-place.
+ *
+ * Currently not implmented, not sure if it's needed.
+ */
+static struct brw_surface *create_linear_view( struct brw_screen *brw_screen,
+ struct brw_texture *tex,
+ union brw_surface_id id,
+ unsigned usage )
+{
+ return NULL;
+}
+
+
+/* Create a pipe_surface that just points directly into the existing
+ * texture's storage.
+ */
+static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
+ struct brw_texture *tex,
+ union brw_surface_id id,
+ unsigned usage )
+{
+ struct brw_surface *surface;
+
+ surface = CALLOC_STRUCT(brw_surface);
+ if (surface == NULL)
+ return NULL;
+
+ pipe_reference_init(&surface->base.reference, 1);
+
+ /* XXX: ignoring render-to-slice-of-3d-texture
+ */
+ assert(id.bits.zslice == 0);
+
+ surface->base.format = tex->base.format;
+ surface->base.width = u_minify(tex->base.width0, id.bits.level);
+ surface->base.height = u_minify(tex->base.height0, id.bits.level);
+ surface->base.offset = tex->image_offset[id.bits.level][id.bits.face];
+ surface->base.usage = usage;
+ surface->base.zslice = id.bits.zslice;
+ surface->base.face = id.bits.face;
+ surface->base.level = id.bits.level;
+ surface->id = id;
+ surface->cpp = tex->cpp;
+ surface->pitch = tex->pitch;
+ surface->tiling = tex->tiling;
+
+ bo_reference( &surface->bo, tex->bo );
+ pipe_texture_reference( &surface->base.texture, &tex->base );
+
+ surface->ss.ss0.surface_format = tex->ss.ss0.surface_format;
+ surface->ss.ss0.surface_type = BRW_SURFACE_2D;
+
+ if (tex->tiling == BRW_TILING_NONE) {
+ surface->ss.ss1.base_addr = surface->base.offset;
+ } else {
+ uint32_t tile_offset = surface->base.offset % 4096;
+
+ surface->ss.ss1.base_addr = surface->base.offset - tile_offset;
+
+ if (brw_screen->chipset.is_g4x) {
+ if (tex->tiling == BRW_TILING_X) {
+ /* Note that the low bits of these fields are missing, so
+ * there's the possibility of getting in trouble.
+ */
+ surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4;
+ surface->ss.ss5.y_offset = tile_offset / 512 / 2;
+ } else {
+ surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4;
+ surface->ss.ss5.y_offset = tile_offset / 128 / 2;
+ }
+ }
+ else {
+ assert(tile_offset == 0);
+ }
+ }
+
+#if 0
+ if (region_bo != NULL)
+ surface->ss.ss1.base_addr += region_bo->offset; /* reloc */
+#endif
+
+ surface->ss.ss2.width = surface->base.width - 1;
+ surface->ss.ss2.height = surface->base.height - 1;
+ surface->ss.ss3.tiled_surface = tex->ss.ss3.tiled_surface;
+ surface->ss.ss3.tile_walk = tex->ss.ss3.tile_walk;
+ surface->ss.ss3.pitch = tex->ss.ss3.pitch;
+
+ return surface;
+}
+
+/* Get a surface which is view into a texture
+ */
+static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level,
+ unsigned zslice,
+ unsigned usage )
+{
+ struct brw_texture *tex = brw_texture(pt);
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_surface *surface;
+ union brw_surface_id id;
+ int type;
+
+ id.bits.face = face;
+ id.bits.level = level;
+ id.bits.zslice = zslice;
+
+ if (need_linear_view(bscreen, tex, id, usage))
+ type = BRW_VIEW_LINEAR;
+ else
+ type = BRW_VIEW_IN_PLACE;
+
+
+ foreach (surface, &tex->views[type]) {
+ if (id.value == surface->id.value)
+ return &surface->base;
+ }
+
+ switch (type) {
+ case BRW_VIEW_LINEAR:
+ surface = create_linear_view( bscreen, tex, id, usage );
+ break;
+ case BRW_VIEW_IN_PLACE:
+ surface = create_in_place_view( bscreen, tex, id, usage );
+ break;
+ default:
+ return NULL;
+ }
+
+ insert_at_head( &tex->views[type], surface );
+ return &surface->base;
+}
+
+
+static void brw_tex_surface_destroy( struct pipe_surface *surf )
+{
+ struct brw_surface *surface = brw_surface(surf);
+
+ /* Unreference texture, shared buffer:
+ */
+ remove_from_list(surface);
+ bo_reference(&surface->bo, NULL);
+ pipe_texture_reference( &surface->base.texture, NULL );
+
+
+ FREE(surface);
+}
+
+
+void brw_screen_tex_surface_init( struct brw_screen *brw_screen )
+{
+ brw_screen->base.get_tex_surface = brw_get_tex_surface;
+ brw_screen->base.tex_surface_destroy = brw_tex_surface_destroy;
+}
diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c
new file mode 100644
index 0000000000..894f4bea40
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c
@@ -0,0 +1,414 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+
+#include "pipe/p_format.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "brw_screen.h"
+#include "brw_debug.h"
+#include "brw_winsys.h"
+
+/* Code to layout images in a mipmap tree for i965.
+ */
+
+static int
+brw_tex_pitch_align (struct brw_texture *tex,
+ int pitch)
+{
+ if (!tex->compressed) {
+ int pitch_align;
+
+ switch (tex->tiling) {
+ case BRW_TILING_X:
+ pitch_align = 512;
+ break;
+ case BRW_TILING_Y:
+ pitch_align = 128;
+ break;
+ default:
+ /* XXX: Untiled pitch alignment of 64 bytes for now to allow
+ * render-to-texture to work in all cases. This should
+ * probably be replaced at some point by some scheme to only
+ * do this when really necessary, for example standalone
+ * render target views.
+ */
+ pitch_align = 64;
+ break;
+ }
+
+ pitch = align(pitch * tex->cpp, pitch_align);
+ pitch /= tex->cpp;
+ }
+
+ return pitch;
+}
+
+
+static void
+brw_tex_alignment_unit(enum pipe_format pf,
+ GLuint *w, GLuint *h)
+{
+ switch (pf) {
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ case PIPE_FORMAT_DXT1_SRGB:
+ case PIPE_FORMAT_DXT1_SRGBA:
+ case PIPE_FORMAT_DXT3_SRGBA:
+ case PIPE_FORMAT_DXT5_SRGBA:
+ *w = 4;
+ *h = 4;
+ break;
+
+ default:
+ *w = 4;
+ *h = 2;
+ break;
+ }
+}
+
+
+static void
+brw_tex_set_level_info(struct brw_texture *tex,
+ GLuint level,
+ GLuint nr_images,
+ GLuint x, GLuint y,
+ GLuint w, GLuint h, GLuint d)
+{
+
+ if (BRW_DEBUG & DEBUG_TEXTURE)
+ debug_printf("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
+ level, w, h, d, x, y, tex->level_offset[level]);
+
+ assert(tex->image_offset[level] == NULL);
+ assert(nr_images >= 1);
+
+ tex->level_offset[level] = (x + y * tex->pitch) * tex->cpp;
+ tex->nr_images[level] = nr_images;
+
+ tex->image_offset[level] = MALLOC(nr_images * sizeof(GLuint));
+ tex->image_offset[level][0] = 0;
+}
+
+
+static void
+brw_tex_set_image_offset(struct brw_texture *tex,
+ GLuint level, GLuint img,
+ GLuint x, GLuint y,
+ GLuint offset)
+{
+ assert((x == 0 && y == 0) || img != 0 || level != 0);
+ assert(img < tex->nr_images[level]);
+
+ if (BRW_DEBUG & DEBUG_TEXTURE)
+ debug_printf("%s level %d img %d pos %d,%d image_offset %x\n",
+ __FUNCTION__, level, img, x, y,
+ tex->image_offset[level][img]);
+
+ tex->image_offset[level][img] = (x + y * tex->pitch) * tex->cpp + offset;
+}
+
+
+
+static void brw_layout_2d( struct brw_texture *tex )
+{
+ GLuint align_h = 2, align_w = 4;
+ GLuint level;
+ GLuint x = 0;
+ GLuint y = 0;
+ GLuint width = tex->base.width0;
+ GLuint height = tex->base.height0;
+
+ tex->pitch = tex->base.width0;
+ brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
+
+ if (tex->compressed) {
+ tex->pitch = align(tex->base.width0, align_w);
+ }
+
+ /* May need to adjust pitch to accomodate the placement of
+ * the 2nd mipmap. This occurs when the alignment
+ * constraints of mipmap placement push the right edge of the
+ * 2nd mipmap out past the width of its parent.
+ */
+ if (tex->base.last_level > 0) {
+ GLuint mip1_width;
+
+ if (tex->compressed) {
+ mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+ align(u_minify(tex->base.width0, 2), align_w));
+ } else {
+ mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+ u_minify(tex->base.width0, 2));
+ }
+
+ if (mip1_width > tex->pitch) {
+ tex->pitch = mip1_width;
+ }
+ }
+
+ /* Pitch must be a whole number of dwords, even though we
+ * express it in texels.
+ */
+ tex->pitch = brw_tex_pitch_align (tex, tex->pitch);
+ tex->total_height = 0;
+
+ for ( level = 0 ; level <= tex->base.last_level ; level++ ) {
+ GLuint img_height;
+
+ brw_tex_set_level_info(tex, level, 1, x, y, width, height, 1);
+
+ if (tex->compressed)
+ img_height = MAX2(1, height/4);
+ else
+ img_height = align(height, align_h);
+
+
+ /* Because the images are packed better, the final offset
+ * might not be the maximal one:
+ */
+ tex->total_height = MAX2(tex->total_height, y + img_height);
+
+ /* Layout_below: step right after second mipmap.
+ */
+ if (level == 1) {
+ x += align(width, align_w);
+ }
+ else {
+ y += img_height;
+ }
+
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ }
+}
+
+
+static boolean
+brw_layout_cubemap_idgng( struct brw_texture *tex )
+{
+ GLuint align_h = 2, align_w = 4;
+ GLuint level;
+ GLuint x = 0;
+ GLuint y = 0;
+ GLuint width = tex->base.width0;
+ GLuint height = tex->base.height0;
+ GLuint qpitch = 0;
+ GLuint y_pitch = 0;
+
+ tex->pitch = tex->base.width0;
+ brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
+ y_pitch = align(height, align_h);
+
+ if (tex->compressed) {
+ tex->pitch = align(tex->base.width0, align_w);
+ }
+
+ if (tex->base.last_level != 0) {
+ GLuint mip1_width;
+
+ if (tex->compressed) {
+ mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+ align(u_minify(tex->base.width0, 2), align_w));
+ } else {
+ mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+ u_minify(tex->base.width0, 2));
+ }
+
+ if (mip1_width > tex->pitch) {
+ tex->pitch = mip1_width;
+ }
+ }
+
+ tex->pitch = brw_tex_pitch_align(tex, tex->pitch);
+
+ if (tex->compressed) {
+ qpitch = ((y_pitch +
+ align(u_minify(y_pitch, 1), align_h) +
+ 11 * align_h) / 4) * tex->pitch * tex->cpp;
+
+ tex->total_height = ((y_pitch +
+ align(u_minify(y_pitch, 1), align_h) +
+ 11 * align_h) / 4) * 6;
+ } else {
+ qpitch = (y_pitch +
+ align(u_minify(y_pitch, 1), align_h) +
+ 11 * align_h) * tex->pitch * tex->cpp;
+
+ tex->total_height = (y_pitch +
+ align(u_minify(y_pitch, 1), align_h) +
+ 11 * align_h) * 6;
+ }
+
+ for (level = 0; level <= tex->base.last_level; level++) {
+ GLuint img_height;
+ GLuint nr_images = 6;
+ GLuint q = 0;
+
+ brw_tex_set_level_info(tex, level, nr_images, x, y, width, height, 1);
+
+ for (q = 0; q < nr_images; q++)
+ brw_tex_set_image_offset(tex, level, q, x, y, q * qpitch);
+
+ if (tex->compressed)
+ img_height = MAX2(1, height/4);
+ else
+ img_height = align(height, align_h);
+
+ if (level == 1) {
+ x += align(width, align_w);
+ }
+ else {
+ y += img_height;
+ }
+
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ }
+
+ return TRUE;
+}
+
+
+static boolean
+brw_layout_3d_cube( struct brw_texture *tex )
+{
+ GLuint width = tex->base.width0;
+ GLuint height = tex->base.height0;
+ GLuint depth = tex->base.depth0;
+ GLuint pack_x_pitch, pack_x_nr;
+ GLuint pack_y_pitch;
+ GLuint level;
+ GLuint align_h = 2;
+ GLuint align_w = 4;
+
+ tex->total_height = 0;
+ brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
+
+ if (tex->compressed) {
+ tex->pitch = align(width, align_w);
+ pack_y_pitch = (height + 3) / 4;
+ } else {
+ tex->pitch = brw_tex_pitch_align(tex, tex->base.width0);
+ pack_y_pitch = align(tex->base.height0, align_h);
+ }
+
+ pack_x_pitch = width;
+ pack_x_nr = 1;
+
+ for (level = 0 ; level <= tex->base.last_level ; level++) {
+ GLuint nr_images = tex->base.target == PIPE_TEXTURE_3D ? depth : 6;
+ GLint x = 0;
+ GLint y = 0;
+ GLint q, j;
+
+ brw_tex_set_level_info(tex, level, nr_images,
+ 0, tex->total_height,
+ width, height, depth);
+
+ for (q = 0; q < nr_images;) {
+ for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
+ brw_tex_set_image_offset(tex, level, q, x, y, 0);
+ x += pack_x_pitch;
+ }
+
+ x = 0;
+ y += pack_y_pitch;
+ }
+
+
+ tex->total_height += y;
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
+
+ if (tex->compressed) {
+ pack_y_pitch = (height + 3) / 4;
+
+ if (pack_x_pitch > align(width, align_w)) {
+ pack_x_pitch = align(width, align_w);
+ pack_x_nr <<= 1;
+ }
+ } else {
+ if (pack_x_pitch > 4) {
+ pack_x_pitch >>= 1;
+ pack_x_nr <<= 1;
+ assert(pack_x_pitch * pack_x_nr <= tex->pitch);
+ }
+
+ if (pack_y_pitch > 2) {
+ pack_y_pitch >>= 1;
+ pack_y_pitch = align(pack_y_pitch, align_h);
+ }
+ }
+ }
+
+ /* The 965's sampler lays cachelines out according to how accesses
+ * in the texture surfaces run, so they may be "vertical" through
+ * memory. As a result, the docs say in Surface Padding Requirements:
+ * Sampling Engine Surfaces that two extra rows of padding are required.
+ */
+ if (tex->base.target == PIPE_TEXTURE_CUBE)
+ tex->total_height += 2;
+
+ return TRUE;
+}
+
+
+
+GLboolean brw_texture_layout(struct brw_screen *brw_screen,
+ struct brw_texture *tex )
+{
+ switch (tex->base.target) {
+ case PIPE_TEXTURE_CUBE:
+ if (brw_screen->chipset.is_igdng)
+ brw_layout_cubemap_idgng( tex );
+ else
+ brw_layout_3d_cube( tex );
+ break;
+
+ case PIPE_TEXTURE_3D:
+ brw_layout_3d_cube( tex );
+ break;
+
+ default:
+ brw_layout_2d( tex );
+ break;
+ }
+
+ if (BRW_DEBUG & DEBUG_TEXTURE)
+ debug_printf("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+ tex->pitch,
+ tex->total_height,
+ tex->cpp,
+ tex->pitch * tex->total_height * tex->cpp );
+
+ return GL_TRUE;
+}
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
new file mode 100644
index 0000000000..feb9d5f765
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -0,0 +1,573 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+#include "util/u_format.h"
+
+#include "brw_screen.h"
+#include "brw_defines.h"
+#include "brw_structs.h"
+#include "brw_winsys.h"
+
+
+
+static GLuint translate_tex_target( unsigned target )
+{
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ return BRW_SURFACE_1D;
+
+ case PIPE_TEXTURE_2D:
+ return BRW_SURFACE_2D;
+
+ case PIPE_TEXTURE_3D:
+ return BRW_SURFACE_3D;
+
+ case PIPE_TEXTURE_CUBE:
+ return BRW_SURFACE_CUBE;
+
+ default:
+ assert(0);
+ return BRW_SURFACE_1D;
+ }
+}
+
+
+static GLuint translate_tex_format( enum pipe_format pf )
+{
+ switch( pf ) {
+ case PIPE_FORMAT_L8_UNORM:
+ return BRW_SURFACEFORMAT_L8_UNORM;
+
+ case PIPE_FORMAT_I8_UNORM:
+ return BRW_SURFACEFORMAT_I8_UNORM;
+
+ case PIPE_FORMAT_A8_UNORM:
+ return BRW_SURFACEFORMAT_A8_UNORM;
+
+ case PIPE_FORMAT_L16_UNORM:
+ return BRW_SURFACEFORMAT_L16_UNORM;
+
+ /* XXX: Add these to gallium
+ case PIPE_FORMAT_I16_UNORM:
+ return BRW_SURFACEFORMAT_I16_UNORM;
+
+ case PIPE_FORMAT_A16_UNORM:
+ return BRW_SURFACEFORMAT_A16_UNORM;
+ */
+
+ case PIPE_FORMAT_A8L8_UNORM:
+ return BRW_SURFACEFORMAT_L8A8_UNORM;
+
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ return BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ /*
+ * Video formats
+ */
+
+ case PIPE_FORMAT_YCBCR_REV:
+ return BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+ case PIPE_FORMAT_YCBCR:
+ return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
+
+ /*
+ * Compressed formats.
+ */
+ /* XXX: Add FXT to gallium?
+ case PIPE_FORMAT_FXT1_RGBA:
+ return BRW_SURFACEFORMAT_FXT1;
+ */
+
+ case PIPE_FORMAT_DXT1_RGB:
+ return BRW_SURFACEFORMAT_DXT1_RGB;
+
+ case PIPE_FORMAT_DXT1_RGBA:
+ return BRW_SURFACEFORMAT_BC1_UNORM;
+
+ case PIPE_FORMAT_DXT3_RGBA:
+ return BRW_SURFACEFORMAT_BC2_UNORM;
+
+ case PIPE_FORMAT_DXT5_RGBA:
+ return BRW_SURFACEFORMAT_BC3_UNORM;
+
+ /*
+ * sRGB formats
+ */
+
+ case PIPE_FORMAT_R8G8B8A8_SRGB:
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
+
+ case PIPE_FORMAT_A8L8_SRGB:
+ return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
+
+ case PIPE_FORMAT_L8_SRGB:
+ return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
+
+ case PIPE_FORMAT_DXT1_SRGB:
+ return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
+
+ /*
+ * Depth formats
+ */
+
+ case PIPE_FORMAT_Z16_UNORM:
+ return BRW_SURFACEFORMAT_I16_UNORM;
+
+ case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ return BRW_SURFACEFORMAT_I24X8_UNORM;
+
+ case PIPE_FORMAT_Z32_FLOAT:
+ return BRW_SURFACEFORMAT_I32_FLOAT;
+
+ /* XXX: presumably for bump mapping. Add this to mesa state
+ * tracker?
+ *
+ * XXX: Add flipped versions of these formats to Gallium.
+ */
+ case PIPE_FORMAT_R8G8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
+
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+ default:
+ return BRW_SURFACEFORMAT_INVALID;
+ }
+}
+
+
+
+
+
+static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
+ const struct pipe_texture *templ )
+
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_texture *tex;
+ enum brw_buffer_type buffer_type;
+ enum pipe_error ret;
+
+ tex = CALLOC_STRUCT(brw_texture);
+ if (tex == NULL)
+ return NULL;
+
+ memcpy(&tex->base, templ, sizeof *templ);
+ pipe_reference_init(&tex->base.reference, 1);
+ tex->base.screen = screen;
+
+ /* XXX: compressed textures need special treatment here
+ */
+ tex->cpp = util_format_get_blocksize(tex->base.format);
+ tex->compressed = util_format_is_compressed(tex->base.format);
+
+ make_empty_list(&tex->views[0]);
+ make_empty_list(&tex->views[1]);
+
+ /* XXX: No tiling with compressed textures??
+ */
+ if (tex->compressed == 0 &&
+ !bscreen->no_tiling)
+ {
+ if (bscreen->chipset.is_965 &&
+ util_format_is_depth_or_stencil(templ->format))
+ tex->tiling = BRW_TILING_Y;
+ else
+ tex->tiling = BRW_TILING_X;
+ }
+ else {
+ tex->tiling = BRW_TILING_NONE;
+ }
+
+
+
+
+ if (!brw_texture_layout( bscreen, tex ))
+ goto fail;
+
+
+ if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
+ PIPE_TEXTURE_USAGE_PRIMARY)) {
+ buffer_type = BRW_BUFFER_TYPE_SCANOUT;
+ }
+ else {
+ buffer_type = BRW_BUFFER_TYPE_TEXTURE;
+ }
+
+ ret = bscreen->sws->bo_alloc( bscreen->sws,
+ buffer_type,
+ tex->pitch * tex->total_height * tex->cpp,
+ 64,
+ &tex->bo );
+ if (ret)
+ goto fail;
+
+ tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ tex->ss.ss0.surface_type = translate_tex_target(tex->base.target);
+ tex->ss.ss0.surface_format = translate_tex_format(tex->base.format);
+ assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID);
+
+ /* This is ok for all textures with channel width 8bit or less:
+ */
+/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+
+
+ /* XXX: what happens when tex->bo->offset changes???
+ */
+ tex->ss.ss1.base_addr = 0; /* reloc */
+ tex->ss.ss2.mip_count = tex->base.last_level;
+ tex->ss.ss2.width = tex->base.width0 - 1;
+ tex->ss.ss2.height = tex->base.height0 - 1;
+
+ switch (tex->tiling) {
+ case BRW_TILING_NONE:
+ tex->ss.ss3.tiled_surface = 0;
+ tex->ss.ss3.tile_walk = 0;
+ break;
+ case BRW_TILING_X:
+ tex->ss.ss3.tiled_surface = 1;
+ tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+ break;
+ case BRW_TILING_Y:
+ tex->ss.ss3.tiled_surface = 1;
+ tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+ break;
+ }
+
+ tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1;
+ tex->ss.ss3.depth = tex->base.depth0 - 1;
+
+ tex->ss.ss4.min_lod = 0;
+
+ if (tex->base.target == PIPE_TEXTURE_CUBE) {
+ tex->ss.ss0.cube_pos_x = 1;
+ tex->ss.ss0.cube_pos_y = 1;
+ tex->ss.ss0.cube_pos_z = 1;
+ tex->ss.ss0.cube_neg_x = 1;
+ tex->ss.ss0.cube_neg_y = 1;
+ tex->ss.ss0.cube_neg_z = 1;
+ }
+
+ return &tex->base;
+
+fail:
+ bo_reference(&tex->bo, NULL);
+ FREE(tex);
+ return NULL;
+}
+
+static struct pipe_texture *brw_texture_blanket(struct pipe_screen *screen,
+ const struct pipe_texture *templ,
+ const unsigned *stride,
+ struct pipe_buffer *buffer)
+{
+ return NULL;
+}
+
+static void brw_texture_destroy(struct pipe_texture *pt)
+{
+ struct brw_texture *tex = brw_texture(pt);
+ bo_reference(&tex->bo, NULL);
+ FREE(pt);
+}
+
+
+static boolean brw_is_format_supported( struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags )
+{
+ return translate_tex_format(format) != BRW_SURFACEFORMAT_INVALID;
+}
+
+
+boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen,
+ struct pipe_texture *texture,
+ unsigned face,
+ unsigned level,
+ struct brw_winsys_buffer *bo )
+{
+ struct brw_texture *tex = brw_texture(texture);
+ struct brw_surface *surf;
+ int i;
+
+ /* XXX: this is subject to false positives if the underlying
+ * texture BO is referenced, we can't tell whether the sub-region
+ * we care about participates in that.
+ */
+ if (brw_screen->sws->bo_references( bo, tex->bo ))
+ return TRUE;
+
+ /* Find any view on this texture for this face/level and see if it
+ * is referenced:
+ */
+ for (i = 0; i < 2; i++) {
+ foreach (surf, &tex->views[i]) {
+ if (surf->bo == tex->bo)
+ continue;
+
+ if (surf->id.bits.face != face ||
+ surf->id.bits.level != level)
+ continue;
+
+ if (brw_screen->sws->bo_references( bo, surf->bo))
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+/*
+ * Transfer functions
+ */
+
+static struct pipe_transfer*
+brw_get_tex_transfer(struct pipe_screen *screen,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level, unsigned zslice,
+ enum pipe_transfer_usage usage, unsigned x, unsigned y,
+ unsigned w, unsigned h)
+{
+ struct brw_texture *tex = brw_texture(texture);
+ struct brw_transfer *trans;
+ unsigned offset; /* in bytes */
+
+ if (texture->target == PIPE_TEXTURE_CUBE) {
+ offset = tex->image_offset[level][face];
+ } else if (texture->target == PIPE_TEXTURE_3D) {
+ offset = tex->image_offset[level][zslice];
+ } else {
+ offset = tex->image_offset[level][0];
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+
+ trans = CALLOC_STRUCT(brw_transfer);
+ if (trans) {
+ pipe_texture_reference(&trans->base.texture, texture);
+ trans->base.x = x;
+ trans->base.y = y;
+ trans->base.width = w;
+ trans->base.height = h;
+ trans->base.stride = tex->pitch * tex->cpp;
+ trans->offset = offset;
+ trans->base.usage = usage;
+ }
+ return &trans->base;
+}
+
+static void *
+brw_transfer_map(struct pipe_screen *screen,
+ struct pipe_transfer *transfer)
+{
+ struct brw_texture *tex = brw_texture(transfer->texture);
+ struct brw_winsys_screen *sws = brw_screen(screen)->sws;
+ char *map;
+ unsigned usage = transfer->usage;
+
+ map = sws->bo_map(tex->bo,
+ BRW_DATA_OTHER,
+ 0,
+ tex->bo->size,
+ (usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE,
+ (usage & 0) ? TRUE : FALSE,
+ (usage & 0) ? TRUE : FALSE);
+
+ if (!map)
+ return NULL;
+
+ /* XXX: blocksize and compressed textures
+ */
+ return map + brw_transfer(transfer)->offset +
+ transfer->y /* / transfer->block.height */ * transfer->stride +
+ transfer->x /* / transfer->block.width */ * brw_texture(transfer->texture)->cpp;
+}
+
+static void
+brw_transfer_unmap(struct pipe_screen *screen,
+ struct pipe_transfer *transfer)
+{
+ struct brw_texture *tex = brw_texture(transfer->texture);
+ struct brw_winsys_screen *sws = brw_screen(screen)->sws;
+
+ sws->bo_unmap(tex->bo);
+}
+
+static void
+brw_tex_transfer_destroy(struct pipe_transfer *trans)
+{
+ pipe_texture_reference(&trans->texture, NULL);
+ FREE(trans);
+}
+
+
+/*
+ * Functions exported to the winsys
+ */
+
+boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
+ struct brw_winsys_buffer **buffer,
+ unsigned *stride)
+{
+ struct brw_texture *tex = brw_texture(texture);
+
+ *buffer = tex->bo;
+ if (stride)
+ *stride = tex->pitch * tex->cpp;
+
+ return TRUE;
+}
+
+struct pipe_texture *
+brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
+ const struct pipe_texture *templ,
+ unsigned pitch,
+ unsigned tiling,
+ struct brw_winsys_buffer *buffer)
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_texture *tex;
+
+ if (templ->target != PIPE_TEXTURE_2D ||
+ templ->last_level != 0 ||
+ templ->depth0 != 1)
+ return NULL;
+
+ if (util_format_is_compressed(templ->format))
+ return NULL;
+
+ tex = CALLOC_STRUCT(brw_texture);
+ if (!tex)
+ return NULL;
+
+ memcpy(&tex->base, templ, sizeof *templ);
+ pipe_reference_init(&tex->base.reference, 1);
+ tex->base.screen = screen;
+
+ /* XXX: cpp vs. blocksize
+ */
+ tex->cpp = util_format_get_blocksize(tex->base.format);
+ tex->tiling = tiling;
+
+ make_empty_list(&tex->views[0]);
+ make_empty_list(&tex->views[1]);
+
+ if (!brw_texture_layout(bscreen, tex))
+ goto fail;
+
+ /* XXX Maybe some more checks? */
+ if ((pitch / tex->cpp) < tex->pitch)
+ goto fail;
+
+ tex->pitch = pitch / tex->cpp;
+
+ tex->bo = buffer;
+
+ /* fix this warning */
+#if 0
+ if (tex->size > buffer->size)
+ goto fail;
+#endif
+
+ tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ tex->ss.ss0.surface_type = translate_tex_target(tex->base.target);
+ tex->ss.ss0.surface_format = translate_tex_format(tex->base.format);
+ assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID);
+
+ /* This is ok for all textures with channel width 8bit or less:
+ */
+/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+
+
+ /* XXX: what happens when tex->bo->offset changes???
+ */
+ tex->ss.ss1.base_addr = 0; /* reloc */
+ tex->ss.ss2.mip_count = tex->base.last_level;
+ tex->ss.ss2.width = tex->base.width0 - 1;
+ tex->ss.ss2.height = tex->base.height0 - 1;
+
+ switch (tex->tiling) {
+ case BRW_TILING_NONE:
+ tex->ss.ss3.tiled_surface = 0;
+ tex->ss.ss3.tile_walk = 0;
+ break;
+ case BRW_TILING_X:
+ tex->ss.ss3.tiled_surface = 1;
+ tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+ break;
+ case BRW_TILING_Y:
+ tex->ss.ss3.tiled_surface = 1;
+ tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+ break;
+ }
+
+ tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1;
+ tex->ss.ss3.depth = tex->base.depth0 - 1;
+
+ tex->ss.ss4.min_lod = 0;
+
+ return &tex->base;
+
+fail:
+ FREE(tex);
+ return NULL;
+}
+
+void brw_screen_tex_init( struct brw_screen *brw_screen )
+{
+ brw_screen->base.is_format_supported = brw_is_format_supported;
+ brw_screen->base.texture_create = brw_texture_create;
+ brw_screen->base.texture_destroy = brw_texture_destroy;
+ brw_screen->base.texture_blanket = brw_texture_blanket;
+ brw_screen->base.get_tex_transfer = brw_get_tex_transfer;
+ brw_screen->base.transfer_map = brw_transfer_map;
+ brw_screen->base.transfer_unmap = brw_transfer_unmap;
+ brw_screen->base.tex_transfer_destroy = brw_tex_transfer_destroy;
+}
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
new file mode 100644
index 0000000000..e1986a9dbb
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -0,0 +1,216 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_state.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_pipe_rast.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+#include "brw_state.h"
+
+static enum pipe_error compile_sf_prog( struct brw_context *brw,
+ struct brw_sf_prog_key *key,
+ struct brw_winsys_buffer **bo_out )
+{
+ enum pipe_error ret;
+ struct brw_sf_compile c;
+ const GLuint *program;
+ GLuint program_size;
+
+ memset(&c, 0, sizeof(c));
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(brw, &c.func);
+
+ c.key = *key;
+ c.nr_attrs = c.key.nr_attrs;
+ c.nr_attr_regs = (c.nr_attrs+1)/2;
+ c.nr_setup_attrs = c.key.nr_attrs;
+ c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
+
+ c.prog_data.urb_read_length = c.nr_attr_regs;
+ c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+
+ /* Special case when there are no attributes to setup.
+ *
+ * XXX: should be able to set nr_setup_attrs to nr_attrs-1 -- but
+ * breaks vp-tris.c
+ */
+ if (c.nr_attrs - 1 == 0) {
+ c.nr_verts = 0;
+ brw_emit_null_setup( &c );
+ }
+ else {
+ /* Which primitive? Or all three?
+ */
+ switch (key->primitive) {
+ case SF_TRIANGLES:
+ c.nr_verts = 3;
+ brw_emit_tri_setup( &c, GL_TRUE );
+ break;
+ case SF_LINES:
+ c.nr_verts = 2;
+ brw_emit_line_setup( &c, GL_TRUE );
+ break;
+ case SF_POINTS:
+ c.nr_verts = 1;
+ if (key->do_point_sprite)
+ brw_emit_point_sprite_setup( &c, GL_TRUE );
+ else
+ brw_emit_point_setup( &c, GL_TRUE );
+ break;
+ case SF_UNFILLED_TRIS:
+ c.nr_verts = 3;
+ brw_emit_anyprim_setup( &c );
+ break;
+ default:
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+ }
+
+ /* get the program
+ */
+ ret = brw_get_program(&c.func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ /* Upload
+ */
+ ret = brw_upload_cache( &brw->cache, BRW_SF_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->sf.prog_data,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static enum pipe_error upload_sf_prog(struct brw_context *brw)
+{
+ const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature;
+ struct brw_sf_prog_key key;
+ enum pipe_error ret;
+ unsigned i;
+
+ memset(&key, 0, sizeof(key));
+
+ /* Populate the key, noting state dependencies:
+ */
+
+ /* XXX: Add one to account for the position input.
+ */
+ /* PIPE_NEW_FRAGMENT_SIGNATURE */
+ key.nr_attrs = sig->nr_inputs + 1;
+
+
+ /* XXX: why is position required to be linear? why do we care
+ * about it at all?
+ */
+ key.linear_attrs = 1; /* position -- but why? */
+
+ for (i = 0; i < sig->nr_inputs; i++) {
+ switch (sig->input[i].interp) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ key.linear_attrs |= 1 << (i+1);
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ key.persp_attrs |= 1 << (i+1);
+ break;
+ }
+ }
+
+ /* BRW_NEW_REDUCED_PRIMITIVE */
+ switch (brw->reduced_primitive) {
+ case PIPE_PRIM_TRIANGLES:
+ /* PIPE_NEW_RAST
+ */
+ if (brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL ||
+ brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL)
+ key.primitive = SF_UNFILLED_TRIS;
+ else
+ key.primitive = SF_TRIANGLES;
+ break;
+ case PIPE_PRIM_LINES:
+ key.primitive = SF_LINES;
+ break;
+ case PIPE_PRIM_POINTS:
+ key.primitive = SF_POINTS;
+ break;
+ }
+
+ key.do_point_sprite = brw->curr.rast->templ.point_sprite;
+ key.sprite_origin_lower_left = 0; /* XXX: ctx->Point.SpriteOrigin - fix rast state */
+ key.do_flat_shading = brw->curr.rast->templ.flatshade;
+ key.do_twoside_color = brw->curr.rast->templ.light_twoside;
+
+ if (key.do_twoside_color) {
+ key.frontface_ccw = (brw->curr.rast->templ.front_winding ==
+ PIPE_WINDING_CCW);
+ }
+
+ if (brw_search_cache(&brw->cache, BRW_SF_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->sf.prog_data,
+ &brw->sf.prog_bo))
+ return PIPE_OK;
+
+ ret = compile_sf_prog( brw, &key, &brw->sf.prog_bo );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+const struct brw_tracked_state brw_sf_prog = {
+ .dirty = {
+ .mesa = (PIPE_NEW_RAST | PIPE_NEW_FRAGMENT_SIGNATURE),
+ .brw = (BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = 0
+ },
+ .prepare = upload_sf_prog
+};
+
diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h
new file mode 100644
index 0000000000..a895c7d2f6
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf.h
@@ -0,0 +1,122 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_SF_H
+#define BRW_SF_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+
+#define SF_POINTS 0
+#define SF_LINES 1
+#define SF_TRIANGLES 2
+#define SF_UNFILLED_TRIS 3
+
+struct brw_sf_prog_key {
+
+ /* Bitmask of linear and perspective interpolated inputs, 0..nr
+ */
+ GLuint persp_attrs:32;
+ GLuint linear_attrs:32;
+ GLuint point_coord_replace_attrs:32;
+
+ GLuint nr_attrs:8;
+ GLuint primitive:2;
+ GLuint do_twoside_color:1;
+ GLuint do_flat_shading:1;
+ GLuint frontface_ccw:1;
+ GLuint do_point_sprite:1;
+ GLuint sprite_origin_lower_left:1;
+ GLuint pad:17;
+
+ GLuint attr_col0:8;
+ GLuint attr_col1:8;
+ GLuint attr_bfc0:8;
+ GLuint attr_bfc1:8;
+};
+
+struct brw_sf_point_tex {
+ GLboolean CoordReplace;
+};
+
+struct brw_sf_compile {
+ struct brw_compile func;
+ struct brw_sf_prog_key key;
+ struct brw_sf_prog_data prog_data;
+
+ struct brw_reg pv;
+ struct brw_reg det;
+ struct brw_reg dx0;
+ struct brw_reg dx2;
+ struct brw_reg dy0;
+ struct brw_reg dy2;
+
+ /* z and 1/w passed in seperately:
+ */
+ struct brw_reg z[3];
+ struct brw_reg inv_w[3];
+
+ /* The vertices:
+ */
+ struct brw_reg vert[3];
+
+ /* Temporaries, allocated after last vertex reg.
+ */
+ struct brw_reg inv_det;
+ struct brw_reg a1_sub_a0;
+ struct brw_reg a2_sub_a0;
+ struct brw_reg tmp;
+
+ struct brw_reg m1Cx;
+ struct brw_reg m2Cy;
+ struct brw_reg m3C0;
+
+ GLuint nr_verts;
+ GLuint nr_attrs;
+ GLuint nr_attr_regs;
+ GLuint nr_setup_attrs;
+ GLuint nr_setup_regs;
+
+ GLuint point_coord_replace_mask;
+};
+
+
+void brw_emit_null_setup( struct brw_sf_compile *c );
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_anyprim_setup( struct brw_sf_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
new file mode 100644
index 0000000000..3b85725e36
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -0,0 +1,765 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+
+
+static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
+ struct brw_reg vert,
+ GLuint attr)
+{
+ GLuint off = attr / 2;
+ GLuint sub = attr % 2;
+
+ return brw_vec4_grf(vert.nr + off, sub * 4);
+}
+
+
+/***********************************************************************
+ * Twoside lighting
+ */
+static void copy_bfc( struct brw_sf_compile *c,
+ struct brw_reg vert )
+{
+ struct brw_compile *p = &c->func;
+
+ if (c->key.attr_col0 && c->key.attr_bfc0)
+ brw_MOV(p,
+ get_vert_attr(c, vert, c->key.attr_col0),
+ get_vert_attr(c, vert, c->key.attr_bfc0));
+
+ if (c->key.attr_col1 && c->key.attr_bfc1)
+ brw_MOV(p,
+ get_vert_attr(c, vert, c->key.attr_col1),
+ get_vert_attr(c, vert, c->key.attr_bfc1));
+}
+
+
+static void do_twoside_color( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+ GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ /* XXX: What happens if BFC isn't present? This could only happen
+ * for user-supplied vertex programs, as t_vp_build.c always does
+ * the right thing.
+ */
+ if (!(c->key.attr_col0 && c->key.attr_bfc0) &&
+ !(c->key.attr_col1 && c->key.attr_bfc1))
+ return;
+
+ /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
+ * to get all channels active inside the IF. In the clipping code
+ * we run with NoMask, so it's not an option and we can use
+ * BRW_EXECUTE_1 for all comparisions.
+ */
+ brw_push_insn_state(p);
+ brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
+ if_insn = brw_IF(p, BRW_EXECUTE_4);
+ {
+ switch (c->nr_verts) {
+ case 3: copy_bfc(c, c->vert[2]);
+ case 2: copy_bfc(c, c->vert[1]);
+ case 1: copy_bfc(c, c->vert[0]);
+ }
+ }
+ brw_ENDIF(p, if_insn);
+ brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Flat shading
+ */
+
+#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
+ (1<<VERT_RESULT_COL1))
+
+static void copy_colors( struct brw_sf_compile *c,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ struct brw_compile *p = &c->func;
+
+ if (c->key.attr_col0)
+ brw_MOV(p,
+ get_vert_attr(c, dst, c->key.attr_col0),
+ get_vert_attr(c, src, c->key.attr_col0));
+
+ if (c->key.attr_col1)
+ brw_MOV(p,
+ get_vert_attr(c, dst, c->key.attr_col1),
+ get_vert_attr(c, src, c->key.attr_col1));
+
+}
+
+
+
+/* Need to use a computed jump to copy flatshaded attributes as the
+ * vertices are ordered according to y-coordinate before reaching this
+ * point, so the PV could be anywhere.
+ */
+static void do_flatshade_triangle( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ GLuint jmpi = 1;
+ GLuint nr = 0;
+
+ if (c->key.attr_col0)
+ nr++;
+
+ if (c->key.attr_col1)
+ nr++;
+
+ if (nr == 0)
+ return;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
+ brw_push_insn_state(p);
+
+ brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
+ brw_JMPI(p, ip, ip, c->pv);
+
+ copy_colors(c, c->vert[1], c->vert[0]);
+ copy_colors(c, c->vert[2], c->vert[0]);
+ brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
+
+ copy_colors(c, c->vert[0], c->vert[1]);
+ copy_colors(c, c->vert[2], c->vert[1]);
+ brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
+
+ copy_colors(c, c->vert[0], c->vert[2]);
+ copy_colors(c, c->vert[1], c->vert[2]);
+
+ brw_pop_insn_state(p);
+}
+
+
+static void do_flatshade_line( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ GLuint jmpi = 1;
+ GLuint nr = 0;
+
+ if (c->key.attr_col0)
+ nr++;
+
+ if (c->key.attr_col1)
+ nr++;
+
+ if (nr == 0)
+ return;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
+ brw_push_insn_state(p);
+
+ brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
+ brw_JMPI(p, ip, ip, c->pv);
+ copy_colors(c, c->vert[1], c->vert[0]);
+
+ brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
+ copy_colors(c, c->vert[0], c->vert[1]);
+
+ brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Triangle setup.
+ */
+
+
+static void alloc_regs( struct brw_sf_compile *c )
+{
+ GLuint reg, i;
+
+ /* Values computed by fixed function unit:
+ */
+ c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
+ c->det = brw_vec1_grf(1, 2);
+ c->dx0 = brw_vec1_grf(1, 3);
+ c->dx2 = brw_vec1_grf(1, 4);
+ c->dy0 = brw_vec1_grf(1, 5);
+ c->dy2 = brw_vec1_grf(1, 6);
+
+ /* z and 1/w passed in seperately:
+ */
+ c->z[0] = brw_vec1_grf(2, 0);
+ c->inv_w[0] = brw_vec1_grf(2, 1);
+ c->z[1] = brw_vec1_grf(2, 2);
+ c->inv_w[1] = brw_vec1_grf(2, 3);
+ c->z[2] = brw_vec1_grf(2, 4);
+ c->inv_w[2] = brw_vec1_grf(2, 5);
+
+ /* The vertices:
+ */
+ reg = 3;
+ for (i = 0; i < c->nr_verts; i++) {
+ c->vert[i] = brw_vec8_grf(reg, 0);
+ reg += c->nr_attr_regs;
+ }
+
+ /* Temporaries, allocated after last vertex reg.
+ */
+ c->inv_det = brw_vec1_grf(reg, 0); reg++;
+ c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;
+ c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;
+ c->tmp = brw_vec8_grf(reg, 0); reg++;
+
+ /* Note grf allocation:
+ */
+ c->prog_data.total_grf = reg;
+
+
+ /* Outputs of this program - interpolation coefficients for
+ * rasterization:
+ */
+ c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
+ c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
+ c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
+}
+
+
+static void copy_z_inv_w( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ brw_push_insn_state(p);
+
+ /* Copy both scalars with a single MOV:
+ */
+ for (i = 0; i < c->nr_verts; i++)
+ brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
+
+ brw_pop_insn_state(p);
+}
+
+
+static void invert_det( struct brw_sf_compile *c)
+{
+ /* Looks like we invert all 8 elements just to get 1/det in
+ * position 2 !?!
+ */
+ brw_math(&c->func,
+ c->inv_det,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ c->det,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+
+}
+
+
+/* Two attributes packed into a wide register. Figure out if either
+ * or both of them need linear/perspective interpolation. Constant
+ * regs are left as-is.
+ */
+static GLboolean calculate_masks( struct brw_sf_compile *c,
+ GLuint reg,
+ GLushort *pc,
+ GLushort *pc_persp,
+ GLushort *pc_linear)
+{
+ GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
+ GLuint persp_mask = c->key.persp_attrs;
+ GLuint linear_mask = (c->key.persp_attrs | c->key.linear_attrs);
+
+ *pc_persp = 0;
+ *pc_linear = 0;
+ *pc = 0xf;
+
+ if (persp_mask & (1 << (reg*2)))
+ *pc_persp = 0xf;
+
+ if (linear_mask & (1 << (reg*2)))
+ *pc_linear = 0xf;
+
+ /* Maybe only processs one attribute on the final round:
+ */
+ if (reg*2+1 < c->nr_setup_attrs) {
+ *pc |= 0xf0;
+
+ if (persp_mask & (1 << (reg*2+1)))
+ *pc_persp |= 0xf0;
+
+ if (linear_mask & (1 << (reg*2+1)))
+ *pc_linear |= 0xf0;
+ }
+
+ return is_last_attr;
+}
+
+
+void brw_emit_null_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+
+ /* m0 is implicitly copied from r0 in the send instruction:
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+ 0, /* allocate */
+ 1, /* used */
+ 1, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ 0, /* offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+}
+
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ c->nr_verts = 3;
+
+ if (allocate)
+ alloc_regs(c);
+
+ invert_det(c);
+ copy_z_inv_w(c);
+
+ if (c->key.do_twoside_color)
+ do_twoside_color(c);
+
+ if (c->key.do_flat_shading)
+ do_flatshade_triangle(c);
+
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* Pair of incoming attributes:
+ */
+ struct brw_reg a0 = offset(c->vert[0], i);
+ struct brw_reg a1 = offset(c->vert[1], i);
+ struct brw_reg a2 = offset(c->vert[2], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ brw_MUL(p, a1, a1, c->inv_w[1]);
+ brw_MUL(p, a2, a2, c->inv_w[2]);
+ }
+
+
+ /* Calculate coefficients for interpolated values:
+ */
+ if (pc_linear)
+ {
+ brw_set_predicate_control_flag_value(p, pc_linear);
+
+ brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+ brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
+
+ /* calculate dA/dx
+ */
+ brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
+ brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+
+ /* calculate dA/dy
+ */
+ brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
+ brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+ /* start point for interpolation
+ */
+ brw_MOV(p, c->m3C0, a0);
+
+ /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
+ * the send instruction:
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* offset */
+ BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+ }
+ }
+}
+
+
+
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+
+ c->nr_verts = 2;
+
+ if (allocate)
+ alloc_regs(c);
+
+ invert_det(c);
+ copy_z_inv_w(c);
+
+ if (c->key.do_flat_shading)
+ do_flatshade_line(c);
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* Pair of incoming attributes:
+ */
+ struct brw_reg a0 = offset(c->vert[0], i);
+ struct brw_reg a1 = offset(c->vert[1], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ brw_MUL(p, a1, a1, c->inv_w[1]);
+ }
+
+ /* Calculate coefficients for position, color:
+ */
+ if (pc_linear) {
+ brw_set_predicate_control_flag_value(p, pc_linear);
+
+ brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+
+ brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+
+ brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+
+ /* start point for interpolation
+ */
+ brw_MOV(p, c->m3C0, a0);
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ c->nr_verts = 1;
+
+ if (allocate)
+ alloc_regs(c);
+
+ copy_z_inv_w(c);
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* XXX: only seems to check point_coord_replace_attrs for every
+ * second attribute?!?
+ */
+ boolean coord_replace = !!(c->key.point_coord_replace_attrs & (1<<(2*i)));
+ struct brw_reg a0 = offset(c->vert[0], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ if (coord_replace) {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ }
+ }
+
+ if (coord_replace) {
+ /* Caculate 1.0/PointWidth */
+ brw_math(&c->func,
+ c->tmp,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ c->dx0,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->key.sprite_origin_lower_left) {
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+ brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
+ brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+ }
+ else {
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+ }
+ }
+ else {
+ brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+ brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+ if (coord_replace) {
+ if (c->key.sprite_origin_lower_left) {
+ brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
+ brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
+ }
+ else {
+ brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+ }
+ }
+ else {
+ brw_MOV(p, c->m3C0, a0); /* constant value */
+ }
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+/* Points setup - several simplifications as all attributes are
+ * constant across the face of the point (point sprites excluded!)
+ */
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ c->nr_verts = 1;
+
+ if (allocate)
+ alloc_regs(c);
+
+ copy_z_inv_w(c);
+
+ brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
+ brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ struct brw_reg a0 = offset(c->vert[0], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ /* This seems odd as the values are all constant, but the
+ * fragment shader will be expecting it:
+ */
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ }
+
+
+ /* The delta values are always zero, just send the starting
+ * coordinate. Again, this is to fit in with the interpolation
+ * code in the fragment shader.
+ */
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+
+ brw_MOV(p, c->m3C0, a0); /* constant value */
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+void brw_emit_anyprim_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
+ struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
+ struct brw_reg primmask;
+ struct brw_instruction *jmp;
+ struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+
+ GLuint saveflag;
+
+ c->nr_verts = 3;
+ alloc_regs(c);
+
+ primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
+
+ brw_MOV(p, primmask, brw_imm_ud(1));
+ brw_SHL(p, primmask, primmask, payload_prim);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
+ (1<<_3DPRIM_TRISTRIP) |
+ (1<<_3DPRIM_TRIFAN) |
+ (1<<_3DPRIM_TRISTRIP_REVERSE) |
+ (1<<_3DPRIM_POLYGON) |
+ (1<<_3DPRIM_RECTLIST) |
+ (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ saveflag = p->flag_value;
+ brw_push_insn_state(p);
+ brw_emit_tri_setup( c, GL_FALSE );
+ brw_pop_insn_state(p);
+ p->flag_value = saveflag;
+ /* note - thread killed in subroutine, so must
+ * restore the flag which is changed when building
+ * the subroutine. fix #13240
+ */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
+ (1<<_3DPRIM_LINESTRIP) |
+ (1<<_3DPRIM_LINELOOP) |
+ (1<<_3DPRIM_LINESTRIP_CONT) |
+ (1<<_3DPRIM_LINESTRIP_BF) |
+ (1<<_3DPRIM_LINESTRIP_CONT_BF)));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ saveflag = p->flag_value;
+ brw_push_insn_state(p);
+ brw_emit_line_setup( c, GL_FALSE );
+ brw_pop_insn_state(p);
+ p->flag_value = saveflag;
+ /* note - thread killed in subroutine */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ saveflag = p->flag_value;
+ brw_push_insn_state(p);
+ brw_emit_point_sprite_setup( c, GL_FALSE );
+ brw_pop_insn_state(p);
+ p->flag_value = saveflag;
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_emit_point_setup( c, GL_FALSE );
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
new file mode 100644
index 0000000000..25dc2b52e0
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -0,0 +1,333 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+#include "brw_pipe_rast.h"
+
+static enum pipe_error upload_sf_vp(struct brw_context *brw)
+{
+ const struct pipe_viewport_state *vp = &brw->curr.viewport;
+ const struct pipe_scissor_state *scissor = &brw->curr.scissor;
+ struct brw_sf_viewport sfv;
+ enum pipe_error ret;
+
+ memset(&sfv, 0, sizeof(sfv));
+
+ /* PIPE_NEW_VIEWPORT, PIPE_NEW_SCISSOR */
+
+ sfv.viewport.m00 = vp->scale[0];
+ sfv.viewport.m11 = vp->scale[1];
+ sfv.viewport.m22 = vp->scale[2];
+ sfv.viewport.m30 = vp->translate[0];
+ sfv.viewport.m31 = vp->translate[1];
+ sfv.viewport.m32 = vp->translate[2];
+
+ sfv.scissor.xmin = scissor->minx;
+ sfv.scissor.xmax = scissor->maxx - 1; /* ? */
+ sfv.scissor.ymin = scissor->miny;
+ sfv.scissor.ymax = scissor->maxy - 1; /* ? */
+
+ ret = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0,
+ &brw->sf.vp_bo );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_sf_vp = {
+ .dirty = {
+ .mesa = (PIPE_NEW_VIEWPORT |
+ PIPE_NEW_SCISSOR),
+ .brw = 0,
+ .cache = 0
+ },
+ .prepare = upload_sf_vp
+};
+
+struct brw_sf_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+ unsigned int nr_urb_entries, urb_size, sfsize;
+
+ unsigned scissor:1;
+ unsigned line_smooth:1;
+ unsigned point_sprite:1;
+ unsigned point_attenuated:1;
+ unsigned front_face:2;
+ unsigned cull_mode:2;
+ unsigned flatshade_first:1;
+ unsigned gl_rasterization_rules:1;
+ unsigned line_last_pixel_enable:1;
+ float line_width;
+ float point_size;
+};
+
+static void
+sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
+{
+ const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ;
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_SF_PROG */
+ key->total_grf = brw->sf.prog_data->total_grf;
+ key->urb_entry_read_length = brw->sf.prog_data->urb_read_length;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_sf_entries;
+ key->urb_size = brw->urb.vsize;
+ key->sfsize = brw->urb.sfsize;
+
+ /* PIPE_NEW_RAST */
+ key->scissor = rast->scissor;
+ key->front_face = rast->front_winding;
+ key->cull_mode = rast->cull_mode;
+ key->line_smooth = rast->line_smooth;
+ key->line_width = rast->line_width;
+ key->flatshade_first = rast->flatshade_first;
+ key->line_last_pixel_enable = rast->line_last_pixel;
+ key->gl_rasterization_rules = rast->gl_rasterization_rules;
+
+ key->point_sprite = rast->point_sprite;
+ key->point_attenuated = rast->point_size_per_vertex;
+
+ key->point_size = CLAMP(rast->point_size,
+ rast->point_size_min,
+ rast->point_size_max);
+}
+
+static enum pipe_error
+sf_unit_create_from_key(struct brw_context *brw,
+ struct brw_sf_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_sf_unit_state sf;
+ enum pipe_error ret;
+ int chipset_max_threads;
+ memset(&sf, 0, sizeof(sf));
+
+ sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ /* reloc */
+ sf.thread0.kernel_start_pointer = 0;
+
+ sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+ sf.thread3.dispatch_grf_start_reg = 3;
+
+ if (BRW_IS_IGDNG(brw))
+ sf.thread3.urb_entry_read_offset = 3;
+ else
+ sf.thread3.urb_entry_read_offset = 1;
+
+ sf.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+ sf.thread4.nr_urb_entries = key->nr_urb_entries;
+ sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
+
+ /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or
+ * 48(IGDNG) threads
+ */
+ if (BRW_IS_IGDNG(brw))
+ chipset_max_threads = 48;
+ else
+ chipset_max_threads = 24;
+
+ sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ sf.thread4.max_threads = 0;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ sf.thread4.stats_enable = 1;
+
+ /* CACHE_NEW_SF_VP */
+ /* reloc */
+ sf.sf5.sf_viewport_state_offset = 0;
+
+ sf.sf5.viewport_transform = 1;
+
+ if (key->scissor)
+ sf.sf6.scissor = 1;
+
+ if (key->front_face == PIPE_WINDING_CCW)
+ sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
+ else
+ sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+
+ switch (key->cull_mode) {
+ case PIPE_WINDING_CCW:
+ case PIPE_WINDING_CW:
+ sf.sf6.cull_mode = (key->front_face == key->cull_mode ?
+ BRW_CULLMODE_FRONT :
+ BRW_CULLMODE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
+ break;
+ case PIPE_WINDING_NONE:
+ sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+ break;
+ default:
+ assert(0);
+ sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+ break;
+ }
+
+ /* _NEW_LINE */
+ /* XXX use ctx->Const.Min/MaxLineWidth here */
+ sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1);
+
+ sf.sf6.line_endcap_aa_region_width = 1;
+ if (key->line_smooth)
+ sf.sf6.aa_enable = 1;
+ else if (sf.sf6.line_width <= 0x2)
+ sf.sf6.line_width = 0;
+
+ /* XXX: gl_rasterization_rules? something else?
+ */
+ sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
+ sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
+ sf.sf6.point_rast_rule = 1;
+
+ /* XXX clamp max depends on AA vs. non-AA */
+
+ /* _NEW_POINT */
+ sf.sf7.sprite_point = key->point_sprite;
+ sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
+ sf.sf7.use_point_size_state = !key->point_attenuated;
+ sf.sf7.aa_line_distance_mode = 0;
+
+ /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
+ */
+ if (!key->flatshade_first) {
+ sf.sf7.trifan_pv = 2;
+ sf.sf7.linestrip_pv = 1;
+ sf.sf7.tristrip_pv = 2;
+ } else {
+ sf.sf7.trifan_pv = 1;
+ sf.sf7.linestrip_pv = 0;
+ sf.sf7.tristrip_pv = 0;
+ }
+
+ sf.sf7.line_last_pixel_enable = key->line_last_pixel_enable;
+
+ /* Set bias for OpenGL rasterization rules:
+ */
+ if (key->gl_rasterization_rules) {
+ sf.sf6.dest_org_vbias = 0x8;
+ sf.sf6.dest_org_hbias = 0x8;
+ }
+ else {
+ sf.sf6.dest_org_vbias = 0x0;
+ sf.sf6.dest_org_hbias = 0x0;
+ }
+
+ ret = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
+ key, sizeof(*key),
+ reloc, 2,
+ &sf, sizeof(sf),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+
+ return PIPE_OK;
+}
+
+static enum pipe_error upload_sf_unit( struct brw_context *brw )
+{
+ struct brw_sf_unit_key key;
+ struct brw_winsys_reloc reloc[2];
+ unsigned total_grf;
+ unsigned viewport_transform;
+ unsigned front_winding;
+ enum pipe_error ret;
+
+ sf_unit_populate_key(brw, &key);
+
+ /* XXX: cut this crap and pre calculate the key:
+ */
+ total_grf = (align(key.total_grf, 16) / 16 - 1);
+ viewport_transform = 1;
+ front_winding = (key.front_face == PIPE_WINDING_CCW ?
+ BRW_FRONTWINDING_CCW :
+ BRW_FRONTWINDING_CW);
+
+ /* Emit SF program relocation */
+ make_reloc(&reloc[0],
+ BRW_USAGE_STATE,
+ total_grf << 1,
+ offsetof(struct brw_sf_unit_state, thread0),
+ brw->sf.prog_bo);
+
+ /* Emit SF viewport relocation */
+ make_reloc(&reloc[1],
+ BRW_USAGE_STATE,
+ front_winding | (viewport_transform << 1),
+ offsetof(struct brw_sf_unit_state, sf5),
+ brw->sf.vp_bo);
+
+
+ if (brw_search_cache(&brw->cache, BRW_SF_UNIT,
+ &key, sizeof(key),
+ reloc, 2,
+ NULL,
+ &brw->sf.state_bo))
+ return PIPE_OK;
+
+
+ ret = sf_unit_create_from_key(brw, &key,
+ reloc,
+ &brw->sf.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_sf_unit = {
+ .dirty = {
+ .mesa = (PIPE_NEW_RAST),
+ .brw = BRW_NEW_URB_FENCE,
+ .cache = (CACHE_NEW_SF_VP |
+ CACHE_NEW_SF_PROG)
+ },
+ .prepare = upload_sf_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
new file mode 100644
index 0000000000..d2bbd0123d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -0,0 +1,174 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_STATE_H
+#define BRW_STATE_H
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+
+#include "brw_context.h"
+
+static INLINE void
+brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo)
+{
+ assert(brw->state.validated_bo_count < Elements(brw->state.validated_bos));
+
+ if (bo != NULL) {
+ bo_reference( &brw->state.validated_bos[brw->state.validated_bo_count++],
+ bo );
+ }
+}
+
+const struct brw_tracked_state brw_blend_constant_color;
+const struct brw_tracked_state brw_cc_unit;
+const struct brw_tracked_state brw_cc_vp;
+const struct brw_tracked_state brw_clip_prog;
+const struct brw_tracked_state brw_clip_unit;
+const struct brw_tracked_state brw_curbe_buffer;
+const struct brw_tracked_state brw_curbe_offsets;
+const struct brw_tracked_state brw_invarient_state;
+const struct brw_tracked_state brw_gs_prog;
+const struct brw_tracked_state brw_gs_unit;
+const struct brw_tracked_state brw_line_stipple;
+const struct brw_tracked_state brw_aa_line_parameters;
+const struct brw_tracked_state brw_pipelined_state_pointers;
+const struct brw_tracked_state brw_binding_table_pointers;
+const struct brw_tracked_state brw_depthbuffer;
+const struct brw_tracked_state brw_polygon_stipple;
+const struct brw_tracked_state brw_program_parameters;
+const struct brw_tracked_state brw_recalculate_urb_fence;
+const struct brw_tracked_state brw_sf_prog;
+const struct brw_tracked_state brw_sf_unit;
+const struct brw_tracked_state brw_sf_vp;
+const struct brw_tracked_state brw_state_base_address;
+const struct brw_tracked_state brw_urb_fence;
+const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_surfaces;
+const struct brw_tracked_state brw_vs_prog;
+const struct brw_tracked_state brw_vs_unit;
+const struct brw_tracked_state brw_wm_input_sizes;
+const struct brw_tracked_state brw_wm_prog;
+const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_constant_surface;
+const struct brw_tracked_state brw_wm_surfaces;
+const struct brw_tracked_state brw_wm_unit;
+
+const struct brw_tracked_state brw_psp_urb_cbs;
+
+const struct brw_tracked_state brw_pipe_control;
+
+const struct brw_tracked_state brw_drawing_rect;
+const struct brw_tracked_state brw_indices;
+const struct brw_tracked_state brw_vertices;
+const struct brw_tracked_state brw_index_buffer;
+
+
+/***********************************************************************
+ * brw_state.c
+ */
+int brw_validate_state(struct brw_context *brw);
+int brw_upload_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+
+/***********************************************************************
+ * brw_state_cache.c
+ */
+enum pipe_error brw_cache_data(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ struct brw_winsys_buffer **bo_out );
+
+enum pipe_error brw_cache_data_sz(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ GLuint data_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ struct brw_winsys_buffer **bo_out);
+
+enum pipe_error brw_upload_cache( struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_sz,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ const void *data,
+ GLuint data_sz,
+ const void *aux,
+ void *aux_return ,
+ struct brw_winsys_buffer **bo_out);
+
+boolean brw_search_cache( struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ void *aux_return,
+ struct brw_winsys_buffer **bo_out);
+
+void brw_state_cache_check_size( struct brw_context *brw );
+
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
+void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo);
+
+/***********************************************************************
+ * brw_state_batch.c
+ */
+#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
+#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
+
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+ const void *data,
+ GLuint sz );
+void brw_destroy_batch_cache( struct brw_context *brw );
+void brw_clear_batch_cache( struct brw_context *brw );
+
+/***********************************************************************
+ * brw_wm_surface_state.c
+ */
+
+/***********************************************************************
+ * brw_state_debug.c
+ */
+void brw_update_dirty_counts( unsigned mesa,
+ unsigned brw,
+ unsigned cache );
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c
new file mode 100644
index 0000000000..7d212e5c24
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_batch.c
@@ -0,0 +1,98 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+
+
+
+/* A facility similar to the data caching code above, which aims to
+ * prevent identical commands being issued repeatedly.
+ */
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+ const void *data,
+ GLuint sz )
+{
+ struct brw_cached_batch_item *item = brw->cached_batch_items;
+ struct header *newheader = (struct header *)data;
+
+ if (brw->flags.always_emit_state) {
+ brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
+ return GL_TRUE;
+ }
+
+ while (item) {
+ if (item->header->opcode == newheader->opcode) {
+ if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
+ return GL_FALSE;
+ if (item->sz != sz) {
+ FREE(item->header);
+ item->header = MALLOC(sz);
+ item->sz = sz;
+ }
+ goto emit;
+ }
+ item = item->next;
+ }
+
+ assert(!item);
+ item = CALLOC_STRUCT(brw_cached_batch_item);
+ item->header = MALLOC(sz);
+ item->sz = sz;
+ item->next = brw->cached_batch_items;
+ brw->cached_batch_items = item;
+
+ emit:
+ memcpy(item->header, newheader, sz);
+ brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
+ return GL_TRUE;
+}
+
+void brw_clear_batch_cache( struct brw_context *brw )
+{
+ struct brw_cached_batch_item *item = brw->cached_batch_items;
+
+ while (item) {
+ struct brw_cached_batch_item *next = item->next;
+ free((void *)item->header);
+ free(item);
+ item = next;
+ }
+
+ brw->cached_batch_items = NULL;
+}
+
+void brw_destroy_batch_cache( struct brw_context *brw )
+{
+ brw_clear_batch_cache(brw);
+}
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
new file mode 100644
index 0000000000..16b643ceb2
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -0,0 +1,617 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/** @file brw_state_cache.c
+ *
+ * This file implements a simple static state cache for 965. The consumers
+ * can query the hash table of state using a cache_id, opaque key data,
+ * and list of buffers that will be used in relocations, and receive the
+ * corresponding state buffer object of state (plus associated auxiliary
+ * data) in return.
+ *
+ * The inner workings are a simple hash table based on a CRC of the key data.
+ * The cache_id and relocation target buffers associated with the state
+ * buffer are included as auxiliary key data, but are not part of the hash
+ * value (this should be fixed, but will likely be fixed instead by making
+ * consumers use structured keys).
+ *
+ * Replacement is not implemented. Instead, when the cache gets too big, at
+ * a safe point (unlock) we throw out all of the cache data and let it
+ * regenerate for the next rendering operation.
+ *
+ * The reloc structs need to be included as key data, otherwise the
+ * non-unique values stuffed in the offset in key data through
+ * brw_cache_data() may result in successful probe for state buffers
+ * even when the buffer being referenced doesn't match. The result would be
+ * that the same state cache entry is used twice for different buffers,
+ * only one of the two buffers referenced gets put into the offset, and the
+ * incorrect program is run for the other instance.
+ */
+#include "util/u_memory.h"
+
+#include "brw_debug.h"
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+
+/* XXX: Fixme - have to include these to get the sizes of the prog_key
+ * structs:
+ */
+#include "brw_wm.h"
+#include "brw_vs.h"
+#include "brw_clip.h"
+#include "brw_sf.h"
+#include "brw_gs.h"
+
+
+static GLuint
+hash_key(const void *key, GLuint key_size,
+ struct brw_winsys_reloc *relocs, GLuint nr_relocs)
+{
+ GLuint *ikey = (GLuint *)key;
+ GLuint hash = 0, i;
+
+ assert(key_size % 4 == 0);
+
+ /* I'm sure this can be improved on:
+ */
+ for (i = 0; i < key_size/4; i++) {
+ hash ^= ikey[i];
+ hash = (hash << 5) | (hash >> 27);
+ }
+
+ /* Include the BO pointers as key data as well */
+ ikey = (GLuint *)relocs;
+ key_size = nr_relocs * sizeof(struct brw_winsys_reloc);
+ for (i = 0; i < key_size/4; i++) {
+ hash ^= ikey[i];
+ hash = (hash << 5) | (hash >> 27);
+ }
+
+ return hash;
+}
+
+
+/**
+ * Marks a new buffer as being chosen for the given cache id.
+ */
+static void
+update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
+ struct brw_winsys_buffer *bo)
+{
+ if (bo == cache->last_bo[cache_id])
+ return; /* no change */
+
+ bo_reference( &cache->last_bo[cache_id], bo );
+
+ cache->brw->state.dirty.cache |= 1 << cache_id;
+}
+
+
+static struct brw_cache_item *
+search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
+ GLuint hash, const void *key, GLuint key_size,
+ struct brw_winsys_reloc *relocs, GLuint nr_relocs)
+{
+ struct brw_cache_item *c;
+
+#if 0
+ int bucketcount = 0;
+
+ for (c = cache->items[hash % cache->size]; c; c = c->next)
+ bucketcount++;
+
+ debug_printf("bucket %d/%d = %d/%d items\n", hash % cache->size,
+ cache->size, bucketcount, cache->n_items);
+#endif
+
+ for (c = cache->items[hash % cache->size]; c; c = c->next) {
+ if (c->cache_id == cache_id &&
+ c->hash == hash &&
+ c->key_size == key_size &&
+ memcmp(c->key, key, key_size) == 0 &&
+ c->nr_relocs == nr_relocs &&
+ memcmp(c->relocs, relocs, nr_relocs * sizeof *relocs) == 0)
+ return c;
+ }
+
+ return NULL;
+}
+
+
+static void
+rehash(struct brw_cache *cache)
+{
+ struct brw_cache_item **items;
+ struct brw_cache_item *c, *next;
+ GLuint size, i;
+
+ size = cache->size * 3;
+ items = (struct brw_cache_item**) CALLOC(size, sizeof(*items));
+
+ for (i = 0; i < cache->size; i++)
+ for (c = cache->items[i]; c; c = next) {
+ next = c->next;
+ c->next = items[c->hash % size];
+ items[c->hash % size] = c;
+ }
+
+ FREE(cache->items);
+ cache->items = items;
+ cache->size = size;
+}
+
+
+/**
+ * Returns the buffer object matching cache_id and key, or NULL.
+ */
+boolean
+brw_search_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ void *aux_return,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_cache_item *item;
+ GLuint hash = hash_key(key, key_size, relocs, nr_relocs);
+
+ item = search_cache(cache, cache_id, hash, key, key_size,
+ relocs, nr_relocs);
+
+ if (item) {
+ if (aux_return)
+ *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+
+ update_cache_last(cache, cache_id, item->bo);
+ bo_reference(bo_out, item->bo);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+enum pipe_error
+brw_upload_cache( struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ const void *data,
+ GLuint data_size,
+ const void *aux,
+ void *aux_return,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+ GLuint hash = hash_key(key, key_size, relocs, nr_relocs);
+ GLuint relocs_size = nr_relocs * sizeof relocs[0];
+ GLuint aux_size = cache->aux_size[cache_id];
+ enum pipe_error ret;
+ void *tmp;
+ int i;
+
+ /* Create the buffer object to contain the data. For now, use a
+ * single buffer type to describe all cached state atoms. Later,
+ * may want to take advantage of hardware distinctions between
+ * these various entities.
+ */
+ ret = cache->sws->bo_alloc(cache->sws,
+ cache->buffer_type,
+ data_size, 1 << 6,
+ bo_out);
+ if (ret)
+ return ret;
+
+
+ /* Set up the memory containing the key, aux_data, and relocs */
+ tmp = MALLOC(key_size + aux_size + relocs_size);
+
+ memcpy(tmp, key, key_size);
+ memcpy((char *)tmp + key_size, aux, cache->aux_size[cache_id]);
+ memcpy((char *)tmp + key_size + aux_size, relocs, relocs_size);
+ for (i = 0; i < nr_relocs; i++) {
+ p_atomic_inc(&relocs[i].bo->reference.count);
+ }
+
+ item->cache_id = cache_id;
+ item->key = tmp;
+ item->hash = hash;
+ item->key_size = key_size;
+ item->relocs = (struct brw_winsys_reloc *)((char *)tmp + key_size + aux_size);
+ item->nr_relocs = nr_relocs;
+ bo_reference( &item->bo, *bo_out );
+ item->data_size = data_size;
+
+ if (cache->n_items > cache->size * 1.5)
+ rehash(cache);
+
+ hash %= cache->size;
+ item->next = cache->items[hash];
+ cache->items[hash] = item;
+ cache->n_items++;
+
+ if (aux_return) {
+ assert(cache->aux_size[cache_id]);
+ *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+ }
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("upload %s: %d bytes to cache id %d\n",
+ cache->name[cache_id],
+ data_size, cache_id);
+
+ /* Copy data to the buffer */
+ ret = cache->sws->bo_subdata(item->bo,
+ cache_id,
+ 0, data_size, data,
+ relocs, nr_relocs);
+ if (ret)
+ return ret;
+
+ update_cache_last(cache, cache_id, item->bo);
+
+ return PIPE_OK;
+}
+
+
+/**
+ * This doesn't really work with aux data. Use search/upload instead
+ */
+enum pipe_error
+brw_cache_data_sz(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ GLuint data_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_cache_item *item;
+ GLuint hash = hash_key(data, data_size, relocs, nr_relocs);
+
+ item = search_cache(cache, cache_id, hash, data, data_size,
+ relocs, nr_relocs);
+ if (item) {
+ update_cache_last(cache, cache_id, item->bo);
+
+ bo_reference(bo_out, item->bo);
+ return PIPE_OK;
+ }
+
+ return brw_upload_cache(cache, cache_id,
+ data, data_size,
+ relocs, nr_relocs,
+ data, data_size,
+ NULL, NULL,
+ bo_out);
+}
+
+
+/**
+ * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
+ *
+ * If nr_relocs is nonzero, brw_search_cache()/brw_upload_cache() would be
+ * better to use, as the potentially changing offsets in the data-used-as-key
+ * will result in excessive cache misses.
+ *
+ * XXX: above is no longer true -- can we remove some code?
+ */
+enum pipe_error
+brw_cache_data(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ struct brw_winsys_buffer **bo_out)
+{
+ return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
+ relocs, nr_relocs, bo_out);
+}
+
+
+static void
+brw_init_cache_id(struct brw_cache *cache,
+ const char *name,
+ enum brw_cache_id id,
+ GLuint key_size,
+ GLuint aux_size)
+{
+ cache->name[id] = strdup(name);
+ cache->key_size[id] = key_size;
+ cache->aux_size[id] = aux_size;
+}
+
+
+static void
+brw_init_general_state_cache(struct brw_context *brw)
+{
+ struct brw_cache *cache = &brw->cache;
+
+ cache->brw = brw;
+ cache->sws = brw->sws;
+
+ cache->buffer_type = BRW_BUFFER_TYPE_GENERAL_STATE;
+
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ CALLOC(cache->size, sizeof(struct brw_cache_item));
+
+ brw_init_cache_id(cache,
+ "CC_VP",
+ BRW_CC_VP,
+ sizeof(struct brw_cc_viewport),
+ 0);
+
+ brw_init_cache_id(cache,
+ "CC_UNIT",
+ BRW_CC_UNIT,
+ sizeof(struct brw_cc_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "WM_PROG",
+ BRW_WM_PROG,
+ sizeof(struct brw_wm_prog_key),
+ sizeof(struct brw_wm_prog_data));
+
+ brw_init_cache_id(cache,
+ "SAMPLER_DEFAULT_COLOR",
+ BRW_SAMPLER_DEFAULT_COLOR,
+ sizeof(struct brw_sampler_default_color),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SAMPLER",
+ BRW_SAMPLER,
+ 0, /* variable key/data size */
+ 0);
+
+ brw_init_cache_id(cache,
+ "WM_UNIT",
+ BRW_WM_UNIT,
+ sizeof(struct brw_wm_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SF_PROG",
+ BRW_SF_PROG,
+ sizeof(struct brw_sf_prog_key),
+ sizeof(struct brw_sf_prog_data));
+
+ brw_init_cache_id(cache,
+ "SF_VP",
+ BRW_SF_VP,
+ sizeof(struct brw_sf_viewport),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SF_UNIT",
+ BRW_SF_UNIT,
+ sizeof(struct brw_sf_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "VS_UNIT",
+ BRW_VS_UNIT,
+ sizeof(struct brw_vs_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "VS_PROG",
+ BRW_VS_PROG,
+ sizeof(struct brw_vs_prog_key),
+ sizeof(struct brw_vs_prog_data));
+
+ brw_init_cache_id(cache,
+ "CLIP_UNIT",
+ BRW_CLIP_UNIT,
+ sizeof(struct brw_clip_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "CLIP_PROG",
+ BRW_CLIP_PROG,
+ sizeof(struct brw_clip_prog_key),
+ sizeof(struct brw_clip_prog_data));
+
+ brw_init_cache_id(cache,
+ "GS_UNIT",
+ BRW_GS_UNIT,
+ sizeof(struct brw_gs_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "GS_PROG",
+ BRW_GS_PROG,
+ sizeof(struct brw_gs_prog_key),
+ sizeof(struct brw_gs_prog_data));
+}
+
+
+static void
+brw_init_surface_state_cache(struct brw_context *brw)
+{
+ struct brw_cache *cache = &brw->surface_cache;
+
+ cache->brw = brw;
+ cache->sws = brw->sws;
+
+ cache->buffer_type = BRW_BUFFER_TYPE_SURFACE_STATE;
+
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ CALLOC(cache->size, sizeof(struct brw_cache_item));
+
+ brw_init_cache_id(cache,
+ "SS_SURFACE",
+ BRW_SS_SURFACE,
+ sizeof(struct brw_surface_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SS_SURF_BIND",
+ BRW_SS_SURF_BIND,
+ 0,
+ 0);
+}
+
+
+void
+brw_init_caches(struct brw_context *brw)
+{
+ brw_init_general_state_cache(brw);
+ brw_init_surface_state_cache(brw);
+}
+
+
+static void
+brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+ struct brw_cache_item *c, *next;
+ GLuint i;
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s\n", __FUNCTION__);
+
+ for (i = 0; i < cache->size; i++) {
+ for (c = cache->items[i]; c; c = next) {
+ int j;
+
+ next = c->next;
+
+ for (j = 0; j < c->nr_relocs; j++)
+ bo_reference(&c->relocs[j].bo, NULL);
+
+ bo_reference(&c->bo, NULL);
+ FREE((void *)c->key);
+ FREE(c);
+ }
+ cache->items[i] = NULL;
+ }
+
+ cache->n_items = 0;
+
+ if (brw->curbe.last_buf) {
+ FREE(brw->curbe.last_buf);
+ brw->curbe.last_buf = NULL;
+ }
+
+ brw->state.dirty.mesa |= ~0;
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+}
+
+/* Clear all entries from the cache that point to the given bo.
+ *
+ * This lets us release memory for reuse earlier for known-dead buffers,
+ * at the cost of walking the entire hash table.
+ */
+void
+brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo)
+{
+ struct brw_cache_item **prev;
+ GLuint i;
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s\n", __FUNCTION__);
+
+ for (i = 0; i < cache->size; i++) {
+ for (prev = &cache->items[i]; *prev;) {
+ struct brw_cache_item *c = *prev;
+
+ if (cache->sws->bo_references(c->bo, bo)) {
+ int j;
+
+ *prev = c->next;
+
+ for (j = 0; j < c->nr_relocs; j++)
+ bo_reference(&c->relocs[j].bo, NULL);
+
+ bo_reference(&c->bo, NULL);
+
+ FREE((void *)c->key);
+ FREE(c);
+ cache->n_items--;
+ } else {
+ prev = &c->next;
+ }
+ }
+ }
+}
+
+void
+brw_state_cache_check_size(struct brw_context *brw)
+{
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+
+ /* un-tuned guess. We've got around 20 state objects for a total of around
+ * 32k, so 1000 of them is around 1.5MB.
+ */
+ if (brw->cache.n_items > 1000)
+ brw_clear_cache(brw, &brw->cache);
+
+ if (brw->surface_cache.n_items > 1000)
+ brw_clear_cache(brw, &brw->surface_cache);
+}
+
+
+static void
+brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+ GLuint i;
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s\n", __FUNCTION__);
+
+ brw_clear_cache(brw, cache);
+ for (i = 0; i < BRW_MAX_CACHE; i++) {
+ bo_reference(&cache->last_bo[i], NULL);
+ FREE(cache->name[i]);
+ }
+ FREE(cache->items);
+ cache->items = NULL;
+ cache->size = 0;
+}
+
+
+void
+brw_destroy_caches(struct brw_context *brw)
+{
+ brw_destroy_cache(brw, &brw->cache);
+ brw_destroy_cache(brw, &brw->surface_cache);
+}
diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c
new file mode 100644
index 0000000000..049c278c93
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_debug.c
@@ -0,0 +1,153 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+
+
+struct dirty_bit_map {
+ uint32_t bit;
+ char *name;
+ uint32_t count;
+};
+
+#define DEFINE_BIT(name) {name, #name, 0}
+
+static struct dirty_bit_map mesa_bits[] = {
+ DEFINE_BIT(PIPE_NEW_DEPTH_STENCIL_ALPHA),
+ DEFINE_BIT(PIPE_NEW_RAST),
+ DEFINE_BIT(PIPE_NEW_BLEND),
+ DEFINE_BIT(PIPE_NEW_VIEWPORT),
+ DEFINE_BIT(PIPE_NEW_SAMPLERS),
+ DEFINE_BIT(PIPE_NEW_VERTEX_BUFFER),
+ DEFINE_BIT(PIPE_NEW_VERTEX_ELEMENT),
+ DEFINE_BIT(PIPE_NEW_FRAGMENT_SHADER),
+ DEFINE_BIT(PIPE_NEW_VERTEX_SHADER),
+ DEFINE_BIT(PIPE_NEW_FRAGMENT_CONSTANTS),
+ DEFINE_BIT(PIPE_NEW_VERTEX_CONSTANTS),
+ DEFINE_BIT(PIPE_NEW_CLIP),
+ DEFINE_BIT(PIPE_NEW_INDEX_BUFFER),
+ DEFINE_BIT(PIPE_NEW_INDEX_RANGE),
+ DEFINE_BIT(PIPE_NEW_BLEND_COLOR),
+ DEFINE_BIT(PIPE_NEW_POLYGON_STIPPLE),
+ DEFINE_BIT(PIPE_NEW_FRAMEBUFFER_DIMENSIONS),
+ DEFINE_BIT(PIPE_NEW_DEPTH_BUFFER),
+ DEFINE_BIT(PIPE_NEW_COLOR_BUFFERS),
+ DEFINE_BIT(PIPE_NEW_QUERY),
+ DEFINE_BIT(PIPE_NEW_SCISSOR),
+ DEFINE_BIT(PIPE_NEW_BOUND_TEXTURES),
+ DEFINE_BIT(PIPE_NEW_NR_CBUFS),
+ {0, 0, 0}
+};
+
+static struct dirty_bit_map brw_bits[] = {
+ DEFINE_BIT(BRW_NEW_URB_FENCE),
+ DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
+ DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
+ DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS),
+ DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
+ DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
+ DEFINE_BIT(BRW_NEW_PRIMITIVE),
+ DEFINE_BIT(BRW_NEW_CONTEXT),
+ DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
+ DEFINE_BIT(BRW_NEW_PSP),
+ DEFINE_BIT(BRW_NEW_WM_SURFACES),
+ DEFINE_BIT(BRW_NEW_xxx),
+ DEFINE_BIT(BRW_NEW_INDICES),
+ {0, 0, 0}
+};
+
+static struct dirty_bit_map cache_bits[] = {
+ DEFINE_BIT(CACHE_NEW_CC_VP),
+ DEFINE_BIT(CACHE_NEW_CC_UNIT),
+ DEFINE_BIT(CACHE_NEW_WM_PROG),
+ DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR),
+ DEFINE_BIT(CACHE_NEW_SAMPLER),
+ DEFINE_BIT(CACHE_NEW_WM_UNIT),
+ DEFINE_BIT(CACHE_NEW_SF_PROG),
+ DEFINE_BIT(CACHE_NEW_SF_VP),
+ DEFINE_BIT(CACHE_NEW_SF_UNIT),
+ DEFINE_BIT(CACHE_NEW_VS_UNIT),
+ DEFINE_BIT(CACHE_NEW_VS_PROG),
+ DEFINE_BIT(CACHE_NEW_GS_UNIT),
+ DEFINE_BIT(CACHE_NEW_GS_PROG),
+ DEFINE_BIT(CACHE_NEW_CLIP_VP),
+ DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
+ DEFINE_BIT(CACHE_NEW_CLIP_PROG),
+ DEFINE_BIT(CACHE_NEW_SURFACE),
+ DEFINE_BIT(CACHE_NEW_SURF_BIND),
+ {0, 0, 0}
+};
+
+
+static void
+brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ if (bit_map[i].bit == 0)
+ return;
+
+ if (bit_map[i].bit & bits)
+ bit_map[i].count++;
+ }
+}
+
+static void
+brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ if (bit_map[i].bit == 0)
+ return;
+
+ debug_printf("0x%08x: %12d (%s)\n",
+ bit_map[i].bit, bit_map[i].count, bit_map[i].name);
+ }
+}
+
+void
+brw_update_dirty_counts( unsigned mesa,
+ unsigned brw,
+ unsigned cache )
+{
+ static int dirty_count = 0;
+
+ brw_update_dirty_count(mesa_bits, mesa);
+ brw_update_dirty_count(brw_bits, brw);
+ brw_update_dirty_count(cache_bits, cache);
+ if (dirty_count++ % 1000 == 0) {
+ brw_print_dirty_count(mesa_bits, mesa);
+ brw_print_dirty_count(brw_bits, brw);
+ brw_print_dirty_count(cache_bits, cache);
+ debug_printf("\n");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
new file mode 100644
index 0000000000..f8b91eff81
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -0,0 +1,270 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+#include "brw_debug.h"
+
+const struct brw_tracked_state *atoms[] =
+{
+/* &brw_wm_input_sizes, */
+ &brw_vs_prog,
+ &brw_gs_prog,
+ &brw_clip_prog,
+ &brw_sf_prog,
+ &brw_wm_prog,
+
+ /* Once all the programs are done, we know how large urb entry
+ * sizes need to be and can decide if we need to change the urb
+ * layout.
+ */
+ &brw_curbe_offsets,
+ &brw_recalculate_urb_fence,
+
+ &brw_cc_vp,
+ &brw_cc_unit,
+
+ &brw_vs_surfaces, /* must do before unit */
+ /*&brw_wm_constant_surface,*/ /* must do before wm surfaces/bind bo */
+ &brw_wm_surfaces, /* must do before samplers and unit */
+ &brw_wm_samplers,
+
+ &brw_wm_unit,
+ &brw_sf_vp,
+ &brw_sf_unit,
+ &brw_vs_unit, /* always required, enabled or not */
+ &brw_clip_unit,
+ &brw_gs_unit,
+
+ /* Command packets:
+ */
+ &brw_invarient_state,
+ &brw_state_base_address,
+
+ &brw_binding_table_pointers,
+ &brw_blend_constant_color,
+
+ &brw_depthbuffer,
+ &brw_polygon_stipple,
+ &brw_line_stipple,
+
+ &brw_psp_urb_cbs,
+
+ &brw_drawing_rect,
+ &brw_indices,
+ &brw_index_buffer,
+ &brw_vertices,
+
+ &brw_curbe_buffer
+};
+
+
+void brw_init_state( struct brw_context *brw )
+{
+ brw_init_caches(brw);
+}
+
+
+void brw_destroy_state( struct brw_context *brw )
+{
+ brw_destroy_caches(brw);
+ brw_destroy_batch_cache(brw);
+}
+
+/***********************************************************************
+ */
+
+static GLboolean check_state( const struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ return ((a->mesa & b->mesa) ||
+ (a->brw & b->brw) ||
+ (a->cache & b->cache));
+}
+
+static void accumulate_state( struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ a->mesa |= b->mesa;
+ a->brw |= b->brw;
+ a->cache |= b->cache;
+}
+
+
+static void xor_states( struct brw_state_flags *result,
+ const struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ result->mesa = a->mesa ^ b->mesa;
+ result->brw = a->brw ^ b->brw;
+ result->cache = a->cache ^ b->cache;
+}
+
+static void
+brw_clear_validated_bos(struct brw_context *brw)
+{
+ int i;
+
+ /* Clear the last round of validated bos */
+ for (i = 0; i < brw->state.validated_bo_count; i++) {
+ bo_reference(&brw->state.validated_bos[i], NULL);
+ }
+ brw->state.validated_bo_count = 0;
+}
+
+
+/***********************************************************************
+ * Emit all state:
+ */
+enum pipe_error brw_validate_state( struct brw_context *brw )
+{
+ struct brw_state_flags *state = &brw->state.dirty;
+ GLuint i;
+ int ret;
+
+ brw_clear_validated_bos(brw);
+ brw_add_validated_bo(brw, brw->batch->buf);
+
+ if (brw->flags.always_emit_state) {
+ state->mesa |= ~0;
+ state->brw |= ~0;
+ state->cache |= ~0;
+ }
+
+ if (state->mesa == 0 &&
+ state->cache == 0 &&
+ state->brw == 0)
+ return 0;
+
+ if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
+ brw_clear_batch_cache(brw);
+
+ /* do prepare stage for all atoms */
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
+
+ if (check_state(state, &atom->dirty)) {
+ if (atom->prepare) {
+ ret = atom->prepare(brw);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+
+ /* Make sure that the textures which are referenced by the current
+ * brw fragment program are actually present/valid.
+ * If this fails, we can experience GPU lock-ups.
+ */
+ {
+ const struct brw_fragment_shader *fp = brw->curr.fragment_shader;
+ if (fp) {
+ assert(fp->info.file_max[TGSI_FILE_SAMPLER] < (int)brw->curr.num_samplers);
+ /*assert(fp->info.texture_max <= brw->curr.num_textures);*/
+ }
+ }
+
+ return 0;
+}
+
+
+enum pipe_error brw_upload_state(struct brw_context *brw)
+{
+ struct brw_state_flags *state = &brw->state.dirty;
+ int ret;
+ int i;
+
+ brw_clear_validated_bos(brw);
+
+ if (BRW_DEBUG) {
+ /* Debug version which enforces various sanity checks on the
+ * state flags which are generated and checked to help ensure
+ * state atoms are ordered correctly in the list.
+ */
+ struct brw_state_flags examined, prev;
+ memset(&examined, 0, sizeof(examined));
+ prev = *state;
+
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
+ struct brw_state_flags generated;
+
+ assert(atom->dirty.mesa ||
+ atom->dirty.brw ||
+ atom->dirty.cache);
+
+ if (check_state(state, &atom->dirty)) {
+ if (atom->emit) {
+ ret = atom->emit( brw );
+ if (ret)
+ return ret;
+ }
+ }
+
+ accumulate_state(&examined, &atom->dirty);
+
+ /* generated = (prev ^ state)
+ * if (examined & generated)
+ * fail;
+ */
+ xor_states(&generated, &prev, state);
+ assert(!check_state(&examined, &generated));
+ prev = *state;
+ }
+ }
+ else {
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
+
+ if (check_state(state, &atom->dirty)) {
+ if (atom->emit) {
+ ret = atom->emit( brw );
+ if (ret)
+ return ret;
+ }
+ }
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_STATE) {
+ brw_update_dirty_counts( state->mesa,
+ state->brw,
+ state->cache );
+ }
+
+ /* Clear dirty flags:
+ */
+ memset(state, 0, sizeof(*state));
+ return 0;
+}
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
new file mode 100644
index 0000000000..bf10bc04de
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -0,0 +1,1576 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_STRUCTS_H
+#define BRW_STRUCTS_H
+
+#include "brw_types.h"
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+
+
+/* Command packets:
+ */
+struct header
+{
+ GLuint length:16;
+ GLuint opcode:16;
+};
+
+
+union header_union
+{
+ struct header bits;
+ GLuint dword;
+};
+
+struct brw_3d_control
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint notify_enable:1;
+ GLuint pad:3;
+ GLuint wc_flush_enable:1;
+ GLuint depth_stall_enable:1;
+ GLuint operation:2;
+ GLuint opcode:16;
+ } header;
+
+ struct
+ {
+ GLuint pad:2;
+ GLuint dest_addr_type:1;
+ GLuint dest_addr:29;
+ } dest;
+
+ GLuint dword2;
+ GLuint dword3;
+};
+
+
+struct brw_3d_primitive
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint pad:2;
+ GLuint topology:5;
+ GLuint indexed:1;
+ GLuint opcode:16;
+ } header;
+
+ GLuint verts_per_instance;
+ GLuint start_vert_location;
+ GLuint instance_count;
+ GLuint start_instance_location;
+ GLuint base_vert_location;
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define BRW_FLUSH_READ_CACHE 0x1
+#define BRW_FLUSH_STATE_CACHE 0x2
+#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8
+
+struct brw_mi_flush
+{
+ GLuint flags:4;
+ GLuint pad:12;
+ GLuint opcode:16;
+};
+
+struct brw_vf_statistics
+{
+ GLuint statistics_enable:1;
+ GLuint pad:15;
+ GLuint opcode:16;
+};
+
+
+
+struct brw_binding_table_pointers
+{
+ struct header header;
+ GLuint vs;
+ GLuint gs;
+ GLuint clp;
+ GLuint sf;
+ GLuint wm;
+};
+
+
+struct brw_blend_constant_color
+{
+ struct header header;
+ GLfloat blend_constant_color[4];
+};
+
+
+struct brw_depthbuffer
+{
+ union header_union header;
+
+ union {
+ struct {
+ GLuint pitch:18;
+ GLuint format:3;
+ GLuint pad:2;
+ GLuint software_tiled_rendering_mode:2;
+ GLuint depth_offset_disable:1;
+ GLuint tile_walk:1;
+ GLuint tiled_surface:1;
+ GLuint pad2:1;
+ GLuint surface_type:3;
+ } bits;
+ GLuint dword;
+ } dword1;
+
+ GLuint dword2_base_addr;
+
+ union {
+ struct {
+ GLuint pad:1;
+ GLuint mipmap_layout:1;
+ GLuint lod:4;
+ GLuint width:13;
+ GLuint height:13;
+ } bits;
+ GLuint dword;
+ } dword3;
+
+ union {
+ struct {
+ GLuint pad:10;
+ GLuint min_array_element:11;
+ GLuint depth:11;
+ } bits;
+ GLuint dword;
+ } dword4;
+};
+
+struct brw_depthbuffer_g4x
+{
+ union header_union header;
+
+ union {
+ struct {
+ GLuint pitch:18;
+ GLuint format:3;
+ GLuint pad:2;
+ GLuint software_tiled_rendering_mode:2;
+ GLuint depth_offset_disable:1;
+ GLuint tile_walk:1;
+ GLuint tiled_surface:1;
+ GLuint pad2:1;
+ GLuint surface_type:3;
+ } bits;
+ GLuint dword;
+ } dword1;
+
+ GLuint dword2_base_addr;
+
+ union {
+ struct {
+ GLuint pad:1;
+ GLuint mipmap_layout:1;
+ GLuint lod:4;
+ GLuint width:13;
+ GLuint height:13;
+ } bits;
+ GLuint dword;
+ } dword3;
+
+ union {
+ struct {
+ GLuint pad:10;
+ GLuint min_array_element:11;
+ GLuint depth:11;
+ } bits;
+ GLuint dword;
+ } dword4;
+
+ union {
+ struct {
+ GLuint xoffset:16;
+ GLuint yoffset:16;
+ } bits;
+ GLuint dword;
+ } dword5; /* NEW in Integrated Graphics Device */
+};
+
+struct brw_drawrect
+{
+ struct header header;
+ GLuint xmin:16;
+ GLuint ymin:16;
+ GLuint xmax:16;
+ GLuint ymax:16;
+ GLuint xorg:16;
+ GLuint yorg:16;
+};
+
+
+
+
+struct brw_global_depth_offset_clamp
+{
+ struct header header;
+ GLfloat depth_offset_clamp;
+};
+
+struct brw_indexbuffer
+{
+ union {
+ struct
+ {
+ GLuint length:8;
+ GLuint index_format:2;
+ GLuint cut_index_enable:1;
+ GLuint pad:5;
+ GLuint opcode:16;
+ } bits;
+ GLuint dword;
+
+ } header;
+
+ GLuint buffer_start;
+ GLuint buffer_end;
+};
+
+/* NEW in Integrated Graphics Device */
+struct brw_aa_line_parameters
+{
+ struct header header;
+
+ struct {
+ GLuint aa_coverage_scope:8;
+ GLuint pad0:8;
+ GLuint aa_coverage_bias:8;
+ GLuint pad1:8;
+ } bits0;
+
+ struct {
+ GLuint aa_coverage_endcap_slope:8;
+ GLuint pad0:8;
+ GLuint aa_coverage_endcap_bias:8;
+ GLuint pad1:8;
+ } bits1;
+};
+
+struct brw_line_stipple
+{
+ struct header header;
+
+ struct
+ {
+ GLuint pattern:16;
+ GLuint pad:16;
+ } bits0;
+
+ struct
+ {
+ GLuint repeat_count:9;
+ GLuint pad:7;
+ GLuint inverse_repeat_count:16;
+ } bits1;
+};
+
+
+struct brw_pipelined_state_pointers
+{
+ struct header header;
+
+ struct {
+ GLuint pad:5;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } vs;
+
+ struct
+ {
+ GLuint enable:1;
+ GLuint pad:4;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } gs;
+
+ struct
+ {
+ GLuint enable:1;
+ GLuint pad:4;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } clp;
+
+ struct
+ {
+ GLuint pad:5;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } sf;
+
+ struct
+ {
+ GLuint pad:5;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } wm;
+
+ struct
+ {
+ GLuint pad:5;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */
+ } cc;
+};
+
+
+struct brw_polygon_stipple_offset
+{
+ struct header header;
+
+ struct {
+ GLuint y_offset:5;
+ GLuint pad:3;
+ GLuint x_offset:5;
+ GLuint pad0:19;
+ } bits0;
+};
+
+
+
+struct brw_polygon_stipple
+{
+ struct header header;
+ GLuint stipple[32];
+};
+
+
+
+struct brw_pipeline_select
+{
+ struct
+ {
+ GLuint pipeline_select:1;
+ GLuint pad:15;
+ GLuint opcode:16;
+ } header;
+};
+
+
+struct brw_pipe_control
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint notify_enable:1;
+ GLuint texture_cache_flush_enable:1;
+ GLuint indirect_state_pointers_disable:1;
+ GLuint instruction_state_cache_flush_enable:1;
+ GLuint write_cache_flush_enable:1;
+ GLuint depth_stall_enable:1;
+ GLuint post_sync_operation:2;
+
+ GLuint opcode:16;
+ } header;
+
+ struct
+ {
+ GLuint pad:2;
+ GLuint dest_addr_type:1;
+ GLuint dest_addr:29;
+ } bits1;
+
+ GLuint data0;
+ GLuint data1;
+};
+
+
+struct brw_urb_fence
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint vs_realloc:1;
+ GLuint gs_realloc:1;
+ GLuint clp_realloc:1;
+ GLuint sf_realloc:1;
+ GLuint vfe_realloc:1;
+ GLuint cs_realloc:1;
+ GLuint pad:2;
+ GLuint opcode:16;
+ } header;
+
+ struct
+ {
+ GLuint vs_fence:10;
+ GLuint gs_fence:10;
+ GLuint clp_fence:10;
+ GLuint pad:2;
+ } bits0;
+
+ struct
+ {
+ GLuint sf_fence:10;
+ GLuint vf_fence:10;
+ GLuint cs_fence:11;
+ GLuint pad:1;
+ } bits1;
+};
+
+struct brw_cs_urb_state
+{
+ struct header header;
+
+ struct
+ {
+ GLuint nr_urb_entries:3;
+ GLuint pad:1;
+ GLuint urb_entry_size:5;
+ GLuint pad0:23;
+ } bits0;
+};
+
+struct brw_constant_buffer
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint valid:1;
+ GLuint pad:7;
+ GLuint opcode:16;
+ } header;
+
+ struct
+ {
+ GLuint buffer_length:6;
+ GLuint buffer_address:26;
+ } bits0;
+};
+
+struct brw_state_base_address
+{
+ struct header header;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:4;
+ GLuint general_state_address:27;
+ } bits0;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:4;
+ GLuint surface_state_address:27;
+ } bits1;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:4;
+ GLuint indirect_object_state_address:27;
+ } bits2;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:11;
+ GLuint general_state_upper_bound:20;
+ } bits3;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:11;
+ GLuint indirect_object_state_upper_bound:20;
+ } bits4;
+};
+
+struct brw_state_prefetch
+{
+ struct header header;
+
+ struct
+ {
+ GLuint prefetch_count:3;
+ GLuint pad:3;
+ GLuint prefetch_pointer:26;
+ } bits0;
+};
+
+struct brw_system_instruction_pointer
+{
+ struct header header;
+
+ struct
+ {
+ GLuint pad:4;
+ GLuint system_instruction_pointer:28;
+ } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+ GLuint pad0:1;
+ GLuint grf_reg_count:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
+};
+
+struct thread1
+{
+ GLuint ext_halt_exception_enable:1;
+ GLuint sw_exception_enable:1;
+ GLuint mask_stack_exception_enable:1;
+ GLuint timeout_exception_enable:1;
+ GLuint illegal_op_exception_enable:1;
+ GLuint pad0:3;
+ GLuint depth_coef_urb_read_offset:6; /* WM only */
+ GLuint pad1:2;
+ GLuint floating_point_mode:1;
+ GLuint thread_priority:1;
+ GLuint binding_table_entry_count:8;
+ GLuint pad3:5;
+ GLuint single_program_flow:1;
+};
+
+struct thread2
+{
+ GLuint per_thread_scratch_space:4;
+ GLuint pad0:6;
+ GLuint scratch_space_base_pointer:22;
+};
+
+
+struct thread3
+{
+ GLuint dispatch_grf_start_reg:4;
+ GLuint urb_entry_read_offset:6;
+ GLuint pad0:1;
+ GLuint urb_entry_read_length:6;
+ GLuint pad1:1;
+ GLuint const_urb_entry_read_offset:6;
+ GLuint pad2:1;
+ GLuint const_urb_entry_read_length:6;
+ GLuint pad3:1;
+};
+
+
+
+struct brw_clip_unit_state
+{
+ struct thread0 thread0;
+ struct
+ {
+ GLuint pad0:7;
+ GLuint sw_exception_enable:1;
+ GLuint pad1:3;
+ GLuint mask_stack_exception_enable:1;
+ GLuint pad2:1;
+ GLuint illegal_op_exception_enable:1;
+ GLuint pad3:2;
+ GLuint floating_point_mode:1;
+ GLuint thread_priority:1;
+ GLuint binding_table_entry_count:8;
+ GLuint pad4:5;
+ GLuint single_program_flow:1;
+ } thread1;
+
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ GLuint pad0:9;
+ GLuint gs_output_stats:1; /* not always */
+ GLuint stats_enable:1;
+ GLuint nr_urb_entries:7;
+ GLuint pad1:1;
+ GLuint urb_entry_allocation_size:5;
+ GLuint pad2:1;
+ GLuint max_threads:5; /* may be less */
+ GLuint pad3:2;
+ } thread4;
+
+ struct
+ {
+ GLuint pad0:13;
+ GLuint clip_mode:3;
+ GLuint userclip_enable_flags:8;
+ GLuint userclip_must_clip:1;
+ GLuint negative_w_clip_test:1;
+ GLuint guard_band_enable:1;
+ GLuint viewport_z_clip_enable:1;
+ GLuint viewport_xy_clip_enable:1;
+ GLuint vertex_position_space:1;
+ GLuint api_mode:1;
+ GLuint pad2:1;
+ } clip5;
+
+ struct
+ {
+ GLuint pad0:5;
+ GLuint clipper_viewport_state_ptr:27;
+ } clip6;
+
+
+ GLfloat viewport_xmin;
+ GLfloat viewport_xmax;
+ GLfloat viewport_ymin;
+ GLfloat viewport_ymax;
+};
+
+
+
+struct brw_cc_unit_state
+{
+ struct brw_cc0
+ {
+ GLuint pad0:3;
+ GLuint bf_stencil_pass_depth_pass_op:3;
+ GLuint bf_stencil_pass_depth_fail_op:3;
+ GLuint bf_stencil_fail_op:3;
+ GLuint bf_stencil_func:3;
+ GLuint bf_stencil_enable:1;
+ GLuint pad1:2;
+ GLuint stencil_write_enable:1;
+ GLuint stencil_pass_depth_pass_op:3;
+ GLuint stencil_pass_depth_fail_op:3;
+ GLuint stencil_fail_op:3;
+ GLuint stencil_func:3;
+ GLuint stencil_enable:1;
+ } cc0;
+
+
+ struct brw_cc1
+ {
+ GLuint bf_stencil_ref:8;
+ GLuint stencil_write_mask:8;
+ GLuint stencil_test_mask:8;
+ GLuint stencil_ref:8;
+ } cc1;
+
+
+ struct brw_cc2
+ {
+ GLuint logicop_enable:1;
+ GLuint pad0:10;
+ GLuint depth_write_enable:1;
+ GLuint depth_test_function:3;
+ GLuint depth_test:1;
+ GLuint bf_stencil_write_mask:8;
+ GLuint bf_stencil_test_mask:8;
+ } cc2;
+
+
+ struct brw_cc3
+ {
+ GLuint pad0:8;
+ GLuint alpha_test_func:3;
+ GLuint alpha_test:1;
+ GLuint blend_enable:1;
+ GLuint ia_blend_enable:1;
+ GLuint pad1:1;
+ GLuint alpha_test_format:1;
+ GLuint pad2:16;
+ } cc3;
+
+ struct brw_cc4
+ {
+ GLuint pad0:5;
+ GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+ } cc4;
+
+ struct brw_cc5
+ {
+ GLuint pad0:2;
+ GLuint ia_dest_blend_factor:5;
+ GLuint ia_src_blend_factor:5;
+ GLuint ia_blend_function:3;
+ GLuint statistics_enable:1;
+ GLuint logicop_func:4;
+ GLuint pad1:11;
+ GLuint dither_enable:1;
+ } cc5;
+
+ struct brw_cc6
+ {
+ GLuint clamp_post_alpha_blend:1;
+ GLuint clamp_pre_alpha_blend:1;
+ GLuint clamp_range:2;
+ GLuint pad0:11;
+ GLuint y_dither_offset:2;
+ GLuint x_dither_offset:2;
+ GLuint dest_blend_factor:5;
+ GLuint src_blend_factor:5;
+ GLuint blend_function:3;
+ } cc6;
+
+ struct brw_cc7 {
+ union {
+ GLfloat f;
+ GLubyte ub[4];
+ } alpha_ref;
+ } cc7;
+};
+
+
+
+struct brw_sf_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ GLuint pad0:10;
+ GLuint stats_enable:1;
+ GLuint nr_urb_entries:7;
+ GLuint pad1:1;
+ GLuint urb_entry_allocation_size:5;
+ GLuint pad2:1;
+ GLuint max_threads:6;
+ GLuint pad3:1;
+ } thread4;
+
+ struct
+ {
+ GLuint front_winding:1;
+ GLuint viewport_transform:1;
+ GLuint pad0:3;
+ GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+ } sf5;
+
+ struct
+ {
+ GLuint pad0:9;
+ GLuint dest_org_vbias:4;
+ GLuint dest_org_hbias:4;
+ GLuint scissor:1;
+ GLuint disable_2x2_trifilter:1;
+ GLuint disable_zero_pix_trifilter:1;
+ GLuint point_rast_rule:2;
+ GLuint line_endcap_aa_region_width:2;
+ GLuint line_width:4;
+ GLuint fast_scissor_disable:1;
+ GLuint cull_mode:2;
+ GLuint aa_enable:1;
+ } sf6;
+
+ struct
+ {
+ GLuint point_size:11;
+ GLuint use_point_size_state:1;
+ GLuint subpixel_precision:1;
+ GLuint sprite_point:1;
+ GLuint pad0:10;
+ GLuint aa_line_distance_mode:1;
+ GLuint trifan_pv:2;
+ GLuint linestrip_pv:2;
+ GLuint tristrip_pv:2;
+ GLuint line_last_pixel_enable:1;
+ } sf7;
+
+};
+
+
+struct brw_gs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ GLuint pad0:8;
+ GLuint rendering_enable:1; /* for IGDNG */
+ GLuint pad4:1;
+ GLuint stats_enable:1;
+ GLuint nr_urb_entries:7;
+ GLuint pad1:1;
+ GLuint urb_entry_allocation_size:5;
+ GLuint pad2:1;
+ GLuint max_threads:5;
+ GLuint pad3:2;
+ } thread4;
+
+ struct
+ {
+ GLuint sampler_count:3;
+ GLuint pad0:2;
+ GLuint sampler_state_pointer:27;
+ } gs5;
+
+
+ struct
+ {
+ GLuint max_vp_index:4;
+ GLuint pad0:12;
+ GLuint svbi_post_inc_value:10;
+ GLuint pad1:1;
+ GLuint svbi_post_inc_enable:1;
+ GLuint svbi_payload:1;
+ GLuint discard_adjaceny:1;
+ GLuint reorder_enable:1;
+ GLuint pad2:1;
+ } gs6;
+};
+
+
+struct brw_vs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ GLuint pad0:10;
+ GLuint stats_enable:1;
+ GLuint nr_urb_entries:7;
+ GLuint pad1:1;
+ GLuint urb_entry_allocation_size:5;
+ GLuint pad2:1;
+ GLuint max_threads:6;
+ GLuint pad3:1;
+ } thread4;
+
+ struct
+ {
+ GLuint sampler_count:3;
+ GLuint pad0:2;
+ GLuint sampler_state_pointer:27;
+ } vs5;
+
+ struct
+ {
+ GLuint vs_enable:1;
+ GLuint vert_cache_disable:1;
+ GLuint pad0:30;
+ } vs6;
+};
+
+
+struct brw_wm_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct {
+ GLuint stats_enable:1;
+ GLuint depth_buffer_clear:1;
+ GLuint sampler_count:3;
+ GLuint sampler_state_pointer:27;
+ } wm4;
+
+ struct
+ {
+ GLuint enable_8_pix:1;
+ GLuint enable_16_pix:1;
+ GLuint enable_32_pix:1;
+ GLuint enable_con_32_pix:1;
+ GLuint enable_con_64_pix:1;
+ GLuint pad0:5;
+ GLuint legacy_global_depth_bias:1;
+ GLuint line_stipple:1;
+ GLuint depth_offset:1;
+ GLuint polygon_stipple:1;
+ GLuint line_aa_region_width:2;
+ GLuint line_endcap_aa_region_width:2;
+ GLuint early_depth_test:1;
+ GLuint thread_dispatch_enable:1;
+ GLuint program_uses_depth:1;
+ GLuint program_computes_depth:1;
+ GLuint program_uses_killpixel:1;
+ GLuint legacy_line_rast: 1;
+ GLuint transposed_urb_read_enable:1;
+ GLuint max_threads:7;
+ } wm5;
+
+ GLfloat global_depth_offset_constant;
+ GLfloat global_depth_offset_scale;
+
+ /* for IGDNG only */
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_1:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_1:26;
+ } wm8;
+
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_2:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_2:26;
+ } wm9;
+
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_3:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_3:26;
+ } wm10;
+};
+
+struct brw_sampler_default_color {
+ GLfloat color[4];
+};
+
+struct brw_sampler_state
+{
+
+ struct brw_ss0
+ {
+ GLuint shadow_function:3;
+ GLuint lod_bias:11;
+ GLuint min_filter:3;
+ GLuint mag_filter:3;
+ GLuint mip_filter:2;
+ GLuint base_level:5;
+ GLuint pad:1;
+ GLuint lod_preclamp:1;
+ GLuint default_color_mode:1;
+ GLuint pad0:1;
+ GLuint disable:1;
+ } ss0;
+
+ struct brw_ss1
+ {
+ GLuint r_wrap_mode:3;
+ GLuint t_wrap_mode:3;
+ GLuint s_wrap_mode:3;
+ GLuint pad:3;
+ GLuint max_lod:10;
+ GLuint min_lod:10;
+ } ss1;
+
+
+ struct brw_ss2
+ {
+ GLuint pad:5;
+ GLuint default_color_pointer:27;
+ } ss2;
+
+ struct brw_ss3
+ {
+ GLuint pad:19;
+ GLuint max_aniso:3;
+ GLuint chroma_key_mode:1;
+ GLuint chroma_key_index:2;
+ GLuint chroma_key_enable:1;
+ GLuint monochrome_filter_width:3;
+ GLuint monochrome_filter_height:3;
+ } ss3;
+};
+
+
+struct brw_clipper_viewport
+{
+ GLfloat xmin;
+ GLfloat xmax;
+ GLfloat ymin;
+ GLfloat ymax;
+};
+
+struct brw_cc_viewport
+{
+ GLfloat min_depth;
+ GLfloat max_depth;
+};
+
+struct brw_sf_viewport
+{
+ struct {
+ GLfloat m00;
+ GLfloat m11;
+ GLfloat m22;
+ GLfloat m30;
+ GLfloat m31;
+ GLfloat m32;
+ } viewport;
+
+ /* scissor coordinates are inclusive */
+ struct {
+ GLshort xmin;
+ GLshort ymin;
+ GLshort xmax;
+ GLshort ymax;
+ } scissor;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct brw_surface_state
+{
+ struct brw_surf_ss0 {
+ GLuint cube_pos_z:1;
+ GLuint cube_neg_z:1;
+ GLuint cube_pos_y:1;
+ GLuint cube_neg_y:1;
+ GLuint cube_pos_x:1;
+ GLuint cube_neg_x:1;
+ GLuint pad:4;
+ GLuint mipmap_layout_mode:1;
+ GLuint vert_line_stride_ofs:1;
+ GLuint vert_line_stride:1;
+ GLuint color_blend:1;
+ GLuint writedisable_blue:1;
+ GLuint writedisable_green:1;
+ GLuint writedisable_red:1;
+ GLuint writedisable_alpha:1;
+ GLuint surface_format:9; /**< BRW_SURFACEFORMAT_x */
+ GLuint data_return_format:1;
+ GLuint pad0:1;
+ GLuint surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */
+ } ss0;
+
+ struct brw_surf_ss1 {
+ GLuint base_addr;
+ } ss1;
+
+ struct brw_surf_ss2 {
+ GLuint pad:2;
+ GLuint mip_count:4;
+ GLuint width:13;
+ GLuint height:13;
+ } ss2;
+
+ struct brw_surf_ss3 {
+ GLuint tile_walk:1;
+ GLuint tiled_surface:1;
+ GLuint pad:1;
+ GLuint pitch:18;
+ GLuint depth:11;
+ } ss3;
+
+ struct brw_surf_ss4 {
+ GLuint multisample_position_palette_index:3;
+ GLuint pad1:1;
+ GLuint num_multisamples:3;
+ GLuint pad0:1;
+ GLuint render_target_view_extent:9;
+ GLuint min_array_elt:11;
+ GLuint min_lod:4;
+ } ss4;
+
+ struct brw_surf_ss5 {
+ GLuint pad1:16;
+ GLuint llc_mapping:1;
+ GLuint mlc_mapping:1;
+ GLuint gfdt:1;
+ GLuint gfdt_src:1;
+ GLuint y_offset:4;
+ GLuint pad0:1;
+ GLuint x_offset:7;
+ } ss5; /* New in G4X */
+
+};
+
+
+
+struct brw_vertex_buffer_state
+{
+ struct {
+ GLuint pitch:11;
+ GLuint pad:15;
+ GLuint access_type:1;
+ GLuint vb_index:5;
+ } vb0;
+
+ GLuint start_addr;
+ GLuint max_index;
+#if 1
+ GLuint instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define BRW_VBP_MAX 17
+
+struct brw_vb_array_state {
+ struct header header;
+ struct brw_vertex_buffer_state vb[BRW_VBP_MAX];
+};
+
+
+struct brw_vertex_element_state
+{
+ struct
+ {
+ GLuint src_offset:11;
+ GLuint pad:5;
+ GLuint src_format:9;
+ GLuint pad0:1;
+ GLuint valid:1;
+ GLuint vertex_buffer_index:5;
+ } ve0;
+
+ struct
+ {
+ GLuint dst_offset:8;
+ GLuint pad:8;
+ GLuint vfcomponent3:4;
+ GLuint vfcomponent2:4;
+ GLuint vfcomponent1:4;
+ GLuint vfcomponent0:4;
+ } ve1;
+};
+
+#define BRW_VEP_MAX 18
+
+struct brw_vertex_element_packet {
+ struct header header;
+ struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct brw_urb_immediate {
+ GLuint opcode:4;
+ GLuint offset:6;
+ GLuint swizzle_control:2;
+ GLuint pad:1;
+ GLuint allocate:1;
+ GLuint used:1;
+ GLuint complete:1;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+
+struct brw_instruction
+{
+ struct
+ {
+ GLuint opcode:7;
+ GLuint pad:1;
+ GLuint access_mode:1;
+ GLuint mask_control:1;
+ GLuint dependency_control:2;
+ GLuint compression_control:2;
+ GLuint thread_control:2;
+ GLuint predicate_control:4;
+ GLuint predicate_inverse:1;
+ GLuint execution_size:3;
+ GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
+ GLuint pad0:2;
+ GLuint debug_control:1;
+ GLuint saturate:1;
+ } header;
+
+ union {
+ struct
+ {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint src1_reg_file:2;
+ GLuint src1_reg_type:3;
+ GLuint pad:1;
+ GLuint dest_subreg_nr:5;
+ GLuint dest_reg_nr:8;
+ GLuint dest_horiz_stride:2;
+ GLuint dest_address_mode:1;
+ } da1;
+
+ struct
+ {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint src1_reg_file:2; /* 0x00000c00 */
+ GLuint src1_reg_type:3; /* 0x00007000 */
+ GLuint pad:1;
+ GLint dest_indirect_offset:10; /* offset against the deref'd address reg */
+ GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ GLuint dest_horiz_stride:2;
+ GLuint dest_address_mode:1;
+ } ia1;
+
+ struct
+ {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint src1_reg_file:2;
+ GLuint src1_reg_type:3;
+ GLuint pad:1;
+ GLuint dest_writemask:4;
+ GLuint dest_subreg_nr:1;
+ GLuint dest_reg_nr:8;
+ GLuint pad1:2;
+ GLuint dest_address_mode:1;
+ } da16;
+
+ struct
+ {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint pad0:6;
+ GLuint dest_writemask:4;
+ GLint dest_indirect_offset:6;
+ GLuint dest_subreg_nr:3;
+ GLuint pad1:2;
+ GLuint dest_address_mode:1;
+ } ia16;
+ } bits1;
+
+
+ union {
+ struct
+ {
+ GLuint src0_subreg_nr:5;
+ GLuint src0_reg_nr:8;
+ GLuint src0_abs:1;
+ GLuint src0_negate:1;
+ GLuint src0_address_mode:1;
+ GLuint src0_horiz_stride:2;
+ GLuint src0_width:3;
+ GLuint src0_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad:6;
+ } da1;
+
+ struct
+ {
+ GLint src0_indirect_offset:10;
+ GLuint src0_subreg_nr:3;
+ GLuint src0_abs:1;
+ GLuint src0_negate:1;
+ GLuint src0_address_mode:1;
+ GLuint src0_horiz_stride:2;
+ GLuint src0_width:3;
+ GLuint src0_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad:6;
+ } ia1;
+
+ struct
+ {
+ GLuint src0_swz_x:2;
+ GLuint src0_swz_y:2;
+ GLuint src0_subreg_nr:1;
+ GLuint src0_reg_nr:8;
+ GLuint src0_abs:1;
+ GLuint src0_negate:1;
+ GLuint src0_address_mode:1;
+ GLuint src0_swz_z:2;
+ GLuint src0_swz_w:2;
+ GLuint pad0:1;
+ GLuint src0_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad1:6;
+ } da16;
+
+ struct
+ {
+ GLuint src0_swz_x:2;
+ GLuint src0_swz_y:2;
+ GLint src0_indirect_offset:6;
+ GLuint src0_subreg_nr:3;
+ GLuint src0_abs:1;
+ GLuint src0_negate:1;
+ GLuint src0_address_mode:1;
+ GLuint src0_swz_z:2;
+ GLuint src0_swz_w:2;
+ GLuint pad0:1;
+ GLuint src0_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad1:6;
+ } ia16;
+
+ struct
+ {
+ GLuint pad:26;
+ GLuint end_of_thread:1;
+ GLuint pad1:1;
+ GLuint sfid:4;
+ } send_igdng; /* for IGDNG only */
+
+ } bits2;
+
+ union
+ {
+ struct
+ {
+ GLuint src1_subreg_nr:5;
+ GLuint src1_reg_nr:8;
+ GLuint src1_abs:1;
+ GLuint src1_negate:1;
+ GLuint src1_address_mode:1;
+ GLuint src1_horiz_stride:2;
+ GLuint src1_width:3;
+ GLuint src1_vert_stride:4;
+ GLuint pad0:7;
+ } da1;
+
+ struct
+ {
+ GLuint src1_swz_x:2;
+ GLuint src1_swz_y:2;
+ GLuint src1_subreg_nr:1;
+ GLuint src1_reg_nr:8;
+ GLuint src1_abs:1;
+ GLuint src1_negate:1;
+ GLuint src1_address_mode:1;
+ GLuint src1_swz_z:2;
+ GLuint src1_swz_w:2;
+ GLuint pad1:1;
+ GLuint src1_vert_stride:4;
+ GLuint pad2:7;
+ } da16;
+
+ struct
+ {
+ GLint src1_indirect_offset:10;
+ GLuint src1_subreg_nr:3;
+ GLuint src1_abs:1;
+ GLuint src1_negate:1;
+ GLuint src1_address_mode:1;
+ GLuint src1_horiz_stride:2;
+ GLuint src1_width:3;
+ GLuint src1_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad1:6;
+ } ia1;
+
+ struct
+ {
+ GLuint src1_swz_x:2;
+ GLuint src1_swz_y:2;
+ GLint src1_indirect_offset:6;
+ GLuint src1_subreg_nr:3;
+ GLuint src1_abs:1;
+ GLuint src1_negate:1;
+ GLuint pad0:1;
+ GLuint src1_swz_z:2;
+ GLuint src1_swz_w:2;
+ GLuint pad1:1;
+ GLuint src1_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad2:6;
+ } ia16;
+
+
+ struct
+ {
+ GLint jump_count:16; /* note: signed */
+ GLuint pop_count:4;
+ GLuint pad0:12;
+ } if_else;
+
+ struct {
+ GLuint function:4;
+ GLuint int_type:1;
+ GLuint precision:1;
+ GLuint saturate:1;
+ GLuint data_type:1;
+ GLuint pad0:8;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } math;
+
+ struct {
+ GLuint function:4;
+ GLuint int_type:1;
+ GLuint precision:1;
+ GLuint saturate:1;
+ GLuint data_type:1;
+ GLuint snapshot:1;
+ GLuint pad0:10;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } math_igdng;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint sampler:4;
+ GLuint return_format:2;
+ GLuint msg_type:2;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } sampler;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint sampler:4;
+ GLuint msg_type:4;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } sampler_g4x;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint sampler:4;
+ GLuint msg_type:4;
+ GLuint simd_mode:2;
+ GLuint pad0:1;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } sampler_igdng;
+
+ struct brw_urb_immediate urb;
+
+ struct {
+ GLuint opcode:4;
+ GLuint offset:6;
+ GLuint swizzle_control:2;
+ GLuint pad:1;
+ GLuint allocate:1;
+ GLuint used:1;
+ GLuint complete:1;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } urb_igdng;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:4;
+ GLuint msg_type:2;
+ GLuint target_cache:2;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } dp_read;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint msg_type:3;
+ GLuint target_cache:2;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_read_igdng;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:3;
+ GLuint send_commit_msg:1;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } dp_write;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:3;
+ GLuint send_commit_msg:1;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_write_igdng;
+
+ struct {
+ GLuint pad:16;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } generic;
+
+ struct {
+ GLuint pad:19;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } generic_igdng;
+
+ GLint d;
+ GLuint ud;
+ float f;
+ } bits3;
+};
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c
new file mode 100644
index 0000000000..cd40fc6d61
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs_dump.c
@@ -0,0 +1,1247 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Dump i965 data structures.
+ *
+ * Generated automatically from brw_structs.h by brw_structs_dump.py.
+ */
+
+#include "util/u_debug.h"
+
+#include "brw_types.h"
+#include "brw_structs.h"
+#include "brw_structs_dump.h"
+
+void
+brw_dump_3d_control(const struct brw_3d_control *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.notify_enable = 0x%x\n", (*ptr).header.notify_enable);
+ debug_printf("\t\t.header.wc_flush_enable = 0x%x\n", (*ptr).header.wc_flush_enable);
+ debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable);
+ debug_printf("\t\t.header.operation = 0x%x\n", (*ptr).header.operation);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.dest.dest_addr_type = 0x%x\n", (*ptr).dest.dest_addr_type);
+ debug_printf("\t\t.dest.dest_addr = 0x%x\n", (*ptr).dest.dest_addr);
+ debug_printf("\t\t.dword2 = 0x%x\n", (*ptr).dword2);
+ debug_printf("\t\t.dword3 = 0x%x\n", (*ptr).dword3);
+}
+
+void
+brw_dump_3d_primitive(const struct brw_3d_primitive *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.topology = 0x%x\n", (*ptr).header.topology);
+ debug_printf("\t\t.header.indexed = 0x%x\n", (*ptr).header.indexed);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.verts_per_instance = 0x%x\n", (*ptr).verts_per_instance);
+ debug_printf("\t\t.start_vert_location = 0x%x\n", (*ptr).start_vert_location);
+ debug_printf("\t\t.instance_count = 0x%x\n", (*ptr).instance_count);
+ debug_printf("\t\t.start_instance_location = 0x%x\n", (*ptr).start_instance_location);
+ debug_printf("\t\t.base_vert_location = 0x%x\n", (*ptr).base_vert_location);
+}
+
+void
+brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope);
+ debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias);
+ debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope);
+ debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias);
+}
+
+void
+brw_dump_binding_table_pointers(const struct brw_binding_table_pointers *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.vs = 0x%x\n", (*ptr).vs);
+ debug_printf("\t\t.gs = 0x%x\n", (*ptr).gs);
+ debug_printf("\t\t.clp = 0x%x\n", (*ptr).clp);
+ debug_printf("\t\t.sf = 0x%x\n", (*ptr).sf);
+ debug_printf("\t\t.wm = 0x%x\n", (*ptr).wm);
+}
+
+void
+brw_dump_blend_constant_color(const struct brw_blend_constant_color *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.blend_constant_color[0] = %f\n", (*ptr).blend_constant_color[0]);
+ debug_printf("\t\t.blend_constant_color[1] = %f\n", (*ptr).blend_constant_color[1]);
+ debug_printf("\t\t.blend_constant_color[2] = %f\n", (*ptr).blend_constant_color[2]);
+ debug_printf("\t\t.blend_constant_color[3] = %f\n", (*ptr).blend_constant_color[3]);
+}
+
+void
+brw_dump_cc0(const struct brw_cc0 *ptr)
+{
+ debug_printf("\t\t.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_pass_op);
+ debug_printf("\t\t.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_fail_op);
+ debug_printf("\t\t.bf_stencil_fail_op = 0x%x\n", (*ptr).bf_stencil_fail_op);
+ debug_printf("\t\t.bf_stencil_func = 0x%x\n", (*ptr).bf_stencil_func);
+ debug_printf("\t\t.bf_stencil_enable = 0x%x\n", (*ptr).bf_stencil_enable);
+ debug_printf("\t\t.stencil_write_enable = 0x%x\n", (*ptr).stencil_write_enable);
+ debug_printf("\t\t.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).stencil_pass_depth_pass_op);
+ debug_printf("\t\t.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).stencil_pass_depth_fail_op);
+ debug_printf("\t\t.stencil_fail_op = 0x%x\n", (*ptr).stencil_fail_op);
+ debug_printf("\t\t.stencil_func = 0x%x\n", (*ptr).stencil_func);
+ debug_printf("\t\t.stencil_enable = 0x%x\n", (*ptr).stencil_enable);
+}
+
+void
+brw_dump_cc1(const struct brw_cc1 *ptr)
+{
+ debug_printf("\t\t.bf_stencil_ref = 0x%x\n", (*ptr).bf_stencil_ref);
+ debug_printf("\t\t.stencil_write_mask = 0x%x\n", (*ptr).stencil_write_mask);
+ debug_printf("\t\t.stencil_test_mask = 0x%x\n", (*ptr).stencil_test_mask);
+ debug_printf("\t\t.stencil_ref = 0x%x\n", (*ptr).stencil_ref);
+}
+
+void
+brw_dump_cc2(const struct brw_cc2 *ptr)
+{
+ debug_printf("\t\t.logicop_enable = 0x%x\n", (*ptr).logicop_enable);
+ debug_printf("\t\t.depth_write_enable = 0x%x\n", (*ptr).depth_write_enable);
+ debug_printf("\t\t.depth_test_function = 0x%x\n", (*ptr).depth_test_function);
+ debug_printf("\t\t.depth_test = 0x%x\n", (*ptr).depth_test);
+ debug_printf("\t\t.bf_stencil_write_mask = 0x%x\n", (*ptr).bf_stencil_write_mask);
+ debug_printf("\t\t.bf_stencil_test_mask = 0x%x\n", (*ptr).bf_stencil_test_mask);
+}
+
+void
+brw_dump_cc3(const struct brw_cc3 *ptr)
+{
+ debug_printf("\t\t.alpha_test_func = 0x%x\n", (*ptr).alpha_test_func);
+ debug_printf("\t\t.alpha_test = 0x%x\n", (*ptr).alpha_test);
+ debug_printf("\t\t.blend_enable = 0x%x\n", (*ptr).blend_enable);
+ debug_printf("\t\t.ia_blend_enable = 0x%x\n", (*ptr).ia_blend_enable);
+ debug_printf("\t\t.alpha_test_format = 0x%x\n", (*ptr).alpha_test_format);
+}
+
+void
+brw_dump_cc4(const struct brw_cc4 *ptr)
+{
+ debug_printf("\t\t.cc_viewport_state_offset = 0x%x\n", (*ptr).cc_viewport_state_offset);
+}
+
+void
+brw_dump_cc5(const struct brw_cc5 *ptr)
+{
+ debug_printf("\t\t.ia_dest_blend_factor = 0x%x\n", (*ptr).ia_dest_blend_factor);
+ debug_printf("\t\t.ia_src_blend_factor = 0x%x\n", (*ptr).ia_src_blend_factor);
+ debug_printf("\t\t.ia_blend_function = 0x%x\n", (*ptr).ia_blend_function);
+ debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable);
+ debug_printf("\t\t.logicop_func = 0x%x\n", (*ptr).logicop_func);
+ debug_printf("\t\t.dither_enable = 0x%x\n", (*ptr).dither_enable);
+}
+
+void
+brw_dump_cc6(const struct brw_cc6 *ptr)
+{
+ debug_printf("\t\t.clamp_post_alpha_blend = 0x%x\n", (*ptr).clamp_post_alpha_blend);
+ debug_printf("\t\t.clamp_pre_alpha_blend = 0x%x\n", (*ptr).clamp_pre_alpha_blend);
+ debug_printf("\t\t.clamp_range = 0x%x\n", (*ptr).clamp_range);
+ debug_printf("\t\t.y_dither_offset = 0x%x\n", (*ptr).y_dither_offset);
+ debug_printf("\t\t.x_dither_offset = 0x%x\n", (*ptr).x_dither_offset);
+ debug_printf("\t\t.dest_blend_factor = 0x%x\n", (*ptr).dest_blend_factor);
+ debug_printf("\t\t.src_blend_factor = 0x%x\n", (*ptr).src_blend_factor);
+ debug_printf("\t\t.blend_function = 0x%x\n", (*ptr).blend_function);
+}
+
+void
+brw_dump_cc7(const struct brw_cc7 *ptr)
+{
+ debug_printf("\t\t.alpha_ref.f = %f\n", (*ptr).alpha_ref.f);
+ debug_printf("\t\t.alpha_ref.ub[0] = 0x%x\n", (*ptr).alpha_ref.ub[0]);
+ debug_printf("\t\t.alpha_ref.ub[1] = 0x%x\n", (*ptr).alpha_ref.ub[1]);
+ debug_printf("\t\t.alpha_ref.ub[2] = 0x%x\n", (*ptr).alpha_ref.ub[2]);
+ debug_printf("\t\t.alpha_ref.ub[3] = 0x%x\n", (*ptr).alpha_ref.ub[3]);
+}
+
+void
+brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr)
+{
+ debug_printf("\t\t.cc0.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_pass_op);
+ debug_printf("\t\t.cc0.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_fail_op);
+ debug_printf("\t\t.cc0.bf_stencil_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_fail_op);
+ debug_printf("\t\t.cc0.bf_stencil_func = 0x%x\n", (*ptr).cc0.bf_stencil_func);
+ debug_printf("\t\t.cc0.bf_stencil_enable = 0x%x\n", (*ptr).cc0.bf_stencil_enable);
+ debug_printf("\t\t.cc0.stencil_write_enable = 0x%x\n", (*ptr).cc0.stencil_write_enable);
+ debug_printf("\t\t.cc0.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_pass_op);
+ debug_printf("\t\t.cc0.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_fail_op);
+ debug_printf("\t\t.cc0.stencil_fail_op = 0x%x\n", (*ptr).cc0.stencil_fail_op);
+ debug_printf("\t\t.cc0.stencil_func = 0x%x\n", (*ptr).cc0.stencil_func);
+ debug_printf("\t\t.cc0.stencil_enable = 0x%x\n", (*ptr).cc0.stencil_enable);
+ debug_printf("\t\t.cc1.bf_stencil_ref = 0x%x\n", (*ptr).cc1.bf_stencil_ref);
+ debug_printf("\t\t.cc1.stencil_write_mask = 0x%x\n", (*ptr).cc1.stencil_write_mask);
+ debug_printf("\t\t.cc1.stencil_test_mask = 0x%x\n", (*ptr).cc1.stencil_test_mask);
+ debug_printf("\t\t.cc1.stencil_ref = 0x%x\n", (*ptr).cc1.stencil_ref);
+ debug_printf("\t\t.cc2.logicop_enable = 0x%x\n", (*ptr).cc2.logicop_enable);
+ debug_printf("\t\t.cc2.depth_write_enable = 0x%x\n", (*ptr).cc2.depth_write_enable);
+ debug_printf("\t\t.cc2.depth_test_function = 0x%x\n", (*ptr).cc2.depth_test_function);
+ debug_printf("\t\t.cc2.depth_test = 0x%x\n", (*ptr).cc2.depth_test);
+ debug_printf("\t\t.cc2.bf_stencil_write_mask = 0x%x\n", (*ptr).cc2.bf_stencil_write_mask);
+ debug_printf("\t\t.cc2.bf_stencil_test_mask = 0x%x\n", (*ptr).cc2.bf_stencil_test_mask);
+ debug_printf("\t\t.cc3.alpha_test_func = 0x%x\n", (*ptr).cc3.alpha_test_func);
+ debug_printf("\t\t.cc3.alpha_test = 0x%x\n", (*ptr).cc3.alpha_test);
+ debug_printf("\t\t.cc3.blend_enable = 0x%x\n", (*ptr).cc3.blend_enable);
+ debug_printf("\t\t.cc3.ia_blend_enable = 0x%x\n", (*ptr).cc3.ia_blend_enable);
+ debug_printf("\t\t.cc3.alpha_test_format = 0x%x\n", (*ptr).cc3.alpha_test_format);
+ debug_printf("\t\t.cc4.cc_viewport_state_offset = 0x%x\n", (*ptr).cc4.cc_viewport_state_offset);
+ debug_printf("\t\t.cc5.ia_dest_blend_factor = 0x%x\n", (*ptr).cc5.ia_dest_blend_factor);
+ debug_printf("\t\t.cc5.ia_src_blend_factor = 0x%x\n", (*ptr).cc5.ia_src_blend_factor);
+ debug_printf("\t\t.cc5.ia_blend_function = 0x%x\n", (*ptr).cc5.ia_blend_function);
+ debug_printf("\t\t.cc5.statistics_enable = 0x%x\n", (*ptr).cc5.statistics_enable);
+ debug_printf("\t\t.cc5.logicop_func = 0x%x\n", (*ptr).cc5.logicop_func);
+ debug_printf("\t\t.cc5.dither_enable = 0x%x\n", (*ptr).cc5.dither_enable);
+ debug_printf("\t\t.cc6.clamp_post_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_post_alpha_blend);
+ debug_printf("\t\t.cc6.clamp_pre_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_pre_alpha_blend);
+ debug_printf("\t\t.cc6.clamp_range = 0x%x\n", (*ptr).cc6.clamp_range);
+ debug_printf("\t\t.cc6.y_dither_offset = 0x%x\n", (*ptr).cc6.y_dither_offset);
+ debug_printf("\t\t.cc6.x_dither_offset = 0x%x\n", (*ptr).cc6.x_dither_offset);
+ debug_printf("\t\t.cc6.dest_blend_factor = 0x%x\n", (*ptr).cc6.dest_blend_factor);
+ debug_printf("\t\t.cc6.src_blend_factor = 0x%x\n", (*ptr).cc6.src_blend_factor);
+ debug_printf("\t\t.cc6.blend_function = 0x%x\n", (*ptr).cc6.blend_function);
+ debug_printf("\t\t.cc7.alpha_ref.f = %f\n", (*ptr).cc7.alpha_ref.f);
+ debug_printf("\t\t.cc7.alpha_ref.ub[0] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[0]);
+ debug_printf("\t\t.cc7.alpha_ref.ub[1] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[1]);
+ debug_printf("\t\t.cc7.alpha_ref.ub[2] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[2]);
+ debug_printf("\t\t.cc7.alpha_ref.ub[3] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[3]);
+}
+
+void
+brw_dump_cc_viewport(const struct brw_cc_viewport *ptr)
+{
+ debug_printf("\t\t.min_depth = %f\n", (*ptr).min_depth);
+ debug_printf("\t\t.max_depth = %f\n", (*ptr).max_depth);
+}
+
+void
+brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr)
+{
+ debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+ debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+ debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+ debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+ debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+ debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+ debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+ debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+ debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+ debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+ debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+ debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+ debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+ debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+ debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+ debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+ debug_printf("\t\t.thread4.gs_output_stats = 0x%x\n", (*ptr).thread4.gs_output_stats);
+ debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
+ debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
+ debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
+ debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
+ debug_printf("\t\t.clip5.clip_mode = 0x%x\n", (*ptr).clip5.clip_mode);
+ debug_printf("\t\t.clip5.userclip_enable_flags = 0x%x\n", (*ptr).clip5.userclip_enable_flags);
+ debug_printf("\t\t.clip5.userclip_must_clip = 0x%x\n", (*ptr).clip5.userclip_must_clip);
+ debug_printf("\t\t.clip5.negative_w_clip_test = 0x%x\n", (*ptr).clip5.negative_w_clip_test);
+ debug_printf("\t\t.clip5.guard_band_enable = 0x%x\n", (*ptr).clip5.guard_band_enable);
+ debug_printf("\t\t.clip5.viewport_z_clip_enable = 0x%x\n", (*ptr).clip5.viewport_z_clip_enable);
+ debug_printf("\t\t.clip5.viewport_xy_clip_enable = 0x%x\n", (*ptr).clip5.viewport_xy_clip_enable);
+ debug_printf("\t\t.clip5.vertex_position_space = 0x%x\n", (*ptr).clip5.vertex_position_space);
+ debug_printf("\t\t.clip5.api_mode = 0x%x\n", (*ptr).clip5.api_mode);
+ debug_printf("\t\t.clip6.clipper_viewport_state_ptr = 0x%x\n", (*ptr).clip6.clipper_viewport_state_ptr);
+ debug_printf("\t\t.viewport_xmin = %f\n", (*ptr).viewport_xmin);
+ debug_printf("\t\t.viewport_xmax = %f\n", (*ptr).viewport_xmax);
+ debug_printf("\t\t.viewport_ymin = %f\n", (*ptr).viewport_ymin);
+ debug_printf("\t\t.viewport_ymax = %f\n", (*ptr).viewport_ymax);
+}
+
+void
+brw_dump_clipper_viewport(const struct brw_clipper_viewport *ptr)
+{
+ debug_printf("\t\t.xmin = %f\n", (*ptr).xmin);
+ debug_printf("\t\t.xmax = %f\n", (*ptr).xmax);
+ debug_printf("\t\t.ymin = %f\n", (*ptr).ymin);
+ debug_printf("\t\t.ymax = %f\n", (*ptr).ymax);
+}
+
+void
+brw_dump_constant_buffer(const struct brw_constant_buffer *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.valid = 0x%x\n", (*ptr).header.valid);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.buffer_length = 0x%x\n", (*ptr).bits0.buffer_length);
+ debug_printf("\t\t.bits0.buffer_address = 0x%x\n", (*ptr).bits0.buffer_address);
+}
+
+void
+brw_dump_cs_urb_state(const struct brw_cs_urb_state *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.nr_urb_entries = 0x%x\n", (*ptr).bits0.nr_urb_entries);
+ debug_printf("\t\t.bits0.urb_entry_size = 0x%x\n", (*ptr).bits0.urb_entry_size);
+}
+
+void
+brw_dump_depthbuffer(const struct brw_depthbuffer *ptr)
+{
+ debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length);
+ debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode);
+ debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch);
+ debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format);
+ debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode);
+ debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable);
+ debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk);
+ debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface);
+ debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type);
+ debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr);
+ debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout);
+ debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod);
+ debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width);
+ debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height);
+ debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element);
+ debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth);
+}
+
+void
+brw_dump_depthbuffer_g4x(const struct brw_depthbuffer_g4x *ptr)
+{
+ debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length);
+ debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode);
+ debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch);
+ debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format);
+ debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode);
+ debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable);
+ debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk);
+ debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface);
+ debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type);
+ debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr);
+ debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout);
+ debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod);
+ debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width);
+ debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height);
+ debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element);
+ debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth);
+ debug_printf("\t\t.dword5.bits.xoffset = 0x%x\n", (*ptr).dword5.bits.xoffset);
+ debug_printf("\t\t.dword5.bits.yoffset = 0x%x\n", (*ptr).dword5.bits.yoffset);
+}
+
+void
+brw_dump_drawrect(const struct brw_drawrect *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.xmin = 0x%x\n", (*ptr).xmin);
+ debug_printf("\t\t.ymin = 0x%x\n", (*ptr).ymin);
+ debug_printf("\t\t.xmax = 0x%x\n", (*ptr).xmax);
+ debug_printf("\t\t.ymax = 0x%x\n", (*ptr).ymax);
+ debug_printf("\t\t.xorg = 0x%x\n", (*ptr).xorg);
+ debug_printf("\t\t.yorg = 0x%x\n", (*ptr).yorg);
+}
+
+void
+brw_dump_global_depth_offset_clamp(const struct brw_global_depth_offset_clamp *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.depth_offset_clamp = %f\n", (*ptr).depth_offset_clamp);
+}
+
+void
+brw_dump_gs_unit_state(const struct brw_gs_unit_state *ptr)
+{
+ debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+ debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+ debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
+ debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+ debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+ debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
+ debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+ debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
+ debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+ debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+ debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+ debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+ debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+ debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+ debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+ debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+ debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+ debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+ debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+ debug_printf("\t\t.thread4.rendering_enable = 0x%x\n", (*ptr).thread4.rendering_enable);
+ debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
+ debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
+ debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
+ debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
+ debug_printf("\t\t.gs5.sampler_count = 0x%x\n", (*ptr).gs5.sampler_count);
+ debug_printf("\t\t.gs5.sampler_state_pointer = 0x%x\n", (*ptr).gs5.sampler_state_pointer);
+ debug_printf("\t\t.gs6.max_vp_index = 0x%x\n", (*ptr).gs6.max_vp_index);
+ debug_printf("\t\t.gs6.svbi_post_inc_value = 0x%x\n", (*ptr).gs6.svbi_post_inc_value);
+ debug_printf("\t\t.gs6.svbi_post_inc_enable = 0x%x\n", (*ptr).gs6.svbi_post_inc_enable);
+ debug_printf("\t\t.gs6.svbi_payload = 0x%x\n", (*ptr).gs6.svbi_payload);
+ debug_printf("\t\t.gs6.discard_adjaceny = 0x%x\n", (*ptr).gs6.discard_adjaceny);
+ debug_printf("\t\t.gs6.reorder_enable = 0x%x\n", (*ptr).gs6.reorder_enable);
+}
+
+void
+brw_dump_indexbuffer(const struct brw_indexbuffer *ptr)
+{
+ debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length);
+ debug_printf("\t\t.header.bits.index_format = 0x%x\n", (*ptr).header.bits.index_format);
+ debug_printf("\t\t.header.bits.cut_index_enable = 0x%x\n", (*ptr).header.bits.cut_index_enable);
+ debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode);
+ debug_printf("\t\t.buffer_start = 0x%x\n", (*ptr).buffer_start);
+ debug_printf("\t\t.buffer_end = 0x%x\n", (*ptr).buffer_end);
+}
+
+void
+brw_dump_line_stipple(const struct brw_line_stipple *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.pattern = 0x%x\n", (*ptr).bits0.pattern);
+ debug_printf("\t\t.bits1.repeat_count = 0x%x\n", (*ptr).bits1.repeat_count);
+ debug_printf("\t\t.bits1.inverse_repeat_count = 0x%x\n", (*ptr).bits1.inverse_repeat_count);
+}
+
+void
+brw_dump_mi_flush(const struct brw_mi_flush *ptr)
+{
+ debug_printf("\t\t.flags = 0x%x\n", (*ptr).flags);
+ debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode);
+}
+
+void
+brw_dump_pipe_control(const struct brw_pipe_control *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.notify_enable = 0x%x\n", (*ptr).header.notify_enable);
+ debug_printf("\t\t.header.texture_cache_flush_enable = 0x%x\n", (*ptr).header.texture_cache_flush_enable);
+ debug_printf("\t\t.header.indirect_state_pointers_disable = 0x%x\n", (*ptr).header.indirect_state_pointers_disable);
+ debug_printf("\t\t.header.instruction_state_cache_flush_enable = 0x%x\n", (*ptr).header.instruction_state_cache_flush_enable);
+ debug_printf("\t\t.header.write_cache_flush_enable = 0x%x\n", (*ptr).header.write_cache_flush_enable);
+ debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable);
+ debug_printf("\t\t.header.post_sync_operation = 0x%x\n", (*ptr).header.post_sync_operation);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits1.dest_addr_type = 0x%x\n", (*ptr).bits1.dest_addr_type);
+ debug_printf("\t\t.bits1.dest_addr = 0x%x\n", (*ptr).bits1.dest_addr);
+ debug_printf("\t\t.data0 = 0x%x\n", (*ptr).data0);
+ debug_printf("\t\t.data1 = 0x%x\n", (*ptr).data1);
+}
+
+void
+brw_dump_pipeline_select(const struct brw_pipeline_select *ptr)
+{
+ debug_printf("\t\t.header.pipeline_select = 0x%x\n", (*ptr).header.pipeline_select);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+}
+
+void
+brw_dump_pipelined_state_pointers(const struct brw_pipelined_state_pointers *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.vs.offset = 0x%x\n", (*ptr).vs.offset);
+ debug_printf("\t\t.gs.enable = 0x%x\n", (*ptr).gs.enable);
+ debug_printf("\t\t.gs.offset = 0x%x\n", (*ptr).gs.offset);
+ debug_printf("\t\t.clp.enable = 0x%x\n", (*ptr).clp.enable);
+ debug_printf("\t\t.clp.offset = 0x%x\n", (*ptr).clp.offset);
+ debug_printf("\t\t.sf.offset = 0x%x\n", (*ptr).sf.offset);
+ debug_printf("\t\t.wm.offset = 0x%x\n", (*ptr).wm.offset);
+ debug_printf("\t\t.cc.offset = 0x%x\n", (*ptr).cc.offset);
+}
+
+void
+brw_dump_polygon_stipple(const struct brw_polygon_stipple *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.stipple[0] = 0x%x\n", (*ptr).stipple[0]);
+ debug_printf("\t\t.stipple[1] = 0x%x\n", (*ptr).stipple[1]);
+ debug_printf("\t\t.stipple[2] = 0x%x\n", (*ptr).stipple[2]);
+ debug_printf("\t\t.stipple[3] = 0x%x\n", (*ptr).stipple[3]);
+ debug_printf("\t\t.stipple[4] = 0x%x\n", (*ptr).stipple[4]);
+ debug_printf("\t\t.stipple[5] = 0x%x\n", (*ptr).stipple[5]);
+ debug_printf("\t\t.stipple[6] = 0x%x\n", (*ptr).stipple[6]);
+ debug_printf("\t\t.stipple[7] = 0x%x\n", (*ptr).stipple[7]);
+ debug_printf("\t\t.stipple[8] = 0x%x\n", (*ptr).stipple[8]);
+ debug_printf("\t\t.stipple[9] = 0x%x\n", (*ptr).stipple[9]);
+ debug_printf("\t\t.stipple[10] = 0x%x\n", (*ptr).stipple[10]);
+ debug_printf("\t\t.stipple[11] = 0x%x\n", (*ptr).stipple[11]);
+ debug_printf("\t\t.stipple[12] = 0x%x\n", (*ptr).stipple[12]);
+ debug_printf("\t\t.stipple[13] = 0x%x\n", (*ptr).stipple[13]);
+ debug_printf("\t\t.stipple[14] = 0x%x\n", (*ptr).stipple[14]);
+ debug_printf("\t\t.stipple[15] = 0x%x\n", (*ptr).stipple[15]);
+ debug_printf("\t\t.stipple[16] = 0x%x\n", (*ptr).stipple[16]);
+ debug_printf("\t\t.stipple[17] = 0x%x\n", (*ptr).stipple[17]);
+ debug_printf("\t\t.stipple[18] = 0x%x\n", (*ptr).stipple[18]);
+ debug_printf("\t\t.stipple[19] = 0x%x\n", (*ptr).stipple[19]);
+ debug_printf("\t\t.stipple[20] = 0x%x\n", (*ptr).stipple[20]);
+ debug_printf("\t\t.stipple[21] = 0x%x\n", (*ptr).stipple[21]);
+ debug_printf("\t\t.stipple[22] = 0x%x\n", (*ptr).stipple[22]);
+ debug_printf("\t\t.stipple[23] = 0x%x\n", (*ptr).stipple[23]);
+ debug_printf("\t\t.stipple[24] = 0x%x\n", (*ptr).stipple[24]);
+ debug_printf("\t\t.stipple[25] = 0x%x\n", (*ptr).stipple[25]);
+ debug_printf("\t\t.stipple[26] = 0x%x\n", (*ptr).stipple[26]);
+ debug_printf("\t\t.stipple[27] = 0x%x\n", (*ptr).stipple[27]);
+ debug_printf("\t\t.stipple[28] = 0x%x\n", (*ptr).stipple[28]);
+ debug_printf("\t\t.stipple[29] = 0x%x\n", (*ptr).stipple[29]);
+ debug_printf("\t\t.stipple[30] = 0x%x\n", (*ptr).stipple[30]);
+ debug_printf("\t\t.stipple[31] = 0x%x\n", (*ptr).stipple[31]);
+}
+
+void
+brw_dump_polygon_stipple_offset(const struct brw_polygon_stipple_offset *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.y_offset = 0x%x\n", (*ptr).bits0.y_offset);
+ debug_printf("\t\t.bits0.x_offset = 0x%x\n", (*ptr).bits0.x_offset);
+}
+
+void
+brw_dump_sampler_default_color(const struct brw_sampler_default_color *ptr)
+{
+ debug_printf("\t\t.color[0] = %f\n", (*ptr).color[0]);
+ debug_printf("\t\t.color[1] = %f\n", (*ptr).color[1]);
+ debug_printf("\t\t.color[2] = %f\n", (*ptr).color[2]);
+ debug_printf("\t\t.color[3] = %f\n", (*ptr).color[3]);
+}
+
+void
+brw_dump_sampler_state(const struct brw_sampler_state *ptr)
+{
+ debug_printf("\t\t.ss0.shadow_function = 0x%x\n", (*ptr).ss0.shadow_function);
+ debug_printf("\t\t.ss0.lod_bias = 0x%x\n", (*ptr).ss0.lod_bias);
+ debug_printf("\t\t.ss0.min_filter = 0x%x\n", (*ptr).ss0.min_filter);
+ debug_printf("\t\t.ss0.mag_filter = 0x%x\n", (*ptr).ss0.mag_filter);
+ debug_printf("\t\t.ss0.mip_filter = 0x%x\n", (*ptr).ss0.mip_filter);
+ debug_printf("\t\t.ss0.base_level = 0x%x\n", (*ptr).ss0.base_level);
+ debug_printf("\t\t.ss0.lod_preclamp = 0x%x\n", (*ptr).ss0.lod_preclamp);
+ debug_printf("\t\t.ss0.default_color_mode = 0x%x\n", (*ptr).ss0.default_color_mode);
+ debug_printf("\t\t.ss0.disable = 0x%x\n", (*ptr).ss0.disable);
+ debug_printf("\t\t.ss1.r_wrap_mode = 0x%x\n", (*ptr).ss1.r_wrap_mode);
+ debug_printf("\t\t.ss1.t_wrap_mode = 0x%x\n", (*ptr).ss1.t_wrap_mode);
+ debug_printf("\t\t.ss1.s_wrap_mode = 0x%x\n", (*ptr).ss1.s_wrap_mode);
+ debug_printf("\t\t.ss1.max_lod = 0x%x\n", (*ptr).ss1.max_lod);
+ debug_printf("\t\t.ss1.min_lod = 0x%x\n", (*ptr).ss1.min_lod);
+ debug_printf("\t\t.ss2.default_color_pointer = 0x%x\n", (*ptr).ss2.default_color_pointer);
+ debug_printf("\t\t.ss3.max_aniso = 0x%x\n", (*ptr).ss3.max_aniso);
+ debug_printf("\t\t.ss3.chroma_key_mode = 0x%x\n", (*ptr).ss3.chroma_key_mode);
+ debug_printf("\t\t.ss3.chroma_key_index = 0x%x\n", (*ptr).ss3.chroma_key_index);
+ debug_printf("\t\t.ss3.chroma_key_enable = 0x%x\n", (*ptr).ss3.chroma_key_enable);
+ debug_printf("\t\t.ss3.monochrome_filter_width = 0x%x\n", (*ptr).ss3.monochrome_filter_width);
+ debug_printf("\t\t.ss3.monochrome_filter_height = 0x%x\n", (*ptr).ss3.monochrome_filter_height);
+}
+
+void
+brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr)
+{
+ debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+ debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+ debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
+ debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+ debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+ debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
+ debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+ debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
+ debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+ debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+ debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+ debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+ debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+ debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+ debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+ debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+ debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+ debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+ debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+ debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
+ debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
+ debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
+ debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
+ debug_printf("\t\t.sf5.front_winding = 0x%x\n", (*ptr).sf5.front_winding);
+ debug_printf("\t\t.sf5.viewport_transform = 0x%x\n", (*ptr).sf5.viewport_transform);
+ debug_printf("\t\t.sf5.sf_viewport_state_offset = 0x%x\n", (*ptr).sf5.sf_viewport_state_offset);
+ debug_printf("\t\t.sf6.dest_org_vbias = 0x%x\n", (*ptr).sf6.dest_org_vbias);
+ debug_printf("\t\t.sf6.dest_org_hbias = 0x%x\n", (*ptr).sf6.dest_org_hbias);
+ debug_printf("\t\t.sf6.scissor = 0x%x\n", (*ptr).sf6.scissor);
+ debug_printf("\t\t.sf6.disable_2x2_trifilter = 0x%x\n", (*ptr).sf6.disable_2x2_trifilter);
+ debug_printf("\t\t.sf6.disable_zero_pix_trifilter = 0x%x\n", (*ptr).sf6.disable_zero_pix_trifilter);
+ debug_printf("\t\t.sf6.point_rast_rule = 0x%x\n", (*ptr).sf6.point_rast_rule);
+ debug_printf("\t\t.sf6.line_endcap_aa_region_width = 0x%x\n", (*ptr).sf6.line_endcap_aa_region_width);
+ debug_printf("\t\t.sf6.line_width = 0x%x\n", (*ptr).sf6.line_width);
+ debug_printf("\t\t.sf6.fast_scissor_disable = 0x%x\n", (*ptr).sf6.fast_scissor_disable);
+ debug_printf("\t\t.sf6.cull_mode = 0x%x\n", (*ptr).sf6.cull_mode);
+ debug_printf("\t\t.sf6.aa_enable = 0x%x\n", (*ptr).sf6.aa_enable);
+ debug_printf("\t\t.sf7.point_size = 0x%x\n", (*ptr).sf7.point_size);
+ debug_printf("\t\t.sf7.use_point_size_state = 0x%x\n", (*ptr).sf7.use_point_size_state);
+ debug_printf("\t\t.sf7.subpixel_precision = 0x%x\n", (*ptr).sf7.subpixel_precision);
+ debug_printf("\t\t.sf7.sprite_point = 0x%x\n", (*ptr).sf7.sprite_point);
+ debug_printf("\t\t.sf7.aa_line_distance_mode = 0x%x\n", (*ptr).sf7.aa_line_distance_mode);
+ debug_printf("\t\t.sf7.trifan_pv = 0x%x\n", (*ptr).sf7.trifan_pv);
+ debug_printf("\t\t.sf7.linestrip_pv = 0x%x\n", (*ptr).sf7.linestrip_pv);
+ debug_printf("\t\t.sf7.tristrip_pv = 0x%x\n", (*ptr).sf7.tristrip_pv);
+ debug_printf("\t\t.sf7.line_last_pixel_enable = 0x%x\n", (*ptr).sf7.line_last_pixel_enable);
+}
+
+void
+brw_dump_sf_viewport(const struct brw_sf_viewport *ptr)
+{
+ debug_printf("\t\t.viewport.m00 = %f\n", (*ptr).viewport.m00);
+ debug_printf("\t\t.viewport.m11 = %f\n", (*ptr).viewport.m11);
+ debug_printf("\t\t.viewport.m22 = %f\n", (*ptr).viewport.m22);
+ debug_printf("\t\t.viewport.m30 = %f\n", (*ptr).viewport.m30);
+ debug_printf("\t\t.viewport.m31 = %f\n", (*ptr).viewport.m31);
+ debug_printf("\t\t.viewport.m32 = %f\n", (*ptr).viewport.m32);
+ debug_printf("\t\t.scissor.xmin = 0x%x\n", (*ptr).scissor.xmin);
+ debug_printf("\t\t.scissor.ymin = 0x%x\n", (*ptr).scissor.ymin);
+ debug_printf("\t\t.scissor.xmax = 0x%x\n", (*ptr).scissor.xmax);
+ debug_printf("\t\t.scissor.ymax = 0x%x\n", (*ptr).scissor.ymax);
+}
+
+void
+brw_dump_ss0(const struct brw_ss0 *ptr)
+{
+ debug_printf("\t\t.shadow_function = 0x%x\n", (*ptr).shadow_function);
+ debug_printf("\t\t.lod_bias = 0x%x\n", (*ptr).lod_bias);
+ debug_printf("\t\t.min_filter = 0x%x\n", (*ptr).min_filter);
+ debug_printf("\t\t.mag_filter = 0x%x\n", (*ptr).mag_filter);
+ debug_printf("\t\t.mip_filter = 0x%x\n", (*ptr).mip_filter);
+ debug_printf("\t\t.base_level = 0x%x\n", (*ptr).base_level);
+ debug_printf("\t\t.lod_preclamp = 0x%x\n", (*ptr).lod_preclamp);
+ debug_printf("\t\t.default_color_mode = 0x%x\n", (*ptr).default_color_mode);
+ debug_printf("\t\t.disable = 0x%x\n", (*ptr).disable);
+}
+
+void
+brw_dump_ss1(const struct brw_ss1 *ptr)
+{
+ debug_printf("\t\t.r_wrap_mode = 0x%x\n", (*ptr).r_wrap_mode);
+ debug_printf("\t\t.t_wrap_mode = 0x%x\n", (*ptr).t_wrap_mode);
+ debug_printf("\t\t.s_wrap_mode = 0x%x\n", (*ptr).s_wrap_mode);
+ debug_printf("\t\t.max_lod = 0x%x\n", (*ptr).max_lod);
+ debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod);
+}
+
+void
+brw_dump_ss2(const struct brw_ss2 *ptr)
+{
+ debug_printf("\t\t.default_color_pointer = 0x%x\n", (*ptr).default_color_pointer);
+}
+
+void
+brw_dump_ss3(const struct brw_ss3 *ptr)
+{
+ debug_printf("\t\t.max_aniso = 0x%x\n", (*ptr).max_aniso);
+ debug_printf("\t\t.chroma_key_mode = 0x%x\n", (*ptr).chroma_key_mode);
+ debug_printf("\t\t.chroma_key_index = 0x%x\n", (*ptr).chroma_key_index);
+ debug_printf("\t\t.chroma_key_enable = 0x%x\n", (*ptr).chroma_key_enable);
+ debug_printf("\t\t.monochrome_filter_width = 0x%x\n", (*ptr).monochrome_filter_width);
+ debug_printf("\t\t.monochrome_filter_height = 0x%x\n", (*ptr).monochrome_filter_height);
+}
+
+void
+brw_dump_state_base_address(const struct brw_state_base_address *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.modify_enable = 0x%x\n", (*ptr).bits0.modify_enable);
+ debug_printf("\t\t.bits0.general_state_address = 0x%x\n", (*ptr).bits0.general_state_address);
+ debug_printf("\t\t.bits1.modify_enable = 0x%x\n", (*ptr).bits1.modify_enable);
+ debug_printf("\t\t.bits1.surface_state_address = 0x%x\n", (*ptr).bits1.surface_state_address);
+ debug_printf("\t\t.bits2.modify_enable = 0x%x\n", (*ptr).bits2.modify_enable);
+ debug_printf("\t\t.bits2.indirect_object_state_address = 0x%x\n", (*ptr).bits2.indirect_object_state_address);
+ debug_printf("\t\t.bits3.modify_enable = 0x%x\n", (*ptr).bits3.modify_enable);
+ debug_printf("\t\t.bits3.general_state_upper_bound = 0x%x\n", (*ptr).bits3.general_state_upper_bound);
+ debug_printf("\t\t.bits4.modify_enable = 0x%x\n", (*ptr).bits4.modify_enable);
+ debug_printf("\t\t.bits4.indirect_object_state_upper_bound = 0x%x\n", (*ptr).bits4.indirect_object_state_upper_bound);
+}
+
+void
+brw_dump_state_prefetch(const struct brw_state_prefetch *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.prefetch_count = 0x%x\n", (*ptr).bits0.prefetch_count);
+ debug_printf("\t\t.bits0.prefetch_pointer = 0x%x\n", (*ptr).bits0.prefetch_pointer);
+}
+
+void
+brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr)
+{
+ debug_printf("\t\t.cube_pos_z = 0x%x\n", (*ptr).cube_pos_z);
+ debug_printf("\t\t.cube_neg_z = 0x%x\n", (*ptr).cube_neg_z);
+ debug_printf("\t\t.cube_pos_y = 0x%x\n", (*ptr).cube_pos_y);
+ debug_printf("\t\t.cube_neg_y = 0x%x\n", (*ptr).cube_neg_y);
+ debug_printf("\t\t.cube_pos_x = 0x%x\n", (*ptr).cube_pos_x);
+ debug_printf("\t\t.cube_neg_x = 0x%x\n", (*ptr).cube_neg_x);
+ debug_printf("\t\t.mipmap_layout_mode = 0x%x\n", (*ptr).mipmap_layout_mode);
+ debug_printf("\t\t.vert_line_stride_ofs = 0x%x\n", (*ptr).vert_line_stride_ofs);
+ debug_printf("\t\t.vert_line_stride = 0x%x\n", (*ptr).vert_line_stride);
+ debug_printf("\t\t.color_blend = 0x%x\n", (*ptr).color_blend);
+ debug_printf("\t\t.writedisable_blue = 0x%x\n", (*ptr).writedisable_blue);
+ debug_printf("\t\t.writedisable_green = 0x%x\n", (*ptr).writedisable_green);
+ debug_printf("\t\t.writedisable_red = 0x%x\n", (*ptr).writedisable_red);
+ debug_printf("\t\t.writedisable_alpha = 0x%x\n", (*ptr).writedisable_alpha);
+ debug_printf("\t\t.surface_format = 0x%x\n", (*ptr).surface_format);
+ debug_printf("\t\t.data_return_format = 0x%x\n", (*ptr).data_return_format);
+ debug_printf("\t\t.surface_type = 0x%x\n", (*ptr).surface_type);
+}
+
+void
+brw_dump_surf_ss1(const struct brw_surf_ss1 *ptr)
+{
+ debug_printf("\t\t.base_addr = 0x%x\n", (*ptr).base_addr);
+}
+
+void
+brw_dump_surf_ss2(const struct brw_surf_ss2 *ptr)
+{
+ debug_printf("\t\t.mip_count = 0x%x\n", (*ptr).mip_count);
+ debug_printf("\t\t.width = 0x%x\n", (*ptr).width);
+ debug_printf("\t\t.height = 0x%x\n", (*ptr).height);
+}
+
+void
+brw_dump_surf_ss3(const struct brw_surf_ss3 *ptr)
+{
+ debug_printf("\t\t.tile_walk = 0x%x\n", (*ptr).tile_walk);
+ debug_printf("\t\t.tiled_surface = 0x%x\n", (*ptr).tiled_surface);
+ debug_printf("\t\t.pitch = 0x%x\n", (*ptr).pitch);
+ debug_printf("\t\t.depth = 0x%x\n", (*ptr).depth);
+}
+
+void
+brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr)
+{
+ debug_printf("\t\t.multisample_position_palette_index = 0x%x\n", (*ptr).multisample_position_palette_index);
+ debug_printf("\t\t.num_multisamples = 0x%x\n", (*ptr).num_multisamples);
+ debug_printf("\t\t.render_target_view_extent = 0x%x\n", (*ptr).render_target_view_extent);
+ debug_printf("\t\t.min_array_elt = 0x%x\n", (*ptr).min_array_elt);
+ debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod);
+}
+
+void
+brw_dump_surf_ss5(const struct brw_surf_ss5 *ptr)
+{
+ debug_printf("\t\t.llc_mapping = 0x%x\n", (*ptr).llc_mapping);
+ debug_printf("\t\t.mlc_mapping = 0x%x\n", (*ptr).mlc_mapping);
+ debug_printf("\t\t.gfdt = 0x%x\n", (*ptr).gfdt);
+ debug_printf("\t\t.gfdt_src = 0x%x\n", (*ptr).gfdt_src);
+ debug_printf("\t\t.y_offset = 0x%x\n", (*ptr).y_offset);
+ debug_printf("\t\t.x_offset = 0x%x\n", (*ptr).x_offset);
+}
+
+void
+brw_dump_surface_state(const struct brw_surface_state *ptr)
+{
+ debug_printf("\t\t.ss0.cube_pos_z = 0x%x\n", (*ptr).ss0.cube_pos_z);
+ debug_printf("\t\t.ss0.cube_neg_z = 0x%x\n", (*ptr).ss0.cube_neg_z);
+ debug_printf("\t\t.ss0.cube_pos_y = 0x%x\n", (*ptr).ss0.cube_pos_y);
+ debug_printf("\t\t.ss0.cube_neg_y = 0x%x\n", (*ptr).ss0.cube_neg_y);
+ debug_printf("\t\t.ss0.cube_pos_x = 0x%x\n", (*ptr).ss0.cube_pos_x);
+ debug_printf("\t\t.ss0.cube_neg_x = 0x%x\n", (*ptr).ss0.cube_neg_x);
+ debug_printf("\t\t.ss0.mipmap_layout_mode = 0x%x\n", (*ptr).ss0.mipmap_layout_mode);
+ debug_printf("\t\t.ss0.vert_line_stride_ofs = 0x%x\n", (*ptr).ss0.vert_line_stride_ofs);
+ debug_printf("\t\t.ss0.vert_line_stride = 0x%x\n", (*ptr).ss0.vert_line_stride);
+ debug_printf("\t\t.ss0.color_blend = 0x%x\n", (*ptr).ss0.color_blend);
+ debug_printf("\t\t.ss0.writedisable_blue = 0x%x\n", (*ptr).ss0.writedisable_blue);
+ debug_printf("\t\t.ss0.writedisable_green = 0x%x\n", (*ptr).ss0.writedisable_green);
+ debug_printf("\t\t.ss0.writedisable_red = 0x%x\n", (*ptr).ss0.writedisable_red);
+ debug_printf("\t\t.ss0.writedisable_alpha = 0x%x\n", (*ptr).ss0.writedisable_alpha);
+ debug_printf("\t\t.ss0.surface_format = 0x%x\n", (*ptr).ss0.surface_format);
+ debug_printf("\t\t.ss0.data_return_format = 0x%x\n", (*ptr).ss0.data_return_format);
+ debug_printf("\t\t.ss0.surface_type = 0x%x\n", (*ptr).ss0.surface_type);
+ debug_printf("\t\t.ss1.base_addr = 0x%x\n", (*ptr).ss1.base_addr);
+ debug_printf("\t\t.ss2.mip_count = 0x%x\n", (*ptr).ss2.mip_count);
+ debug_printf("\t\t.ss2.width = 0x%x\n", (*ptr).ss2.width);
+ debug_printf("\t\t.ss2.height = 0x%x\n", (*ptr).ss2.height);
+ debug_printf("\t\t.ss3.tile_walk = 0x%x\n", (*ptr).ss3.tile_walk);
+ debug_printf("\t\t.ss3.tiled_surface = 0x%x\n", (*ptr).ss3.tiled_surface);
+ debug_printf("\t\t.ss3.pitch = 0x%x\n", (*ptr).ss3.pitch);
+ debug_printf("\t\t.ss3.depth = 0x%x\n", (*ptr).ss3.depth);
+ debug_printf("\t\t.ss4.multisample_position_palette_index = 0x%x\n", (*ptr).ss4.multisample_position_palette_index);
+ debug_printf("\t\t.ss4.num_multisamples = 0x%x\n", (*ptr).ss4.num_multisamples);
+ debug_printf("\t\t.ss4.render_target_view_extent = 0x%x\n", (*ptr).ss4.render_target_view_extent);
+ debug_printf("\t\t.ss4.min_array_elt = 0x%x\n", (*ptr).ss4.min_array_elt);
+ debug_printf("\t\t.ss4.min_lod = 0x%x\n", (*ptr).ss4.min_lod);
+ debug_printf("\t\t.ss5.llc_mapping = 0x%x\n", (*ptr).ss5.llc_mapping);
+ debug_printf("\t\t.ss5.mlc_mapping = 0x%x\n", (*ptr).ss5.mlc_mapping);
+ debug_printf("\t\t.ss5.gfdt = 0x%x\n", (*ptr).ss5.gfdt);
+ debug_printf("\t\t.ss5.gfdt_src = 0x%x\n", (*ptr).ss5.gfdt_src);
+ debug_printf("\t\t.ss5.y_offset = 0x%x\n", (*ptr).ss5.y_offset);
+ debug_printf("\t\t.ss5.x_offset = 0x%x\n", (*ptr).ss5.x_offset);
+}
+
+void
+brw_dump_system_instruction_pointer(const struct brw_system_instruction_pointer *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.system_instruction_pointer = 0x%x\n", (*ptr).bits0.system_instruction_pointer);
+}
+
+void
+brw_dump_urb_fence(const struct brw_urb_fence *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.vs_realloc = 0x%x\n", (*ptr).header.vs_realloc);
+ debug_printf("\t\t.header.gs_realloc = 0x%x\n", (*ptr).header.gs_realloc);
+ debug_printf("\t\t.header.clp_realloc = 0x%x\n", (*ptr).header.clp_realloc);
+ debug_printf("\t\t.header.sf_realloc = 0x%x\n", (*ptr).header.sf_realloc);
+ debug_printf("\t\t.header.vfe_realloc = 0x%x\n", (*ptr).header.vfe_realloc);
+ debug_printf("\t\t.header.cs_realloc = 0x%x\n", (*ptr).header.cs_realloc);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.vs_fence = 0x%x\n", (*ptr).bits0.vs_fence);
+ debug_printf("\t\t.bits0.gs_fence = 0x%x\n", (*ptr).bits0.gs_fence);
+ debug_printf("\t\t.bits0.clp_fence = 0x%x\n", (*ptr).bits0.clp_fence);
+ debug_printf("\t\t.bits1.sf_fence = 0x%x\n", (*ptr).bits1.sf_fence);
+ debug_printf("\t\t.bits1.vf_fence = 0x%x\n", (*ptr).bits1.vf_fence);
+ debug_printf("\t\t.bits1.cs_fence = 0x%x\n", (*ptr).bits1.cs_fence);
+}
+
+void
+brw_dump_urb_immediate(const struct brw_urb_immediate *ptr)
+{
+ debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode);
+ debug_printf("\t\t.offset = 0x%x\n", (*ptr).offset);
+ debug_printf("\t\t.swizzle_control = 0x%x\n", (*ptr).swizzle_control);
+ debug_printf("\t\t.allocate = 0x%x\n", (*ptr).allocate);
+ debug_printf("\t\t.used = 0x%x\n", (*ptr).used);
+ debug_printf("\t\t.complete = 0x%x\n", (*ptr).complete);
+ debug_printf("\t\t.response_length = 0x%x\n", (*ptr).response_length);
+ debug_printf("\t\t.msg_length = 0x%x\n", (*ptr).msg_length);
+ debug_printf("\t\t.msg_target = 0x%x\n", (*ptr).msg_target);
+ debug_printf("\t\t.end_of_thread = 0x%x\n", (*ptr).end_of_thread);
+}
+
+void
+brw_dump_vb_array_state(const struct brw_vb_array_state *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.vb[0].vb0.pitch = 0x%x\n", (*ptr).vb[0].vb0.pitch);
+ debug_printf("\t\t.vb[0].vb0.access_type = 0x%x\n", (*ptr).vb[0].vb0.access_type);
+ debug_printf("\t\t.vb[0].vb0.vb_index = 0x%x\n", (*ptr).vb[0].vb0.vb_index);
+ debug_printf("\t\t.vb[0].start_addr = 0x%x\n", (*ptr).vb[0].start_addr);
+ debug_printf("\t\t.vb[0].max_index = 0x%x\n", (*ptr).vb[0].max_index);
+ debug_printf("\t\t.vb[0].instance_data_step_rate = 0x%x\n", (*ptr).vb[0].instance_data_step_rate);
+ debug_printf("\t\t.vb[1].vb0.pitch = 0x%x\n", (*ptr).vb[1].vb0.pitch);
+ debug_printf("\t\t.vb[1].vb0.access_type = 0x%x\n", (*ptr).vb[1].vb0.access_type);
+ debug_printf("\t\t.vb[1].vb0.vb_index = 0x%x\n", (*ptr).vb[1].vb0.vb_index);
+ debug_printf("\t\t.vb[1].start_addr = 0x%x\n", (*ptr).vb[1].start_addr);
+ debug_printf("\t\t.vb[1].max_index = 0x%x\n", (*ptr).vb[1].max_index);
+ debug_printf("\t\t.vb[1].instance_data_step_rate = 0x%x\n", (*ptr).vb[1].instance_data_step_rate);
+ debug_printf("\t\t.vb[2].vb0.pitch = 0x%x\n", (*ptr).vb[2].vb0.pitch);
+ debug_printf("\t\t.vb[2].vb0.access_type = 0x%x\n", (*ptr).vb[2].vb0.access_type);
+ debug_printf("\t\t.vb[2].vb0.vb_index = 0x%x\n", (*ptr).vb[2].vb0.vb_index);
+ debug_printf("\t\t.vb[2].start_addr = 0x%x\n", (*ptr).vb[2].start_addr);
+ debug_printf("\t\t.vb[2].max_index = 0x%x\n", (*ptr).vb[2].max_index);
+ debug_printf("\t\t.vb[2].instance_data_step_rate = 0x%x\n", (*ptr).vb[2].instance_data_step_rate);
+ debug_printf("\t\t.vb[3].vb0.pitch = 0x%x\n", (*ptr).vb[3].vb0.pitch);
+ debug_printf("\t\t.vb[3].vb0.access_type = 0x%x\n", (*ptr).vb[3].vb0.access_type);
+ debug_printf("\t\t.vb[3].vb0.vb_index = 0x%x\n", (*ptr).vb[3].vb0.vb_index);
+ debug_printf("\t\t.vb[3].start_addr = 0x%x\n", (*ptr).vb[3].start_addr);
+ debug_printf("\t\t.vb[3].max_index = 0x%x\n", (*ptr).vb[3].max_index);
+ debug_printf("\t\t.vb[3].instance_data_step_rate = 0x%x\n", (*ptr).vb[3].instance_data_step_rate);
+ debug_printf("\t\t.vb[4].vb0.pitch = 0x%x\n", (*ptr).vb[4].vb0.pitch);
+ debug_printf("\t\t.vb[4].vb0.access_type = 0x%x\n", (*ptr).vb[4].vb0.access_type);
+ debug_printf("\t\t.vb[4].vb0.vb_index = 0x%x\n", (*ptr).vb[4].vb0.vb_index);
+ debug_printf("\t\t.vb[4].start_addr = 0x%x\n", (*ptr).vb[4].start_addr);
+ debug_printf("\t\t.vb[4].max_index = 0x%x\n", (*ptr).vb[4].max_index);
+ debug_printf("\t\t.vb[4].instance_data_step_rate = 0x%x\n", (*ptr).vb[4].instance_data_step_rate);
+ debug_printf("\t\t.vb[5].vb0.pitch = 0x%x\n", (*ptr).vb[5].vb0.pitch);
+ debug_printf("\t\t.vb[5].vb0.access_type = 0x%x\n", (*ptr).vb[5].vb0.access_type);
+ debug_printf("\t\t.vb[5].vb0.vb_index = 0x%x\n", (*ptr).vb[5].vb0.vb_index);
+ debug_printf("\t\t.vb[5].start_addr = 0x%x\n", (*ptr).vb[5].start_addr);
+ debug_printf("\t\t.vb[5].max_index = 0x%x\n", (*ptr).vb[5].max_index);
+ debug_printf("\t\t.vb[5].instance_data_step_rate = 0x%x\n", (*ptr).vb[5].instance_data_step_rate);
+ debug_printf("\t\t.vb[6].vb0.pitch = 0x%x\n", (*ptr).vb[6].vb0.pitch);
+ debug_printf("\t\t.vb[6].vb0.access_type = 0x%x\n", (*ptr).vb[6].vb0.access_type);
+ debug_printf("\t\t.vb[6].vb0.vb_index = 0x%x\n", (*ptr).vb[6].vb0.vb_index);
+ debug_printf("\t\t.vb[6].start_addr = 0x%x\n", (*ptr).vb[6].start_addr);
+ debug_printf("\t\t.vb[6].max_index = 0x%x\n", (*ptr).vb[6].max_index);
+ debug_printf("\t\t.vb[6].instance_data_step_rate = 0x%x\n", (*ptr).vb[6].instance_data_step_rate);
+ debug_printf("\t\t.vb[7].vb0.pitch = 0x%x\n", (*ptr).vb[7].vb0.pitch);
+ debug_printf("\t\t.vb[7].vb0.access_type = 0x%x\n", (*ptr).vb[7].vb0.access_type);
+ debug_printf("\t\t.vb[7].vb0.vb_index = 0x%x\n", (*ptr).vb[7].vb0.vb_index);
+ debug_printf("\t\t.vb[7].start_addr = 0x%x\n", (*ptr).vb[7].start_addr);
+ debug_printf("\t\t.vb[7].max_index = 0x%x\n", (*ptr).vb[7].max_index);
+ debug_printf("\t\t.vb[7].instance_data_step_rate = 0x%x\n", (*ptr).vb[7].instance_data_step_rate);
+ debug_printf("\t\t.vb[8].vb0.pitch = 0x%x\n", (*ptr).vb[8].vb0.pitch);
+ debug_printf("\t\t.vb[8].vb0.access_type = 0x%x\n", (*ptr).vb[8].vb0.access_type);
+ debug_printf("\t\t.vb[8].vb0.vb_index = 0x%x\n", (*ptr).vb[8].vb0.vb_index);
+ debug_printf("\t\t.vb[8].start_addr = 0x%x\n", (*ptr).vb[8].start_addr);
+ debug_printf("\t\t.vb[8].max_index = 0x%x\n", (*ptr).vb[8].max_index);
+ debug_printf("\t\t.vb[8].instance_data_step_rate = 0x%x\n", (*ptr).vb[8].instance_data_step_rate);
+ debug_printf("\t\t.vb[9].vb0.pitch = 0x%x\n", (*ptr).vb[9].vb0.pitch);
+ debug_printf("\t\t.vb[9].vb0.access_type = 0x%x\n", (*ptr).vb[9].vb0.access_type);
+ debug_printf("\t\t.vb[9].vb0.vb_index = 0x%x\n", (*ptr).vb[9].vb0.vb_index);
+ debug_printf("\t\t.vb[9].start_addr = 0x%x\n", (*ptr).vb[9].start_addr);
+ debug_printf("\t\t.vb[9].max_index = 0x%x\n", (*ptr).vb[9].max_index);
+ debug_printf("\t\t.vb[9].instance_data_step_rate = 0x%x\n", (*ptr).vb[9].instance_data_step_rate);
+ debug_printf("\t\t.vb[10].vb0.pitch = 0x%x\n", (*ptr).vb[10].vb0.pitch);
+ debug_printf("\t\t.vb[10].vb0.access_type = 0x%x\n", (*ptr).vb[10].vb0.access_type);
+ debug_printf("\t\t.vb[10].vb0.vb_index = 0x%x\n", (*ptr).vb[10].vb0.vb_index);
+ debug_printf("\t\t.vb[10].start_addr = 0x%x\n", (*ptr).vb[10].start_addr);
+ debug_printf("\t\t.vb[10].max_index = 0x%x\n", (*ptr).vb[10].max_index);
+ debug_printf("\t\t.vb[10].instance_data_step_rate = 0x%x\n", (*ptr).vb[10].instance_data_step_rate);
+ debug_printf("\t\t.vb[11].vb0.pitch = 0x%x\n", (*ptr).vb[11].vb0.pitch);
+ debug_printf("\t\t.vb[11].vb0.access_type = 0x%x\n", (*ptr).vb[11].vb0.access_type);
+ debug_printf("\t\t.vb[11].vb0.vb_index = 0x%x\n", (*ptr).vb[11].vb0.vb_index);
+ debug_printf("\t\t.vb[11].start_addr = 0x%x\n", (*ptr).vb[11].start_addr);
+ debug_printf("\t\t.vb[11].max_index = 0x%x\n", (*ptr).vb[11].max_index);
+ debug_printf("\t\t.vb[11].instance_data_step_rate = 0x%x\n", (*ptr).vb[11].instance_data_step_rate);
+ debug_printf("\t\t.vb[12].vb0.pitch = 0x%x\n", (*ptr).vb[12].vb0.pitch);
+ debug_printf("\t\t.vb[12].vb0.access_type = 0x%x\n", (*ptr).vb[12].vb0.access_type);
+ debug_printf("\t\t.vb[12].vb0.vb_index = 0x%x\n", (*ptr).vb[12].vb0.vb_index);
+ debug_printf("\t\t.vb[12].start_addr = 0x%x\n", (*ptr).vb[12].start_addr);
+ debug_printf("\t\t.vb[12].max_index = 0x%x\n", (*ptr).vb[12].max_index);
+ debug_printf("\t\t.vb[12].instance_data_step_rate = 0x%x\n", (*ptr).vb[12].instance_data_step_rate);
+ debug_printf("\t\t.vb[13].vb0.pitch = 0x%x\n", (*ptr).vb[13].vb0.pitch);
+ debug_printf("\t\t.vb[13].vb0.access_type = 0x%x\n", (*ptr).vb[13].vb0.access_type);
+ debug_printf("\t\t.vb[13].vb0.vb_index = 0x%x\n", (*ptr).vb[13].vb0.vb_index);
+ debug_printf("\t\t.vb[13].start_addr = 0x%x\n", (*ptr).vb[13].start_addr);
+ debug_printf("\t\t.vb[13].max_index = 0x%x\n", (*ptr).vb[13].max_index);
+ debug_printf("\t\t.vb[13].instance_data_step_rate = 0x%x\n", (*ptr).vb[13].instance_data_step_rate);
+ debug_printf("\t\t.vb[14].vb0.pitch = 0x%x\n", (*ptr).vb[14].vb0.pitch);
+ debug_printf("\t\t.vb[14].vb0.access_type = 0x%x\n", (*ptr).vb[14].vb0.access_type);
+ debug_printf("\t\t.vb[14].vb0.vb_index = 0x%x\n", (*ptr).vb[14].vb0.vb_index);
+ debug_printf("\t\t.vb[14].start_addr = 0x%x\n", (*ptr).vb[14].start_addr);
+ debug_printf("\t\t.vb[14].max_index = 0x%x\n", (*ptr).vb[14].max_index);
+ debug_printf("\t\t.vb[14].instance_data_step_rate = 0x%x\n", (*ptr).vb[14].instance_data_step_rate);
+ debug_printf("\t\t.vb[15].vb0.pitch = 0x%x\n", (*ptr).vb[15].vb0.pitch);
+ debug_printf("\t\t.vb[15].vb0.access_type = 0x%x\n", (*ptr).vb[15].vb0.access_type);
+ debug_printf("\t\t.vb[15].vb0.vb_index = 0x%x\n", (*ptr).vb[15].vb0.vb_index);
+ debug_printf("\t\t.vb[15].start_addr = 0x%x\n", (*ptr).vb[15].start_addr);
+ debug_printf("\t\t.vb[15].max_index = 0x%x\n", (*ptr).vb[15].max_index);
+ debug_printf("\t\t.vb[15].instance_data_step_rate = 0x%x\n", (*ptr).vb[15].instance_data_step_rate);
+ debug_printf("\t\t.vb[16].vb0.pitch = 0x%x\n", (*ptr).vb[16].vb0.pitch);
+ debug_printf("\t\t.vb[16].vb0.access_type = 0x%x\n", (*ptr).vb[16].vb0.access_type);
+ debug_printf("\t\t.vb[16].vb0.vb_index = 0x%x\n", (*ptr).vb[16].vb0.vb_index);
+ debug_printf("\t\t.vb[16].start_addr = 0x%x\n", (*ptr).vb[16].start_addr);
+ debug_printf("\t\t.vb[16].max_index = 0x%x\n", (*ptr).vb[16].max_index);
+ debug_printf("\t\t.vb[16].instance_data_step_rate = 0x%x\n", (*ptr).vb[16].instance_data_step_rate);
+}
+
+void
+brw_dump_vertex_buffer_state(const struct brw_vertex_buffer_state *ptr)
+{
+ debug_printf("\t\t.vb0.pitch = 0x%x\n", (*ptr).vb0.pitch);
+ debug_printf("\t\t.vb0.access_type = 0x%x\n", (*ptr).vb0.access_type);
+ debug_printf("\t\t.vb0.vb_index = 0x%x\n", (*ptr).vb0.vb_index);
+ debug_printf("\t\t.start_addr = 0x%x\n", (*ptr).start_addr);
+ debug_printf("\t\t.max_index = 0x%x\n", (*ptr).max_index);
+ debug_printf("\t\t.instance_data_step_rate = 0x%x\n", (*ptr).instance_data_step_rate);
+}
+
+void
+brw_dump_vertex_element_packet(const struct brw_vertex_element_packet *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.ve[0].ve0.src_offset = 0x%x\n", (*ptr).ve[0].ve0.src_offset);
+ debug_printf("\t\t.ve[0].ve0.src_format = 0x%x\n", (*ptr).ve[0].ve0.src_format);
+ debug_printf("\t\t.ve[0].ve0.valid = 0x%x\n", (*ptr).ve[0].ve0.valid);
+ debug_printf("\t\t.ve[0].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[0].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[0].ve1.dst_offset = 0x%x\n", (*ptr).ve[0].ve1.dst_offset);
+ debug_printf("\t\t.ve[0].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[0].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[0].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[0].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[1].ve0.src_offset = 0x%x\n", (*ptr).ve[1].ve0.src_offset);
+ debug_printf("\t\t.ve[1].ve0.src_format = 0x%x\n", (*ptr).ve[1].ve0.src_format);
+ debug_printf("\t\t.ve[1].ve0.valid = 0x%x\n", (*ptr).ve[1].ve0.valid);
+ debug_printf("\t\t.ve[1].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[1].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[1].ve1.dst_offset = 0x%x\n", (*ptr).ve[1].ve1.dst_offset);
+ debug_printf("\t\t.ve[1].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[1].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[1].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[1].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[2].ve0.src_offset = 0x%x\n", (*ptr).ve[2].ve0.src_offset);
+ debug_printf("\t\t.ve[2].ve0.src_format = 0x%x\n", (*ptr).ve[2].ve0.src_format);
+ debug_printf("\t\t.ve[2].ve0.valid = 0x%x\n", (*ptr).ve[2].ve0.valid);
+ debug_printf("\t\t.ve[2].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[2].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[2].ve1.dst_offset = 0x%x\n", (*ptr).ve[2].ve1.dst_offset);
+ debug_printf("\t\t.ve[2].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[2].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[2].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[2].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[3].ve0.src_offset = 0x%x\n", (*ptr).ve[3].ve0.src_offset);
+ debug_printf("\t\t.ve[3].ve0.src_format = 0x%x\n", (*ptr).ve[3].ve0.src_format);
+ debug_printf("\t\t.ve[3].ve0.valid = 0x%x\n", (*ptr).ve[3].ve0.valid);
+ debug_printf("\t\t.ve[3].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[3].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[3].ve1.dst_offset = 0x%x\n", (*ptr).ve[3].ve1.dst_offset);
+ debug_printf("\t\t.ve[3].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[3].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[3].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[3].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[4].ve0.src_offset = 0x%x\n", (*ptr).ve[4].ve0.src_offset);
+ debug_printf("\t\t.ve[4].ve0.src_format = 0x%x\n", (*ptr).ve[4].ve0.src_format);
+ debug_printf("\t\t.ve[4].ve0.valid = 0x%x\n", (*ptr).ve[4].ve0.valid);
+ debug_printf("\t\t.ve[4].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[4].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[4].ve1.dst_offset = 0x%x\n", (*ptr).ve[4].ve1.dst_offset);
+ debug_printf("\t\t.ve[4].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[4].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[4].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[4].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[5].ve0.src_offset = 0x%x\n", (*ptr).ve[5].ve0.src_offset);
+ debug_printf("\t\t.ve[5].ve0.src_format = 0x%x\n", (*ptr).ve[5].ve0.src_format);
+ debug_printf("\t\t.ve[5].ve0.valid = 0x%x\n", (*ptr).ve[5].ve0.valid);
+ debug_printf("\t\t.ve[5].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[5].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[5].ve1.dst_offset = 0x%x\n", (*ptr).ve[5].ve1.dst_offset);
+ debug_printf("\t\t.ve[5].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[5].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[5].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[5].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[6].ve0.src_offset = 0x%x\n", (*ptr).ve[6].ve0.src_offset);
+ debug_printf("\t\t.ve[6].ve0.src_format = 0x%x\n", (*ptr).ve[6].ve0.src_format);
+ debug_printf("\t\t.ve[6].ve0.valid = 0x%x\n", (*ptr).ve[6].ve0.valid);
+ debug_printf("\t\t.ve[6].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[6].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[6].ve1.dst_offset = 0x%x\n", (*ptr).ve[6].ve1.dst_offset);
+ debug_printf("\t\t.ve[6].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[6].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[6].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[6].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[7].ve0.src_offset = 0x%x\n", (*ptr).ve[7].ve0.src_offset);
+ debug_printf("\t\t.ve[7].ve0.src_format = 0x%x\n", (*ptr).ve[7].ve0.src_format);
+ debug_printf("\t\t.ve[7].ve0.valid = 0x%x\n", (*ptr).ve[7].ve0.valid);
+ debug_printf("\t\t.ve[7].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[7].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[7].ve1.dst_offset = 0x%x\n", (*ptr).ve[7].ve1.dst_offset);
+ debug_printf("\t\t.ve[7].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[7].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[7].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[7].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[8].ve0.src_offset = 0x%x\n", (*ptr).ve[8].ve0.src_offset);
+ debug_printf("\t\t.ve[8].ve0.src_format = 0x%x\n", (*ptr).ve[8].ve0.src_format);
+ debug_printf("\t\t.ve[8].ve0.valid = 0x%x\n", (*ptr).ve[8].ve0.valid);
+ debug_printf("\t\t.ve[8].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[8].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[8].ve1.dst_offset = 0x%x\n", (*ptr).ve[8].ve1.dst_offset);
+ debug_printf("\t\t.ve[8].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[8].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[8].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[8].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[9].ve0.src_offset = 0x%x\n", (*ptr).ve[9].ve0.src_offset);
+ debug_printf("\t\t.ve[9].ve0.src_format = 0x%x\n", (*ptr).ve[9].ve0.src_format);
+ debug_printf("\t\t.ve[9].ve0.valid = 0x%x\n", (*ptr).ve[9].ve0.valid);
+ debug_printf("\t\t.ve[9].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[9].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[9].ve1.dst_offset = 0x%x\n", (*ptr).ve[9].ve1.dst_offset);
+ debug_printf("\t\t.ve[9].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[9].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[9].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[9].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[10].ve0.src_offset = 0x%x\n", (*ptr).ve[10].ve0.src_offset);
+ debug_printf("\t\t.ve[10].ve0.src_format = 0x%x\n", (*ptr).ve[10].ve0.src_format);
+ debug_printf("\t\t.ve[10].ve0.valid = 0x%x\n", (*ptr).ve[10].ve0.valid);
+ debug_printf("\t\t.ve[10].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[10].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[10].ve1.dst_offset = 0x%x\n", (*ptr).ve[10].ve1.dst_offset);
+ debug_printf("\t\t.ve[10].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[10].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[10].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[10].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[11].ve0.src_offset = 0x%x\n", (*ptr).ve[11].ve0.src_offset);
+ debug_printf("\t\t.ve[11].ve0.src_format = 0x%x\n", (*ptr).ve[11].ve0.src_format);
+ debug_printf("\t\t.ve[11].ve0.valid = 0x%x\n", (*ptr).ve[11].ve0.valid);
+ debug_printf("\t\t.ve[11].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[11].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[11].ve1.dst_offset = 0x%x\n", (*ptr).ve[11].ve1.dst_offset);
+ debug_printf("\t\t.ve[11].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[11].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[11].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[11].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[12].ve0.src_offset = 0x%x\n", (*ptr).ve[12].ve0.src_offset);
+ debug_printf("\t\t.ve[12].ve0.src_format = 0x%x\n", (*ptr).ve[12].ve0.src_format);
+ debug_printf("\t\t.ve[12].ve0.valid = 0x%x\n", (*ptr).ve[12].ve0.valid);
+ debug_printf("\t\t.ve[12].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[12].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[12].ve1.dst_offset = 0x%x\n", (*ptr).ve[12].ve1.dst_offset);
+ debug_printf("\t\t.ve[12].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[12].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[12].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[12].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[13].ve0.src_offset = 0x%x\n", (*ptr).ve[13].ve0.src_offset);
+ debug_printf("\t\t.ve[13].ve0.src_format = 0x%x\n", (*ptr).ve[13].ve0.src_format);
+ debug_printf("\t\t.ve[13].ve0.valid = 0x%x\n", (*ptr).ve[13].ve0.valid);
+ debug_printf("\t\t.ve[13].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[13].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[13].ve1.dst_offset = 0x%x\n", (*ptr).ve[13].ve1.dst_offset);
+ debug_printf("\t\t.ve[13].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[13].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[13].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[13].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[14].ve0.src_offset = 0x%x\n", (*ptr).ve[14].ve0.src_offset);
+ debug_printf("\t\t.ve[14].ve0.src_format = 0x%x\n", (*ptr).ve[14].ve0.src_format);
+ debug_printf("\t\t.ve[14].ve0.valid = 0x%x\n", (*ptr).ve[14].ve0.valid);
+ debug_printf("\t\t.ve[14].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[14].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[14].ve1.dst_offset = 0x%x\n", (*ptr).ve[14].ve1.dst_offset);
+ debug_printf("\t\t.ve[14].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[14].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[14].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[14].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[15].ve0.src_offset = 0x%x\n", (*ptr).ve[15].ve0.src_offset);
+ debug_printf("\t\t.ve[15].ve0.src_format = 0x%x\n", (*ptr).ve[15].ve0.src_format);
+ debug_printf("\t\t.ve[15].ve0.valid = 0x%x\n", (*ptr).ve[15].ve0.valid);
+ debug_printf("\t\t.ve[15].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[15].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[15].ve1.dst_offset = 0x%x\n", (*ptr).ve[15].ve1.dst_offset);
+ debug_printf("\t\t.ve[15].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[15].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[15].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[15].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[16].ve0.src_offset = 0x%x\n", (*ptr).ve[16].ve0.src_offset);
+ debug_printf("\t\t.ve[16].ve0.src_format = 0x%x\n", (*ptr).ve[16].ve0.src_format);
+ debug_printf("\t\t.ve[16].ve0.valid = 0x%x\n", (*ptr).ve[16].ve0.valid);
+ debug_printf("\t\t.ve[16].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[16].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[16].ve1.dst_offset = 0x%x\n", (*ptr).ve[16].ve1.dst_offset);
+ debug_printf("\t\t.ve[16].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[16].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[16].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[16].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[17].ve0.src_offset = 0x%x\n", (*ptr).ve[17].ve0.src_offset);
+ debug_printf("\t\t.ve[17].ve0.src_format = 0x%x\n", (*ptr).ve[17].ve0.src_format);
+ debug_printf("\t\t.ve[17].ve0.valid = 0x%x\n", (*ptr).ve[17].ve0.valid);
+ debug_printf("\t\t.ve[17].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[17].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[17].ve1.dst_offset = 0x%x\n", (*ptr).ve[17].ve1.dst_offset);
+ debug_printf("\t\t.ve[17].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[17].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[17].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[17].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent0);
+}
+
+void
+brw_dump_vertex_element_state(const struct brw_vertex_element_state *ptr)
+{
+ debug_printf("\t\t.ve0.src_offset = 0x%x\n", (*ptr).ve0.src_offset);
+ debug_printf("\t\t.ve0.src_format = 0x%x\n", (*ptr).ve0.src_format);
+ debug_printf("\t\t.ve0.valid = 0x%x\n", (*ptr).ve0.valid);
+ debug_printf("\t\t.ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve1.dst_offset = 0x%x\n", (*ptr).ve1.dst_offset);
+ debug_printf("\t\t.ve1.vfcomponent3 = 0x%x\n", (*ptr).ve1.vfcomponent3);
+ debug_printf("\t\t.ve1.vfcomponent2 = 0x%x\n", (*ptr).ve1.vfcomponent2);
+ debug_printf("\t\t.ve1.vfcomponent1 = 0x%x\n", (*ptr).ve1.vfcomponent1);
+ debug_printf("\t\t.ve1.vfcomponent0 = 0x%x\n", (*ptr).ve1.vfcomponent0);
+}
+
+void
+brw_dump_vf_statistics(const struct brw_vf_statistics *ptr)
+{
+ debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable);
+ debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode);
+}
+
+void
+brw_dump_vs_unit_state(const struct brw_vs_unit_state *ptr)
+{
+ debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+ debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+ debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
+ debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+ debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+ debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
+ debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+ debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
+ debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+ debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+ debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+ debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+ debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+ debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+ debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+ debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+ debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+ debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+ debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+ debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
+ debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
+ debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
+ debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
+ debug_printf("\t\t.vs5.sampler_count = 0x%x\n", (*ptr).vs5.sampler_count);
+ debug_printf("\t\t.vs5.sampler_state_pointer = 0x%x\n", (*ptr).vs5.sampler_state_pointer);
+ debug_printf("\t\t.vs6.vs_enable = 0x%x\n", (*ptr).vs6.vs_enable);
+ debug_printf("\t\t.vs6.vert_cache_disable = 0x%x\n", (*ptr).vs6.vert_cache_disable);
+}
+
+void
+brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr)
+{
+ debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+ debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+ debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
+ debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+ debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+ debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
+ debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+ debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
+ debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+ debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+ debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+ debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+ debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+ debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+ debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+ debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+ debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+ debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+ debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+ debug_printf("\t\t.wm4.stats_enable = 0x%x\n", (*ptr).wm4.stats_enable);
+ debug_printf("\t\t.wm4.depth_buffer_clear = 0x%x\n", (*ptr).wm4.depth_buffer_clear);
+ debug_printf("\t\t.wm4.sampler_count = 0x%x\n", (*ptr).wm4.sampler_count);
+ debug_printf("\t\t.wm4.sampler_state_pointer = 0x%x\n", (*ptr).wm4.sampler_state_pointer);
+ debug_printf("\t\t.wm5.enable_8_pix = 0x%x\n", (*ptr).wm5.enable_8_pix);
+ debug_printf("\t\t.wm5.enable_16_pix = 0x%x\n", (*ptr).wm5.enable_16_pix);
+ debug_printf("\t\t.wm5.enable_32_pix = 0x%x\n", (*ptr).wm5.enable_32_pix);
+ debug_printf("\t\t.wm5.enable_con_32_pix = 0x%x\n", (*ptr).wm5.enable_con_32_pix);
+ debug_printf("\t\t.wm5.enable_con_64_pix = 0x%x\n", (*ptr).wm5.enable_con_64_pix);
+ debug_printf("\t\t.wm5.legacy_global_depth_bias = 0x%x\n", (*ptr).wm5.legacy_global_depth_bias);
+ debug_printf("\t\t.wm5.line_stipple = 0x%x\n", (*ptr).wm5.line_stipple);
+ debug_printf("\t\t.wm5.depth_offset = 0x%x\n", (*ptr).wm5.depth_offset);
+ debug_printf("\t\t.wm5.polygon_stipple = 0x%x\n", (*ptr).wm5.polygon_stipple);
+ debug_printf("\t\t.wm5.line_aa_region_width = 0x%x\n", (*ptr).wm5.line_aa_region_width);
+ debug_printf("\t\t.wm5.line_endcap_aa_region_width = 0x%x\n", (*ptr).wm5.line_endcap_aa_region_width);
+ debug_printf("\t\t.wm5.early_depth_test = 0x%x\n", (*ptr).wm5.early_depth_test);
+ debug_printf("\t\t.wm5.thread_dispatch_enable = 0x%x\n", (*ptr).wm5.thread_dispatch_enable);
+ debug_printf("\t\t.wm5.program_uses_depth = 0x%x\n", (*ptr).wm5.program_uses_depth);
+ debug_printf("\t\t.wm5.program_computes_depth = 0x%x\n", (*ptr).wm5.program_computes_depth);
+ debug_printf("\t\t.wm5.program_uses_killpixel = 0x%x\n", (*ptr).wm5.program_uses_killpixel);
+ debug_printf("\t\t.wm5.legacy_line_rast = 0x%x\n", (*ptr).wm5.legacy_line_rast);
+ debug_printf("\t\t.wm5.transposed_urb_read_enable = 0x%x\n", (*ptr).wm5.transposed_urb_read_enable);
+ debug_printf("\t\t.wm5.max_threads = 0x%x\n", (*ptr).wm5.max_threads);
+ debug_printf("\t\t.global_depth_offset_constant = %f\n", (*ptr).global_depth_offset_constant);
+ debug_printf("\t\t.global_depth_offset_scale = %f\n", (*ptr).global_depth_offset_scale);
+ debug_printf("\t\t.wm8.grf_reg_count_1 = 0x%x\n", (*ptr).wm8.grf_reg_count_1);
+ debug_printf("\t\t.wm8.kernel_start_pointer_1 = 0x%x\n", (*ptr).wm8.kernel_start_pointer_1);
+ debug_printf("\t\t.wm9.grf_reg_count_2 = 0x%x\n", (*ptr).wm9.grf_reg_count_2);
+ debug_printf("\t\t.wm9.kernel_start_pointer_2 = 0x%x\n", (*ptr).wm9.kernel_start_pointer_2);
+ debug_printf("\t\t.wm10.grf_reg_count_3 = 0x%x\n", (*ptr).wm10.grf_reg_count_3);
+ debug_printf("\t\t.wm10.kernel_start_pointer_3 = 0x%x\n", (*ptr).wm10.kernel_start_pointer_3);
+}
+
diff --git a/src/gallium/drivers/i965/brw_structs_dump.h b/src/gallium/drivers/i965/brw_structs_dump.h
new file mode 100644
index 0000000000..7c02dbfe33
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs_dump.h
@@ -0,0 +1,276 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Dump i965 data structures.
+ *
+ * Generated automatically from brw_structs.h by brw_structs_dump.py.
+ */
+
+#ifndef BRW_STRUCTS_DUMP_H
+#define BRW_STRUCTS_DUMP_H
+
+struct brw_3d_control;
+struct brw_3d_primitive;
+struct brw_aa_line_parameters;
+struct brw_binding_table_pointers;
+struct brw_blend_constant_color;
+struct brw_cc0;
+struct brw_cc1;
+struct brw_cc2;
+struct brw_cc3;
+struct brw_cc4;
+struct brw_cc5;
+struct brw_cc6;
+struct brw_cc7;
+struct brw_cc_unit_state;
+struct brw_cc_viewport;
+struct brw_clip_unit_state;
+struct brw_clipper_viewport;
+struct brw_constant_buffer;
+struct brw_cs_urb_state;
+struct brw_depthbuffer;
+struct brw_depthbuffer_g4x;
+struct brw_drawrect;
+struct brw_global_depth_offset_clamp;
+struct brw_gs_unit_state;
+struct brw_indexbuffer;
+struct brw_line_stipple;
+struct brw_mi_flush;
+struct brw_pipe_control;
+struct brw_pipeline_select;
+struct brw_pipelined_state_pointers;
+struct brw_polygon_stipple;
+struct brw_polygon_stipple_offset;
+struct brw_sampler_default_color;
+struct brw_sampler_state;
+struct brw_sf_unit_state;
+struct brw_sf_viewport;
+struct brw_ss0;
+struct brw_ss1;
+struct brw_ss2;
+struct brw_ss3;
+struct brw_state_base_address;
+struct brw_state_prefetch;
+struct brw_surf_ss0;
+struct brw_surf_ss1;
+struct brw_surf_ss2;
+struct brw_surf_ss3;
+struct brw_surf_ss4;
+struct brw_surf_ss5;
+struct brw_surface_state;
+struct brw_system_instruction_pointer;
+struct brw_urb_fence;
+struct brw_urb_immediate;
+struct brw_vb_array_state;
+struct brw_vertex_buffer_state;
+struct brw_vertex_element_packet;
+struct brw_vertex_element_state;
+struct brw_vf_statistics;
+struct brw_vs_unit_state;
+struct brw_wm_unit_state;
+
+void
+brw_dump_3d_control(const struct brw_3d_control *ptr);
+
+void
+brw_dump_3d_primitive(const struct brw_3d_primitive *ptr);
+
+void
+brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr);
+
+void
+brw_dump_binding_table_pointers(const struct brw_binding_table_pointers *ptr);
+
+void
+brw_dump_blend_constant_color(const struct brw_blend_constant_color *ptr);
+
+void
+brw_dump_cc0(const struct brw_cc0 *ptr);
+
+void
+brw_dump_cc1(const struct brw_cc1 *ptr);
+
+void
+brw_dump_cc2(const struct brw_cc2 *ptr);
+
+void
+brw_dump_cc3(const struct brw_cc3 *ptr);
+
+void
+brw_dump_cc4(const struct brw_cc4 *ptr);
+
+void
+brw_dump_cc5(const struct brw_cc5 *ptr);
+
+void
+brw_dump_cc6(const struct brw_cc6 *ptr);
+
+void
+brw_dump_cc7(const struct brw_cc7 *ptr);
+
+void
+brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr);
+
+void
+brw_dump_cc_viewport(const struct brw_cc_viewport *ptr);
+
+void
+brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr);
+
+void
+brw_dump_clipper_viewport(const struct brw_clipper_viewport *ptr);
+
+void
+brw_dump_constant_buffer(const struct brw_constant_buffer *ptr);
+
+void
+brw_dump_cs_urb_state(const struct brw_cs_urb_state *ptr);
+
+void
+brw_dump_depthbuffer(const struct brw_depthbuffer *ptr);
+
+void
+brw_dump_depthbuffer_g4x(const struct brw_depthbuffer_g4x *ptr);
+
+void
+brw_dump_drawrect(const struct brw_drawrect *ptr);
+
+void
+brw_dump_global_depth_offset_clamp(const struct brw_global_depth_offset_clamp *ptr);
+
+void
+brw_dump_gs_unit_state(const struct brw_gs_unit_state *ptr);
+
+void
+brw_dump_indexbuffer(const struct brw_indexbuffer *ptr);
+
+void
+brw_dump_line_stipple(const struct brw_line_stipple *ptr);
+
+void
+brw_dump_mi_flush(const struct brw_mi_flush *ptr);
+
+void
+brw_dump_pipe_control(const struct brw_pipe_control *ptr);
+
+void
+brw_dump_pipeline_select(const struct brw_pipeline_select *ptr);
+
+void
+brw_dump_pipelined_state_pointers(const struct brw_pipelined_state_pointers *ptr);
+
+void
+brw_dump_polygon_stipple(const struct brw_polygon_stipple *ptr);
+
+void
+brw_dump_polygon_stipple_offset(const struct brw_polygon_stipple_offset *ptr);
+
+void
+brw_dump_sampler_default_color(const struct brw_sampler_default_color *ptr);
+
+void
+brw_dump_sampler_state(const struct brw_sampler_state *ptr);
+
+void
+brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr);
+
+void
+brw_dump_sf_viewport(const struct brw_sf_viewport *ptr);
+
+void
+brw_dump_ss0(const struct brw_ss0 *ptr);
+
+void
+brw_dump_ss1(const struct brw_ss1 *ptr);
+
+void
+brw_dump_ss2(const struct brw_ss2 *ptr);
+
+void
+brw_dump_ss3(const struct brw_ss3 *ptr);
+
+void
+brw_dump_state_base_address(const struct brw_state_base_address *ptr);
+
+void
+brw_dump_state_prefetch(const struct brw_state_prefetch *ptr);
+
+void
+brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr);
+
+void
+brw_dump_surf_ss1(const struct brw_surf_ss1 *ptr);
+
+void
+brw_dump_surf_ss2(const struct brw_surf_ss2 *ptr);
+
+void
+brw_dump_surf_ss3(const struct brw_surf_ss3 *ptr);
+
+void
+brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr);
+
+void
+brw_dump_surf_ss5(const struct brw_surf_ss5 *ptr);
+
+void
+brw_dump_surface_state(const struct brw_surface_state *ptr);
+
+void
+brw_dump_system_instruction_pointer(const struct brw_system_instruction_pointer *ptr);
+
+void
+brw_dump_urb_fence(const struct brw_urb_fence *ptr);
+
+void
+brw_dump_urb_immediate(const struct brw_urb_immediate *ptr);
+
+void
+brw_dump_vb_array_state(const struct brw_vb_array_state *ptr);
+
+void
+brw_dump_vertex_buffer_state(const struct brw_vertex_buffer_state *ptr);
+
+void
+brw_dump_vertex_element_packet(const struct brw_vertex_element_packet *ptr);
+
+void
+brw_dump_vertex_element_state(const struct brw_vertex_element_state *ptr);
+
+void
+brw_dump_vf_statistics(const struct brw_vf_statistics *ptr);
+
+void
+brw_dump_vs_unit_state(const struct brw_vs_unit_state *ptr);
+
+void
+brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr);
+
+
+#endif /* BRW_STRUCTS_DUMP_H */
diff --git a/src/gallium/drivers/i965/brw_structs_dump.py b/src/gallium/drivers/i965/brw_structs_dump.py
new file mode 100755
index 0000000000..6dba49ad91
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs_dump.py
@@ -0,0 +1,291 @@
+#!/usr/bin/env python
+'''
+Generates dumpers for the i965 state strucutures using pygccxml.
+
+Run as
+
+ PYTHONPATH=/path/to/pygccxml-1.0.0 python brw_structs_dump.py
+
+Jose Fonseca <jfonseca@vmware.com>
+'''
+
+copyright = '''
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+ '''
+
+import os
+import sys
+import re
+
+from pygccxml import parser
+from pygccxml import declarations
+
+from pygccxml.declarations import algorithm
+from pygccxml.declarations import decl_visitor
+from pygccxml.declarations import type_traits
+from pygccxml.declarations import type_visitor
+
+
+enums = True
+
+
+def vars_filter(variable):
+ name = variable.name
+ return not re.match('^pad\d*', name) and name != 'dword'
+
+
+class decl_dumper_t(decl_visitor.decl_visitor_t):
+
+ def __init__(self, stream, instance = '', decl = None):
+ decl_visitor.decl_visitor_t.__init__(self)
+ self.stream = stream
+ self._instance = instance
+ self.decl = decl
+
+ def clone(self):
+ return decl_dumper_t(self.stream, self._instance, self.decl)
+
+ def visit_class(self):
+ class_ = self.decl
+ assert self.decl.class_type in ('struct', 'union')
+
+ for variable in class_.variables(recursive = False):
+ if vars_filter(variable):
+ dump_type(self.stream, self._instance + '.' + variable.name, variable.type)
+
+ def visit_enumeration(self):
+ if enums:
+ self.stream.write(' switch(%s) {\n' % ("(*ptr)" + self._instance,))
+ for name, value in self.decl.values:
+ self.stream.write(' case %s:\n' % (name,))
+ self.stream.write(' debug_printf("\\t\\t%s = %s\\n");\n' % (self._instance, name))
+ self.stream.write(' break;\n')
+ self.stream.write(' default:\n')
+ self.stream.write(' debug_printf("\\t\\t%s = %%i\\n", %s);\n' % (self._instance, "(*ptr)" + self._instance))
+ self.stream.write(' break;\n')
+ self.stream.write(' }\n')
+ else:
+ self.stream.write(' debug_printf("\\t\\t%s = %%i\\n", %s);\n' % (self._instance, "(*ptr)" + self._instance))
+
+
+def dump_decl(stream, instance, decl):
+ dumper = decl_dumper_t(stream, instance, decl)
+ algorithm.apply_visitor(dumper, decl)
+
+
+class type_dumper_t(type_visitor.type_visitor_t):
+
+ def __init__(self, stream, instance, type_):
+ type_visitor.type_visitor_t.__init__(self)
+ self.stream = stream
+ self.instance = instance
+ self.type = type_
+
+ def clone(self):
+ return type_dumper_t(self.instance, self.type)
+
+ def visit_bool(self):
+ self.print_instance('%i')
+
+ def visit_char(self):
+ #self.print_instance('%i')
+ self.print_instance('0x%x')
+
+ def visit_unsigned_char(self):
+ #self.print_instance('%u')
+ self.print_instance('0x%x')
+
+ def visit_signed_char(self):
+ #self.print_instance('%i')
+ self.print_instance('0x%x')
+
+ def visit_wchar(self):
+ self.print_instance('0x%x')
+
+ def visit_short_int(self):
+ #self.print_instance('%i')
+ self.print_instance('0x%x')
+
+ def visit_short_unsigned_int(self):
+ #self.print_instance('%u')
+ self.print_instance('0x%x')
+
+ def visit_int(self):
+ #self.print_instance('%i')
+ self.print_instance('0x%x')
+
+ def visit_unsigned_int(self):
+ #self.print_instance('%u')
+ self.print_instance('0x%x')
+
+ def visit_long_int(self):
+ #self.print_instance('%li')
+ self.print_instance('0x%lx')
+
+ def visit_long_unsigned_int(self):
+ #self.print_instance('%lu')
+ self.print_instance('%0xlx')
+
+ def visit_long_long_int(self):
+ #self.print_instance('%lli')
+ self.print_instance('%0xllx')
+
+ def visit_long_long_unsigned_int(self):
+ #self.print_instance('%llu')
+ self.print_instance('0x%llx')
+
+ def visit_float(self):
+ self.print_instance('%f')
+
+ def visit_double(self):
+ self.print_instance('%f')
+
+ def visit_array(self):
+ for i in range(type_traits.array_size(self.type)):
+ dump_type(self.stream, self.instance + '[%i]' % i, type_traits.base_type(self.type))
+
+ def visit_pointer(self):
+ self.print_instance('%p')
+
+ def visit_declarated(self):
+ #stream.write('decl = %r\n' % self.type.decl_string)
+ decl = type_traits.remove_declarated(self.type)
+ dump_decl(self.stream, self.instance, decl)
+
+ def print_instance(self, format):
+ self.stream.write(' debug_printf("\\t\\t%s = %s\\n", %s);\n' % (self.instance, format, "(*ptr)" + self.instance))
+
+
+
+def dump_type(stream, instance, type_):
+ type_ = type_traits.remove_alias(type_)
+ visitor = type_dumper_t(stream, instance, type_)
+ algorithm.apply_visitor(visitor, type_)
+
+
+def dump_struct_interface(stream, class_, suffix = ';'):
+ name = class_.name
+ assert name.startswith('brw_');
+ name = name[:4] + 'dump_' + name[4:]
+ stream.write('void\n')
+ stream.write('%s(const struct %s *ptr)%s\n' % (name, class_.name, suffix))
+
+
+def dump_struct_implementation(stream, decls, class_):
+ dump_struct_interface(stream, class_, suffix = '')
+ stream.write('{\n')
+ dump_decl(stream, '', class_)
+ stream.write('}\n')
+ stream.write('\n')
+
+
+def dump_header(stream):
+ stream.write(copyright.strip() + '\n')
+ stream.write('\n')
+ stream.write('/**\n')
+ stream.write(' * @file\n')
+ stream.write(' * Dump i965 data structures.\n')
+ stream.write(' *\n')
+ stream.write(' * Generated automatically from brw_structs.h by brw_structs_dump.py.\n')
+ stream.write(' */\n')
+ stream.write('\n')
+
+
+def dump_interfaces(decls, global_ns, names):
+ stream = open('brw_structs_dump.h', 'wt')
+
+ dump_header(stream)
+
+ stream.write('#ifndef BRW_STRUCTS_DUMP_H\n')
+ stream.write('#define BRW_STRUCTS_DUMP_H\n')
+ stream.write('\n')
+
+ for name in names:
+ stream.write('struct %s;\n' % (name,))
+ stream.write('\n')
+
+ for name in names:
+ (class_,) = global_ns.classes(name = name)
+ dump_struct_interface(stream, class_)
+ stream.write('\n')
+ stream.write('\n')
+
+ stream.write('#endif /* BRW_STRUCTS_DUMP_H */\n')
+
+
+def dump_implementations(decls, global_ns, names):
+ stream = open('brw_structs_dump.c', 'wt')
+
+ dump_header(stream)
+
+ stream.write('#include "util/u_debug.h"\n')
+ stream.write('\n')
+ stream.write('#include "brw_types.h"\n')
+ stream.write('#include "brw_structs.h"\n')
+ stream.write('#include "brw_structs_dump.h"\n')
+ stream.write('\n')
+
+ for name in names:
+ (class_,) = global_ns.classes(name = name)
+ dump_struct_implementation(stream, decls, class_)
+
+
+def decl_filter(decl):
+ '''Filter the declarations we're interested in'''
+ name = decl.name
+ return name.startswith('brw_') and name not in ('brw_instruction',)
+
+
+def main():
+
+ config = parser.config_t(
+ include_paths = [
+ '../../include',
+ ],
+ compiler = 'gcc',
+ )
+
+ headers = [
+ 'brw_types.h',
+ 'brw_structs.h',
+ ]
+
+ decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE)
+ global_ns = declarations.get_global_namespace(decls)
+
+ names = []
+ for class_ in global_ns.classes(decl_filter):
+ names.append(class_.name)
+ names.sort()
+
+ dump_interfaces(decls, global_ns, names)
+ dump_implementations(decls, global_ns, names)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c
new file mode 100644
index 0000000000..464013e7c4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_swtnl.c
@@ -0,0 +1,95 @@
+
+#include "brw_context.h"
+#include "brw_pipe_rast.h"
+
+
+static GLboolean need_swtnl( struct brw_context *brw )
+{
+ const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ;
+
+ /* If we don't require strict OpenGL conformance, never
+ * use fallbacks. If we're forcing fallbacks, always
+ * use fallfacks.
+ */
+ if (brw->flags.no_swtnl)
+ return FALSE;
+
+ if (brw->flags.force_swtnl)
+ return TRUE;
+
+ /* Exceeding hw limits on number of VS inputs?
+ */
+ if (brw->curr.num_vertex_elements == 0 ||
+ brw->curr.num_vertex_elements >= BRW_VEP_MAX) {
+ return TRUE;
+ }
+
+ /* Position array with zero stride?
+ *
+ * XXX: position isn't always at zero...
+ * XXX: eliminate zero-stride arrays
+ */
+ {
+ int ve0_vb = brw->curr.vertex_element[0].vertex_buffer_index;
+
+ if (brw->curr.vertex_buffer[ve0_vb].stride == 0)
+ return TRUE;
+ }
+
+ /* XXX: short-circuit
+ */
+ return FALSE;
+
+ if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) {
+ if (rast->poly_smooth)
+ return TRUE;
+
+ }
+
+ if (brw->reduced_primitive == PIPE_PRIM_LINES ||
+ (brw->reduced_primitive == PIPE_PRIM_TRIANGLES &&
+ (rast->fill_cw == PIPE_POLYGON_MODE_LINE ||
+ rast->fill_ccw == PIPE_POLYGON_MODE_LINE)))
+ {
+ /* BRW hardware will do AA lines, but they are non-conformant it
+ * seems. TBD whether we keep this fallback:
+ */
+ if (rast->line_smooth)
+ return TRUE;
+
+ /* XXX: was a fallback in mesa (gs doesn't get enough
+ * information to know when to reset stipple counter), but there
+ * must be a way around it.
+ */
+ if (rast->line_stipple_enable &&
+ (brw->reduced_primitive == PIPE_PRIM_TRIANGLES ||
+ brw->primitive == PIPE_PRIM_LINE_LOOP ||
+ brw->primitive == PIPE_PRIM_LINE_STRIP))
+ return TRUE;
+ }
+
+
+ if (brw->reduced_primitive == PIPE_PRIM_POINTS ||
+ (brw->reduced_primitive == PIPE_PRIM_TRIANGLES &&
+ (rast->fill_cw == PIPE_POLYGON_MODE_POINT ||
+ rast->fill_ccw == PIPE_POLYGON_MODE_POINT)))
+ {
+ if (rast->point_smooth)
+ return TRUE;
+ }
+
+ /* BRW hardware doesn't handle CLAMP texturing correctly;
+ * brw_wm_sampler_state:translate_wrap_mode() treats CLAMP
+ * as CLAMP_TO_EDGE instead. If we're using CLAMP, and
+ * we want strict conformance, force the fallback.
+ *
+ * XXX: need a workaround for this.
+ */
+
+ /* Nothing stopping us from the fast path now */
+ return FALSE;
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_types.h b/src/gallium/drivers/i965/brw_types.h
new file mode 100644
index 0000000000..89e08a5c80
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_types.h
@@ -0,0 +1,21 @@
+#ifndef BRW_TYPES_H
+#define BRW_TYPES_H
+
+#include "pipe/p_compiler.h"
+
+typedef uint32_t GLuint;
+typedef uint8_t GLubyte;
+typedef uint16_t GLushort;
+typedef int32_t GLint;
+typedef int8_t GLbyte;
+typedef int16_t GLshort;
+typedef float GLfloat;
+
+/* no GLenum, translate all away */
+
+typedef uint8_t GLboolean;
+
+#define GL_FALSE FALSE
+#define GL_TRUE TRUE
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
new file mode 100644
index 0000000000..907ec56c6c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -0,0 +1,263 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+#define VS 0
+#define GS 1
+#define CLP 2
+#define SF 3
+#define CS 4
+
+/** @file brw_urb.c
+ *
+ * Manages the division of the URB space between the various fixed-function
+ * units.
+ *
+ * See the Thread Initiation Management section of the GEN4 B-Spec, and
+ * the individual *_STATE structures for restrictions on numbers of
+ * entries and threads.
+ */
+
+/*
+ * Generally, a unit requires a min_nr_entries based on how many entries
+ * it produces before the downstream unit gets unblocked and can use and
+ * dereference some of its handles.
+ *
+ * The SF unit preallocates a PUE at the start of thread dispatch, and only
+ * uses that one. So it requires one entry per thread.
+ *
+ * For CLIP, the SF unit will hold the previous primitive while the
+ * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
+ * (vertices) to ensure continued processing, trifans require 4, and tristrips
+ * require 5. There can be 1 or 2 threads, and each has the same requirement.
+ *
+ * GS has the same requirement as CLIP, but it never handles tristrips,
+ * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
+ * We only run it single-threaded.
+ *
+ * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
+ * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
+ * get streamed down as soon as threads processing earlier vertices get
+ * theirs accepted.
+ *
+ * Each unit will take the number of URB entries we give it (based on the
+ * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
+ * and brw_curbe.c for the CURBEs) and decide its maximum number of
+ * threads it can support based on that. in brw_*_state.c.
+ *
+ * XXX: Are the min_entry_size numbers useful?
+ * XXX: Verify min_nr_entries, esp for VS.
+ * XXX: Verify SF min_entry_size.
+ */
+static const struct urb_limits {
+ GLuint min_nr_entries;
+ GLuint preferred_nr_entries;
+ GLuint min_entry_size;
+ GLuint max_entry_size;
+} limits[CS+1] = {
+ { 16, 32, 1, 5 }, /* vs */
+ { 4, 8, 1, 5 }, /* gs */
+ { 5, 10, 1, 5 }, /* clp */
+ { 1, 8, 1, 12 }, /* sf */
+ { 1, 4, 1, 32 } /* cs */
+};
+
+
+static GLboolean check_urb_layout( struct brw_context *brw )
+{
+ brw->urb.vs_start = 0;
+ brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
+ brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
+ brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
+ brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
+
+ return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw);
+}
+
+/* Most minimal update, forces re-emit of URB fence packet after GS
+ * unit turned on/off.
+ */
+static int recalculate_urb_fence( struct brw_context *brw )
+{
+ GLuint csize = brw->curbe.total_size;
+ GLuint vsize = brw->vs.prog_data->urb_entry_size;
+ GLuint sfsize = brw->sf.prog_data->urb_entry_size;
+
+ if (csize < limits[CS].min_entry_size)
+ csize = limits[CS].min_entry_size;
+
+ if (vsize < limits[VS].min_entry_size)
+ vsize = limits[VS].min_entry_size;
+
+ if (sfsize < limits[SF].min_entry_size)
+ sfsize = limits[SF].min_entry_size;
+
+ if (brw->urb.vsize < vsize ||
+ brw->urb.sfsize < sfsize ||
+ brw->urb.csize < csize ||
+ (brw->urb.constrained && (brw->urb.vsize > vsize ||
+ brw->urb.sfsize > sfsize ||
+ brw->urb.csize > csize))) {
+
+
+ brw->urb.csize = csize;
+ brw->urb.sfsize = sfsize;
+ brw->urb.vsize = vsize;
+
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;
+ brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+ brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
+
+ brw->urb.constrained = 0;
+
+ if (BRW_IS_IGDNG(brw)) {
+ brw->urb.nr_vs_entries = 128;
+ brw->urb.nr_sf_entries = 48;
+ if (check_urb_layout(brw)) {
+ goto done;
+ } else {
+ brw->urb.constrained = 1;
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+ }
+ } else if (BRW_IS_G4X(brw)) {
+ brw->urb.nr_vs_entries = 64;
+ if (check_urb_layout(brw)) {
+ goto done;
+ } else {
+ brw->urb.constrained = 1;
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_MIN_URB) {
+ brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
+ brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
+ brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
+ brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
+ brw->urb.constrained = 1;
+ }
+
+ if (!check_urb_layout(brw)) {
+ brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
+ brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
+ brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
+ brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
+
+ /* Mark us as operating with constrained nr_entries, so that next
+ * time we recalculate we'll resize the fences in the hope of
+ * escaping constrained mode and getting back to normal performance.
+ */
+ brw->urb.constrained = 1;
+
+ if (!check_urb_layout(brw)) {
+ /* This is impossible, given the maximal sizes of urb
+ * entries and the values for minimum nr of entries
+ * provided above.
+ */
+ debug_printf("couldn't calculate URB layout!\n");
+ exit(1);
+ }
+
+ if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
+ debug_printf("URB CONSTRAINED\n");
+ }
+
+done:
+ if (BRW_DEBUG & DEBUG_URB)
+ debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+ brw->urb.vs_start,
+ brw->urb.gs_start,
+ brw->urb.clip_start,
+ brw->urb.sf_start,
+ brw->urb.cs_start,
+ URB_SIZES(brw));
+
+ brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
+ }
+
+ return 0;
+}
+
+
+const struct brw_tracked_state brw_recalculate_urb_fence = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CURBE_OFFSETS,
+ .cache = (CACHE_NEW_VS_PROG |
+ CACHE_NEW_SF_PROG)
+ },
+ .prepare = recalculate_urb_fence
+};
+
+
+
+
+
+int brw_upload_urb_fence(struct brw_context *brw)
+{
+ struct brw_urb_fence uf;
+ memset(&uf, 0, sizeof(uf));
+
+ uf.header.opcode = CMD_URB_FENCE;
+ uf.header.length = sizeof(uf)/4-2;
+ uf.header.vs_realloc = 1;
+ uf.header.gs_realloc = 1;
+ uf.header.clp_realloc = 1;
+ uf.header.sf_realloc = 1;
+ uf.header.vfe_realloc = 1;
+ uf.header.cs_realloc = 1;
+
+ /* The ordering below is correct, not the layout in the
+ * instruction.
+ *
+ * There are 256/384 urb reg pairs in total.
+ */
+ uf.bits0.vs_fence = brw->urb.gs_start;
+ uf.bits0.gs_fence = brw->urb.clip_start;
+ uf.bits0.clp_fence = brw->urb.sf_start;
+ uf.bits1.sf_fence = brw->urb.cs_start;
+ uf.bits1.cs_fence = URB_SIZES(brw);
+
+ BRW_BATCH_STRUCT(brw, &uf);
+ return 0;
+}
diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c
new file mode 100644
index 0000000000..458058d668
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_util.c
@@ -0,0 +1,38 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_util.h"
+#include "brw_defines.h"
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_util.h b/src/gallium/drivers/i965/brw_util.h
new file mode 100644
index 0000000000..b5f9a36e7b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_util.h
@@ -0,0 +1,44 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_UTIL_H
+#define BRW_UTIL_H
+
+#include "brw_types.h"
+
+extern GLuint brw_count_bits( GLuint val );
+extern GLuint brw_translate_blend_factor( unsigned factor );
+extern GLuint brw_translate_blend_equation( unsigned mode );
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
new file mode 100644
index 0000000000..e3ea5a3a13
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -0,0 +1,131 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "tgsi/tgsi_dump.h"
+
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_pipe_rast.h"
+
+
+
+static enum pipe_error do_vs_prog( struct brw_context *brw,
+ struct brw_vertex_shader *vp,
+ struct brw_vs_prog_key *key,
+ struct brw_winsys_buffer **bo_out)
+{
+ enum pipe_error ret;
+ GLuint program_size;
+ const GLuint *program;
+ struct brw_vs_compile c;
+
+ memset(&c, 0, sizeof(c));
+ memcpy(&c.key, key, sizeof(*key));
+
+ brw_init_compile(brw, &c.func);
+ c.vp = vp;
+
+ c.prog_data.nr_outputs = vp->info.num_outputs;
+ c.prog_data.nr_inputs = vp->info.num_inputs;
+
+ if (1)
+ tgsi_dump(c.vp->tokens, 0);
+
+ /* Emit GEN4 code.
+ */
+ brw_vs_emit(&c);
+
+ /* get the program
+ */
+ ret = brw_get_program(&c.func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_cache( &brw->cache, BRW_VS_PROG,
+ &c.key, brw_vs_prog_key_size(&c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->vs.prog_data,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
+{
+ struct brw_vs_prog_key key;
+ struct brw_vertex_shader *vp = brw->curr.vertex_shader;
+ struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature;
+ enum pipe_error ret;
+
+ memset(&key, 0, sizeof(key));
+
+ key.program_string_id = vp->id;
+ key.nr_userclip = brw->curr.ucp.nr;
+
+ memcpy(&key.fs_signature, sig, brw_fs_signature_size(sig));
+
+
+ /* Make an early check for the key.
+ */
+ if (brw_search_cache(&brw->cache, BRW_VS_PROG,
+ &key, brw_vs_prog_key_size(&key),
+ NULL, 0,
+ &brw->vs.prog_data,
+ &brw->vs.prog_bo))
+ return PIPE_OK;
+
+ ret = do_vs_prog(brw, vp, &key, &brw->vs.prog_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+/* See brw_vs.c:
+ */
+const struct brw_tracked_state brw_vs_prog = {
+ .dirty = {
+ .mesa = (PIPE_NEW_CLIP |
+ PIPE_NEW_RAST |
+ PIPE_NEW_FRAGMENT_SIGNATURE),
+ .brw = BRW_NEW_VERTEX_PROGRAM,
+ .cache = 0
+ },
+ .prepare = brw_upload_vs_prog
+};
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
new file mode 100644
index 0000000000..944d88c84c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -0,0 +1,106 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_VS_H
+#define BRW_VS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+
+struct brw_vs_prog_key {
+ GLuint program_string_id;
+ GLuint nr_userclip:4;
+ GLuint pad:26;
+ struct brw_fs_signature fs_signature;
+};
+
+#define brw_vs_prog_key_size(s) (offsetof(struct brw_vs_prog_key, fs_signature) + \
+ brw_fs_signature_size(&(s)->fs_signature))
+
+
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
+
+struct brw_vs_compile {
+ struct brw_compile func;
+ struct brw_vs_prog_key key;
+ struct brw_vs_prog_data prog_data;
+ struct brw_chipset chipset;
+
+ struct brw_vertex_shader *vp;
+
+ GLuint nr_inputs;
+ GLuint nr_outputs;
+ GLuint nr_immediates;
+ GLfloat immediate[128][4];
+
+ GLuint overflow_grf_start;
+ GLuint overflow_count;
+
+ GLuint first_tmp;
+ GLuint last_tmp;
+
+ struct brw_reg r0;
+ struct brw_reg r1;
+ struct brw_reg regs[TGSI_FILE_COUNT][128];
+ struct brw_reg tmp;
+ struct brw_reg stack;
+
+ struct {
+ GLboolean used_in_src;
+ struct brw_reg reg;
+ } output_regs[128];
+
+ struct brw_reg userplane[6];
+
+ /** we may need up to 3 constants per instruction (if use_const_buffer) */
+ struct {
+ GLint index;
+ struct brw_reg reg;
+ } current_const[3];
+
+ struct brw_instruction *if_inst[MAX_IF_DEPTH];
+ struct brw_instruction *loop_inst[MAX_LOOP_DEPTH];
+ GLuint insn;
+ GLuint if_depth;
+ GLuint loop_depth;
+ GLuint end_offset;
+
+ struct brw_indirect stack_index;
+};
+
+
+void brw_vs_emit( struct brw_vs_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
new file mode 100644
index 0000000000..8a16205d2f
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -0,0 +1,1654 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_debug.h"
+#include "brw_disasm.h"
+
+/* Choose one of the 4 vec4's which can be packed into each 16-wide reg.
+ */
+static INLINE struct brw_reg brw_vec4_grf_repeat( GLuint reg, GLuint slot )
+{
+ int nr = reg + slot/2;
+ int subnr = (slot%2) * 4;
+
+ return stride(brw_vec4_grf(nr, subnr), 0, 4, 1);
+}
+
+
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+ struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+static void release_tmps( struct brw_vs_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+static boolean is_position_output( struct brw_vs_compile *c,
+ unsigned vs_output )
+{
+ const struct brw_vertex_shader *vs = c->vp;
+ unsigned semantic = vs->info.output_semantic_name[vs_output];
+ unsigned index = vs->info.output_semantic_index[vs_output];
+
+ return (semantic == TGSI_SEMANTIC_POSITION &&
+ index == 0);
+}
+
+
+static boolean find_output_slot( struct brw_vs_compile *c,
+ unsigned vs_output,
+ unsigned *fs_input_slot )
+{
+ const struct brw_vertex_shader *vs = c->vp;
+ unsigned semantic = vs->info.output_semantic_name[vs_output];
+ unsigned index = vs->info.output_semantic_index[vs_output];
+ unsigned i;
+
+ for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
+ if (c->key.fs_signature.input[i].semantic == semantic &&
+ c->key.fs_signature.input[i].semantic_index == index) {
+ *fs_input_slot = i;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+/**
+ * Preallocate GRF register before code emit.
+ * Do things as simply as possible. Allocate and populate all regs
+ * ahead of time.
+ */
+static void brw_vs_alloc_regs( struct brw_vs_compile *c )
+{
+ GLuint i, reg = 0, subreg = 0, mrf;
+ int attributes_in_vue;
+
+ /* Determine whether to use a real constant buffer or use a block
+ * of GRF registers for constants. The later is faster but only
+ * works if everything fits in the GRF.
+ * XXX this heuristic/check may need some fine tuning...
+ */
+ if (c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1 +
+ c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 +
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 > BRW_MAX_GRF)
+ c->vp->use_const_buffer = GL_TRUE;
+ else {
+ /* XXX: immediates can go elsewhere if necessary:
+ */
+ assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 +
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 <= BRW_MAX_GRF);
+
+ c->vp->use_const_buffer = GL_FALSE;
+ }
+
+ /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
+
+ /* r0 -- reserved as usual
+ */
+ c->r0 = brw_vec8_grf(reg, 0);
+ reg++;
+
+ /* User clip planes from curbe:
+ */
+ if (c->key.nr_userclip) {
+ /* Skip over fixed planes: Or never read them into vs unit?
+ */
+ subreg += 6;
+
+ for (i = 0; i < c->key.nr_userclip; i++, subreg++) {
+ c->userplane[i] =
+ stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
+ }
+
+ /* Deal with curbe alignment:
+ */
+ subreg = align(subreg, 2);
+ /*reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;*/
+ }
+
+
+ /* Immediates: always in the curbe.
+ *
+ * XXX: Can try to encode some immediates as brw immediates
+ * XXX: Make sure ureg sets minimal immediate size and respect it
+ * here.
+ */
+ for (i = 0; i < c->vp->info.immediate_count; i++, subreg++) {
+ c->regs[TGSI_FILE_IMMEDIATE][i] =
+ stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
+ }
+ c->prog_data.nr_params = c->vp->info.immediate_count * 4;
+
+
+ /* Vertex constant buffer.
+ *
+ * Constants from the buffer can be either cached in the curbe or
+ * loaded as needed from the actual constant buffer.
+ */
+ if (!c->vp->use_const_buffer) {
+ GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1;
+
+ for (i = 0; i < nr_params; i++, subreg++) {
+ c->regs[TGSI_FILE_CONSTANT][i] =
+ stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
+ }
+
+ c->prog_data.nr_params += nr_params * 4;
+ }
+
+ /* All regs allocated
+ */
+ reg += (subreg + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+
+
+ /* Allocate input regs:
+ */
+ c->nr_inputs = c->vp->info.num_inputs;
+ for (i = 0; i < c->nr_inputs; i++) {
+ c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+
+ /* If there are no inputs, we'll still be reading one attribute's worth
+ * because it's required -- see urb_read_length setting.
+ */
+ if (c->nr_inputs == 0)
+ reg++;
+
+
+
+ /* Allocate outputs. The non-position outputs go straight into message regs.
+ */
+ c->nr_outputs = c->prog_data.nr_outputs;
+
+ if (c->chipset.is_igdng)
+ mrf = 8;
+ else
+ mrf = 4;
+
+
+ if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) {
+ c->overflow_grf_start = reg;
+ c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF;
+ reg += c->overflow_count;
+ }
+
+ /* XXX: need to access vertex output semantics here:
+ */
+ for (i = 0; i < c->nr_outputs; i++) {
+ unsigned slot;
+
+ /* XXX: Put output position in slot zero always. Clipper, etc,
+ * need access to this reg.
+ */
+ if (is_position_output(c, i)) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */
+ reg++;
+ }
+ else if (find_output_slot(c, i, &slot)) {
+
+ if (0 /* is_psize_output(c, i) */ ) {
+ /* c->psize_out.grf = reg; */
+ /* c->psize_out.mrf = i; */
+ }
+
+ /* The first (16-4) outputs can go straight into the message regs.
+ */
+ if (slot + mrf < BRW_MAX_MRF) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf);
+ }
+ else {
+ int grf = c->overflow_grf_start + slot - BRW_MAX_MRF;
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0);
+ }
+ }
+ else {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg();
+ }
+ }
+
+ /* Allocate program temporaries:
+ */
+
+ for (i = 0; i < c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1; i++) {
+ c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+
+ /* Address reg(s). Don't try to use the internal address reg until
+ * deref time.
+ */
+ for (i = 0; i < c->vp->info.file_max[TGSI_FILE_ADDRESS]+1; i++) {
+ c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE,
+ reg,
+ 0,
+ BRW_REGISTER_TYPE_D,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XXXX,
+ BRW_WRITEMASK_X);
+ reg++;
+ }
+
+ if (c->vp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+
+#if 0
+ for (i = 0; i < 128; i++) {
+ if (c->output_regs[i].used_in_src) {
+ c->output_regs[i].reg = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+#endif
+
+ if (c->vp->has_flow_control) {
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+ reg += 2;
+ }
+
+ /* Some opcodes need an internal temporary:
+ */
+ c->first_tmp = reg;
+ c->last_tmp = reg; /* for allocation purposes */
+
+ /* Each input reg holds data from two vertices. The
+ * urb_read_length is the number of registers read from *each*
+ * vertex urb, so is half the amount:
+ */
+ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
+
+ /* Setting this field to 0 leads to undefined behavior according to the
+ * the VS_STATE docs. Our VUEs will always have at least one attribute
+ * sitting in them, even if it's padding.
+ */
+ if (c->prog_data.urb_read_length == 0)
+ c->prog_data.urb_read_length = 1;
+
+ /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size
+ * them to fit the biggest thing they need to.
+ */
+ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
+
+ if (c->chipset.is_igdng)
+ c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
+ else
+ c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
+
+ c->prog_data.total_grf = reg;
+
+ if (BRW_DEBUG & DEBUG_VS) {
+ debug_printf("%s NumAddrRegs %d\n", __FUNCTION__,
+ c->vp->info.file_max[TGSI_FILE_ADDRESS]+1);
+ debug_printf("%s NumTemps %d\n", __FUNCTION__,
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1);
+ debug_printf("%s reg = %d\n", __FUNCTION__, reg);
+ }
+}
+
+
+/**
+ * If an instruction uses a temp reg both as a src and the dest, we
+ * sometimes need to allocate an intermediate temporary.
+ */
+static void unalias1( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if (dst.file == arg0.file && dst.nr == arg0.nr) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0);
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+ else {
+ func(c, dst, arg0);
+ }
+}
+
+/**
+ * \sa unalias2
+ * Checkes if 2-operand instruction needs an intermediate temporary.
+ */
+static void unalias2( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+ (dst.file == arg1.file && dst.nr == arg1.nr)) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0, arg1);
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+ else {
+ func(c, dst, arg0, arg1);
+ }
+}
+
+/**
+ * \sa unalias2
+ * Checkes if 3-operand instruction needs an intermediate temporary.
+ */
+static void unalias3( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ struct brw_reg arg2,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+ (dst.file == arg1.file && dst.nr == arg1.nr) ||
+ (dst.file == arg2.file && dst.nr == arg2.nr)) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0, arg1, arg2);
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+ else {
+ func(c, dst, arg0, arg1, arg2);
+ }
+}
+
+static void emit_sop( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ GLuint cond)
+{
+ brw_MOV(p, dst, brw_imm_f(0.0f));
+ brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
+ brw_MOV(p, dst, brw_imm_f(1.0f));
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+static void emit_seq( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
+}
+static void emit_slt( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
+}
+
+static void emit_max( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+ brw_SEL(p, dst, arg1, arg0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+static void emit_min( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+static void emit_math1( struct brw_vs_compile *c,
+ GLuint function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ GLuint precision)
+{
+ /* There are various odd behaviours with SEND on the simulator. In
+ * addition there are documented issues with the fact that the GEN4
+ * processor doesn't do dependency control properly on SEND
+ * results. So, on balance, this kludge to get around failures
+ * with writemasked math results looks like it might be necessary
+ * whether that turns out to be a simulator bug or not:
+ */
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_math(p,
+ tmp,
+ function,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ arg0,
+ BRW_MATH_DATA_SCALAR,
+ precision);
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+static void emit_math2( struct brw_vs_compile *c,
+ GLuint function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ GLuint precision)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_MOV(p, brw_message_reg(3), arg1);
+
+ brw_math(p,
+ tmp,
+ function,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ arg0,
+ BRW_MATH_DATA_SCALAR,
+ precision);
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+static void emit_exp_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_X) {
+ struct brw_reg tmp = get_tmp(c);
+ struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
+
+ /* tmp_d = floor(arg0.x) */
+ brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
+
+ /* result[0] = 2.0 ^ tmp */
+
+ /* Adjust exponent for floating point:
+ * exp += 127
+ */
+ brw_ADD(p, brw_writemask(tmp_d, BRW_WRITEMASK_X), tmp_d, brw_imm_d(127));
+
+ /* Install exponent and sign.
+ * Excess drops off the edge:
+ */
+ brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), BRW_WRITEMASK_X),
+ tmp_d, brw_imm_d(23));
+
+ release_tmp(c, tmp);
+ }
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y) {
+ /* result[1] = arg0.x - floor(arg0.x) */
+ brw_FRC(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0, 0));
+ }
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) {
+ /* As with the LOG instruction, we might be better off just
+ * doing a taylor expansion here, seeing as we have to do all
+ * the prep work.
+ *
+ * If mathbox partial precision is too low, consider also:
+ * result[3] = result[0] * EXP(result[1])
+ */
+ emit_math1(c,
+ BRW_MATH_FUNCTION_EXP,
+ brw_writemask(dst, BRW_WRITEMASK_Z),
+ brw_swizzle1(arg0, 0),
+ BRW_MATH_PRECISION_FULL);
+ }
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) {
+ /* result[3] = 1.0; */
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), brw_imm_f(1));
+ }
+}
+
+
+static void emit_log_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+ struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
+ GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp) {
+ tmp = get_tmp(c);
+ tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+ }
+
+ /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
+ * according to spec:
+ *
+ * These almost look likey they could be joined up, but not really
+ * practical:
+ *
+ * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
+ * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
+ */
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_XZ) {
+ brw_AND(p,
+ brw_writemask(tmp_ud, BRW_WRITEMASK_X),
+ brw_swizzle1(arg0_ud, 0),
+ brw_imm_ud((1U<<31)-1));
+
+ brw_SHR(p,
+ brw_writemask(tmp_ud, BRW_WRITEMASK_X),
+ tmp_ud,
+ brw_imm_ud(23));
+
+ brw_ADD(p,
+ brw_writemask(tmp, BRW_WRITEMASK_X),
+ retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */
+ brw_imm_d(-127));
+ }
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_YZ) {
+ brw_AND(p,
+ brw_writemask(tmp_ud, BRW_WRITEMASK_Y),
+ brw_swizzle1(arg0_ud, 0),
+ brw_imm_ud((1<<23)-1));
+
+ brw_OR(p,
+ brw_writemask(tmp_ud, BRW_WRITEMASK_Y),
+ tmp_ud,
+ brw_imm_ud(127<<23));
+ }
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) {
+ /* result[2] = result[0] + LOG2(result[1]); */
+
+ /* Why bother? The above is just a hint how to do this with a
+ * taylor series. Maybe we *should* use a taylor series as by
+ * the time all the above has been done it's almost certainly
+ * quicker than calling the mathbox, even with low precision.
+ *
+ * Options are:
+ * - result[0] + mathbox.LOG2(result[1])
+ * - mathbox.LOG2(arg0.x)
+ * - result[0] + inline_taylor_approx(result[1])
+ */
+ emit_math1(c,
+ BRW_MATH_FUNCTION_LOG,
+ brw_writemask(tmp, BRW_WRITEMASK_Z),
+ brw_swizzle1(tmp, 1),
+ BRW_MATH_PRECISION_FULL);
+
+ brw_ADD(p,
+ brw_writemask(tmp, BRW_WRITEMASK_Z),
+ brw_swizzle1(tmp, 2),
+ brw_swizzle1(tmp, 0));
+ }
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) {
+ /* result[3] = 1.0; */
+ brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_W), brw_imm_f(1));
+ }
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
+ */
+static void emit_dst_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1)
+{
+ struct brw_compile *p = &c->func;
+
+ /* There must be a better way to do this:
+ */
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_X)
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_X), brw_imm_f(1.0));
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y)
+ brw_MUL(p, brw_writemask(dst, BRW_WRITEMASK_Y), arg0, arg1);
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z)
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Z), arg0);
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_W)
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), arg1);
+}
+
+
+static void emit_xpd( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg t,
+ struct brw_reg u)
+{
+ brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3));
+ brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
+}
+
+
+static void emit_lit_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+ struct brw_reg tmp = dst;
+ GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_YZ), brw_imm_f(0));
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_XW), brw_imm_f(1));
+
+ /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
+ * to get all channels active inside the IF. In the clipping code
+ * we run with NoMask, so it's not an option and we can use
+ * BRW_EXECUTE_1 for all comparisions.
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
+ if_insn = brw_IF(p, BRW_EXECUTE_8);
+ {
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0,0));
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
+ brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_Z), brw_swizzle1(arg0,1));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ emit_math2(c,
+ BRW_MATH_FUNCTION_POW,
+ brw_writemask(dst, BRW_WRITEMASK_Z),
+ brw_swizzle1(tmp, 2),
+ brw_swizzle1(arg0, 3),
+ BRW_MATH_PRECISION_PARTIAL);
+ }
+
+ brw_ENDIF(p, if_insn);
+
+ release_tmp(c, tmp);
+}
+
+static void emit_lrp_noalias(struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ struct brw_reg arg2)
+{
+ struct brw_compile *p = &c->func;
+
+ brw_ADD(p, dst, negate(arg0), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst, arg2);
+ brw_MAC(p, dst, arg0, arg1);
+}
+
+/** 3 or 4-component vector normalization */
+static void emit_nrm( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ int num_comps)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+
+ /* tmp = dot(arg0, arg0) */
+ if (num_comps == 3)
+ brw_DP3(p, tmp, arg0, arg0);
+ else
+ brw_DP4(p, tmp, arg0, arg0);
+
+ /* tmp = 1 / sqrt(tmp) */
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL);
+
+ /* dst = arg0 * tmp */
+ brw_MUL(p, dst, arg0, tmp);
+
+ release_tmp(c, tmp);
+}
+
+
+static struct brw_reg
+get_constant(struct brw_vs_compile *c,
+ GLuint argIndex,
+ GLuint index,
+ GLboolean relAddr)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg const_reg;
+ struct brw_reg const2_reg;
+
+ assert(argIndex < 3);
+
+ if (c->current_const[argIndex].index != index || relAddr) {
+ struct brw_reg addrReg = c->regs[TGSI_FILE_ADDRESS][0];
+
+ c->current_const[argIndex].index = index;
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src.Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+ /* need to fetch the constant now */
+ brw_dp_READ_4_vs(p,
+ c->current_const[argIndex].reg,/* writeback dest */
+ 0, /* oword */
+ relAddr, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
+ );
+
+ if (relAddr) {
+ /* second read */
+ const2_reg = get_tmp(c);
+
+ /* use upper half of address reg for second read */
+ addrReg = stride(addrReg, 0, 4, 0);
+ addrReg.subnr = 16;
+
+ brw_dp_READ_4_vs(p,
+ const2_reg, /* writeback dest */
+ 1, /* oword */
+ relAddr, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER
+ );
+ }
+ }
+
+ const_reg = c->current_const[argIndex].reg;
+
+ if (relAddr) {
+ /* merge the two Owords into the constant register */
+ /* const_reg[7..4] = const2_reg[7..4] */
+ brw_MOV(p,
+ suboffset(stride(const_reg, 0, 4, 1), 4),
+ suboffset(stride(const2_reg, 0, 4, 1), 4));
+ release_tmp(c, const2_reg);
+ }
+ else {
+ /* replicate lower four floats into upper half (to get XYZWXYZW) */
+ const_reg = stride(const_reg, 0, 4, 0);
+ const_reg.subnr = 0;
+ }
+
+ return const_reg;
+}
+
+
+
+/* TODO: relative addressing!
+ */
+static struct brw_reg get_reg( struct brw_vs_compile *c,
+ enum tgsi_file_type file,
+ GLuint index )
+{
+ switch (file) {
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
+ case TGSI_FILE_CONSTANT:
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+
+ case TGSI_FILE_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case TGSI_FILE_NULL: /* undef values */
+ return brw_null_reg();
+
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
+/**
+ * Indirect addressing: get reg[[arg] + offset].
+ */
+static struct brw_reg deref( struct brw_vs_compile *c,
+ struct brw_reg arg,
+ GLint offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = vec4(get_tmp(c));
+ struct brw_reg addr_reg = c->regs[TGSI_FILE_ADDRESS][0];
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
+ GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+ struct brw_reg indirect = brw_vec4_indirect(0,0);
+
+ {
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ /* This is pretty clunky - load the address register twice and
+ * fetch each 4-dword value in turn. There must be a way to do
+ * this in a single pass, but I couldn't get it to work.
+ */
+ brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
+ brw_MOV(p, tmp, indirect);
+
+ brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
+ brw_MOV(p, suboffset(tmp, 4), indirect);
+
+ brw_pop_insn_state(p);
+ }
+
+ /* NOTE: tmp not released */
+ return vec8(tmp);
+}
+
+
+/**
+ * Get brw reg corresponding to the instruction's [argIndex] src reg.
+ * TODO: relative addressing!
+ */
+static struct brw_reg
+get_src_reg( struct brw_vs_compile *c,
+ GLuint argIndex,
+ GLuint file,
+ GLint index,
+ GLboolean relAddr )
+{
+
+ switch (file) {
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
+ if (relAddr) {
+ return deref(c, c->regs[file][0], index);
+ }
+ else {
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ }
+
+ case TGSI_FILE_IMMEDIATE:
+ return c->regs[file][index];
+
+ case TGSI_FILE_CONSTANT:
+ if (c->vp->use_const_buffer) {
+ return get_constant(c, argIndex, index, relAddr);
+ }
+ else if (relAddr) {
+ return deref(c, c->regs[TGSI_FILE_CONSTANT][0], index);
+ }
+ else {
+ assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
+ return c->regs[TGSI_FILE_CONSTANT][index];
+ }
+ case TGSI_FILE_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case TGSI_FILE_NULL:
+ /* this is a normal case since we loop over all three src args */
+ return brw_null_reg();
+
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
+static void emit_arl( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */
+ brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */
+
+ if (need_tmp)
+ release_tmp(c, tmp);
+}
+
+
+/**
+ * Return the brw reg for the given instruction's src argument.
+ */
+static struct brw_reg get_arg( struct brw_vs_compile *c,
+ const struct tgsi_full_src_register *src,
+ GLuint argIndex )
+{
+ struct brw_reg reg;
+
+ if (src->Register.File == TGSI_FILE_NULL)
+ return brw_null_reg();
+
+ reg = get_src_reg(c, argIndex,
+ src->Register.File,
+ src->Register.Index,
+ src->Register.Indirect);
+
+ /* Convert 3-bit swizzle to 2-bit.
+ */
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->Register.SwizzleX,
+ src->Register.SwizzleY,
+ src->Register.SwizzleZ,
+ src->Register.SwizzleW);
+
+ reg.negate = src->Register.Negate ? 1 : 0;
+
+ /* XXX: abs, absneg
+ */
+
+ return reg;
+}
+
+
+/**
+ * Get brw register for the given program dest register.
+ */
+static struct brw_reg get_dst( struct brw_vs_compile *c,
+ unsigned file,
+ unsigned index,
+ unsigned writemask )
+{
+ struct brw_reg reg;
+
+ switch (file) {
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_OUTPUT:
+ assert(c->regs[file][index].nr != 0);
+ reg = c->regs[file][index];
+ break;
+ case TGSI_FILE_ADDRESS:
+ assert(index == 0);
+ reg = c->regs[file][index];
+ break;
+ case TGSI_FILE_NULL:
+ /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
+ reg = brw_null_reg();
+ break;
+ default:
+ assert(0);
+ reg = brw_null_reg();
+ }
+
+ reg.dw1.bits.writemask = writemask;
+
+ return reg;
+}
+
+
+
+
+/**
+ * Post-vertex-program processing. Send the results to the URB.
+ */
+static void emit_vertex_write( struct brw_vs_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg m0 = brw_message_reg(0);
+ struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
+ struct brw_reg ndc;
+ int eot;
+ int i;
+ GLuint len_vertext_header = 2;
+
+ /* Build ndc coords */
+ ndc = get_tmp(c);
+ /* ndc = 1.0 / pos.w */
+ emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+ /* ndc.xyz = pos * ndc */
+ brw_MUL(p, brw_writemask(ndc, BRW_WRITEMASK_XYZ), pos, ndc);
+
+ /* Update the header for point size, user clipping flags, and -ve rhw
+ * workaround.
+ */
+ if (c->prog_data.writes_psiz ||
+ c->key.nr_userclip ||
+ c->chipset.is_965)
+ {
+ struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ GLuint i;
+
+ brw_MOV(p, header1, brw_imm_ud(0));
+
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ if (c->prog_data.writes_psiz) {
+ struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_PSIZ];
+ brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
+ brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
+ }
+
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
+ brw_OR(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(1<<i));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ /* i965 clipping workaround:
+ * 1) Test for -ve rhw
+ * 2) If set,
+ * set ndc = (0,0,0,0)
+ * set ucp[6] = 1
+ *
+ * Later, clipping will detect ucp[6] and ensure the primitive is
+ * clipped against all fixed planes.
+ */
+ if (c->chipset.is_965) {
+ brw_CMP(p,
+ vec8(brw_null_reg()),
+ BRW_CONDITIONAL_L,
+ brw_swizzle1(ndc, 3),
+ brw_imm_f(0));
+
+ brw_OR(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(1<<6));
+ brw_MOV(p, ndc, brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ brw_set_access_mode(p, BRW_ALIGN_1); /* why? */
+ brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ release_tmp(c, header1);
+ }
+ else {
+ brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
+ }
+
+ /* Emit the (interleaved) headers for the two vertices - an 8-reg
+ * of zeros followed by two sets of NDC coordinates:
+ */
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, offset(m0, 2), ndc);
+
+ if (c->chipset.is_igdng) {
+ /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
+ brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
+ /* m4, m5 contain the distances from vertex to the user clip planeXXX.
+ * Seems it is useless for us.
+ * m6 is used for aligning, so that the remainder of vertex element is
+ * reg-aligned.
+ */
+ brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */
+ len_vertext_header = 6;
+ } else {
+ brw_MOV(p, offset(m0, 3), pos);
+ len_vertext_header = 2;
+ }
+
+ eot = (c->overflow_count == 0);
+
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ 0, /* starting mrf reg nr */
+ c->r0, /* src */
+ 0, /* allocate */
+ 1, /* used */
+ MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */
+ 0, /* response len */
+ eot, /* eot */
+ eot, /* writes complete */
+ 0, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+
+ /* Not all of the vertex outputs/results fit into the MRF.
+ * Move the overflowed attributes from the GRF to the MRF and
+ * issue another brw_urb_WRITE().
+ */
+ for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) {
+ unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF);
+ GLuint j;
+
+ eot = (i + nr >= c->overflow_count);
+
+ /* XXX I'm not 100% sure about which MRF regs to use here. Starting
+ * at mrf[4] atm...
+ */
+ for (j = 0; j < nr; j++) {
+ brw_MOV(p, brw_message_reg(4+j),
+ brw_vec8_grf(c->overflow_grf_start + i + j, 0));
+ }
+
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ 4, /* starting mrf reg nr */
+ c->r0, /* src */
+ 0, /* allocate */
+ 1, /* used */
+ nr+1, /* msg len */
+ 0, /* response len */
+ eot, /* eot */
+ eot, /* writes complete */
+ i-1, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+ }
+}
+
+
+/**
+ * Called after code generation to resolve subroutine calls and the
+ * END instruction.
+ * \param end_inst points to brw code for END instruction
+ * \param last_inst points to last instruction emitted before vertex write
+ */
+static void
+post_vs_emit( struct brw_vs_compile *c,
+ struct brw_instruction *end_inst,
+ struct brw_instruction *last_inst )
+{
+ GLint offset;
+
+ brw_resolve_cals(&c->func);
+
+ /* patch up the END code to jump past subroutines, etc */
+ offset = last_inst - end_inst;
+ if (offset > 1) {
+ brw_set_src1(end_inst, brw_imm_d(offset * 16));
+ } else {
+ end_inst->header.opcode = BRW_OPCODE_NOP;
+ }
+}
+
+static uint32_t
+get_predicate(const struct tgsi_full_instruction *inst)
+{
+ /* XXX: disabling for now
+ */
+#if 0
+ if (inst->dst.CondMask == COND_TR)
+ return BRW_PREDICATE_NONE;
+
+ /* All of GLSL only produces predicates for COND_NE and one channel per
+ * vector. Fail badly if someone starts doing something else, as it might
+ * mean infinite looping or something.
+ *
+ * We'd like to support all the condition codes, but our hardware doesn't
+ * quite match the Mesa IR, which is modeled after the NV extensions. For
+ * those, the instruction may update the condition codes or not, then any
+ * later instruction may use one of those condition codes. For gen4, the
+ * instruction may update the flags register based on one of the condition
+ * codes output by the instruction, and then further instructions may
+ * predicate on that. We can probably support this, but it won't
+ * necessarily be easy.
+ */
+/* assert(inst->dst.CondMask == COND_NE); */
+
+ switch (inst->dst.CondSwizzle) {
+ case SWIZZLE_XXXX:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_X;
+ case SWIZZLE_YYYY:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+ case SWIZZLE_ZZZZ:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+ case SWIZZLE_WWWW:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_W;
+ default:
+ debug_printf("Unexpected predicate: 0x%08x\n",
+ inst->dst.CondMask);
+ return BRW_PREDICATE_NORMAL;
+ }
+#else
+ return BRW_PREDICATE_NORMAL;
+#endif
+}
+
+static void emit_insn(struct brw_vs_compile *c,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned opcode = inst->Instruction.Opcode;
+ unsigned label = inst->Label.Label;
+ struct brw_compile *p = &c->func;
+ struct brw_reg args[3], dst;
+ GLuint i;
+
+#if 0
+ printf("%d: ", insn);
+ _mesa_print_instruction(inst);
+#endif
+
+ /* Get argument regs.
+ */
+ for (i = 0; i < 3; i++) {
+ args[i] = get_arg(c, &inst->Src[i], i);
+ }
+
+ /* Get dest regs. Note that it is possible for a reg to be both
+ * dst and arg, given the static allocation of registers. So
+ * care needs to be taken emitting multi-operation instructions.
+ */
+ dst = get_dst(c,
+ inst->Dst[0].Register.File,
+ inst->Dst[0].Register.Index,
+ inst->Dst[0].Register.WriteMask);
+
+ /* XXX: saturate
+ */
+ if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
+ debug_printf("Unsupported saturate in vertex shader");
+ }
+
+ switch (opcode) {
+ case TGSI_OPCODE_ABS:
+ brw_MOV(p, dst, brw_abs(args[0]));
+ break;
+ case TGSI_OPCODE_ADD:
+ brw_ADD(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_COS:
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_DP3:
+ brw_DP3(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DP4:
+ brw_DP4(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DPH:
+ brw_DPH(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_NRM:
+ emit_nrm(c, dst, args[0], 3);
+ break;
+ case TGSI_OPCODE_NRM4:
+ emit_nrm(c, dst, args[0], 4);
+ break;
+ case TGSI_OPCODE_DST:
+ unalias2(c, dst, args[0], args[1], emit_dst_noalias);
+ break;
+ case TGSI_OPCODE_EXP:
+ unalias1(c, dst, args[0], emit_exp_noalias);
+ break;
+ case TGSI_OPCODE_EX2:
+ emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_ARL:
+ emit_arl(c, dst, args[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ brw_RNDD(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_FRC:
+ brw_FRC(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ unalias1(c, dst, args[0], emit_log_noalias);
+ break;
+ case TGSI_OPCODE_LG2:
+ emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_LIT:
+ unalias1(c, dst, args[0], emit_lit_noalias);
+ break;
+ case TGSI_OPCODE_LRP:
+ unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
+ break;
+ case TGSI_OPCODE_MAD:
+ brw_MOV(p, brw_acc_reg(), args[2]);
+ brw_MAC(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MAX:
+ emit_max(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MIN:
+ emit_min(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MOV:
+ brw_MOV(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_MUL:
+ brw_MUL(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_POW:
+ emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_RCP:
+ emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_RSQ:
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst,
+ brw_swizzle(args[0], 0,0,0,0), BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_SEQ:
+ emit_seq(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SIN:
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_SNE:
+ emit_sne(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGE:
+ emit_sge(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGT:
+ emit_sgt(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SLT:
+ emit_slt(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SLE:
+ emit_sle(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SUB:
+ brw_ADD(p, dst, args[0], negate(args[1]));
+ break;
+ case TGSI_OPCODE_TRUNC:
+ /* round toward zero */
+ brw_RNDZ(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_XPD:
+ emit_xpd(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_IF:
+ assert(c->if_depth < MAX_IF_DEPTH);
+ c->if_inst[c->if_depth] = brw_IF(p, BRW_EXECUTE_8);
+ /* Note that brw_IF smashes the predicate_control field. */
+ c->if_inst[c->if_depth]->header.predicate_control = get_predicate(inst);
+ c->if_depth++;
+ break;
+ case TGSI_OPCODE_ELSE:
+ c->if_inst[c->if_depth-1] = brw_ELSE(p, c->if_inst[c->if_depth-1]);
+ break;
+ case TGSI_OPCODE_ENDIF:
+ assert(c->if_depth > 0);
+ brw_ENDIF(p, c->if_inst[--c->if_depth]);
+ break;
+ case TGSI_OPCODE_BGNLOOP:
+ c->loop_inst[c->loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case TGSI_OPCODE_BRK:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_CONT:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ c->loop_depth--;
+
+ if (c->chipset.is_igdng)
+ br = 2;
+
+ inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ while (inst0 > c->loop_inst[c->loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == TGSI_OPCODE_BRK) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ else if (inst0->header.opcode == TGSI_OPCODE_CONT) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ }
+ break;
+ case TGSI_OPCODE_BRA:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_CAL:
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1d(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, get_addr_reg(c->stack_index),
+ get_addr_reg(c->stack_index), brw_imm_d(4));
+ brw_save_call(p, label, p->nr_insn);
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case TGSI_OPCODE_RET:
+ brw_ADD(p, get_addr_reg(c->stack_index),
+ get_addr_reg(c->stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1d(c->stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ break;
+ case TGSI_OPCODE_END:
+ c->end_offset = p->nr_insn;
+ /* this instruction will get patched later to jump past subroutine
+ * code, etc.
+ */
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case TGSI_OPCODE_BGNSUB:
+ brw_save_label(p, p->nr_insn, p->nr_insn);
+ break;
+ case TGSI_OPCODE_ENDSUB:
+ /* no-op */
+ break;
+ default:
+ debug_printf("Unsupported opcode %i (%s) in vertex shader",
+ opcode,
+ tgsi_get_opcode_name(opcode));
+ }
+
+ /* Set the predication update on the last instruction of the native
+ * instruction sequence.
+ *
+ * This would be problematic if it was set on a math instruction,
+ * but that shouldn't be the case with the current GLSL compiler.
+ */
+#if 0
+ /* XXX: disabled
+ */
+ if (inst->CondUpdate) {
+ struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
+
+ assert(hw_insn->header.destreg__conditionalmod == 0);
+ hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
+ }
+#endif
+
+ release_tmps(c);
+}
+
+
+/* Emit the vertex program instructions here.
+ */
+void brw_vs_emit(struct brw_vs_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ const struct tgsi_token *tokens = c->vp->tokens;
+ struct brw_instruction *end_inst, *last_inst;
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction *inst;
+
+ if (BRW_DEBUG & DEBUG_VS)
+ tgsi_dump(c->vp->tokens, 0);
+
+ c->stack_index = brw_indirect(0, 0);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+
+ /* Static register allocation
+ */
+ brw_vs_alloc_regs(c);
+
+ if (c->vp->has_flow_control) {
+ brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
+ }
+
+ /* Instructions
+ */
+ tgsi_parse_init( &parse, tokens );
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ inst = &parse.FullToken.FullInstruction;
+ emit_insn( c, inst );
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+ tgsi_parse_free( &parse );
+
+ end_inst = &p->store[c->end_offset];
+ last_inst = &p->store[p->nr_insn];
+
+ /* The END instruction will be patched to jump to this code */
+ emit_vertex_write(c);
+
+ post_vs_emit(c, end_inst, last_inst);
+
+ if (BRW_DEBUG & DEBUG_VS) {
+ debug_printf("vs-native:\n");
+ brw_disasm(stderr, p->store, p->nr_insn);
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
new file mode 100644
index 0000000000..dadbb622e4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -0,0 +1,201 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+
+#include "brw_debug.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+struct brw_vs_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+ unsigned int curb_entry_read_length;
+
+ unsigned int curbe_offset;
+
+ unsigned int nr_urb_entries, urb_size;
+
+ unsigned int nr_surfaces;
+};
+
+static void
+vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_VS_PROG */
+ key->total_grf = brw->vs.prog_data->total_grf;
+ key->urb_entry_read_length = brw->vs.prog_data->urb_read_length;
+ key->curb_entry_read_length = brw->vs.prog_data->curb_read_length;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_vs_entries;
+ key->urb_size = brw->urb.vsize;
+
+ /* BRW_NEW_NR_VS_SURFACES */
+ key->nr_surfaces = brw->vs.nr_surfaces;
+
+ /* PIPE_NEW_CLIP */
+ if (brw->curr.ucp.nr) {
+ /* Note that we read in the userclip planes as well, hence
+ * clip_start:
+ */
+ key->curbe_offset = brw->curbe.clip_start;
+ }
+ else {
+ key->curbe_offset = brw->curbe.vs_start;
+ }
+}
+
+static enum pipe_error
+vs_unit_create_from_key(struct brw_context *brw,
+ struct brw_vs_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ enum pipe_error ret;
+ struct brw_vs_unit_state vs;
+ int chipset_max_threads;
+
+ memset(&vs, 0, sizeof(vs));
+
+ vs.thread0.kernel_start_pointer = 0; /* reloc */
+ vs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ /* Choosing multiple program flow means that we may get 2-vertex threads,
+ * which will have the channel mask for dwords 4-7 enabled in the thread,
+ * and those dwords will be written to the second URB handle when we
+ * brw_urb_WRITE() results.
+ */
+ vs.thread1.single_program_flow = 0;
+
+ if (BRW_IS_IGDNG(brw))
+ vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
+ else
+ vs.thread1.binding_table_entry_count = key->nr_surfaces;
+
+ vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+ vs.thread3.dispatch_grf_start_reg = 1;
+ vs.thread3.urb_entry_read_offset = 0;
+ vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+
+ if (BRW_IS_IGDNG(brw))
+ vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
+ else
+ vs.thread4.nr_urb_entries = key->nr_urb_entries;
+
+ vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+
+ if (BRW_IS_IGDNG(brw))
+ chipset_max_threads = 72;
+ else if (BRW_IS_G4X(brw))
+ chipset_max_threads = 32;
+ else
+ chipset_max_threads = 16;
+
+ vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
+ 1, chipset_max_threads) - 1;
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ vs.thread4.max_threads = 0;
+
+ /* No samplers for ARB_vp programs:
+ */
+ /* It has to be set to 0 for IGDNG
+ */
+ vs.vs5.sampler_count = 0;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ vs.thread4.stats_enable = 1;
+
+ /* Vertex program always enabled:
+ */
+ vs.vs6.vs_enable = 1;
+
+ ret = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
+ key, sizeof(*key),
+ reloc, 1,
+ &vs, sizeof(vs),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static int prepare_vs_unit(struct brw_context *brw)
+{
+ struct brw_vs_unit_key key;
+ enum pipe_error ret;
+ struct brw_winsys_reloc reloc[1];
+ unsigned grf_reg_count;
+
+ vs_unit_populate_key(brw, &key);
+
+ grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
+
+ /* Emit VS program relocation */
+ make_reloc(&reloc[0],
+ BRW_USAGE_STATE,
+ grf_reg_count << 1,
+ offsetof(struct brw_vs_unit_state, thread0),
+ brw->vs.prog_bo);
+
+
+ if (brw_search_cache(&brw->cache, BRW_VS_UNIT,
+ &key, sizeof(key),
+ reloc, 1,
+ NULL,
+ &brw->vs.state_bo))
+ return PIPE_OK;
+
+ ret = vs_unit_create_from_key(brw, &key, reloc, &brw->vs.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_vs_unit = {
+ .dirty = {
+ .mesa = (PIPE_NEW_CLIP),
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_NR_VS_SURFACES |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .prepare = prepare_vs_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c
new file mode 100644
index 0000000000..177a5170d2
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs_surface_state.c
@@ -0,0 +1,232 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_winsys.h"
+
+/* XXX: disabled true constant buffer functionality
+ */
+
+
+/* Creates a new VS constant buffer reflecting the current VS program's
+ * constants, if needed by the VS program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+#if 0
+static struct brw_winsys_buffer *
+brw_vs_update_constant_buffer(struct brw_context *brw)
+{
+ /* XXX: true constant buffers
+ */
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ drm_intel_bo *const_buffer;
+
+ /* BRW_NEW_VERTEX_PROGRAM */
+ if (!vp->use_const_buffer)
+ return NULL;
+
+ const_buffer = brw->sws->bo_alloc(brw->sws,
+ BRW_BUFFER_TYPE_SHADER_CONSTANTS,
+ size, 64);
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ brw->sws->bo_subdata(const_buffer, 0, size, params->ParameterValues,
+ NULL, 0);
+
+ return const_buffer;
+}
+#endif
+
+/**
+ * Update the surface state for a VS constant buffer.
+ *
+ * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer.
+ */
+#if 0
+static void
+brw_update_vs_constant_surface( struct brw_context *brw,
+ GLuint surf)
+{
+ struct brw_surface_key key;
+ struct pipe_buffer *cb = brw->curr.vs_constants;
+ enum pipe_error ret;
+
+ assert(surf == 0);
+
+ /* If we're in this state update atom, we need to update VS constants, so
+ * free the old buffer and create a new one for the new contents.
+ */
+ ret = brw_vs_update_constant_buffer(brw, &vp->const_buffer);
+ if (ret)
+ return ret;
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (vp->const_buffer == NULL) {
+ bo_reference(brw->vs.surf_bo[surf], NULL);
+ return PIPE_OK;
+ }
+
+ memset(&key, 0, sizeof(key));
+
+ key.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ key.bo = vp->const_buffer;
+ key.depthmode = GL_NONE;
+ key.pitch = params->NumParameters;
+ key.width = params->NumParameters;
+ key.height = 1;
+ key.depth = 1;
+ key.cpp = 16;
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ if (brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL,
+ &brw->vs.surf_bo[surf]))
+ return PIPE_OK;
+
+ ret = brw_create_constant_surface(brw, &key
+ &brw->vs.surf_bo[surf]);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+#endif
+
+
+/**
+ * Constructs the binding table for the VS surface state.
+ */
+static enum pipe_error
+brw_vs_get_binding_table(struct brw_context *brw,
+ struct brw_winsys_buffer **bo_out)
+{
+#if 0
+ static GLuint data[BRW_VS_MAX_SURF]; /* always zero */
+ struct brw_winsys_reloc reloc[BRW_VS_MAX_SURF];
+ int i;
+
+ /* Emit binding table relocations to surface state */
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ make_reloc(&reloc[i],
+ BRW_USAGE_STATE,
+ 0,
+ i * 4,
+ brw->vs.surf_bo[i]);
+ }
+
+ ret = brw_cache_data( &brw->surface_cache,
+ BRW_SS_SURF_BIND,
+ NULL, 0,
+ reloc, nr_reloc,
+ data, sizeof data,
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ FREE(data);
+ return PIPE_OK;
+#else
+ return PIPE_OK;
+#endif
+}
+
+/**
+ * Vertex shader surfaces (constant buffer).
+ *
+ * This consumes the state updates for the constant buffer needing
+ * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
+ * CACHE_NEW_SURF_BIND for the binding table upload.
+ */
+static enum pipe_error prepare_vs_surfaces(struct brw_context *brw )
+{
+ enum pipe_error ret;
+
+#if 0
+ int i;
+ int nr_surfaces = 0;
+
+ brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
+
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ if (brw->vs.surf_bo[i] != NULL) {
+ nr_surfaces = i + 1;
+ }
+ }
+
+ if (brw->vs.nr_surfaces != nr_surfaces) {
+ brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+ brw->vs.nr_surfaces = nr_surfaces;
+ }
+#endif
+
+ /* Note that we don't end up updating the bind_bo if we don't have a
+ * surface to be pointing at. This should be relatively harmless, as it
+ * just slightly increases our working set size.
+ */
+ if (brw->vs.nr_surfaces != 0) {
+ ret = brw_vs_get_binding_table(brw, &brw->vs.bind_bo);
+ if (ret)
+ return ret;
+ }
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_vs_surfaces = {
+ .dirty = {
+ .mesa = (PIPE_NEW_VERTEX_CONSTANTS |
+ PIPE_NEW_VERTEX_SHADER),
+ .brw = 0,
+ .cache = 0
+ },
+ .prepare = prepare_vs_surfaces,
+};
+
+
+
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
new file mode 100644
index 0000000000..a242e31218
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -0,0 +1,309 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 Jakob Bornecrantz
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_WINSYS_H
+#define BRW_WINSYS_H
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_refcnt.h"
+
+struct brw_winsys;
+struct pipe_fence_handle;
+
+/* Not sure why the winsys needs this:
+ */
+#define BRW_BATCH_SIZE (32*1024)
+
+struct brw_winsys_screen;
+
+/* Need a tiny bit of information inside the abstract buffer struct:
+ */
+struct brw_winsys_buffer {
+ struct pipe_reference reference;
+ struct brw_winsys_screen *sws;
+ unsigned size;
+};
+
+
+/* Should be possible to validate usages above against buffer creation
+ * types, below:
+ */
+enum brw_buffer_type
+{
+ BRW_BUFFER_TYPE_TEXTURE,
+ BRW_BUFFER_TYPE_SCANOUT, /**< a texture used for scanning out from */
+ BRW_BUFFER_TYPE_VERTEX,
+ BRW_BUFFER_TYPE_CURBE,
+ BRW_BUFFER_TYPE_QUERY,
+ BRW_BUFFER_TYPE_SHADER_CONSTANTS,
+ BRW_BUFFER_TYPE_SHADER_SCRATCH,
+ BRW_BUFFER_TYPE_BATCH,
+ BRW_BUFFER_TYPE_GENERAL_STATE,
+ BRW_BUFFER_TYPE_SURFACE_STATE,
+ BRW_BUFFER_TYPE_PIXEL, /* image uploads, pbo's, etc */
+ BRW_BUFFER_TYPE_GENERIC, /* unknown */
+ BRW_BUFFER_TYPE_MAX /* Count of possible values */
+};
+
+
+/* Describe the usage of a particular buffer in a relocation. The DRM
+ * winsys will translate these back to GEM read/write domain flags.
+ */
+enum brw_buffer_usage {
+ BRW_USAGE_STATE, /* INSTRUCTION, 0 */
+ BRW_USAGE_QUERY_RESULT, /* INSTRUCTION, INSTRUCTION */
+ BRW_USAGE_RENDER_TARGET, /* RENDER, 0 */
+ BRW_USAGE_DEPTH_BUFFER, /* RENDER, RENDER */
+ BRW_USAGE_BLIT_SOURCE, /* RENDER, 0 */
+ BRW_USAGE_BLIT_DEST, /* RENDER, RENDER */
+ BRW_USAGE_SAMPLER, /* SAMPLER, 0 */
+ BRW_USAGE_VERTEX, /* VERTEX, 0 */
+ BRW_USAGE_SCRATCH, /* 0, 0 */
+ BRW_USAGE_MAX
+};
+
+enum brw_buffer_data_type {
+ BRW_DATA_GS_CC_VP,
+ BRW_DATA_GS_CC_UNIT,
+ BRW_DATA_GS_WM_PROG,
+ BRW_DATA_GS_SAMPLER_DEFAULT_COLOR,
+ BRW_DATA_GS_SAMPLER,
+ BRW_DATA_GS_WM_UNIT,
+ BRW_DATA_GS_SF_PROG,
+ BRW_DATA_GS_SF_VP,
+ BRW_DATA_GS_SF_UNIT,
+ BRW_DATA_GS_VS_UNIT,
+ BRW_DATA_GS_VS_PROG,
+ BRW_DATA_GS_GS_UNIT,
+ BRW_DATA_GS_GS_PROG,
+ BRW_DATA_GS_CLIP_VP,
+ BRW_DATA_GS_CLIP_UNIT,
+ BRW_DATA_GS_CLIP_PROG,
+ BRW_DATA_SS_SURFACE,
+ BRW_DATA_SS_SURF_BIND,
+ BRW_DATA_CONSTANT_BUFFER,
+ BRW_DATA_BATCH_BUFFER,
+ BRW_DATA_OTHER,
+ BRW_DATA_MAX
+};
+
+
+/* Matches the i915_drm definitions:
+ */
+#define BRW_TILING_NONE 0
+#define BRW_TILING_X 1
+#define BRW_TILING_Y 2
+
+
+/* Relocations to be applied with subdata in a call to sws->bo_subdata, below.
+ *
+ * Effectively this encodes:
+ *
+ * (unsigned *)(subdata + offset) = bo->offset + delta
+ */
+struct brw_winsys_reloc {
+ enum brw_buffer_usage usage; /* debug only */
+ unsigned delta;
+ unsigned offset;
+ struct brw_winsys_buffer *bo;
+};
+
+static INLINE void make_reloc(struct brw_winsys_reloc *reloc,
+ enum brw_buffer_usage usage,
+ unsigned delta,
+ unsigned offset,
+ struct brw_winsys_buffer *bo)
+{
+ reloc->usage = usage;
+ reloc->delta = delta;
+ reloc->offset = offset;
+ reloc->bo = bo; /* Note - note taking a reference yet */
+}
+
+
+
+struct brw_winsys_screen {
+
+
+ /**
+ * Buffer functions.
+ */
+
+ /*@{*/
+ /**
+ * Create a buffer.
+ */
+ enum pipe_error (*bo_alloc)(struct brw_winsys_screen *sws,
+ enum brw_buffer_type type,
+ unsigned size,
+ unsigned alignment,
+ struct brw_winsys_buffer **bo_out);
+
+ /* Destroy a buffer when our refcount goes to zero:
+ */
+ void (*bo_destroy)(struct brw_winsys_buffer *buffer);
+
+ /* delta -- added to b2->offset, and written into buffer
+ * offset -- location above value is written to within buffer
+ */
+ enum pipe_error (*bo_emit_reloc)(struct brw_winsys_buffer *buffer,
+ enum brw_buffer_usage usage,
+ unsigned delta,
+ unsigned offset,
+ struct brw_winsys_buffer *b2);
+
+ enum pipe_error (*bo_exec)(struct brw_winsys_buffer *buffer,
+ unsigned bytes_used);
+
+ enum pipe_error (*bo_subdata)(struct brw_winsys_buffer *buffer,
+ enum brw_buffer_data_type data_type,
+ size_t offset,
+ size_t size,
+ const void *data,
+ const struct brw_winsys_reloc *reloc,
+ unsigned nr_reloc );
+
+ boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer);
+ boolean (*bo_references)(struct brw_winsys_buffer *a,
+ struct brw_winsys_buffer *b);
+
+ /* XXX: couldn't this be handled by returning true/false on
+ * bo_emit_reloc?
+ */
+ enum pipe_error (*check_aperture_space)(struct brw_winsys_screen *iws,
+ struct brw_winsys_buffer **buffers,
+ unsigned count);
+
+ /**
+ * Map a buffer.
+ */
+ void *(*bo_map)(struct brw_winsys_buffer *buffer,
+ enum brw_buffer_data_type data_type,
+ unsigned offset,
+ unsigned length,
+ boolean write,
+ boolean discard,
+ boolean flush_explicit);
+
+ void (*bo_flush_range)(struct brw_winsys_buffer *buffer,
+ unsigned offset,
+ unsigned length);
+
+ /**
+ * Unmap a buffer.
+ */
+ void (*bo_unmap)(struct brw_winsys_buffer *buffer);
+ /*@}*/
+
+
+ /* Wait for buffer to go idle. Similar to map+unmap, but doesn't
+ * mark buffer contents as dirty.
+ */
+ void (*bo_wait_idle)(struct brw_winsys_buffer *buffer);
+
+ /**
+ * Destroy the winsys.
+ */
+ void (*destroy)(struct brw_winsys_screen *iws);
+};
+
+static INLINE void *
+bo_map_read(struct brw_winsys_screen *sws, struct brw_winsys_buffer *buf)
+{
+ return sws->bo_map( buf,
+ BRW_DATA_OTHER,
+ 0, buf->size,
+ FALSE, FALSE, FALSE );
+}
+
+static INLINE void
+bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf)
+{
+ struct brw_winsys_buffer *old_buf = *ptr;
+
+ if (pipe_reference(&(*ptr)->reference, &buf->reference))
+ old_buf->sws->bo_destroy(old_buf);
+
+ *ptr = buf;
+}
+
+
+/**
+ * Create brw pipe_screen.
+ */
+struct pipe_screen *brw_create_screen(struct brw_winsys_screen *iws, unsigned pci_id);
+
+/**
+ * Create a brw pipe_context.
+ */
+struct pipe_context *brw_create_context(struct pipe_screen *screen);
+
+/**
+ * Get the brw_winsys buffer backing the texture.
+ *
+ * TODO UGLY
+ */
+struct pipe_texture;
+boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
+ struct brw_winsys_buffer **buffer,
+ unsigned *stride);
+
+/**
+ * Wrap a brw_winsys buffer with a texture blanket.
+ *
+ * TODO UGLY
+ */
+struct pipe_texture *
+brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
+ const struct pipe_texture *template,
+ unsigned pitch,
+ unsigned tiling,
+ struct brw_winsys_buffer *buffer);
+
+
+/*************************************************************************
+ * Cooperative dumping between winsys and driver. TODO: make this
+ * driver-only by wrapping calls to winsys->bo_subdata().
+ */
+
+#ifdef DEBUG
+extern int BRW_DUMP;
+#else
+#define BRW_DUMP 0
+#endif
+
+#define DUMP_ASM 0x1
+#define DUMP_STATE 0x2
+#define DUMP_BATCH 0x4
+
+void brw_dump_data( unsigned pci_id,
+ enum brw_buffer_data_type data_type,
+ unsigned offset,
+ const void *data,
+ size_t size );
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_winsys_debug.c b/src/gallium/drivers/i965/brw_winsys_debug.c
new file mode 100644
index 0000000000..f8f6a539bc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_winsys_debug.c
@@ -0,0 +1,87 @@
+#include "brw_winsys.h"
+#include "brw_disasm.h"
+#include "brw_structs_dump.h"
+#include "brw_structs.h"
+#include "intel_decode.h"
+
+
+void brw_dump_data( unsigned pci_id,
+ enum brw_buffer_data_type data_type,
+ unsigned offset,
+ const void *data,
+ size_t size )
+{
+ if (BRW_DUMP & DUMP_ASM) {
+ switch (data_type) {
+ case BRW_DATA_GS_WM_PROG:
+ case BRW_DATA_GS_SF_PROG:
+ case BRW_DATA_GS_VS_PROG:
+ case BRW_DATA_GS_GS_PROG:
+ case BRW_DATA_GS_CLIP_PROG:
+ brw_disasm( stderr, data, size / sizeof(struct brw_instruction) );
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (BRW_DUMP & DUMP_STATE) {
+ switch (data_type) {
+ case BRW_DATA_GS_CC_VP:
+ brw_dump_cc_viewport( data );
+ break;
+ case BRW_DATA_GS_CC_UNIT:
+ brw_dump_cc_unit_state( data );
+ break;
+ case BRW_DATA_GS_SAMPLER_DEFAULT_COLOR:
+ brw_dump_sampler_default_color( data );
+ break;
+ case BRW_DATA_GS_SAMPLER:
+ brw_dump_sampler_state( data );
+ break;
+ case BRW_DATA_GS_WM_UNIT:
+ brw_dump_wm_unit_state( data );
+ break;
+ case BRW_DATA_GS_SF_VP:
+ brw_dump_sf_viewport( data );
+ break;
+ case BRW_DATA_GS_SF_UNIT:
+ brw_dump_sf_unit_state( data );
+ break;
+ case BRW_DATA_GS_VS_UNIT:
+ brw_dump_vs_unit_state( data );
+ break;
+ case BRW_DATA_GS_GS_UNIT:
+ brw_dump_gs_unit_state( data );
+ break;
+ case BRW_DATA_GS_CLIP_VP:
+ brw_dump_clipper_viewport( data );
+ break;
+ case BRW_DATA_GS_CLIP_UNIT:
+ brw_dump_clip_unit_state( data );
+ break;
+ case BRW_DATA_SS_SURFACE:
+ brw_dump_surface_state( data );
+ break;
+ case BRW_DATA_SS_SURF_BIND:
+ break;
+ case BRW_DATA_OTHER:
+ break;
+ case BRW_DATA_CONSTANT_BUFFER:
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (BRW_DUMP & DUMP_BATCH) {
+ switch (data_type) {
+ case BRW_DATA_BATCH_BUFFER:
+ intel_decode(data, size / 4, offset, pci_id);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
new file mode 100644
index 0000000000..fdf820a9aa
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -0,0 +1,319 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "tgsi/tgsi_info.h"
+
+#include "brw_context.h"
+#include "brw_screen.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+#include "brw_state.h"
+#include "brw_debug.h"
+#include "brw_pipe_rast.h"
+
+
+/** Return number of src args for given instruction */
+GLuint brw_wm_nr_args( GLuint opcode )
+{
+ switch (opcode) {
+ case WM_FRONTFACING:
+ case WM_PIXELXY:
+ return 0;
+ case WM_CINTERP:
+ case WM_WPOSXY:
+ case WM_DELTAXY:
+ return 1;
+ case WM_LINTERP:
+ case WM_PIXELW:
+ return 2;
+ case WM_FB_WRITE:
+ case WM_PINTERP:
+ return 3;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXP:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXD:
+ /* sampler arg is held as a field in the instruction, not in an
+ * actual register:
+ */
+ return tgsi_get_opcode_info(opcode)->num_src - 1;
+
+ default:
+ assert(opcode < MAX_OPCODE);
+ return tgsi_get_opcode_info(opcode)->num_src;
+ }
+}
+
+
+GLuint brw_wm_is_scalar_result( GLuint opcode )
+{
+ switch (opcode) {
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_DST:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+
+/**
+ * Do GPU code generation for shaders without flow control. Shaders
+ * without flow control instructions can more readily be analysed for
+ * SSA-style optimizations.
+ */
+static void
+brw_wm_linear_shader_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+ /* Augment fragment program. Add instructions for pre- and
+ * post-fragment-program tasks such as interpolation and fogging.
+ */
+ brw_wm_pass_fp(c);
+
+ /* Translate to intermediate representation. Build register usage
+ * chains.
+ */
+ brw_wm_pass0(c);
+
+ /* Dead code removal.
+ */
+ brw_wm_pass1(c);
+
+ /* Register allocation.
+ * Divide by two because we operate on 16 pixels at a time and require
+ * two GRF entries for each logical shader register.
+ */
+ c->grf_limit = BRW_WM_MAX_GRF / 2;
+
+ brw_wm_pass2(c);
+
+ /* how many general-purpose registers are used */
+ c->prog_data.total_grf = c->max_wm_grf;
+
+ /* Scratch space is used for register spilling */
+ if (c->last_scratch) {
+ c->prog_data.total_scratch = c->last_scratch + 0x40;
+ }
+ else {
+ c->prog_data.total_scratch = 0;
+ }
+
+ /* Emit GEN4 code.
+ */
+ brw_wm_emit(c);
+}
+
+
+/**
+ * All Mesa program -> GPU code generation goes through this function.
+ * Depending on the instructions used (i.e. flow control instructions)
+ * we'll use one of two code generators.
+ */
+static enum pipe_error do_wm_prog( struct brw_context *brw,
+ struct brw_fragment_shader *fp,
+ struct brw_wm_prog_key *key,
+ struct brw_winsys_buffer **bo_out)
+{
+ enum pipe_error ret;
+ struct brw_wm_compile *c;
+ const GLuint *program;
+ GLuint program_size;
+
+ if (brw->wm.compile_data == NULL) {
+ brw->wm.compile_data = MALLOC(sizeof(*brw->wm.compile_data));
+ if (!brw->wm.compile_data)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ c = brw->wm.compile_data;
+ memset(c, 0, sizeof *c);
+
+ c->key = *key;
+ c->fp = fp;
+ c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/
+
+ brw_init_compile(brw, &c->func);
+
+ /*
+ * Shader which use GLSL features such as flow control are handled
+ * differently from "simple" shaders.
+ */
+ if (fp->has_flow_control) {
+ c->dispatch_width = 8;
+ /* XXX: GLSL support
+ */
+ exit(1);
+ /* brw_wm_branching_shader_emit(brw, c); */
+ }
+ else {
+ c->dispatch_width = 16;
+ brw_wm_linear_shader_emit(brw, c);
+ }
+
+ if (BRW_DEBUG & DEBUG_WM)
+ debug_printf("\n");
+
+ /* get the program
+ */
+ ret = brw_get_program(&c->func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_cache( &brw->cache, BRW_WM_PROG,
+ &c->key, sizeof(c->key),
+ NULL, 0,
+ program, program_size,
+ &c->prog_data,
+ &brw->wm.prog_data,
+ bo_out );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+
+static void brw_wm_populate_key( struct brw_context *brw,
+ struct brw_wm_prog_key *key )
+{
+ unsigned lookup, line_aa;
+ unsigned i;
+
+ memset(key, 0, sizeof(*key));
+
+ /* PIPE_NEW_FRAGMENT_SHADER
+ * PIPE_NEW_DEPTH_STENCIL_ALPHA
+ */
+ lookup = (brw->curr.zstencil->iz_lookup |
+ brw->curr.fragment_shader->iz_lookup);
+
+
+ /* PIPE_NEW_RAST
+ * BRW_NEW_REDUCED_PRIMITIVE
+ */
+ switch (brw->reduced_primitive) {
+ case PIPE_PRIM_POINTS:
+ line_aa = AA_NEVER;
+ break;
+ case PIPE_PRIM_LINES:
+ line_aa = (brw->curr.rast->templ.line_smooth ?
+ AA_ALWAYS : AA_NEVER);
+ break;
+ default:
+ line_aa = brw->curr.rast->unfilled_aa_line;
+ break;
+ }
+
+ brw_wm_lookup_iz(line_aa,
+ lookup,
+ brw->curr.fragment_shader->uses_depth,
+ key);
+
+ /* PIPE_NEW_RAST */
+ key->flat_shade = brw->curr.rast->templ.flatshade;
+
+
+ /* PIPE_NEW_BOUND_TEXTURES */
+ for (i = 0; i < brw->curr.num_textures; i++) {
+ const struct brw_texture *tex = brw_texture(brw->curr.texture[i]);
+
+ if (tex->base.format == PIPE_FORMAT_YCBCR)
+ key->yuvtex_mask |= 1 << i;
+
+ if (tex->base.format == PIPE_FORMAT_YCBCR_REV)
+ key->yuvtex_swap_mask |= 1 << i;
+
+ /* XXX: shadow texture
+ */
+ /* key->shadowtex_mask |= 1<<i; */
+ }
+
+ /* CACHE_NEW_VS_PROG */
+ key->vp_nr_outputs = brw->vs.prog_data->nr_outputs;
+
+ key->nr_cbufs = brw->curr.fb.nr_cbufs;
+
+ key->nr_inputs = brw->curr.fragment_shader->info.num_inputs;
+
+ /* The unique fragment program ID */
+ key->program_string_id = brw->curr.fragment_shader->id;
+}
+
+
+static enum pipe_error brw_prepare_wm_prog(struct brw_context *brw)
+{
+ struct brw_wm_prog_key key;
+ struct brw_fragment_shader *fs = brw->curr.fragment_shader;
+ enum pipe_error ret;
+
+ brw_wm_populate_key(brw, &key);
+
+ /* Make an early check for the key.
+ */
+ if (brw_search_cache(&brw->cache, BRW_WM_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->wm.prog_data,
+ &brw->wm.prog_bo))
+ return PIPE_OK;
+
+ ret = do_wm_prog(brw, fs, &key, &brw->wm.prog_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+const struct brw_tracked_state brw_wm_prog = {
+ .dirty = {
+ .mesa = (PIPE_NEW_FRAGMENT_SHADER |
+ PIPE_NEW_DEPTH_STENCIL_ALPHA |
+ PIPE_NEW_RAST |
+ PIPE_NEW_NR_CBUFS |
+ PIPE_NEW_BOUND_TEXTURES),
+ .brw = (BRW_NEW_WM_INPUT_DIMENSIONS |
+ BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = CACHE_NEW_VS_PROG,
+ },
+ .prepare = brw_prepare_wm_prog
+};
+
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
new file mode 100644
index 0000000000..f1ca9f6369
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -0,0 +1,344 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_WM_H
+#define BRW_WM_H
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define SATURATE (1<<5)
+
+/* A big lookup table is used to figure out which and how many
+ * additional regs will inserted before the main payload in the WM
+ * program execution. These mainly relate to depth and stencil
+ * processing and the early-depth-test optimization.
+ */
+#define IZ_PS_KILL_ALPHATEST_BIT 0x1
+#define IZ_PS_COMPUTES_DEPTH_BIT 0x2
+#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4
+#define IZ_DEPTH_TEST_ENABLE_BIT 0x8
+#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10
+#define IZ_STENCIL_TEST_ENABLE_BIT 0x20
+#define IZ_BIT_MAX 0x40
+
+#define AA_NEVER 0
+#define AA_SOMETIMES 1
+#define AA_ALWAYS 2
+
+struct brw_wm_prog_key {
+ GLuint source_depth_reg:3;
+ GLuint aa_dest_stencil_reg:3;
+ GLuint dest_depth_reg:3;
+ GLuint nr_depth_regs:3;
+ GLuint computes_depth:1;
+ GLuint source_depth_to_render_target:1;
+ GLuint flat_shade:1;
+ GLuint runtime_check_aads_emit:1;
+
+ GLuint shadowtex_mask:16;
+ GLuint yuvtex_mask:16;
+ GLuint yuvtex_swap_mask:16; /* UV swaped */
+
+ GLuint vp_nr_outputs:6;
+ GLuint nr_inputs:6;
+ GLuint nr_cbufs:3;
+ GLuint has_flow_control:1;
+
+ GLuint program_string_id;
+};
+
+
+/* A bit of a glossary:
+ *
+ * brw_wm_value: A computed value or program input. Values are
+ * constant, they are created once and are never modified. When a
+ * fragment program register is written or overwritten, new values are
+ * created fresh, preserving the rule that values are constant.
+ *
+ * brw_wm_ref: A reference to a value. Wherever a value used is by an
+ * instruction or as a program output, that is tracked with an
+ * instance of this struct. All references to a value occur after it
+ * is created. After the last reference, a value is dead and can be
+ * discarded.
+ *
+ * brw_wm_grf: Represents a physical hardware register. May be either
+ * empty or hold a value. Register allocation is the process of
+ * assigning values to grf registers. This occurs in pass2 and the
+ * brw_wm_grf struct is not used before that.
+ *
+ * Fragment program registers: These are time-varying constructs that
+ * are hard to reason about and which we translate away in pass0. A
+ * single fragment program register element (eg. temp[0].x) will be
+ * translated to one or more brw_wm_value structs, one for each time
+ * that temp[0].x is written to during the program.
+ */
+
+
+
+/* Used in pass2 to track register allocation.
+ */
+struct brw_wm_grf {
+ struct brw_wm_value *value;
+ GLuint nextuse;
+};
+
+struct brw_wm_value {
+ struct brw_reg hw_reg; /* emitted to this reg, may not always be there */
+ struct brw_wm_ref *lastuse;
+ struct brw_wm_grf *resident;
+ GLuint contributes_to_output:1;
+ GLuint spill_slot:16; /* if non-zero, spill immediately after calculation */
+};
+
+struct brw_wm_ref {
+ struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */
+ struct brw_wm_value *value;
+ struct brw_wm_ref *prevuse;
+ GLuint unspill_reg:7; /* unspill to reg */
+ GLuint emitted:1;
+ GLuint insn:24;
+};
+
+struct brw_wm_instruction {
+ struct brw_wm_value *dst[4];
+ struct brw_wm_ref *src[3][4];
+ GLuint opcode:8;
+ GLuint saturate:1;
+ GLuint writemask:4;
+ GLuint sampler:4;
+ GLuint tex_unit:4; /* texture/sampler unit for texture instructions */
+ GLuint target:4; /* TGSI_TEXTURE_x for texture instructions,
+ * target binding table index for FB_WRITE
+ */
+ GLuint eot:1; /* End of thread indicator for FB_WRITE*/
+};
+
+
+#define BRW_WM_MAX_INSN 2048
+#define BRW_WM_MAX_GRF 128 /* hardware limit */
+#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4)
+#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12)
+#define BRW_WM_MAX_PARAM 256
+#define BRW_WM_MAX_CONST 256
+#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
+#define BRW_WM_MAX_SUBROUTINE 16
+
+
+/* New opcodes to track internal operations required for WM unit.
+ * These are added early so that the registers used can be tracked,
+ * freed and reused like those of other instructions.
+ */
+#define MAX_OPCODE TGSI_OPCODE_LAST
+#define WM_PIXELXY (MAX_OPCODE)
+#define WM_DELTAXY (MAX_OPCODE + 1)
+#define WM_PIXELW (MAX_OPCODE + 2)
+#define WM_LINTERP (MAX_OPCODE + 3)
+#define WM_PINTERP (MAX_OPCODE + 4)
+#define WM_CINTERP (MAX_OPCODE + 5)
+#define WM_WPOSXY (MAX_OPCODE + 6)
+#define WM_FB_WRITE (MAX_OPCODE + 7)
+#define WM_FRONTFACING (MAX_OPCODE + 8)
+#define MAX_WM_OPCODE (MAX_OPCODE + 9)
+
+#define BRW_FILE_PAYLOAD (TGSI_FILE_COUNT)
+#define PAYLOAD_DEPTH (PIPE_MAX_SHADER_INPUTS) /* ?? */
+
+#define X 0
+#define Y 1
+#define Z 2
+#define W 3
+
+
+struct brw_fp_src {
+ unsigned file:4;
+ unsigned index:16;
+ unsigned swizzle:8;
+ unsigned indirect:1;
+ unsigned negate:1;
+ unsigned abs:1;
+};
+
+struct brw_fp_dst {
+ unsigned file:4;
+ unsigned index:16;
+ unsigned writemask:4;
+ unsigned indirect:1;
+ unsigned saturate:1;
+};
+
+struct brw_fp_instruction {
+ struct brw_fp_dst dst;
+ struct brw_fp_src src[3];
+ unsigned opcode:8;
+ unsigned target:8; /* XXX: special usage for FB_WRITE */
+ unsigned tex_unit:4;
+ unsigned sampler:4;
+ unsigned pad:8;
+};
+
+
+struct brw_wm_compile {
+ struct brw_compile func;
+ struct brw_wm_prog_key key;
+ struct brw_wm_prog_data prog_data;
+
+ struct brw_fragment_shader *fp;
+
+ GLfloat (*env_param)[4];
+
+ enum {
+ START,
+ PASS2_DONE
+ } state;
+
+ /* Initial pass - translate fp instructions to fp instructions,
+ * simplifying and adding instructions for interpolation and
+ * framebuffer writes.
+ */
+ struct {
+ GLfloat v[4];
+ unsigned nr;
+ } immediate[BRW_WM_MAX_CONST+3];
+ GLuint nr_immediates;
+
+ struct brw_fp_instruction fp_instructions[BRW_WM_MAX_INSN];
+ GLuint nr_fp_insns;
+ GLuint fp_temp;
+ GLuint fp_interp_emitted;
+ GLuint fp_fragcolor_emitted;
+ GLuint fp_first_internal_temp;
+
+ struct brw_fp_src fp_pixel_xy;
+ struct brw_fp_src fp_delta_xy;
+ struct brw_fp_src fp_pixel_w;
+
+
+ /* Subsequent passes using SSA representation:
+ */
+ struct brw_wm_value vreg[BRW_WM_MAX_VREG];
+ GLuint nr_vreg;
+
+ struct brw_wm_value creg[BRW_WM_MAX_PARAM];
+ GLuint nr_creg;
+
+ struct {
+ struct brw_wm_value depth[4]; /* includes r0/r1 */
+ struct brw_wm_value input_interp[PIPE_MAX_SHADER_INPUTS];
+ } payload;
+
+
+ const struct brw_wm_ref *pass0_fp_reg[BRW_FILE_PAYLOAD+1][256][4];
+
+ struct brw_wm_ref undef_ref;
+ struct brw_wm_value undef_value;
+
+ struct brw_wm_ref refs[BRW_WM_MAX_REF];
+ GLuint nr_refs;
+
+ struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
+ GLuint nr_insns;
+
+ struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
+
+ GLuint grf_limit;
+ GLuint max_wm_grf;
+ GLuint last_scratch;
+
+ GLuint cur_inst; /**< index of current instruction */
+
+ GLboolean out_of_regs; /**< ran out of GRF registers? */
+
+ /** Mapping from Mesa registers to hardware registers */
+ struct {
+ GLboolean inited;
+ struct brw_reg reg;
+ } wm_regs[BRW_FILE_PAYLOAD+1][256][4];
+
+ GLboolean used_grf[BRW_WM_MAX_GRF];
+ GLuint first_free_grf;
+ struct brw_reg stack;
+ struct brw_reg emit_mask_reg;
+ GLuint tmp_regs[BRW_WM_MAX_GRF];
+ GLuint tmp_index;
+ GLuint tmp_max;
+ GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
+ GLuint dispatch_width;
+
+ /** we may need up to 3 constants per instruction (if use_const_buffer) */
+ struct {
+ GLint index;
+ struct brw_reg reg;
+ } current_const[3];
+
+ GLuint error;
+};
+
+
+GLuint brw_wm_nr_args( GLuint opcode );
+GLuint brw_wm_is_scalar_result( GLuint opcode );
+
+int brw_wm_pass_fp( struct brw_wm_compile *c );
+void brw_wm_pass0( struct brw_wm_compile *c );
+void brw_wm_pass1( struct brw_wm_compile *c );
+void brw_wm_pass2( struct brw_wm_compile *c );
+void brw_wm_emit( struct brw_wm_compile *c );
+
+void brw_wm_print_value( struct brw_wm_compile *c,
+ struct brw_wm_value *value );
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+ struct brw_wm_ref *ref );
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst );
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+ const char *stage );
+
+void brw_wm_print_fp_program( struct brw_wm_compile *c,
+ const char *stage );
+
+void brw_wm_lookup_iz( GLuint line_aa,
+ GLuint lookup,
+ GLboolean ps_uses_depth,
+ struct brw_wm_prog_key *key );
+
+void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c);
+
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0);
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_wm_constant_buffer.c b/src/gallium/drivers/i965/brw_wm_constant_buffer.c
new file mode 100644
index 0000000000..6434c6acf7
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_constant_buffer.c
@@ -0,0 +1,165 @@
+/* XXX: Constant buffers disabled
+ */
+
+
+/**
+ * Create the constant buffer surface. Vertex/fragment shader constants will be
+ * read from this buffer with Data Port Read instructions/messages.
+ */
+enum pipe_error
+brw_create_constant_surface( struct brw_context *brw,
+ struct brw_surface_key *key,
+ struct brw_winsys_buffer **bo_out )
+{
+ const GLint w = key->width - 1;
+ struct brw_winsys_buffer *bo;
+ struct brw_winsys_reloc reloc[1];
+ enum pipe_error ret;
+
+ /* Emit relocation to surface contents */
+ make_reloc(&reloc[0],
+ BRW_USAGE_SAMPLER,
+ 0,
+ offsetof(struct brw_surface_state, ss1),
+ key->bo);
+
+
+ memset(&surf, 0, sizeof(surf));
+
+ surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ surf.ss0.surface_type = BRW_SURFACE_BUFFER;
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ surf.ss1.base_addr = 0; /* reloc */
+
+ surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
+ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
+ surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
+ surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
+ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
+
+ ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
+ key, sizeof(*key),
+ reloc, Elements(reloc),
+ &surf, sizeof(surf),
+ NULL, NULL,
+ &bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+
+/**
+ * Update the surface state for a WM constant buffer.
+ * The constant buffer will be (re)allocated here if needed.
+ */
+static enum pipe_error
+brw_update_wm_constant_surface( struct brw_context *brw,
+ GLuint surf)
+{
+ struct brw_surface_key key;
+ struct brw_fragment_shader *fp = brw->curr.fragment_shader;
+ struct pipe_buffer *cbuf = brw->curr.fragment_constants;
+ int pitch = cbuf->size / (4 * sizeof(float));
+ enum pipe_error ret;
+
+ /* If we're in this state update atom, we need to update WM constants, so
+ * free the old buffer and create a new one for the new contents.
+ */
+ ret = brw_wm_update_constant_buffer(brw, &fp->const_buffer);
+ if (ret)
+ return ret;
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (cbuf == NULL) {
+ bo_reference(&brw->wm.surf_bo[surf], NULL);
+ return PIPE_OK;
+ }
+
+ memset(&key, 0, sizeof(key));
+
+ key.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ key.ss0.surface_type = BRW_SURFACE_BUFFER;
+ key.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ key.bo = brw_buffer(cbuf)->bo;
+
+ key.ss2.width = (pitch-1) & 0x7f; /* bits 6:0 of size or width */
+ key.ss2.height = ((pitch-1) >> 7) & 0x1fff; /* bits 19:7 of size or width */
+ key.ss3.depth = ((pitch-1) >> 20) & 0x7f; /* bits 26:20 of size or width */
+ key.ss3.pitch = (pitch * 4 * sizeof(float)) - 1; /* ignored?? */
+ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
+
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ if (brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, 1,
+ NULL,
+ &brw->wm.surf_bo[surf]))
+ return PIPE_OK;
+
+ ret = brw_create_constant_surface(brw, &key, &brw->wm.surf_bo[surf]);
+ if (ret)
+ return ret;
+
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+ return PIPE_OK;
+}
+
+/**
+ * Updates surface / buffer for fragment shader constant buffer, if
+ * one is required.
+ *
+ * This consumes the state updates for the constant buffer, and produces
+ * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
+ * inclusion in the binding table.
+ */
+static enum pipe_error prepare_wm_constant_surface(struct brw_context *brw )
+{
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
+
+ ret = brw_wm_update_constant_buffer(brw,
+ &fp->const_buffer);
+ if (ret)
+ return ret;
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (fp->const_buffer == 0) {
+ if (brw->wm.surf_bo[surf] != NULL) {
+ bo_reference(&brw->wm.surf_bo[surf], NULL);
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+ }
+ return PIPE_OK;
+ }
+
+ ret = brw_update_wm_constant_surface(ctx, surf);
+ if (ret)
+ return ret;
+
+ return PIPE_OK
+}
+
+const struct brw_tracked_state brw_wm_constant_surface = {
+ .dirty = {
+ .mesa = (_NEW_PROGRAM_CONSTANTS),
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM),
+ .cache = 0
+ },
+ .prepare = prepare_wm_constant_surface,
+};
diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c
new file mode 100644
index 0000000000..3d11fa074c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_debug.c
@@ -0,0 +1,256 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "tgsi/tgsi_info.h"
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+static void print_writemask( unsigned writemask )
+{
+ if (writemask != BRW_WRITEMASK_XYZW)
+ debug_printf(".%s%s%s%s",
+ (writemask & BRW_WRITEMASK_X) ? "x" : "",
+ (writemask & BRW_WRITEMASK_Y) ? "y" : "",
+ (writemask & BRW_WRITEMASK_Z) ? "z" : "",
+ (writemask & BRW_WRITEMASK_W) ? "w" : "");
+}
+
+static void print_swizzle( unsigned swizzle )
+{
+ char *swz = "xyzw";
+ if (swizzle != BRW_SWIZZLE_XYZW)
+ debug_printf(".%c%c%c%c",
+ swz[BRW_GET_SWZ(swizzle, X)],
+ swz[BRW_GET_SWZ(swizzle, Y)],
+ swz[BRW_GET_SWZ(swizzle, Z)],
+ swz[BRW_GET_SWZ(swizzle, W)]);
+}
+
+static void print_opcode( unsigned opcode )
+{
+ switch (opcode) {
+ case WM_PIXELXY:
+ debug_printf("PIXELXY");
+ break;
+ case WM_DELTAXY:
+ debug_printf("DELTAXY");
+ break;
+ case WM_PIXELW:
+ debug_printf("PIXELW");
+ break;
+ case WM_WPOSXY:
+ debug_printf("WPOSXY");
+ break;
+ case WM_PINTERP:
+ debug_printf("PINTERP");
+ break;
+ case WM_LINTERP:
+ debug_printf("LINTERP");
+ break;
+ case WM_CINTERP:
+ debug_printf("CINTERP");
+ break;
+ case WM_FB_WRITE:
+ debug_printf("FB_WRITE");
+ break;
+ case WM_FRONTFACING:
+ debug_printf("FRONTFACING");
+ break;
+ default:
+ debug_printf("%s", tgsi_get_opcode_info(opcode)->mnemonic);
+ break;
+ }
+}
+
+void brw_wm_print_value( struct brw_wm_compile *c,
+ struct brw_wm_value *value )
+{
+ assert(value);
+ if (c->state >= PASS2_DONE)
+ brw_print_reg(value->hw_reg);
+ else if( value == &c->undef_value )
+ debug_printf("undef");
+ else if( value - c->vreg >= 0 &&
+ value - c->vreg < BRW_WM_MAX_VREG)
+ debug_printf("r%d", value - c->vreg);
+ else if (value - c->creg >= 0 &&
+ value - c->creg < BRW_WM_MAX_PARAM)
+ debug_printf("c%d", value - c->creg);
+ else if (value - c->payload.input_interp >= 0 &&
+ value - c->payload.input_interp < PIPE_MAX_SHADER_INPUTS)
+ debug_printf("i%d", value - c->payload.input_interp);
+ else if (value - c->payload.depth >= 0 &&
+ value - c->payload.depth < PIPE_MAX_SHADER_INPUTS)
+ debug_printf("d%d", value - c->payload.depth);
+ else
+ debug_printf("?");
+}
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+ struct brw_wm_ref *ref )
+{
+ struct brw_reg hw_reg = ref->hw_reg;
+
+ if (ref->unspill_reg)
+ debug_printf("UNSPILL(%x)/", ref->value->spill_slot);
+
+ if (c->state >= PASS2_DONE)
+ brw_print_reg(ref->hw_reg);
+ else {
+ debug_printf("%s", hw_reg.negate ? "-" : "");
+ debug_printf("%s", hw_reg.abs ? "abs/" : "");
+ brw_wm_print_value(c, ref->value);
+ if ((hw_reg.nr&1) || hw_reg.subnr) {
+ debug_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
+ }
+ }
+}
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst )
+{
+ GLuint i, arg;
+ GLuint nr_args = brw_wm_nr_args(inst->opcode);
+
+ debug_printf("[");
+ for (i = 0; i < 4; i++) {
+ if (inst->dst[i]) {
+ brw_wm_print_value(c, inst->dst[i]);
+ if (inst->dst[i]->spill_slot)
+ debug_printf("/SPILL(%x)",inst->dst[i]->spill_slot);
+ }
+ else
+ debug_printf("#");
+ if (i < 3)
+ debug_printf(",");
+ }
+ debug_printf("]");
+ print_writemask(inst->writemask);
+
+ debug_printf(" = ");
+ print_opcode(inst->opcode);
+
+ if (inst->saturate)
+ debug_printf("_SAT");
+
+ for (arg = 0; arg < nr_args; arg++) {
+
+ debug_printf(" [");
+
+ for (i = 0; i < 4; i++) {
+ if (inst->src[arg][i]) {
+ brw_wm_print_ref(c, inst->src[arg][i]);
+ }
+ else
+ debug_printf("%%");
+
+ if (i < 3)
+ debug_printf(",");
+ else
+ debug_printf("]");
+ }
+ }
+ debug_printf("\n");
+}
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+ const char *stage )
+{
+ GLuint insn;
+
+ debug_printf("%s:\n", stage);
+ for (insn = 0; insn < c->nr_insns; insn++)
+ brw_wm_print_insn(c, &c->instruction[insn]);
+ debug_printf("\n");
+}
+
+static const char *file_strings[TGSI_FILE_COUNT+1] = {
+ "NULL",
+ "CONST",
+ "IN",
+ "OUT",
+ "TEMP",
+ "SAMPLER",
+ "ADDR",
+ "IMM",
+ "LOOP",
+ "PAYLOAD"
+};
+
+static void brw_wm_print_fp_insn( struct brw_wm_compile *c,
+ struct brw_fp_instruction *inst )
+{
+ GLuint i;
+ GLuint nr_args = brw_wm_nr_args(inst->opcode);
+
+ print_opcode(inst->opcode);
+ if (inst->dst.saturate)
+ debug_printf("_SAT");
+ debug_printf(" ");
+
+ if (inst->dst.indirect)
+ debug_printf("[");
+
+ debug_printf("%s[%d]",
+ file_strings[inst->dst.file],
+ inst->dst.index );
+ print_writemask(inst->dst.writemask);
+
+ if (inst->dst.indirect)
+ debug_printf("]");
+
+ debug_printf(nr_args ? ", " : "\n");
+
+ for (i = 0; i < nr_args; i++) {
+ debug_printf("%s%s%s[%d]%s",
+ inst->src[i].negate ? "-" : "",
+ inst->src[i].abs ? "ABS(" : "",
+ file_strings[inst->src[i].file],
+ inst->src[i].index,
+ inst->src[i].abs ? ")" : "");
+ print_swizzle(inst->src[i].swizzle);
+ debug_printf("%s", i == nr_args - 1 ? "\n" : ", ");
+ }
+}
+
+
+void brw_wm_print_fp_program( struct brw_wm_compile *c,
+ const char *stage )
+{
+ GLuint insn;
+
+ debug_printf("%s:\n", stage);
+ for (insn = 0; insn < c->nr_fp_insns; insn++)
+ brw_wm_print_fp_insn(c, &c->fp_instructions[insn]);
+ debug_printf("\n");
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
new file mode 100644
index 0000000000..7e57d0306b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -0,0 +1,1521 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+#include "tgsi/tgsi_info.h"
+
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "brw_debug.h"
+#include "brw_disasm.h"
+
+/* Not quite sure how correct this is - need to understand horiz
+ * vs. vertical strides a little better.
+ */
+static INLINE struct brw_reg sechalf( struct brw_reg reg )
+{
+ if (reg.vstride)
+ reg.nr++;
+ return reg;
+}
+
+/* Payload R0:
+ *
+ * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads,
+ * corresponding to each of the 16 execution channels.
+ * R0.1..8 -- ?
+ * R1.0 -- triangle vertex 0.X
+ * R1.1 -- triangle vertex 0.Y
+ * R1.2 -- quad 0 x,y coords (2 packed uwords)
+ * R1.3 -- quad 1 x,y coords (2 packed uwords)
+ * R1.4 -- quad 2 x,y coords (2 packed uwords)
+ * R1.5 -- quad 3 x,y coords (2 packed uwords)
+ * R1.6 -- ?
+ * R1.7 -- ?
+ * R1.8 -- ?
+ */
+
+
+static void emit_pixel_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ /* Calculate pixel centers by adding 1 or 0 to each of the
+ * micro-tile coordinates passed in r1.
+ */
+ if (mask & BRW_WRITEMASK_X) {
+ brw_ADD(p,
+ vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+ }
+
+ if (mask & BRW_WRITEMASK_Y) {
+ brw_ADD(p,
+ vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw,5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+ }
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+}
+
+
+
+static void emit_delta_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ if (mask & BRW_WRITEMASK_X) {
+ brw_ADD(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_UW),
+ negate(r1));
+ }
+
+ if (mask & BRW_WRITEMASK_Y) {
+ brw_ADD(p,
+ dst[1],
+ retype(arg0[1], BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
+
+ }
+}
+
+static void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ struct brw_compile *p = &c->func;
+
+ if (mask & BRW_WRITEMASK_X) {
+ /* X' = X */
+ brw_MOV(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_W));
+ }
+
+ /* XXX: is this needed any more, or is this a NOOP?
+ */
+ if (mask & BRW_WRITEMASK_Y) {
+#if 0
+ /* Y' = height - 1 - Y */
+ brw_ADD(p,
+ dst[1],
+ negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+ brw_imm_d(c->key.drawable_height - 1));
+#else
+ brw_MOV(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_W));
+#endif
+ }
+}
+
+
+static void emit_pixel_w( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas)
+{
+ /* Don't need this if all you are doing is interpolating color, for
+ * instance.
+ */
+ if (mask & BRW_WRITEMASK_W) {
+ struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
+
+ /* Calc 1/w - just linterp wpos[3] optimized by putting the
+ * result straight into a message reg.
+ */
+ brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+
+ /* Calc w */
+ brw_math_16( p, dst[3],
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ }
+}
+
+
+
+static void emit_linterp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas )
+{
+ struct brw_reg interp[4];
+ GLuint nr = arg0[0].nr;
+ GLuint i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+ brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ }
+ }
+}
+
+
+static void emit_pinterp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas,
+ const struct brw_reg *w)
+{
+ struct brw_reg interp[4];
+ GLuint nr = arg0[0].nr;
+ GLuint i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+ brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ }
+ }
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MUL(p, dst[i], dst[i], w[3]);
+ }
+ }
+}
+
+
+static void emit_cinterp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0 )
+{
+ struct brw_reg interp[4];
+ GLuint nr = arg0[0].nr;
+ GLuint i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
+ }
+ }
+}
+
+/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
+static void emit_frontfacing( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask )
+{
+ struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+ GLuint i;
+
+ if (!(mask & BRW_WRITEMASK_XYZW))
+ return;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], brw_imm_f(0.0));
+ }
+ }
+
+ /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+ * us front face
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], brw_imm_f(1.0));
+ }
+ }
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br
+ *
+ * and we're trying to produce:
+ *
+ * DDX DDY
+ * dst: (q0.tr - q0.tl) (q0.tl - q0.bl)
+ * (q0.tr - q0.tl) (q0.tr - q0.br)
+ * (q0.br - q0.bl) (q0.tl - q0.bl)
+ * (q0.br - q0.bl) (q0.tr - q0.br)
+ * (q1.tr - q1.tl) (q1.tl - q1.bl)
+ * (q1.tr - q1.tl) (q1.tr - q1.br)
+ * (q1.br - q1.bl) (q1.tl - q1.bl)
+ * (q1.br - q1.bl) (q1.tr - q1.br)
+ *
+ * and add two more quads if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other. We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4)
+ */
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0)
+{
+ int i;
+ struct brw_reg src0, src1;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+ for (i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ if (is_ddx) {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
+ } else {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
+ }
+ brw_ADD(p, dst[i], src0, negate(src1));
+ }
+ }
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+static void emit_alu1( struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0 )
+{
+ GLuint i;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ func(p, dst[i], arg0[i]);
+ }
+ }
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_alu2( struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ func(p, dst[i], arg0[i], arg1[i]);
+ }
+ }
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_mad( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MUL(p, dst[i], arg0[i], arg1[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_ADD(p, dst[i], dst[i], arg2[i]);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+static void emit_trunc( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_RNDZ(p, dst[i], arg0[i]);
+ }
+ }
+}
+
+static void emit_lrp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2 )
+{
+ GLuint i;
+
+ /* Uses dst as a temporary:
+ */
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ /* Can I use the LINE instruction for this?
+ */
+ brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[i], arg0[i], arg1[i]);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+static void emit_sop( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLuint cond,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], brw_imm_f(0));
+ brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
+ brw_MOV(p, dst[i], brw_imm_f(1.0));
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+static void emit_slt( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
+}
+
+static void emit_sle( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
+}
+
+static void emit_sgt( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
+}
+
+static void emit_sge( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
+}
+
+static void emit_seq( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
+}
+
+static void emit_sne( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
+}
+
+static void emit_cmp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg2[i]);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg1[i]);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+static void emit_max( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg0[i]);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg1[i]);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+static void emit_min( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg1[i]);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg0[i]);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+
+static void emit_dp3( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+
+ if (!(mask & BRW_WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+ brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_dp4( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+
+ if (!(mask & BRW_WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+ brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+ brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_dph( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ const int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+
+ if (!(mask & BRW_WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+ brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+ brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_xpd( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_X);
+
+ for (i = 0 ; i < 3; i++) {
+ if (mask & (1<<i)) {
+ GLuint i2 = (i+2)%3;
+ GLuint i1 = (i+1)%3;
+
+ brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+
+static void emit_math1( struct brw_compile *p,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0 )
+{
+ int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+
+ if (!(mask & BRW_WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+
+ brw_MOV(p, brw_message_reg(2), arg0[0]);
+
+ /* Send two messages to perform all 16 operations:
+ */
+ brw_math_16(p,
+ dst[dst_chan],
+ function,
+ (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+}
+
+
+static void emit_math2( struct brw_compile *p,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
+{
+ int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+
+ if (!(mask & BRW_WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+
+ brw_push_insn_state(p);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, brw_message_reg(2), arg0[0]);
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, brw_message_reg(3), arg1[0]);
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+
+
+ /* Send two messages to perform all 16 operations:
+ */
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p,
+ dst[dst_chan],
+ function,
+ (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p,
+ offset(dst[dst_chan],1),
+ function,
+ (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 4,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_pop_insn_state(p);
+}
+
+
+
+static void emit_tex( struct brw_wm_compile *c,
+ const struct brw_wm_instruction *inst,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *coord,
+ GLuint sampler)
+{
+ struct brw_compile *p = &c->func;
+ GLuint msgLength, responseLength;
+ GLuint i, nr;
+ GLuint emit;
+ GLuint msg_type;
+ GLboolean shadow = FALSE;
+
+ /* How many input regs are there?
+ */
+ switch (inst->target) {
+ case TGSI_TEXTURE_1D:
+ emit = BRW_WRITEMASK_X;
+ nr = 1;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ emit = BRW_WRITEMASK_XW;
+ nr = 4;
+ shadow = TRUE;
+ break;
+ case TGSI_TEXTURE_2D:
+ emit = BRW_WRITEMASK_XY;
+ nr = 2;
+ break;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ emit = BRW_WRITEMASK_XYW;
+ nr = 4;
+ shadow = TRUE;
+ break;
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ emit = BRW_WRITEMASK_XYZ;
+ nr = 3;
+ break;
+ default:
+ /* unexpected target */
+ abort();
+ }
+
+ msgLength = 1;
+
+ for (i = 0; i < nr; i++) {
+ static const GLuint swz[4] = {0,1,2,2};
+ if (emit & (1<<i))
+ brw_MOV(p, brw_message_reg(msgLength+1), coord[swz[i]]);
+ else
+ brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
+ msgLength += 2;
+ }
+
+ responseLength = 8; /* always */
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
+ } else {
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
+ }
+
+ brw_SAMPLE(p,
+ retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
+ BTI_TEXTURE(inst->tex_unit),
+ sampler, /* sampler index */
+ inst->writemask,
+ msg_type,
+ responseLength,
+ msgLength,
+ 0,
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD16);
+}
+
+
+static void emit_txb( struct brw_wm_compile *c,
+ const struct brw_wm_instruction *inst,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *coord,
+ GLuint sampler )
+{
+ struct brw_compile *p = &c->func;
+ GLuint msgLength;
+ GLuint msg_type;
+ /* Shadow ignored for txb.
+ */
+ switch (inst->target) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ brw_MOV(p, brw_message_reg(2), coord[0]);
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ brw_MOV(p, brw_message_reg(2), coord[0]);
+ brw_MOV(p, brw_message_reg(4), coord[1]);
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ break;
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ brw_MOV(p, brw_message_reg(2), coord[0]);
+ brw_MOV(p, brw_message_reg(4), coord[1]);
+ brw_MOV(p, brw_message_reg(6), coord[2]);
+ break;
+ default:
+ /* unexpected target */
+ abort();
+ }
+
+ brw_MOV(p, brw_message_reg(8), coord[3]);
+ msgLength = 9;
+
+ if (BRW_IS_IGDNG(p->brw))
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+
+ brw_SAMPLE(p,
+ retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
+ BTI_TEXTURE(inst->tex_unit),
+ sampler, /* sampler index */
+ inst->writemask,
+ msg_type,
+ 8, /* responseLength */
+ msgLength,
+ 0,
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD16);
+}
+
+
+static void emit_lit( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0 )
+{
+ assert((mask & BRW_WRITEMASK_XW) == 0);
+
+ if (mask & BRW_WRITEMASK_Y) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[1], arg0[0]);
+ brw_set_saturate(p, 0);
+ }
+
+ if (mask & BRW_WRITEMASK_Z) {
+ emit_math2(p, BRW_MATH_FUNCTION_POW,
+ &dst[2],
+ BRW_WRITEMASK_X | (mask & SATURATE),
+ &arg0[1],
+ &arg0[3]);
+ }
+
+ /* Ordinarily you'd use an iff statement to skip or shortcircuit
+ * some of the POW calculations above, but 16-wide iff statements
+ * seem to lock c1 hardware, so this is a nasty workaround:
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
+ {
+ if (mask & BRW_WRITEMASK_Y)
+ brw_MOV(p, dst[1], brw_imm_f(0));
+
+ if (mask & BRW_WRITEMASK_Z)
+ brw_MOV(p, dst[2], brw_imm_f(0));
+ }
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+/* Kill pixel - set execution mask to zero for those pixels which
+ * fail.
+ */
+static void emit_kil( struct brw_wm_compile *c,
+ struct brw_reg *arg0)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ GLuint i;
+
+ /* XXX - usually won't need 4 compares!
+ */
+ for (i = 0; i < 4; i++) {
+ brw_push_insn_state(p);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
+ brw_set_predicate_control_flag_value(p, 0xff);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_AND(p, r0uw, brw_flag_reg(), r0uw);
+ brw_pop_insn_state(p);
+ }
+}
+
+/* KILLP kills the pixels that are currently executing, not based on a test
+ * of the arguments.
+ */
+static void emit_killp( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
+ brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
+ brw_pop_insn_state(p);
+}
+
+static void fire_fb_write( struct brw_wm_compile *c,
+ GLuint base_reg,
+ GLuint nr,
+ GLuint target,
+ GLuint eot )
+{
+ struct brw_compile *p = &c->func;
+
+ /* Pass through control information:
+ */
+/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(base_reg + 1),
+ brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+ }
+
+ /* Send framebuffer write message: */
+/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
+ brw_fb_WRITE(p,
+ retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ base_reg,
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ target,
+ nr,
+ 0,
+ eot);
+}
+
+
+static void emit_aa( struct brw_wm_compile *c,
+ struct brw_reg *arg1,
+ GLuint reg )
+{
+ struct brw_compile *p = &c->func;
+ GLuint comp = c->key.aa_dest_stencil_reg / 2;
+ GLuint off = c->key.aa_dest_stencil_reg % 2;
+ struct brw_reg aa = offset(arg1[comp], off);
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
+ brw_MOV(p, brw_message_reg(reg), aa);
+ brw_pop_insn_state(p);
+}
+
+
+/* Post-fragment-program processing. Send the results to the
+ * framebuffer.
+ * \param arg0 the fragment color
+ * \param arg1 the pass-through depth value
+ * \param arg2 the shader-computed depth value
+ */
+static void emit_fb_write( struct brw_wm_compile *c,
+ struct brw_reg *arg0,
+ struct brw_reg *arg1,
+ struct brw_reg *arg2,
+ GLuint target,
+ GLuint eot)
+{
+ struct brw_compile *p = &c->func;
+ GLuint nr = 2;
+ GLuint channel;
+
+ /* Reserve a space for AA - may not be needed:
+ */
+ if (c->key.aa_dest_stencil_reg)
+ nr += 1;
+
+ /* I don't really understand how this achieves the color interleave
+ * (ie RGBARGBA) in the result: [Do the saturation here]
+ */
+ {
+ brw_push_insn_state(p);
+
+ for (channel = 0; channel < 4; channel++) {
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(nr + channel),
+ arg0[channel]);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p,
+ brw_message_reg(nr + channel + 4),
+ sechalf(arg0[channel]));
+ }
+
+ /* skip over the regs populated above:
+ */
+ nr += 8;
+
+ brw_pop_insn_state(p);
+ }
+
+ if (c->key.source_depth_to_render_target)
+ {
+ if (c->key.computes_depth)
+ brw_MOV(p, brw_message_reg(nr), arg2[2]);
+ else
+ brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
+
+ nr += 2;
+ }
+
+ if (c->key.dest_depth_reg)
+ {
+ GLuint comp = c->key.dest_depth_reg / 2;
+ GLuint off = c->key.dest_depth_reg % 2;
+
+ if (off != 0) {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+ /* 2nd half? */
+ brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+ brw_pop_insn_state(p);
+ }
+ else {
+ brw_MOV(p, brw_message_reg(nr), arg1[comp]);
+ }
+ nr += 2;
+ }
+
+ if (!c->key.runtime_check_aads_emit) {
+ if (c->key.aa_dest_stencil_reg)
+ emit_aa(c, arg1, 2);
+
+ fire_fb_write(c, 0, nr, target, eot);
+ }
+ else {
+ struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+ struct brw_reg ip = brw_ip_reg();
+ struct brw_instruction *jmp;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p,
+ v1_null_ud,
+ get_element_ud(brw_vec8_grf(1,0), 6),
+ brw_imm_ud(1<<26));
+
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ emit_aa(c, arg1, 2);
+ fire_fb_write(c, 0, nr, target, eot);
+ /* note - thread killed in subroutine */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ /* ELSE: Shuffle up one register to fill in the hole left for AA:
+ */
+ fire_fb_write(c, 1, nr-1, target, eot);
+ }
+}
+
+
+/**
+ * Move a GPR to scratch memory.
+ */
+static void emit_spill( struct brw_wm_compile *c,
+ struct brw_reg reg,
+ GLuint slot )
+{
+ struct brw_compile *p = &c->func;
+
+ /*
+ mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
+ */
+ brw_MOV(p, brw_message_reg(2), reg);
+
+ /*
+ mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
+ send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
+ */
+ brw_dp_WRITE_16(p,
+ retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
+ slot);
+}
+
+
+/**
+ * Load a GPR from scratch memory.
+ */
+static void emit_unspill( struct brw_wm_compile *c,
+ struct brw_reg reg,
+ GLuint slot )
+{
+ struct brw_compile *p = &c->func;
+
+ /* Slot 0 is the undef value.
+ */
+ if (slot == 0) {
+ brw_MOV(p, reg, brw_imm_f(0));
+ return;
+ }
+
+ /*
+ mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
+ send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
+ */
+
+ brw_dp_READ_16(p,
+ retype(vec16(reg), BRW_REGISTER_TYPE_UW),
+ slot);
+}
+
+
+/**
+ * Retrieve up to 4 GEN4 register pairs for the given wm reg:
+ * Args with unspill_reg != 0 will be loaded from scratch memory.
+ */
+static void get_argument_regs( struct brw_wm_compile *c,
+ struct brw_wm_ref *arg[],
+ struct brw_reg *regs )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (arg[i]) {
+ if (arg[i]->unspill_reg)
+ emit_unspill(c,
+ brw_vec8_grf(arg[i]->unspill_reg, 0),
+ arg[i]->value->spill_slot);
+
+ regs[i] = arg[i]->hw_reg;
+ }
+ else {
+ regs[i] = brw_null_reg();
+ }
+ }
+}
+
+
+/**
+ * For values that have a spill_slot!=0, write those regs to scratch memory.
+ */
+static void spill_values( struct brw_wm_compile *c,
+ struct brw_wm_value *values,
+ GLuint nr )
+{
+ GLuint i;
+
+ for (i = 0; i < nr; i++)
+ if (values[i].spill_slot)
+ emit_spill(c, values[i].hw_reg, values[i].spill_slot);
+}
+
+
+/* Emit the fragment program instructions here.
+ */
+void brw_wm_emit( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ GLuint insn;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ /* Check if any of the payload regs need to be spilled:
+ */
+ spill_values(c, c->payload.depth, 4);
+ spill_values(c, c->creg, c->nr_creg);
+ spill_values(c, c->payload.input_interp, PIPE_MAX_SHADER_INPUTS);
+
+
+ for (insn = 0; insn < c->nr_insns; insn++) {
+
+ struct brw_wm_instruction *inst = &c->instruction[insn];
+ struct brw_reg args[3][4], dst[4];
+ GLuint i, dst_flags;
+
+ /* Get argument regs:
+ */
+ for (i = 0; i < 3; i++)
+ get_argument_regs(c, inst->src[i], args[i]);
+
+ /* Get dest regs:
+ */
+ for (i = 0; i < 4; i++)
+ if (inst->dst[i])
+ dst[i] = inst->dst[i]->hw_reg;
+ else
+ dst[i] = brw_null_reg();
+
+ /* Flags
+ */
+ dst_flags = inst->writemask;
+ if (inst->saturate)
+ dst_flags |= SATURATE;
+
+ switch (inst->opcode) {
+ /* Generated instructions for calculating triangle interpolants:
+ */
+ case WM_PIXELXY:
+ emit_pixel_xy(p, dst, dst_flags);
+ break;
+
+ case WM_DELTAXY:
+ emit_delta_xy(p, dst, dst_flags, args[0]);
+ break;
+
+ case WM_WPOSXY:
+ emit_wpos_xy(c, dst, dst_flags, args[0]);
+ break;
+
+ case WM_PIXELW:
+ emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case WM_LINTERP:
+ emit_linterp(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case WM_PINTERP:
+ emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case WM_CINTERP:
+ emit_cinterp(p, dst, dst_flags, args[0]);
+ break;
+
+ case WM_FB_WRITE:
+ emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
+ break;
+
+ case WM_FRONTFACING:
+ emit_frontfacing(p, dst, dst_flags);
+ break;
+
+ /* Straightforward arithmetic:
+ */
+ case TGSI_OPCODE_ADD:
+ emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_FRC:
+ emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_FLR:
+ emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_DDX:
+ emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+ break;
+
+ case TGSI_OPCODE_DDY:
+ emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+ break;
+
+ case TGSI_OPCODE_DP3:
+ emit_dp3(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_DP4:
+ emit_dp4(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_DPH:
+ emit_dph(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_TRUNC:
+ emit_trunc(p, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_LRP:
+ emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case TGSI_OPCODE_MAD:
+ emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case TGSI_OPCODE_MOV:
+ emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_MUL:
+ emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_XPD:
+ emit_xpd(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ /* Higher math functions:
+ */
+ case TGSI_OPCODE_RCP:
+ emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_SIN:
+ emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_COS:
+ emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_EX2:
+ emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_LG2:
+ emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_SCS:
+ /* There is an scs math function, but it would need some
+ * fixup for 16-element execution.
+ */
+ if (dst_flags & BRW_WRITEMASK_X)
+ emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
+ if (dst_flags & BRW_WRITEMASK_Y)
+ emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
+ break;
+
+ case TGSI_OPCODE_POW:
+ emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
+ break;
+
+ /* Comparisons:
+ */
+ case TGSI_OPCODE_CMP:
+ emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case TGSI_OPCODE_MAX:
+ emit_max(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_MIN:
+ emit_min(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_SLT:
+ emit_slt(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_SLE:
+ emit_sle(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGT:
+ emit_sgt(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGE:
+ emit_sge(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SEQ:
+ emit_seq(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SNE:
+ emit_sne(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_LIT:
+ emit_lit(p, dst, dst_flags, args[0]);
+ break;
+
+ /* Texturing operations:
+ */
+ case TGSI_OPCODE_TEX:
+ emit_tex(c, inst, dst, dst_flags, args[0], inst->sampler);
+ break;
+
+ case TGSI_OPCODE_TXB:
+ emit_txb(c, inst, dst, dst_flags, args[0], inst->sampler);
+ break;
+
+ case TGSI_OPCODE_KIL:
+ emit_kil(c, args[0]);
+ break;
+
+ case TGSI_OPCODE_KILP:
+ emit_killp(c);
+ break;
+
+ default:
+ debug_printf("Unsupported opcode %i (%s) in fragment shader\n",
+ inst->opcode,
+ tgsi_get_opcode_info(inst->opcode)->mnemonic);
+ }
+
+ for (i = 0; i < 4; i++)
+ if (inst->dst[i] && inst->dst[i]->spill_slot)
+ emit_spill(c,
+ inst->dst[i]->hw_reg,
+ inst->dst[i]->spill_slot);
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ debug_printf("wm-native:\n");
+ brw_disasm(stderr, p->store, p->nr_insn);
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
new file mode 100644
index 0000000000..9c5b527f89
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -0,0 +1,1224 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_util.h"
+
+#include "brw_wm.h"
+#include "brw_util.h"
+#include "brw_debug.h"
+
+
+/***********************************************************************
+ * Source regs
+ */
+
+static struct brw_fp_src src_reg(GLuint file, GLuint idx)
+{
+ struct brw_fp_src reg;
+ reg.file = file;
+ reg.index = idx;
+ reg.swizzle = BRW_SWIZZLE_XYZW;
+ reg.indirect = 0;
+ reg.negate = 0;
+ reg.abs = 0;
+ return reg;
+}
+
+static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst)
+{
+ return src_reg(dst.file, dst.index);
+}
+
+static struct brw_fp_src src_undef( void )
+{
+ return src_reg(TGSI_FILE_NULL, 0);
+}
+
+static GLboolean src_is_undef(struct brw_fp_src src)
+{
+ return src.file == TGSI_FILE_NULL;
+}
+
+static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w )
+{
+ unsigned swz = reg.swizzle;
+
+ reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 |
+ BRW_GET_SWZ(swz, y) << 2 |
+ BRW_GET_SWZ(swz, z) << 4 |
+ BRW_GET_SWZ(swz, w) << 6 );
+
+ return reg;
+}
+
+static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x )
+{
+ return src_swizzle(reg, x, x, x, x);
+}
+
+static struct brw_fp_src src_abs( struct brw_fp_src src )
+{
+ src.negate = 0;
+ src.abs = 1;
+ return src;
+}
+
+static struct brw_fp_src src_negate( struct brw_fp_src src )
+{
+ src.negate = 1;
+ src.abs = 0;
+ return src;
+}
+
+
+static int match_or_expand_immediate( const float *v,
+ unsigned nr,
+ float *v2,
+ unsigned *nr2,
+ unsigned *swizzle )
+{
+ unsigned i, j;
+
+ *swizzle = 0;
+
+ for (i = 0; i < nr; i++) {
+ boolean found = FALSE;
+
+ for (j = 0; j < *nr2 && !found; j++) {
+ if (v[i] == v2[j]) {
+ *swizzle |= j << (i * 2);
+ found = TRUE;
+ }
+ }
+
+ if (!found) {
+ if (*nr2 >= 4)
+ return FALSE;
+
+ v2[*nr2] = v[i];
+ *swizzle |= *nr2 << (i * 2);
+ (*nr2)++;
+ }
+ }
+
+ return TRUE;
+}
+
+
+
+/* Internally generated immediates: overkill...
+ */
+static struct brw_fp_src src_imm( struct brw_wm_compile *c,
+ const GLfloat *v,
+ unsigned nr)
+{
+ unsigned i, j;
+ unsigned swizzle;
+
+ /* Could do a first pass where we examine all existing immediates
+ * without expanding.
+ */
+
+ for (i = 0; i < c->nr_immediates; i++) {
+ if (match_or_expand_immediate( v,
+ nr,
+ c->immediate[i].v,
+ &c->immediate[i].nr,
+ &swizzle ))
+ goto out;
+ }
+
+ if (c->nr_immediates < Elements(c->immediate)) {
+ i = c->nr_immediates++;
+ if (match_or_expand_immediate( v,
+ nr,
+ c->immediate[i].v,
+ &c->immediate[i].nr,
+ &swizzle ))
+ goto out;
+ }
+
+ c->error = 1;
+ return src_undef();
+
+out:
+ /* Make sure that all referenced elements are from this immediate.
+ * Has the effect of making size-one immediates into scalars.
+ */
+ for (j = nr; j < 4; j++)
+ swizzle |= (swizzle & 0x3) << (j * 2);
+
+ return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ),
+ BRW_GET_SWZ(swizzle, X),
+ BRW_GET_SWZ(swizzle, Y),
+ BRW_GET_SWZ(swizzle, Z),
+ BRW_GET_SWZ(swizzle, W) );
+}
+
+
+
+static struct brw_fp_src src_imm1f( struct brw_wm_compile *c,
+ GLfloat f )
+{
+ return src_imm(c, &f, 1);
+}
+
+static struct brw_fp_src src_imm4f( struct brw_wm_compile *c,
+ GLfloat x,
+ GLfloat y,
+ GLfloat z,
+ GLfloat w)
+{
+ GLfloat f[4] = {x,y,z,w};
+ return src_imm(c, f, 4);
+}
+
+
+
+/***********************************************************************
+ * Dest regs
+ */
+
+static struct brw_fp_dst dst_reg(GLuint file, GLuint idx)
+{
+ struct brw_fp_dst reg;
+ reg.file = file;
+ reg.index = idx;
+ reg.writemask = BRW_WRITEMASK_XYZW;
+ reg.indirect = 0;
+ reg.saturate = 0;
+ return reg;
+}
+
+static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask )
+{
+ reg.writemask &= mask;
+ return reg;
+}
+
+static struct brw_fp_dst dst_undef( void )
+{
+ return dst_reg(TGSI_FILE_NULL, 0);
+}
+
+static boolean dst_is_undef( struct brw_fp_dst dst )
+{
+ return dst.file == TGSI_FILE_NULL;
+}
+
+static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag )
+{
+ reg.saturate = flag;
+ return reg;
+}
+
+static struct brw_fp_dst get_temp( struct brw_wm_compile *c )
+{
+ int bit = ffs( ~c->fp_temp );
+
+ if (!bit) {
+ debug_printf("%s: out of temporaries\n", __FILE__);
+ }
+
+ c->fp_temp |= 1<<(bit-1);
+ return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1));
+}
+
+
+static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp )
+{
+ c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp));
+}
+
+
+/***********************************************************************
+ * Instructions
+ */
+
+static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c)
+{
+ return &c->fp_instructions[c->nr_fp_insns++];
+}
+
+static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest,
+ GLuint tex_unit,
+ GLuint target,
+ GLuint sampler,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1,
+ struct brw_fp_src src2 )
+{
+ struct brw_fp_instruction *inst = get_fp_inst(c);
+
+ if (tex_unit || target)
+ assert(op == TGSI_OPCODE_TXP ||
+ op == TGSI_OPCODE_TXB ||
+ op == TGSI_OPCODE_TEX ||
+ op == WM_FB_WRITE);
+
+ inst->opcode = op;
+ inst->dst = dest;
+ inst->tex_unit = tex_unit;
+ inst->target = target;
+ inst->sampler = sampler;
+ inst->src[0] = src0;
+ inst->src[1] = src1;
+ inst->src[2] = src2;
+
+ return inst;
+}
+
+
+static INLINE void emit_op3(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1,
+ struct brw_fp_src src2 )
+{
+ emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src2);
+}
+
+
+static INLINE void emit_op2(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1)
+{
+ emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src_undef());
+}
+
+static INLINE void emit_op1(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest,
+ struct brw_fp_src src0)
+{
+ emit_tex_op(c, op, dest, 0, 0, 0, src0, src_undef(), src_undef());
+}
+
+static INLINE void emit_op0(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest)
+{
+ emit_tex_op(c, op, dest, 0, 0, 0, src_undef(), src_undef(), src_undef());
+}
+
+
+
+/* Many opcodes produce the same value across all the result channels.
+ * We'd rather not have to support that splatting in the opcode implementations,
+ * and brw_wm_pass*.c wants to optimize them out by shuffling references around
+ * anyway. We can easily get both by emitting the opcode to one channel, and
+ * then MOVing it to the others, which brw_wm_pass*.c already understands.
+ */
+static void emit_scalar_insn(struct brw_wm_compile *c,
+ unsigned opcode,
+ struct brw_fp_dst dst,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1,
+ struct brw_fp_src src2 )
+{
+ unsigned first_chan = ffs(dst.writemask) - 1;
+ unsigned first_mask = 1 << first_chan;
+
+ if (dst.writemask == 0)
+ return;
+
+ emit_op3( c, opcode,
+ dst_mask(dst, first_mask),
+ src0, src1, src2 );
+
+ if (dst.writemask != first_mask) {
+ emit_op1(c, TGSI_OPCODE_MOV,
+ dst_mask(dst, ~first_mask),
+ src_scalar(src_reg_from_dst(dst), first_chan));
+ }
+}
+
+
+/***********************************************************************
+ * Special instructions for interpolation and other tasks
+ */
+
+static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c )
+{
+ if (src_is_undef(c->fp_pixel_xy)) {
+ struct brw_fp_dst pixel_xy = get_temp(c);
+ struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
+
+
+ /* Emit the out calculations, and hold onto the results. Use
+ * two instructions as a temporary is required.
+ */
+ /* pixel_xy.xy = PIXELXY payload[0];
+ */
+ emit_op1(c,
+ WM_PIXELXY,
+ dst_mask(pixel_xy, BRW_WRITEMASK_XY),
+ payload_r0_depth);
+
+ c->fp_pixel_xy = src_reg_from_dst(pixel_xy);
+ }
+
+ return c->fp_pixel_xy;
+}
+
+static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c )
+{
+ if (src_is_undef(c->fp_delta_xy)) {
+ struct brw_fp_dst delta_xy = get_temp(c);
+ struct brw_fp_src pixel_xy = get_pixel_xy(c);
+ struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
+
+ /* deltas.xy = DELTAXY pixel_xy, payload[0]
+ */
+ emit_op3(c,
+ WM_DELTAXY,
+ dst_mask(delta_xy, BRW_WRITEMASK_XY),
+ pixel_xy,
+ payload_r0_depth,
+ src_undef());
+
+ c->fp_delta_xy = src_reg_from_dst(delta_xy);
+ }
+
+ return c->fp_delta_xy;
+}
+
+static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c )
+{
+ if (src_is_undef(c->fp_pixel_w)) {
+ struct brw_fp_dst pixel_w = get_temp(c);
+ struct brw_fp_src deltas = get_delta_xy(c);
+
+ /* XXX: assuming position is always first -- valid?
+ */
+ struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0);
+
+ /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
+ */
+ emit_op3(c,
+ WM_PIXELW,
+ dst_mask(pixel_w, BRW_WRITEMASK_W),
+ interp_wpos,
+ deltas,
+ src_undef());
+
+
+ c->fp_pixel_w = src_reg_from_dst(pixel_w);
+ }
+
+ return c->fp_pixel_w;
+}
+
+
+/***********************************************************************
+ * Emit INTERP instructions ahead of first use of each attrib.
+ */
+
+static void emit_interp( struct brw_wm_compile *c,
+ GLuint idx,
+ GLuint semantic,
+ GLuint interp_mode )
+{
+ struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
+ struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx);
+ struct brw_fp_src deltas = get_delta_xy(c);
+
+ /* Need to use PINTERP on attributes which have been
+ * multiplied by 1/W in the SF program, and LINTERP on those
+ * which have not:
+ */
+ switch (semantic) {
+ case TGSI_SEMANTIC_POSITION:
+ /* Have to treat wpos.xy specially:
+ */
+ emit_op1(c,
+ WM_WPOSXY,
+ dst_mask(dst, BRW_WRITEMASK_XY),
+ get_pixel_xy(c));
+
+ /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+ */
+ emit_op2(c,
+ WM_LINTERP,
+ dst_mask(dst, BRW_WRITEMASK_ZW),
+ interp,
+ deltas);
+ break;
+
+ case TGSI_SEMANTIC_COLOR:
+ if (c->key.flat_shade) {
+ emit_op1(c,
+ WM_CINTERP,
+ dst,
+ interp);
+ }
+ else if (interp_mode == TGSI_INTERPOLATE_LINEAR) {
+ emit_op2(c,
+ WM_LINTERP,
+ dst,
+ interp,
+ deltas);
+ }
+ else {
+ emit_op3(c,
+ WM_PINTERP,
+ dst,
+ interp,
+ deltas,
+ get_pixel_w(c));
+ }
+
+ break;
+
+ case TGSI_SEMANTIC_FOG:
+ /* Interpolate the fog coordinate */
+ emit_op3(c,
+ WM_PINTERP,
+ dst_mask(dst, BRW_WRITEMASK_X),
+ interp,
+ deltas,
+ get_pixel_w(c));
+
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_YZ),
+ src_imm1f(c, 0.0));
+
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_W),
+ src_imm1f(c, 1.0));
+ break;
+
+ case TGSI_SEMANTIC_FACE:
+ /* XXX review/test this case */
+ emit_op0(c,
+ WM_FRONTFACING,
+ dst_mask(dst, BRW_WRITEMASK_X));
+
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_YZ),
+ src_imm1f(c, 0.0));
+
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_W),
+ src_imm1f(c, 1.0));
+ break;
+
+ case TGSI_SEMANTIC_PSIZE:
+ /* XXX review/test this case */
+ emit_op3(c,
+ WM_PINTERP,
+ dst_mask(dst, BRW_WRITEMASK_XY),
+ interp,
+ deltas,
+ get_pixel_w(c));
+
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_Z),
+ src_imm1f(c, 0.0f));
+
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_W),
+ src_imm1f(c, 1.0f));
+ break;
+
+ default:
+ switch (interp_mode) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ emit_op1(c,
+ WM_CINTERP,
+ dst,
+ interp);
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ emit_op2(c,
+ WM_LINTERP,
+ dst,
+ interp,
+ deltas);
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ emit_op3(c,
+ WM_PINTERP,
+ dst,
+ interp,
+ deltas,
+ get_pixel_w(c));
+ break;
+ }
+ break;
+ }
+}
+
+
+/***********************************************************************
+ * Expand various instructions here to simpler forms.
+ */
+static void precalc_dst( struct brw_wm_compile *c,
+ struct brw_fp_dst dst,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1 )
+{
+ if (dst.writemask & BRW_WRITEMASK_Y) {
+ /* dst.y = mul src0.y, src1.y
+ */
+ emit_op2(c,
+ TGSI_OPCODE_MUL,
+ dst_mask(dst, BRW_WRITEMASK_Y),
+ src0,
+ src1);
+ }
+
+ if (dst.writemask & BRW_WRITEMASK_XZ) {
+ /* dst.z = mov src0.zzzz
+ */
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_Z),
+ src_scalar(src0, Z));
+
+ /* dst.x = imm1f(1.0)
+ */
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
+ src_imm1f(c, 1.0));
+ }
+ if (dst.writemask & BRW_WRITEMASK_W) {
+ /* dst.w = mov src1.w
+ */
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_W),
+ src1);
+ }
+}
+
+
+static void precalc_lit( struct brw_wm_compile *c,
+ struct brw_fp_dst dst,
+ struct brw_fp_src src0 )
+{
+ if (dst.writemask & BRW_WRITEMASK_XW) {
+ /* dst.xw = imm(1.0f)
+ */
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0),
+ src_imm1f(c, 1.0f));
+ }
+
+ if (dst.writemask & BRW_WRITEMASK_YZ) {
+ emit_op1(c,
+ TGSI_OPCODE_LIT,
+ dst_mask(dst, BRW_WRITEMASK_YZ),
+ src0);
+ }
+}
+
+
+/**
+ * Some TEX instructions require extra code, cube map coordinate
+ * normalization, or coordinate scaling for RECT textures, etc.
+ * This function emits those extra instructions and the TEX
+ * instruction itself.
+ */
+static void precalc_tex( struct brw_wm_compile *c,
+ struct brw_fp_dst dst,
+ unsigned target,
+ unsigned unit,
+ struct brw_fp_src src0,
+ struct brw_fp_src sampler )
+{
+ struct brw_fp_src coord = src_undef();
+ struct brw_fp_dst tmp = dst_undef();
+
+ assert(unit < BRW_MAX_TEX_UNIT);
+
+ /* Cubemap: find longest component of coord vector and normalize
+ * it.
+ */
+ if (target == TGSI_TEXTURE_CUBE) {
+ struct brw_fp_src tmpsrc;
+
+ tmp = get_temp(c);
+ tmpsrc = src_reg_from_dst(tmp);
+
+ /* tmp = abs(src0) */
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ tmp,
+ src_abs(src0));
+
+ /* tmp.X = MAX(tmp.X, tmp.Y) */
+ emit_op2(c, TGSI_OPCODE_MAX,
+ dst_mask(tmp, BRW_WRITEMASK_X),
+ src_scalar(tmpsrc, X),
+ src_scalar(tmpsrc, Y));
+
+ /* tmp.X = MAX(tmp.X, tmp.Z) */
+ emit_op2(c, TGSI_OPCODE_MAX,
+ dst_mask(tmp, BRW_WRITEMASK_X),
+ tmpsrc,
+ src_scalar(tmpsrc, Z));
+
+ /* tmp.X = 1 / tmp.X */
+ emit_op1(c, TGSI_OPCODE_RCP,
+ dst_mask(tmp, BRW_WRITEMASK_X),
+ tmpsrc);
+
+ /* tmp = src0 * tmp.xxxx */
+ emit_op2(c, TGSI_OPCODE_MUL,
+ tmp,
+ src0,
+ src_scalar(tmpsrc, X));
+
+ coord = tmpsrc;
+ }
+ else if (target == TGSI_TEXTURE_RECT ||
+ target == TGSI_TEXTURE_SHADOWRECT) {
+ /* XXX: need a mechanism for internally generated constants.
+ */
+ coord = src0;
+ }
+ else {
+ coord = src0;
+ }
+
+ /* Need to emit YUV texture conversions by hand. Probably need to
+ * do this here - the alternative is in brw_wm_emit.c, but the
+ * conversion requires allocating a temporary variable which we
+ * don't have the facility to do that late in the compilation.
+ */
+ if (c->key.yuvtex_mask & (1 << unit)) {
+ /* convert ycbcr to RGBA */
+ GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
+ struct brw_fp_dst tmp = get_temp(c);
+ struct brw_fp_src tmpsrc = src_reg_from_dst(tmp);
+ struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 );
+ struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 );
+
+ /* tmp = TEX ...
+ */
+ emit_tex_op(c,
+ TGSI_OPCODE_TEX,
+ dst_saturate(tmp, dst.saturate),
+ unit,
+ target,
+ sampler.index,
+ coord,
+ src_undef(),
+ src_undef());
+
+ /* tmp.xyz = ADD TMP, C0
+ */
+ emit_op2(c, TGSI_OPCODE_ADD,
+ dst_mask(tmp, BRW_WRITEMASK_XYZ),
+ tmpsrc,
+ C0);
+
+ /* YUV.y = MUL YUV.y, C0.w
+ */
+ emit_op2(c, TGSI_OPCODE_MUL,
+ dst_mask(tmp, BRW_WRITEMASK_Y),
+ tmpsrc,
+ src_scalar(C0, W));
+
+ /*
+ * if (UV swaped)
+ * RGB.xyz = MAD YUV.zzx, C1, YUV.y
+ * else
+ * RGB.xyz = MAD YUV.xxz, C1, YUV.y
+ */
+
+ emit_op3(c, TGSI_OPCODE_MAD,
+ dst_mask(dst, BRW_WRITEMASK_XYZ),
+ ( swap_uv ?
+ src_swizzle(tmpsrc, Z,Z,X,X) :
+ src_swizzle(tmpsrc, X,X,Z,Z)),
+ C1,
+ src_scalar(tmpsrc, Y));
+
+ /* RGB.y = MAD YUV.z, C1.w, RGB.y
+ */
+ emit_op3(c,
+ TGSI_OPCODE_MAD,
+ dst_mask(dst, BRW_WRITEMASK_Y),
+ src_scalar(tmpsrc, Z),
+ src_scalar(C1, W),
+ src_scalar(src_reg_from_dst(dst), Y));
+
+ release_temp(c, tmp);
+ }
+ else {
+ /* ordinary RGBA tex instruction */
+ emit_tex_op(c,
+ TGSI_OPCODE_TEX,
+ dst,
+ unit,
+ target,
+ sampler.index,
+ coord,
+ src_undef(),
+ src_undef());
+ }
+
+ /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
+ * generating shader varients in mesa state tracker.
+ */
+
+ /* Release this temp if we ended up allocating it:
+ */
+ if (!dst_is_undef(tmp))
+ release_temp(c, tmp);
+}
+
+
+/**
+ * Check if the given TXP instruction really needs the divide-by-W step.
+ */
+static GLboolean projtex( struct brw_wm_compile *c,
+ unsigned target,
+ struct brw_fp_src src )
+{
+ /* Only try to detect the simplest cases. Could detect (later)
+ * cases where we are trying to emit code like RCP {1.0}, MUL x,
+ * {1.0}, and so on.
+ *
+ * More complex cases than this typically only arise from
+ * user-provided fragment programs anyway:
+ */
+ if (target == TGSI_TEXTURE_CUBE)
+ return GL_FALSE; /* ut2004 gun rendering !?! */
+
+ if (src.file == TGSI_FILE_INPUT &&
+ BRW_GET_SWZ(src.swizzle, W) == W &&
+ c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE)
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Emit code for TXP.
+ */
+static void precalc_txp( struct brw_wm_compile *c,
+ struct brw_fp_dst dst,
+ unsigned target,
+ unsigned unit,
+ struct brw_fp_src src0,
+ struct brw_fp_src sampler )
+{
+ if (projtex(c, target, src0)) {
+ struct brw_fp_dst tmp = get_temp(c);
+
+ /* tmp0.w = RCP inst.arg[0][3]
+ */
+ emit_op1(c,
+ TGSI_OPCODE_RCP,
+ dst_mask(tmp, BRW_WRITEMASK_W),
+ src_scalar(src0, W));
+
+ /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
+ */
+ emit_op2(c,
+ TGSI_OPCODE_MUL,
+ dst_mask(tmp, BRW_WRITEMASK_XYZ),
+ src0,
+ src_scalar(src_reg_from_dst(tmp), W));
+
+ /* dst = TEX tmp0
+ */
+ precalc_tex(c,
+ dst,
+ target,
+ unit,
+ src_reg_from_dst(tmp),
+ sampler );
+
+ release_temp(c, tmp);
+ }
+ else
+ {
+ /* dst = TEX src0
+ */
+ precalc_tex(c, dst, target, unit, src0, sampler);
+ }
+}
+
+
+/* XXX: note this returns a src_reg.
+ */
+static struct brw_fp_src
+find_output_by_semantic( struct brw_wm_compile *c,
+ unsigned semantic,
+ unsigned index )
+{
+ const struct tgsi_shader_info *info = &c->fp->info;
+ unsigned i;
+
+ for (i = 0; i < info->num_outputs; i++)
+ if (info->output_semantic_name[i] == semantic &&
+ info->output_semantic_index[i] == index)
+ return src_reg( TGSI_FILE_OUTPUT, i );
+
+ /* If not found, return some arbitrary immediate value:
+ *
+ * XXX: this is a good idea but immediates are up generating extra
+ * curbe entries atm, as they would have in the original driver.
+ */
+ return src_reg( TGSI_FILE_OUTPUT, 0 ); /* src_imm1f(c, 1.0); */
+}
+
+
+static void emit_fb_write( struct brw_wm_compile *c )
+{
+ struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
+ struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0);
+ GLuint i;
+
+
+ outdepth = src_scalar(outdepth, Z);
+
+ for (i = 0 ; i < c->key.nr_cbufs; i++) {
+ struct brw_fp_src outcolor;
+
+ outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
+
+ /* Use emit_tex_op so that we can specify the inst->target
+ * field, which is abused to contain the FB write target and the
+ * EOT marker
+ */
+ emit_tex_op(c, WM_FB_WRITE,
+ dst_undef(),
+ (i == c->key.nr_cbufs - 1), /* EOT */
+ i,
+ 0, /* no sampler */
+ outcolor,
+ payload_r0_depth,
+ outdepth);
+ }
+}
+
+
+static struct brw_fp_dst translate_dst( struct brw_wm_compile *c,
+ const struct tgsi_full_dst_register *dst,
+ unsigned saturate )
+{
+ struct brw_fp_dst out;
+
+ out.file = dst->Register.File;
+ out.index = dst->Register.Index;
+ out.writemask = dst->Register.WriteMask;
+ out.indirect = dst->Register.Indirect;
+ out.saturate = (saturate == TGSI_SAT_ZERO_ONE);
+
+ if (out.indirect) {
+ assert(dst->Indirect.File == TGSI_FILE_ADDRESS);
+ assert(dst->Indirect.Index == 0);
+ }
+
+ return out;
+}
+
+
+static struct brw_fp_src translate_src( struct brw_wm_compile *c,
+ const struct tgsi_full_src_register *src )
+{
+ struct brw_fp_src out;
+
+ out.file = src->Register.File;
+ out.index = src->Register.Index;
+ out.indirect = src->Register.Indirect;
+
+ out.swizzle = ((src->Register.SwizzleX << 0) |
+ (src->Register.SwizzleY << 2) |
+ (src->Register.SwizzleZ << 4) |
+ (src->Register.SwizzleW << 6));
+
+ switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) {
+ case TGSI_UTIL_SIGN_CLEAR:
+ out.abs = 1;
+ out.negate = 0;
+ break;
+
+ case TGSI_UTIL_SIGN_SET:
+ out.abs = 1;
+ out.negate = 1;
+ break;
+
+ case TGSI_UTIL_SIGN_TOGGLE:
+ out.abs = 0;
+ out.negate = 1;
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ default:
+ out.abs = 0;
+ out.negate = 0;
+ break;
+ }
+
+ if (out.indirect) {
+ assert(src->Indirect.File == TGSI_FILE_ADDRESS);
+ assert(src->Indirect.Index == 0);
+ }
+
+ return out;
+}
+
+
+
+static void emit_insn( struct brw_wm_compile *c,
+ const struct tgsi_full_instruction *inst )
+{
+ unsigned opcode = inst->Instruction.Opcode;
+ struct brw_fp_dst dst;
+ struct brw_fp_src src[3];
+ int i;
+
+ dst = translate_dst( c, &inst->Dst[0],
+ inst->Instruction.Saturate );
+
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
+ src[i] = translate_src( c, &inst->Src[i] );
+
+ switch (opcode) {
+ case TGSI_OPCODE_ABS:
+ emit_op1(c, TGSI_OPCODE_MOV,
+ dst,
+ src_abs(src[0]));
+ break;
+
+ case TGSI_OPCODE_SUB:
+ emit_op2(c, TGSI_OPCODE_ADD,
+ dst,
+ src[0],
+ src_negate(src[1]));
+ break;
+
+ case TGSI_OPCODE_SCS:
+ emit_op1(c, TGSI_OPCODE_SCS,
+ dst_mask(dst, BRW_WRITEMASK_XY),
+ src[0]);
+ break;
+
+ case TGSI_OPCODE_DST:
+ precalc_dst(c, dst, src[0], src[1]);
+ break;
+
+ case TGSI_OPCODE_LIT:
+ precalc_lit(c, dst, src[0]);
+ break;
+
+ case TGSI_OPCODE_TEX:
+ precalc_tex(c, dst,
+ inst->Texture.Texture,
+ src[1].index, /* use sampler unit for tex idx */
+ src[0], /* coord */
+ src[1]); /* sampler */
+ break;
+
+ case TGSI_OPCODE_TXP:
+ precalc_txp(c, dst,
+ inst->Texture.Texture,
+ src[1].index, /* use sampler unit for tex idx */
+ src[0], /* coord */
+ src[1]); /* sampler */
+ break;
+
+ case TGSI_OPCODE_TXB:
+ /* XXX: TXB not done
+ */
+ precalc_tex(c, dst,
+ inst->Texture.Texture,
+ src[1].index, /* use sampler unit for tex idx*/
+ src[0],
+ src[1]);
+ break;
+
+ case TGSI_OPCODE_XPD:
+ emit_op2(c, TGSI_OPCODE_XPD,
+ dst_mask(dst, BRW_WRITEMASK_XYZ),
+ src[0],
+ src[1]);
+ break;
+
+ case TGSI_OPCODE_KIL:
+ emit_op1(c, TGSI_OPCODE_KIL,
+ dst_mask(dst_undef(), 0),
+ src[0]);
+ break;
+
+ case TGSI_OPCODE_END:
+ emit_fb_write(c);
+ break;
+ default:
+ if (!c->key.has_flow_control &&
+ brw_wm_is_scalar_result(opcode))
+ emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
+ else
+ emit_op3(c, opcode, dst, src[0], src[1], src[2]);
+ break;
+ }
+}
+
+/**
+ * Initial pass for fragment program code generation.
+ * This function is used by both the GLSL and non-GLSL paths.
+ */
+int brw_wm_pass_fp( struct brw_wm_compile *c )
+{
+ struct brw_fragment_shader *fs = c->fp;
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction *inst;
+ struct tgsi_full_declaration *decl;
+ const float *imm;
+ GLuint size;
+ GLuint i;
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ debug_printf("pre-fp:\n");
+ tgsi_dump(fs->tokens, 0);
+ }
+
+ c->fp_pixel_xy = src_undef();
+ c->fp_delta_xy = src_undef();
+ c->fp_pixel_w = src_undef();
+ c->nr_fp_insns = 0;
+ c->nr_immediates = 0;
+
+
+ /* Loop over all instructions doing assorted simplifications and
+ * transformations.
+ */
+ tgsi_parse_init( &parse, fs->tokens );
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ /* Turn intput declarations into special WM_* instructions.
+ *
+ * XXX: For non-branching shaders, consider deferring variable
+ * initialization as late as possible to minimize register
+ * usage. This is how the original BRW driver worked.
+ *
+ * In a branching shader, must preamble instructions at decl
+ * time, as instruction order in the shader does not
+ * correspond to the order instructions are executed in the
+ * wild.
+ *
+ * This is where special instructions such as WM_CINTERP,
+ * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
+ * compute shader inputs from the payload registers and pixel
+ * position.
+ */
+ decl = &parse.FullToken.FullDeclaration;
+ if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+ unsigned first, last, mask;
+ unsigned attrib;
+
+ first = decl->Range.First;
+ last = decl->Range.Last;
+ mask = decl->Declaration.UsageMask;
+
+ for (attrib = first; attrib <= last; attrib++) {
+ emit_interp(c,
+ attrib,
+ decl->Semantic.Name,
+ decl->Declaration.Interpolate );
+ }
+ }
+
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* Unlike VS programs we can probably manage fine encoding
+ * immediate values directly into the emitted EU
+ * instructions, as we probably only need to reference one
+ * float value per instruction. Just save the data for now
+ * and use directly later.
+ */
+ i = c->nr_immediates++;
+ imm = &parse.FullToken.FullImmediate.u[i].Float;
+ size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+
+ if (c->nr_immediates >= BRW_WM_MAX_CONST)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ for (i = 0; i < size; i++)
+ c->immediate[c->nr_immediates].v[i] = imm[i];
+
+ for (; i < 4; i++)
+ c->immediate[c->nr_immediates].v[i] = 0.0;
+
+ c->immediate[c->nr_immediates].nr = size;
+ c->nr_immediates++;
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ inst = &parse.FullToken.FullInstruction;
+ emit_insn(c, inst);
+ break;
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_fp_program( c, "pass_fp" );
+ debug_printf("\n");
+ }
+
+ return c->error;
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
new file mode 100644
index 0000000000..3b3afc39d3
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -0,0 +1,2032 @@
+#include "util/u_math.h"
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+
+
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst,
+ GLuint component);
+
+
+static void
+reclaim_temps(struct brw_wm_compile *c);
+
+
+/** Mark GRF register as used. */
+static void
+prealloc_grf(struct brw_wm_compile *c, int r)
+{
+ c->used_grf[r] = GL_TRUE;
+}
+
+
+/** Mark given GRF register as not in use. */
+static void
+release_grf(struct brw_wm_compile *c, int r)
+{
+ /*assert(c->used_grf[r]);*/
+ c->used_grf[r] = GL_FALSE;
+ c->first_free_grf = MIN2(c->first_free_grf, r);
+}
+
+
+/** Return index of a free GRF, mark it as used. */
+static int
+alloc_grf(struct brw_wm_compile *c)
+{
+ GLuint r;
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ /* no free temps, try to reclaim some */
+ reclaim_temps(c);
+ c->first_free_grf = 0;
+
+ /* try alloc again */
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ for (r = 0; r < BRW_WM_MAX_GRF; r++) {
+ assert(c->used_grf[r]);
+ }
+
+ /* really, no free GRF regs found */
+ if (!c->out_of_regs) {
+ /* print warning once per compilation */
+ debug_printf("%s: ran out of registers for fragment program", __FUNCTION__);
+ c->out_of_regs = GL_TRUE;
+ }
+
+ return -1;
+}
+
+
+/** Return number of GRF registers used */
+static int
+num_grf_used(const struct brw_wm_compile *c)
+{
+ int r;
+ for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
+ if (c->used_grf[r])
+ return r + 1;
+ return 0;
+}
+
+
+
+/**
+ * Record the mapping of a Mesa register to a hardware register.
+ */
+static void set_reg(struct brw_wm_compile *c, int file, int index,
+ int component, struct brw_reg reg)
+{
+ c->wm_regs[file][index][component].reg = reg;
+ c->wm_regs[file][index][component].inited = GL_TRUE;
+}
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+ struct brw_reg reg;
+
+ /* if we need to allocate another temp, grow the tmp_regs[] array */
+ if (c->tmp_index == c->tmp_max) {
+ int r = alloc_grf(c);
+ if (r < 0) {
+ /*printf("Out of temps in %s\n", __FUNCTION__);*/
+ r = 50; /* XXX random register! */
+ }
+ c->tmp_regs[ c->tmp_max++ ] = r;
+ }
+
+ /* form the GRF register */
+ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
+ /*printf("alloc_temp %d\n", reg.nr);*/
+ assert(reg.nr < BRW_WM_MAX_GRF);
+ return reg;
+
+}
+
+/**
+ * Save current temp register info.
+ * There must be a matching call to release_tmps().
+ */
+static int mark_tmps(struct brw_wm_compile *c)
+{
+ return c->tmp_index;
+}
+
+static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index )
+{
+ return brw_vec8_grf( c->tmp_regs[ index ], 0 );
+}
+
+static void release_tmps(struct brw_wm_compile *c, int mark)
+{
+ c->tmp_index = mark;
+}
+
+/**
+ * Convert Mesa src register to brw register.
+ *
+ * Since we're running in SOA mode each Mesa register corresponds to four
+ * hardware registers. We allocate the hardware registers as needed here.
+ *
+ * \param file register file, one of PROGRAM_x
+ * \param index register number
+ * \param component src component (X=0, Y=1, Z=2, W=3)
+ * \param nr not used?!?
+ * \param neg negate value?
+ * \param abs take absolute value?
+ */
+static struct brw_reg
+get_reg(struct brw_wm_compile *c, int file, int index, int component,
+ int nr, GLuint neg, GLuint abs)
+{
+ struct brw_reg reg;
+ switch (file) {
+ case TGSI_FILE_NULL:
+ return brw_null_reg();
+
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
+ case BRW_FILE_PAYLOAD:
+ break;
+
+ default:
+ debug_printf("%s: Unexpected file type\n", __FUNCTION__);
+ return brw_null_reg();
+ }
+
+ assert(index < 256);
+ assert(component < 4);
+
+ /* see if we've already allocated a HW register for this Mesa register */
+ if (c->wm_regs[file][index][component].inited) {
+ /* yes, re-use */
+ reg = c->wm_regs[file][index][component].reg;
+ }
+ else {
+ /* no, allocate new register */
+ int grf = alloc_grf(c);
+ /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
+ if (grf < 0) {
+ /* totally out of temps */
+ grf = 51; /* XXX random register! */
+ }
+
+ reg = brw_vec8_grf(grf, 0);
+ /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
+
+ set_reg(c, file, index, component, reg);
+ }
+
+ if (neg & (1 << component)) {
+ reg = negate(reg);
+ }
+ if (abs)
+ reg = brw_abs(reg);
+ return reg;
+}
+
+
+
+
+/**
+ * Find first/last instruction that references each temporary register.
+ */
+GLboolean
+_mesa_find_temp_intervals(const struct prog_instruction *instructions,
+ GLuint numInstructions,
+ GLint intBegin[MAX_PROGRAM_TEMPS],
+ GLint intEnd[MAX_PROGRAM_TEMPS])
+{
+ struct loop_info
+ {
+ GLuint Start, End; /**< Start, end instructions of loop */
+ };
+ struct loop_info loopStack[MAX_LOOP_NESTING];
+ GLuint loopStackDepth = 0;
+ GLuint i;
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
+ intBegin[i] = intEnd[i] = -1;
+ }
+
+ /* Scan instructions looking for temporary registers */
+ for (i = 0; i < numInstructions; i++) {
+ const struct prog_instruction *inst = instructions + i;
+ if (inst->Opcode == OPCODE_BGNLOOP) {
+ loopStack[loopStackDepth].Start = i;
+ loopStack[loopStackDepth].End = inst->BranchTarget;
+ loopStackDepth++;
+ }
+ else if (inst->Opcode == OPCODE_ENDLOOP) {
+ loopStackDepth--;
+ }
+ else if (inst->Opcode == OPCODE_CAL) {
+ return GL_FALSE;
+ }
+ else {
+ const GLuint numSrc = 3;
+ GLuint j;
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->SrcReg[j].Index;
+ if (inst->SrcReg[j].RelAddr)
+ return GL_FALSE;
+ update_interval(intBegin, intEnd, index, i);
+ if (loopStackDepth > 0) {
+ /* extend temp register's interval to end of loop */
+ GLuint loopEnd = loopStack[loopStackDepth - 1].End;
+ update_interval(intBegin, intEnd, index, loopEnd);
+ }
+ }
+ }
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->DstReg.Index;
+ if (inst->DstReg.RelAddr)
+ return GL_FALSE;
+ update_interval(intBegin, intEnd, index, i);
+ if (loopStackDepth > 0) {
+ /* extend temp register's interval to end of loop */
+ GLuint loopEnd = loopStack[loopStackDepth - 1].End;
+ update_interval(intBegin, intEnd, index, loopEnd);
+ }
+ }
+ }
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * This is called if we run out of GRF registers. Examine the live intervals
+ * of temp regs in the program and free those which won't be used again.
+ */
+static void
+reclaim_temps(struct brw_wm_compile *c)
+{
+ GLint intBegin[BRW_WM_MAX_TEMPS];
+ GLint intEnd[BRW_WM_MAX_TEMPS];
+ int index;
+
+ /*printf("Reclaim temps:\n");*/
+
+ _mesa_find_temp_intervals(c->fp_instructions, c->nr_fp_insns,
+ intBegin, intEnd);
+
+ for (index = 0; index < BRW_WM_MAX_TEMPS; index++) {
+ if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
+ /* program temp[i] can be freed */
+ int component;
+ /*printf(" temp[%d] is dead\n", index);*/
+ for (component = 0; component < 4; component++) {
+ if (c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited) {
+ int r = c->wm_regs[TGSI_FILE_TEMPORARY][index][component].reg.nr;
+ release_grf(c, r);
+ /*
+ printf(" Reclaim temp %d, reg %d at inst %d\n",
+ index, r, c->cur_inst);
+ */
+ c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited = GL_FALSE;
+ }
+ }
+ }
+ }
+}
+
+
+
+
+/**
+ * Preallocate registers. This sets up the Mesa to hardware register
+ * mapping for certain registers, such as constants (uniforms/state vars)
+ * and shader inputs.
+ */
+static void prealloc_reg(struct brw_wm_compile *c)
+{
+ int i, j;
+ struct brw_reg reg;
+ int urb_read_length = 0;
+ GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
+ GLuint reg_index = 0;
+
+ memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
+ c->first_free_grf = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (i < c->key.nr_depth_regs)
+ reg = brw_vec8_grf(i * 2, 0);
+ else
+ reg = brw_vec8_grf(0, 0);
+ set_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, i, reg);
+ }
+ reg_index += 2 * c->key.nr_depth_regs;
+
+ /* constants */
+ {
+ const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
+ const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
+
+ /* use a real constant buffer, or just use a section of the GRF? */
+ /* XXX this heuristic may need adjustment... */
+ if ((nr_params + nr_temps) * 4 + reg_index > 80)
+ c->fp->use_const_buffer = GL_TRUE;
+ else
+ c->fp->use_const_buffer = GL_FALSE;
+ /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
+
+ if (c->fp->use_const_buffer) {
+ /* We'll use a real constant buffer and fetch constants from
+ * it with a dataport read message.
+ */
+
+ /* number of float constants in CURBE */
+ c->prog_data.nr_params = 0;
+ }
+ else {
+ const struct gl_program_parameter_list *plist =
+ c->fp->program.Base.Parameters;
+ int index = 0;
+
+ /* number of float constants in CURBE */
+ c->prog_data.nr_params = 4 * nr_params;
+
+ /* loop over program constants (float[4]) */
+ for (i = 0; i < nr_params; i++) {
+ /* loop over XYZW channels */
+ for (j = 0; j < 4; j++, index++) {
+ reg = brw_vec1_grf(reg_index + index / 8, index % 8);
+ /* Save pointer to parameter/constant value.
+ * Constants will be copied in prepare_constant_buffer()
+ */
+ c->prog_data.param[index] = &plist->ParameterValues[i][j];
+ set_reg(c, TGSI_FILE_STATE_VAR, i, j, reg);
+ }
+ }
+ /* number of constant regs used (each reg is float[8]) */
+ c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
+ reg_index += c->nr_creg;
+ }
+ }
+
+ /* fragment shader inputs */
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
+ int fp_input;
+
+ if (i >= VERT_RESULT_VAR0)
+ fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
+ else if (i <= VERT_RESULT_TEX7)
+ fp_input = i;
+ else
+ fp_input = -1;
+
+ if (fp_input >= 0 && inputs & (1 << fp_input)) {
+ urb_read_length = reg_index;
+ reg = brw_vec8_grf(reg_index, 0);
+ for (j = 0; j < 4; j++)
+ set_reg(c, TGSI_FILE_PAYLOAD, fp_input, j, reg);
+ }
+ if (c->key.nr_vp_outputs > i) {
+ reg_index += 2;
+ }
+ }
+
+ c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.urb_read_length = urb_read_length;
+ c->prog_data.curb_read_length = c->nr_creg;
+ c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index++;
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index += 2;
+
+ /* mark GRF regs [0..reg_index-1] as in-use */
+ for (i = 0; i < reg_index; i++)
+ prealloc_grf(c, i);
+
+ /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
+ prealloc_grf(c, 126);
+ prealloc_grf(c, 127);
+
+ for (i = 0; i < c->nr_fp_insns; i++) {
+ const struct brw_fp_instruction *inst = &c->fp_instructions[i];
+ struct brw_reg dst[4];
+
+ switch (inst->Opcode) {
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ /* Allocate the channels of texture results contiguously,
+ * since they are written out that way by the sampler unit.
+ */
+ for (j = 0; j < 4; j++) {
+ dst[j] = get_dst_reg(c, inst, j);
+ if (j != 0)
+ assert(dst[j].nr == dst[j - 1].nr + 1);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* An instruction may reference up to three constants.
+ * They'll be found in these registers.
+ * XXX alloc these on demand!
+ */
+ if (c->fp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
+ }
+ }
+#if 0
+ printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
+ printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
+#endif
+}
+
+
+/**
+ * Check if any of the instruction's src registers are constants, uniforms,
+ * or statevars. If so, fetch any constants that we don't already have in
+ * the three GRF slots.
+ */
+static void fetch_constants(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ /* loop over instruction src regs */
+ for (i = 0; i < 3; i++) {
+ const struct prog_src_register *src = &inst->SrcReg[i];
+ if (src->File == TGSI_FILE_IMMEDIATE ||
+ src->File == TGSI_FILE_CONSTANT) {
+ c->current_const[i].index = src->Index;
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src->Index, i, c->current_const[i].reg.nr);
+#endif
+
+ /* need to fetch the constant now */
+ brw_dp_READ_4(p,
+ c->current_const[i].reg, /* writeback dest */
+ src->RelAddr, /* relative indexing? */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
+ );
+ }
+ }
+}
+
+
+/**
+ * Convert Mesa dst register to brw register.
+ */
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst,
+ GLuint component)
+{
+ const int nr = 1;
+ return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
+ 0, 0);
+}
+
+
+static struct brw_reg
+get_src_reg_const(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst,
+ GLuint srcRegIndex, GLuint component)
+{
+ /* We should have already fetched the constant from the constant
+ * buffer in fetch_constants(). Now we just have to return a
+ * register description that extracts the needed component and
+ * smears it across all eight vector components.
+ */
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ struct brw_reg const_reg;
+
+ assert(component < 4);
+ assert(srcRegIndex < 3);
+ assert(c->current_const[srcRegIndex].index != -1);
+ const_reg = c->current_const[srcRegIndex].reg;
+
+ /* extract desired float from the const_reg, and smear */
+ const_reg = stride(const_reg, 0, 1, 0);
+ const_reg.subnr = component * 4;
+
+ if (src->Negate)
+ const_reg = negate(const_reg);
+ if (src->Abs)
+ const_reg = brw_abs(const_reg);
+
+#if 0
+ printf(" form const[%d].%d for arg %d, reg %d\n",
+ c->current_const[srcRegIndex].index,
+ component,
+ srcRegIndex,
+ const_reg.nr);
+#endif
+
+ return const_reg;
+}
+
+
+/**
+ * Convert Mesa src register to brw register.
+ */
+static struct brw_reg get_src_reg(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst,
+ GLuint srcRegIndex, GLuint channel)
+{
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ const GLuint nr = 1;
+ const GLuint component = BRW_GET_SWZ(src->Swizzle, channel);
+
+ /* Extended swizzle terms */
+ if (component == SWIZZLE_ZERO) {
+ return brw_imm_f(0.0F);
+ }
+ else if (component == SWIZZLE_ONE) {
+ return brw_imm_f(1.0F);
+ }
+
+ if (c->fp->use_const_buffer &&
+ (src->File == TGSI_FILE_STATE_VAR ||
+ src->File == TGSI_FILE_CONSTANT ||
+ src->File == TGSI_FILE_UNIFORM)) {
+ return get_src_reg_const(c, inst, srcRegIndex, component);
+ }
+ else {
+ /* other type of source register */
+ return get_reg(c, src->File, src->Index, component, nr,
+ src->Negate, src->Abs);
+ }
+}
+
+
+/**
+ * Same as \sa get_src_reg() but if the register is a immediate, emit
+ * a brw_reg encoding the immediate.
+ * Note that a brw instruction only allows one src operand to be a immediate.
+ * For instructions with more than one operand, only the second can be a
+ * immediate. This means that we treat some immediates as constants
+ * (which why TGSI_FILE_IMMEDIATE is checked in fetch_constants()).
+ *
+ */
+static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst,
+ GLuint srcRegIndex, GLuint channel)
+{
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ if (src->File == TGSI_FILE_IMMEDIATE) {
+ /* an immediate */
+ const int component = BRW_GET_SWZ(src->Swizzle, channel);
+ const GLfloat *param =
+ c->fp->program.Base.Parameters->ParameterValues[src->Index];
+ GLfloat value = param[component];
+ if (src->Negate)
+ value = -value;
+ if (src->Abs)
+ value = FABSF(value);
+#if 0
+ printf(" form immed value %f for chan %d\n", value, channel);
+#endif
+ return brw_imm_f(value);
+ }
+ else {
+ return get_src_reg(c, inst, srcRegIndex, channel);
+ }
+}
+
+
+/**
+ * Subroutines are minimal support for resusable instruction sequences.
+ * They are implemented as simply as possible to minimise overhead: there
+ * is no explicit support for communication between the caller and callee
+ * other than saving the return address in a temporary register, nor is
+ * there any automatic local storage. This implies that great care is
+ * required before attempting reentrancy or any kind of nested
+ * subroutine invocations.
+ */
+static void invoke_subroutine( struct brw_wm_compile *c,
+ enum _subroutine subroutine,
+ void (*emit)( struct brw_wm_compile * ) )
+{
+ struct brw_compile *p = &c->func;
+
+ assert( subroutine < BRW_WM_MAX_SUBROUTINE );
+
+ if( c->subroutines[ subroutine ] ) {
+ /* subroutine previously emitted: reuse existing instructions */
+
+ int mark = mark_tmps( c );
+ struct brw_reg return_address = retype( alloc_tmp( c ),
+ BRW_REGISTER_TYPE_UD );
+ int here = p->nr_insn;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
+
+ brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
+ brw_imm_d( ( c->subroutines[ subroutine ] -
+ here - 1 ) << 4 ) );
+ brw_pop_insn_state(p);
+
+ release_tmps( c, mark );
+ } else {
+ /* previously unused subroutine: emit, and mark for later reuse */
+
+ int mark = mark_tmps( c );
+ struct brw_reg return_address = retype( alloc_tmp( c ),
+ BRW_REGISTER_TYPE_UD );
+ struct brw_instruction *calc;
+ int base = p->nr_insn;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) );
+ brw_pop_insn_state(p);
+
+ c->subroutines[ subroutine ] = p->nr_insn;
+
+ emit( c );
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV( p, brw_ip_reg(), return_address );
+ brw_pop_insn_state(p);
+
+ brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) );
+
+ release_tmps( c, mark );
+ }
+}
+
+static void emit_trunc( struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg src, dst;
+ dst = get_dst_reg(c, inst, i);
+ src = get_src_reg(c, inst, 0, i);
+ brw_RNDZ(p, dst, src);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_mov( struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg src, dst;
+ dst = get_dst_reg(c, inst, i);
+ /* XXX some moves from immediate value don't work reliably!!! */
+ /*src = get_src_reg_imm(c, inst, 0, i);*/
+ src = get_src_reg(c, inst, 0, i);
+ brw_MOV(p, dst, src);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_pixel_xy(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+ struct brw_reg dst0, dst1;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+
+ dst0 = get_dst_reg(c, inst, 0);
+ dst1 = get_dst_reg(c, inst, 1);
+ /* Calculate pixel centers by adding 1 or 0 to each of the
+ * micro-tile coordinates passed in r1.
+ */
+ if (mask & WRITEMASK_X) {
+ brw_ADD(p,
+ vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ brw_ADD(p,
+ vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw, 5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+ }
+}
+
+static void emit_delta_xy(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg dst0, dst1, src0, src1;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+
+ dst0 = get_dst_reg(c, inst, 0);
+ dst1 = get_dst_reg(c, inst, 1);
+ src0 = get_src_reg(c, inst, 0, 0);
+ src1 = get_src_reg(c, inst, 0, 1);
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ if (mask & WRITEMASK_X) {
+ brw_ADD(p,
+ dst0,
+ retype(src0, BRW_REGISTER_TYPE_UW),
+ negate(r1));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ brw_ADD(p,
+ dst1,
+ retype(src1, BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
+
+ }
+}
+
+static void fire_fb_write( struct brw_wm_compile *c,
+ GLuint base_reg,
+ GLuint nr,
+ GLuint target,
+ GLuint eot)
+{
+ struct brw_compile *p = &c->func;
+ /* Pass through control information:
+ */
+ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+ brw_MOV(p,
+ brw_message_reg(base_reg + 1),
+ brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+ }
+ /* Send framebuffer write message: */
+ brw_fb_WRITE(p,
+ retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ base_reg,
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ target,
+ nr,
+ 0,
+ eot);
+}
+
+static void emit_fb_write(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ int nr = 2;
+ int channel;
+ GLuint target, eot;
+ struct brw_reg src0;
+
+ /* Reserve a space for AA - may not be needed:
+ */
+ if (c->key.aa_dest_stencil_reg)
+ nr += 1;
+
+ brw_push_insn_state(p);
+ for (channel = 0; channel < 4; channel++) {
+ src0 = get_src_reg(c, inst, 0, channel);
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+ brw_MOV(p, brw_message_reg(nr + channel), src0);
+ }
+ /* skip over the regs populated above: */
+ nr += 8;
+ brw_pop_insn_state(p);
+
+ if (c->key.source_depth_to_render_target) {
+ if (c->key.computes_depth) {
+ src0 = get_src_reg(c, inst, 2, 2);
+ brw_MOV(p, brw_message_reg(nr), src0);
+ }
+ else {
+ src0 = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src0);
+ }
+
+ nr += 2;
+ }
+
+ if (c->key.dest_depth_reg) {
+ const GLuint comp = c->key.dest_depth_reg / 2;
+ const GLuint off = c->key.dest_depth_reg % 2;
+
+ if (off != 0) {
+ /* XXX this code needs review/testing */
+ struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
+ struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
+ /* 2nd half? */
+ brw_MOV(p, brw_message_reg(nr+1), arg1_1);
+ brw_pop_insn_state(p);
+ }
+ else
+ {
+ struct brw_reg src = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src);
+ }
+ nr += 2;
+ }
+
+ target = inst->Aux >> 1;
+ eot = inst->Aux & 1;
+ fire_fb_write(c, 0, nr, target, eot);
+}
+
+static void emit_pixel_w( struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ if (mask & WRITEMASK_W) {
+ struct brw_reg dst, src0, delta0, delta1;
+ struct brw_reg interp3;
+
+ dst = get_dst_reg(c, inst, 3);
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+
+ interp3 = brw_vec1_grf(src0.nr+1, 4);
+ /* Calc 1/w - just linterp wpos[3] optimized by putting the
+ * result straight into a message reg.
+ */
+ brw_LINE(p, brw_null_reg(), interp3, delta0);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
+
+ /* Calc w */
+ brw_math_16( p, dst,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ }
+}
+
+static void emit_linterp(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg interp[4];
+ struct brw_reg dst, delta0, delta1;
+ struct brw_reg src0;
+ GLuint nr, i;
+
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+ nr = src0.nr;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_LINE(p, brw_null_reg(), interp[i], delta0);
+ brw_MAC(p, dst, suboffset(interp[i],1), delta1);
+ }
+ }
+}
+
+static void emit_cinterp(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+
+ struct brw_reg interp[4];
+ struct brw_reg dst, src0;
+ GLuint nr, i;
+
+ src0 = get_src_reg(c, inst, 0, 0);
+ nr = src0.nr;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, suboffset(interp[i],3));
+ }
+ }
+}
+
+static void emit_pinterp(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+
+ struct brw_reg interp[4];
+ struct brw_reg dst, delta0, delta1;
+ struct brw_reg src0, w;
+ GLuint nr, i;
+
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+ w = get_src_reg(c, inst, 2, 3);
+ nr = src0.nr;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_LINE(p, brw_null_reg(), interp[i], delta0);
+ brw_MAC(p, dst, suboffset(interp[i],1),
+ delta1);
+ brw_MUL(p, dst, dst, w);
+ }
+ }
+}
+
+/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
+static void emit_frontfacing(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+ struct brw_reg dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, brw_imm_f(0.0));
+ }
+ }
+
+ /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+ * us front face
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, brw_imm_f(1.0));
+ }
+ }
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+static void emit_xpd(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ for (i = 0; i < 4; i++) {
+ GLuint i2 = (i+2)%3;
+ GLuint i1 = (i+1)%3;
+ if (mask & (1<<i)) {
+ struct brw_reg src0, src1, dst;
+ dst = get_dst_reg(c, inst, i);
+ src0 = negate(get_src_reg(c, inst, 0, i2));
+ src1 = get_src_reg_imm(c, inst, 1, i1);
+ brw_MUL(p, brw_null_reg(), src0, src1);
+ src0 = get_src_reg(c, inst, 0, i1);
+ src1 = get_src_reg_imm(c, inst, 1, i2);
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ brw_MAC(p, dst, src0, src1);
+ brw_set_saturate(p, 0);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dp3(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_reg src0[3], src1[3], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ for (i = 0; i < 3; i++) {
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
+ }
+
+ dst = get_dst_reg(c, inst, dst_chan);
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MAC(p, dst, src0[2], src1[2]);
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dp4(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_reg src0[4], src1[4], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ for (i = 0; i < 4; i++) {
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
+ }
+ dst = get_dst_reg(c, inst, dst_chan);
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MAC(p, dst, src0[3], src1[3]);
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dph(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_reg src0[4], src1[4], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ for (i = 0; i < 4; i++) {
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
+ }
+ dst = get_dst_reg(c, inst, dst_chan);
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_MAC(p, dst, src0[2], src1[2]);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_ADD(p, dst, dst, src1[3]);
+ brw_set_saturate(p, 0);
+}
+
+/**
+ * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
+ * Note that the result of the function is smeared across the dest
+ * register's X, Y, Z and W channels (subject to writemasking of course).
+ */
+static void emit_math1(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst, GLuint func)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ /* Get first component of source register */
+ dst = get_dst_reg(c, inst, dst_chan);
+ src0 = get_src_reg(c, inst, 0, 0);
+
+ brw_MOV(p, brw_message_reg(2), src0);
+ brw_math(p,
+ dst,
+ func,
+ (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_rcp(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
+}
+
+static void emit_rsq(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
+}
+
+static void emit_sin(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
+}
+
+static void emit_cos(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
+}
+
+static void emit_ex2(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
+}
+
+static void emit_lg2(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
+}
+
+static void emit_add(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ brw_ADD(p, dst, src0, src1);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_arl(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, addr_reg;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS, 0);
+ src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */
+ brw_MOV(p, addr_reg, src0);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_mul(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ brw_MUL(p, dst, src0, src1);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_frc(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg_imm(c, inst, 0, i);
+ brw_FRC(p, dst, src0);
+ }
+ }
+ if (inst->SaturateMode != SATURATE_OFF)
+ brw_set_saturate(p, 0);
+}
+
+static void emit_flr(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg_imm(c, inst, 0, i);
+ brw_RNDD(p, dst, src0);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_min_max(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ const GLuint mask = inst->DstReg.WriteMask;
+ const int mark = mark_tmps(c);
+ int i;
+ brw_push_insn_state(p);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg real_dst = get_dst_reg(c, inst, i);
+ struct brw_reg src0 = get_src_reg(c, inst, 0, i);
+ struct brw_reg src1 = get_src_reg(c, inst, 1, i);
+ struct brw_reg dst;
+ /* if dst==src0 or dst==src1 we need to use a temp reg */
+ GLboolean use_temp = brw_same_reg(dst, src0) ||
+ brw_same_reg(dst, src1);
+ if (use_temp)
+ dst = alloc_tmp(c);
+ else
+ dst = real_dst;
+
+ /*
+ printf(" Min/max: dst %d src0 %d src1 %d\n",
+ dst.nr, src0.nr, src1.nr);
+ */
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MOV(p, dst, src0);
+ brw_set_saturate(p, 0);
+
+ if (inst->Opcode == OPCODE_MIN)
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+ else
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0);
+
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, src1);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ if (use_temp)
+ brw_MOV(p, real_dst, dst);
+ }
+ }
+ brw_pop_insn_state(p);
+ release_tmps(c, mark);
+}
+
+static void emit_pow(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst, src0, src1;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ dst = get_dst_reg(c, inst, dst_chan);
+ src0 = get_src_reg_imm(c, inst, 0, 0);
+ src1 = get_src_reg_imm(c, inst, 1, 0);
+
+ brw_MOV(p, brw_message_reg(2), src0);
+ brw_MOV(p, brw_message_reg(3), src1);
+
+ brw_math(p,
+ dst,
+ BRW_MATH_FUNCTION_POW,
+ (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_lrp(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
+ int i;
+ int mark = mark_tmps(c);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+
+ src1 = get_src_reg_imm(c, inst, 1, i);
+
+ if (src1.nr == dst.nr) {
+ tmp1 = alloc_tmp(c);
+ brw_MOV(p, tmp1, src1);
+ } else
+ tmp1 = src1;
+
+ src2 = get_src_reg(c, inst, 2, i);
+ if (src2.nr == dst.nr) {
+ tmp2 = alloc_tmp(c);
+ brw_MOV(p, tmp2, src2);
+ } else
+ tmp2 = src2;
+
+ brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst, tmp2);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MAC(p, dst, src0, tmp1);
+ brw_set_saturate(p, 0);
+ }
+ release_tmps(c, mark);
+ }
+}
+
+/**
+ * For GLSL shaders, this KIL will be unconditional.
+ * It may be contained inside an IF/ENDIF structure of course.
+ */
+static void emit_kil(struct brw_wm_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, depth, c->emit_mask_reg, depth);
+ brw_pop_insn_state(p);
+}
+
+static void emit_mad(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg dst, src0, src1, src2;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ src2 = get_src_reg_imm(c, inst, 2, i);
+ brw_MUL(p, dst, src0, src1);
+
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_ADD(p, dst, dst, src2);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+static void emit_sop(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst, GLuint cond)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg dst, src0, src1;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ brw_push_insn_state(p);
+ brw_CMP(p, brw_null_reg(), cond, src0, src1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_MOV(p, dst, brw_imm_f(0.0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, brw_imm_f(1.0));
+ brw_pop_insn_state(p);
+ }
+ }
+}
+
+static void emit_slt(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_GE);
+}
+
+static void emit_seq(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
+}
+
+static INLINE struct brw_reg high_words( struct brw_reg reg )
+{
+ return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
+ 0, 8, 2 );
+}
+
+static INLINE struct brw_reg low_words( struct brw_reg reg )
+{
+ return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
+}
+
+static INLINE struct brw_reg even_bytes( struct brw_reg reg )
+{
+ return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
+}
+
+static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
+{
+ return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
+ 0, 16, 2 );
+}
+
+
+
+static void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg src0[2], dst[2];
+
+ dst[0] = get_dst_reg(c, inst, 0);
+ dst[1] = get_dst_reg(c, inst, 1);
+
+ src0[0] = get_src_reg(c, inst, 0, 0);
+ src0[1] = get_src_reg(c, inst, 0, 1);
+
+ /* Calculate the pixel offset from window bottom left into destination
+ * X and Y channels.
+ */
+ if (mask & WRITEMASK_X) {
+ /* X' = X */
+ brw_MOV(p,
+ dst[0],
+ retype(src0[0], BRW_REGISTER_TYPE_W));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ /* Y' = height - 1 - Y */
+ brw_ADD(p,
+ dst[1],
+ negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
+ brw_imm_d(c->key.drawable_height - 1));
+ }
+}
+
+/* TODO
+ BIAS on SIMD8 not working yet...
+ */
+static void emit_txb(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst[4], src[4], payload_reg;
+ /* Note: tex_unit was already looked up through SamplerTextures[] */
+ const GLuint unit = inst->tex_unit;
+ GLuint i;
+ GLuint msg_type;
+
+ assert(unit < BRW_MAX_TEX_UNIT);
+
+ payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
+ for (i = 0; i < 4; i++)
+ dst[i] = get_dst_reg(c, inst, i);
+ for (i = 0; i < 4; i++)
+ src[i] = get_src_reg(c, inst, 0, i);
+
+ switch (inst->tex_target) {
+ case TEXTURE_1D_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */
+ brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ break;
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), src[2]);
+ break;
+ default:
+ /* invalid target */
+ abort();
+ }
+ brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
+ } else {
+ /* Does it work well on SIMD8? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ }
+
+ brw_SAMPLE(p,
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
+ 1, /* msg_reg_nr */
+ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
+ SURF_INDEX_TEXTURE(unit),
+ unit, /* sampler */
+ inst->DstReg.WriteMask, /* writemask */
+ msg_type, /* msg_type */
+ 4, /* response_length */
+ 4, /* msg_length */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
+}
+
+
+static void emit_tex(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst[4], src[4], payload_reg;
+ /* Note: tex_unit was already looked up through SamplerTextures[] */
+ const GLuint unit = inst->tex_unit;
+ GLuint msg_len;
+ GLuint i, nr;
+ GLuint emit;
+ GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
+ GLuint msg_type;
+
+ assert(unit < BRW_MAX_TEX_UNIT);
+
+ payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
+ for (i = 0; i < 4; i++)
+ dst[i] = get_dst_reg(c, inst, i);
+ for (i = 0; i < 4; i++)
+ src[i] = get_src_reg(c, inst, 0, i);
+
+ switch (inst->tex_target) {
+ case TEXTURE_1D_INDEX:
+ emit = WRITEMASK_X;
+ nr = 1;
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ emit = WRITEMASK_XY;
+ nr = 2;
+ break;
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
+ emit = WRITEMASK_XYZ;
+ nr = 3;
+ break;
+ default:
+ /* invalid target */
+ abort();
+ }
+ msg_len = 1;
+
+ /* move/load S, T, R coords */
+ for (i = 0; i < nr; i++) {
+ static const GLuint swz[4] = {0,1,2,2};
+ if (emit & (1<<i))
+ brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
+ else
+ brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
+ msg_len += 1;
+ }
+
+ if (shadow) {
+ brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
+ brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
+ }
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG;
+ } else {
+ /* Does it work for shadow on SIMD8 ? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+ }
+
+ brw_SAMPLE(p,
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
+ 1, /* msg_reg_nr */
+ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
+ SURF_INDEX_TEXTURE(unit),
+ unit, /* sampler */
+ inst->DstReg.WriteMask, /* writemask */
+ msg_type, /* msg_type */
+ 4, /* response_length */
+ shadow ? 6 : 4, /* msg_length */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
+
+ if (shadow)
+ brw_MOV(p, dst[3], brw_imm_f(1.0));
+}
+
+
+/**
+ * Resolve subroutine calls after code emit is done.
+ */
+static void post_wm_emit( struct brw_wm_compile *c )
+{
+ brw_resolve_cals(&c->func);
+}
+
+static void
+get_argument_regs(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst,
+ int index,
+ struct brw_reg *regs,
+ int mask)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1 << i))
+ regs[i] = get_src_reg(c, inst, index, i);
+ }
+}
+
+static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_compile *c)
+{
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
+ struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+ GLuint i, if_depth = 0, loop_depth = 0;
+ struct brw_compile *p = &c->func;
+ struct brw_indirect stack_index = brw_indirect(0, 0);
+
+ c->out_of_regs = GL_FALSE;
+
+ prealloc_reg(c);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+ for (i = 0; i < c->nr_fp_insns; i++) {
+ const struct brw_fp_instruction *inst = &c->fp_instructions[i];
+ int dst_flags;
+ struct brw_reg args[3][4], dst[4];
+ int j;
+
+ c->cur_inst = i;
+
+#if 0
+ debug_printf("Inst %d: ", i);
+ _mesa_print_instruction(inst);
+#endif
+
+ /* fetch any constants that this instruction needs */
+ if (c->fp->use_const_buffer)
+ fetch_constants(c, inst);
+
+ if (inst->CondUpdate)
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ else
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+
+ dst_flags = inst->DstReg.WriteMask;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ dst_flags |= SATURATE;
+
+ switch (inst->Opcode) {
+ case WM_PIXELXY:
+ emit_pixel_xy(c, inst);
+ break;
+ case WM_DELTAXY:
+ emit_delta_xy(c, inst);
+ break;
+ case WM_PIXELW:
+ emit_pixel_w(c, inst);
+ break;
+ case WM_LINTERP:
+ emit_linterp(c, inst);
+ break;
+ case WM_PINTERP:
+ emit_pinterp(c, inst);
+ break;
+ case WM_CINTERP:
+ emit_cinterp(c, inst);
+ break;
+ case WM_WPOSXY:
+ emit_wpos_xy(c, inst);
+ break;
+ case WM_FB_WRITE:
+ emit_fb_write(c, inst);
+ break;
+ case WM_FRONTFACING:
+ emit_frontfacing(c, inst);
+ break;
+ case OPCODE_ADD:
+ emit_add(c, inst);
+ break;
+ case OPCODE_ARL:
+ emit_arl(c, inst);
+ break;
+ case OPCODE_FRC:
+ emit_frc(c, inst);
+ break;
+ case OPCODE_FLR:
+ emit_flr(c, inst);
+ break;
+ case OPCODE_LRP:
+ emit_lrp(c, inst);
+ break;
+ case OPCODE_TRUNC:
+ emit_trunc(c, inst);
+ break;
+ case OPCODE_MOV:
+ emit_mov(c, inst);
+ break;
+ case OPCODE_DP3:
+ emit_dp3(c, inst);
+ break;
+ case OPCODE_DP4:
+ emit_dp4(c, inst);
+ break;
+ case OPCODE_XPD:
+ emit_xpd(c, inst);
+ break;
+ case OPCODE_DPH:
+ emit_dph(c, inst);
+ break;
+ case OPCODE_RCP:
+ emit_rcp(c, inst);
+ break;
+ case OPCODE_RSQ:
+ emit_rsq(c, inst);
+ break;
+ case OPCODE_SIN:
+ emit_sin(c, inst);
+ break;
+ case OPCODE_COS:
+ emit_cos(c, inst);
+ break;
+ case OPCODE_EX2:
+ emit_ex2(c, inst);
+ break;
+ case OPCODE_LG2:
+ emit_lg2(c, inst);
+ break;
+ case OPCODE_MIN:
+ case OPCODE_MAX:
+ emit_min_max(c, inst);
+ break;
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ for (j = 0; j < 4; j++) {
+ if (inst->DstReg.WriteMask & (1 << j))
+ dst[j] = get_dst_reg(c, inst, j);
+ else
+ dst[j] = brw_null_reg();
+ }
+ get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW);
+ emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
+ args[0]);
+ break;
+ case OPCODE_SLT:
+ emit_slt(c, inst);
+ break;
+ case OPCODE_SLE:
+ emit_sle(c, inst);
+ break;
+ case OPCODE_SGT:
+ emit_sgt(c, inst);
+ break;
+ case OPCODE_SGE:
+ emit_sge(c, inst);
+ break;
+ case OPCODE_SEQ:
+ emit_seq(c, inst);
+ break;
+ case OPCODE_SNE:
+ emit_sne(c, inst);
+ break;
+ case OPCODE_MUL:
+ emit_mul(c, inst);
+ break;
+ case OPCODE_POW:
+ emit_pow(c, inst);
+ break;
+ case OPCODE_MAD:
+ emit_mad(c, inst);
+ break;
+ case OPCODE_TEX:
+ emit_tex(c, inst);
+ break;
+ case OPCODE_TXB:
+ emit_txb(c, inst);
+ break;
+ case OPCODE_KIL_NV:
+ emit_kil(c);
+ break;
+ case OPCODE_IF:
+ assert(if_depth < MAX_IF_DEPTH);
+ if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
+ break;
+ case OPCODE_ELSE:
+ if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
+ break;
+ case OPCODE_ENDIF:
+ assert(if_depth > 0);
+ brw_ENDIF(p, if_inst[--if_depth]);
+ break;
+ case OPCODE_BGNSUB:
+ brw_save_label(p, inst->Comment, p->nr_insn);
+ break;
+ case OPCODE_ENDSUB:
+ /* no-op */
+ break;
+ case OPCODE_CAL:
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(4));
+ brw_save_call(&c->func, inst->label, p->nr_insn);
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_pop_insn_state(p);
+ break;
+
+ case OPCODE_RET:
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_pop_insn_state(p);
+
+ break;
+ case OPCODE_BGNLOOP:
+ /* XXX may need to invalidate the current_constant regs */
+ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case OPCODE_BRK:
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_CONT:
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_ENDLOOP:
+ {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(brw))
+ br = 2;
+
+ loop_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ }
+ break;
+ default:
+ debug_printf("unsupported IR in fragment shader %d\n",
+ inst->Opcode);
+ }
+
+ if (inst->CondUpdate)
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ else
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ post_wm_emit(c);
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ debug_printf("wm-native:\n");
+ brw_disasm(stderr, p->store, p->nr_insn);
+ }
+}
+
+/**
+ * Do GPU code generation for shaders that use GLSL features such as
+ * flow control. Other shaders will be compiled with the
+ */
+void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+ if (BRW_DEBUG & DEBUG_WM) {
+ debug_printf("%s:\n", __FUNCTION__);
+ }
+
+ /* initial instruction translation/simplification */
+ brw_wm_pass_fp(c);
+
+ /* actual code generation */
+ brw_wm_emit_branching_shader(brw, c);
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "brw_wm_branching_shader_emit done");
+ }
+
+ c->prog_data.total_grf = num_grf_used(c);
+ c->prog_data.total_scratch = 0;
+}
diff --git a/src/gallium/drivers/i965/brw_wm_iz.c b/src/gallium/drivers/i965/brw_wm_iz.c
new file mode 100644
index 0000000000..6f1e9fcc3c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_iz.c
@@ -0,0 +1,156 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_wm.h"
+
+
+#undef P /* prompted depth */
+#undef C /* computed */
+#undef N /* non-promoted? */
+
+#define P 0
+#define C 1
+#define N 2
+
+const struct {
+ GLuint mode:2;
+ GLuint sd_present:1;
+ GLuint sd_to_rt:1;
+ GLuint dd_present:1;
+ GLuint ds_present:1;
+} wm_iz_table[IZ_BIT_MAX] =
+{
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 1 },
+ { N, 0, 1, 0, 1 },
+ { N, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 0, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 }
+};
+
+/**
+ * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES
+ * \param lookup bitmask of IZ_* flags
+ */
+void brw_wm_lookup_iz( GLuint line_aa,
+ GLuint lookup,
+ GLboolean ps_uses_depth,
+ struct brw_wm_prog_key *key )
+{
+ GLuint reg = 2;
+
+ assert (lookup < IZ_BIT_MAX);
+
+ if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
+ key->computes_depth = 1;
+
+ if (wm_iz_table[lookup].sd_present || ps_uses_depth) {
+ key->source_depth_reg = reg;
+ reg += 2;
+ }
+
+ if (wm_iz_table[lookup].sd_to_rt)
+ key->source_depth_to_render_target = 1;
+
+ if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
+ key->aa_dest_stencil_reg = reg;
+ key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
+ line_aa == AA_SOMETIMES);
+ reg++;
+ }
+
+ if (wm_iz_table[lookup].dd_present) {
+ key->dest_depth_reg = reg;
+ reg+=2;
+ }
+
+ key->nr_depth_regs = (reg+1)/2;
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
new file mode 100644
index 0000000000..0bacad2b0f
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -0,0 +1,366 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "brw_debug.h"
+#include "brw_wm.h"
+
+
+
+/***********************************************************************
+ */
+
+static struct brw_wm_ref *get_ref( struct brw_wm_compile *c )
+{
+ assert(c->nr_refs < BRW_WM_MAX_REF);
+ return &c->refs[c->nr_refs++];
+}
+
+static struct brw_wm_value *get_value( struct brw_wm_compile *c)
+{
+ assert(c->nr_refs < BRW_WM_MAX_VREG);
+ return &c->vreg[c->nr_vreg++];
+}
+
+/** return pointer to a newly allocated instruction */
+static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
+{
+ assert(c->nr_insns < BRW_WM_MAX_INSN);
+ return &c->instruction[c->nr_insns++];
+}
+
+/***********************************************************************
+ */
+
+/** Init the "undef" register */
+static void pass0_init_undef( struct brw_wm_compile *c)
+{
+ struct brw_wm_ref *ref = &c->undef_ref;
+ ref->value = &c->undef_value;
+ ref->hw_reg = brw_vec8_grf(0, 0);
+ ref->insn = 0;
+ ref->prevuse = NULL;
+}
+
+/** Set a FP register to a value */
+static void pass0_set_fpreg_value( struct brw_wm_compile *c,
+ GLuint file,
+ GLuint idx,
+ GLuint component,
+ struct brw_wm_value *value )
+{
+ struct brw_wm_ref *ref = get_ref(c);
+ ref->value = value;
+ ref->hw_reg = brw_vec8_grf(0, 0);
+ ref->insn = 0;
+ ref->prevuse = NULL;
+ c->pass0_fp_reg[file][idx][component] = ref;
+}
+
+/** Set a FP register to a ref */
+static void pass0_set_fpreg_ref( struct brw_wm_compile *c,
+ GLuint file,
+ GLuint idx,
+ GLuint component,
+ const struct brw_wm_ref *src_ref )
+{
+ c->pass0_fp_reg[file][idx][component] = src_ref;
+}
+
+static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
+ unsigned idx,
+ unsigned component)
+{
+ GLuint i = idx * 4 + component;
+
+ if (i >= BRW_WM_MAX_PARAM) {
+ debug_printf("%s: out of params\n", __FUNCTION__);
+ c->prog_data.error = 1;
+ return NULL;
+ }
+ else {
+ struct brw_wm_ref *ref = get_ref(c);
+
+ c->nr_creg = MAX2(c->nr_creg, (i+16)/16);
+
+ /* Push the offsets into hw_reg. These will be added to the
+ * real register numbers once one is allocated in pass2.
+ */
+ ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8);
+ ref->value = &c->creg[i/16];
+ ref->insn = 0;
+ ref->prevuse = NULL;
+
+ return ref;
+ }
+}
+
+
+
+
+/* Lookup our internal registers
+ */
+static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
+ GLuint file,
+ GLuint idx,
+ GLuint component )
+{
+ const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component];
+
+ if (!ref) {
+ switch (file) {
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_OUTPUT:
+ case BRW_FILE_PAYLOAD:
+ /* should already be done?? */
+ break;
+
+ case TGSI_FILE_CONSTANT:
+ ref = get_param_ref(c,
+ c->fp->info.immediate_count + idx,
+ component);
+ break;
+
+ case TGSI_FILE_IMMEDIATE:
+ ref = get_param_ref(c,
+ idx,
+ component);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ c->pass0_fp_reg[file][idx][component] = ref;
+ }
+
+ if (!ref)
+ ref = &c->undef_ref;
+
+ return ref;
+}
+
+
+
+/***********************************************************************
+ * Straight translation to internal instruction format
+ */
+
+static void pass0_set_dst( struct brw_wm_compile *c,
+ struct brw_wm_instruction *out,
+ const struct brw_fp_instruction *inst,
+ GLuint writemask )
+{
+ const struct brw_fp_dst dst = inst->dst;
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i)) {
+ out->dst[i] = get_value(c);
+ pass0_set_fpreg_value(c, dst.file, dst.index, i, out->dst[i]);
+ }
+ }
+
+ out->writemask = writemask;
+}
+
+
+static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
+ struct brw_fp_src src,
+ GLuint i )
+{
+ return pass0_get_reg(c, src.file, src.index, BRW_GET_SWZ(src.swizzle,i));
+}
+
+
+static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
+ struct brw_fp_src src,
+ GLuint i,
+ struct brw_wm_instruction *insn)
+{
+ const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i);
+ struct brw_wm_ref *newref = get_ref(c);
+
+ newref->value = ref->value;
+ newref->hw_reg = ref->hw_reg;
+
+ if (insn) {
+ newref->insn = insn - c->instruction;
+ newref->prevuse = newref->value->lastuse;
+ newref->value->lastuse = newref;
+ }
+
+ if (src.negate)
+ newref->hw_reg.negate ^= 1;
+
+ if (src.abs) {
+ newref->hw_reg.negate = 0;
+ newref->hw_reg.abs = 1;
+ }
+
+ return newref;
+}
+
+
+static void
+translate_insn(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_wm_instruction *out = get_instruction(c);
+ GLuint writemask = inst->dst.writemask;
+ GLuint nr_args = brw_wm_nr_args(inst->opcode);
+ GLuint i, j;
+
+ /* Copy some data out of the instruction
+ */
+ out->opcode = inst->opcode;
+ out->saturate = inst->dst.saturate;
+ out->tex_unit = inst->tex_unit;
+ out->target = inst->target;
+
+ /* Nasty hack:
+ */
+ out->eot = (inst->opcode == WM_FB_WRITE &&
+ inst->tex_unit != 0);
+
+
+ /* Args:
+ */
+ for (i = 0; i < nr_args; i++) {
+ for (j = 0; j < 4; j++) {
+ out->src[i][j] = get_new_ref(c, inst->src[i], j, out);
+ }
+ }
+
+ /* Dst:
+ */
+ pass0_set_dst(c, out, inst, writemask);
+}
+
+
+
+/***********************************************************************
+ * Optimize moves and swizzles away:
+ */
+static void pass0_precalc_mov( struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst )
+{
+ const struct brw_fp_dst dst = inst->dst;
+ GLuint writemask = dst.writemask;
+ struct brw_wm_ref *refs[4];
+ GLuint i;
+
+ /* Get the effect of a MOV by manipulating our register table:
+ * First get all refs, then assign refs. This ensures that "in-place"
+ * swizzles such as:
+ * MOV t, t.xxyx
+ * are handled correctly. Previously, these two steps were done in
+ * one loop and the above case was incorrectly handled.
+ */
+ for (i = 0; i < 4; i++) {
+ refs[i] = get_new_ref(c, inst->src[0], i, NULL);
+ }
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1 << i)) {
+ pass0_set_fpreg_ref( c, dst.file, dst.index, i, refs[i]);
+ }
+ }
+}
+
+
+/* Initialize payload "registers".
+ */
+static void pass0_init_payload( struct brw_wm_compile *c )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ GLuint j = i >= c->key.nr_depth_regs ? 0 : i;
+ pass0_set_fpreg_value( c, BRW_FILE_PAYLOAD, PAYLOAD_DEPTH, i,
+ &c->payload.depth[j] );
+ }
+
+ for (i = 0; i < c->key.nr_inputs; i++)
+ pass0_set_fpreg_value( c, BRW_FILE_PAYLOAD, i, 0,
+ &c->payload.input_interp[i] );
+}
+
+
+/***********************************************************************
+ * PASS 0
+ *
+ * Work forwards to give each calculated value a unique number. Where
+ * an instruction produces duplicate values (eg DP3), all are given
+ * the same number.
+ *
+ * Translate away swizzling and eliminate non-saturating moves.
+ *
+ * Translate instructions from our fp_instruction structs to our
+ * internal brw_wm_instruction representation.
+ */
+void brw_wm_pass0( struct brw_wm_compile *c )
+{
+ GLuint insn;
+
+ c->nr_vreg = 0;
+ c->nr_insns = 0;
+
+ pass0_init_undef(c);
+ pass0_init_payload(c);
+
+ for (insn = 0; insn < c->nr_fp_insns; insn++) {
+ const struct brw_fp_instruction *inst = &c->fp_instructions[insn];
+
+ /* Optimize away moves, otherwise emit translated instruction:
+ */
+ switch (inst->opcode) {
+ case TGSI_OPCODE_MOV:
+ if (!inst->dst.saturate) {
+ pass0_precalc_mov(c, inst);
+ }
+ else {
+ translate_insn(c, inst);
+ }
+ break;
+ default:
+ translate_insn(c, inst);
+ break;
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass0");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c
new file mode 100644
index 0000000000..005747f00b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_pass1.c
@@ -0,0 +1,292 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_wm.h"
+#include "brw_debug.h"
+
+
+static GLuint get_tracked_mask(struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst)
+{
+ GLuint i;
+ for (i = 0; i < 4; i++) {
+ if (inst->writemask & (1<<i)) {
+ if (!inst->dst[i]->contributes_to_output) {
+ inst->writemask &= ~(1<<i);
+ inst->dst[i] = 0;
+ }
+ }
+ }
+
+ return inst->writemask;
+}
+
+/* Remove a reference from a value's usage chain.
+ */
+static void unlink_ref(struct brw_wm_ref *ref)
+{
+ struct brw_wm_value *value = ref->value;
+
+ if (ref == value->lastuse) {
+ value->lastuse = ref->prevuse;
+ }
+ else {
+ struct brw_wm_ref *i = value->lastuse;
+ while (i->prevuse != ref) i = i->prevuse;
+ i->prevuse = ref->prevuse;
+ }
+}
+
+static void track_arg(struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst,
+ GLuint arg,
+ GLuint readmask)
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ struct brw_wm_ref *ref = inst->src[arg][i];
+ if (ref) {
+ if (readmask & (1<<i)) {
+ ref->value->contributes_to_output = 1;
+ }
+ else {
+ unlink_ref(ref);
+ inst->src[arg][i] = NULL;
+ }
+ }
+ }
+}
+
+static GLuint get_texcoord_mask( GLuint tex_idx )
+{
+ switch (tex_idx) {
+ case TGSI_TEXTURE_1D:
+ return BRW_WRITEMASK_X;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ return BRW_WRITEMASK_XY;
+ case TGSI_TEXTURE_3D:
+ return BRW_WRITEMASK_XYZ;
+ case TGSI_TEXTURE_CUBE:
+ return BRW_WRITEMASK_XYZ;
+
+ case TGSI_TEXTURE_SHADOW1D:
+ return BRW_WRITEMASK_XZ;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ return BRW_WRITEMASK_XYZ;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+
+/* Step two: Basically this is dead code elimination.
+ *
+ * Iterate backwards over instructions, noting which values
+ * contribute to the final result. Adjust writemasks to only
+ * calculate these values.
+ */
+void brw_wm_pass1( struct brw_wm_compile *c )
+{
+ GLint insn;
+
+ for (insn = c->nr_insns-1; insn >= 0; insn--) {
+ struct brw_wm_instruction *inst = &c->instruction[insn];
+ GLuint writemask;
+ GLuint read0, read1, read2;
+
+ if (inst->opcode == TGSI_OPCODE_KIL) {
+ track_arg(c, inst, 0, BRW_WRITEMASK_XYZW); /* All args contribute to final */
+ continue;
+ }
+
+ if (inst->opcode == WM_FB_WRITE) {
+ track_arg(c, inst, 0, BRW_WRITEMASK_XYZW);
+ track_arg(c, inst, 1, BRW_WRITEMASK_XYZW);
+ if (c->key.source_depth_to_render_target &&
+ c->key.computes_depth)
+ track_arg(c, inst, 2, BRW_WRITEMASK_Z);
+ else
+ track_arg(c, inst, 2, 0);
+ continue;
+ }
+
+ /* Lookup all the registers which were written by this
+ * instruction and get a mask of those that contribute to the output:
+ */
+ writemask = get_tracked_mask(c, inst);
+ if (!writemask) {
+ GLuint arg;
+ for (arg = 0; arg < 3; arg++)
+ track_arg(c, inst, arg, 0);
+ continue;
+ }
+
+ read0 = 0;
+ read1 = 0;
+ read2 = 0;
+
+ /* Mark all inputs which contribute to the marked outputs:
+ */
+ switch (inst->opcode) {
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_FRC:
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_TRUNC:
+ read0 = writemask;
+ break;
+
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MUL:
+ read0 = writemask;
+ read1 = writemask;
+ break;
+
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
+ read0 = writemask;
+ break;
+
+ case TGSI_OPCODE_MAD:
+ case TGSI_OPCODE_CMP:
+ case TGSI_OPCODE_LRP:
+ read0 = writemask;
+ read1 = writemask;
+ read2 = writemask;
+ break;
+
+ case TGSI_OPCODE_XPD:
+ if (writemask & BRW_WRITEMASK_X) read0 |= BRW_WRITEMASK_YZ;
+ if (writemask & BRW_WRITEMASK_Y) read0 |= BRW_WRITEMASK_XZ;
+ if (writemask & BRW_WRITEMASK_Z) read0 |= BRW_WRITEMASK_XY;
+ read1 = read0;
+ break;
+
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_SCS:
+ case WM_CINTERP:
+ case WM_PIXELXY:
+ read0 = BRW_WRITEMASK_X;
+ break;
+
+ case TGSI_OPCODE_POW:
+ read0 = BRW_WRITEMASK_X;
+ read1 = BRW_WRITEMASK_X;
+ break;
+
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXP:
+ read0 = get_texcoord_mask(inst->target);
+ break;
+
+ case TGSI_OPCODE_TXB:
+ read0 = get_texcoord_mask(inst->target) | BRW_WRITEMASK_W;
+ break;
+
+ case WM_WPOSXY:
+ read0 = writemask & BRW_WRITEMASK_XY;
+ break;
+
+ case WM_DELTAXY:
+ read0 = writemask & BRW_WRITEMASK_XY;
+ read1 = BRW_WRITEMASK_X;
+ break;
+
+ case WM_PIXELW:
+ read0 = BRW_WRITEMASK_X;
+ read1 = BRW_WRITEMASK_XY;
+ break;
+
+ case WM_LINTERP:
+ read0 = BRW_WRITEMASK_X;
+ read1 = BRW_WRITEMASK_XY;
+ break;
+
+ case WM_PINTERP:
+ read0 = BRW_WRITEMASK_X; /* interpolant */
+ read1 = BRW_WRITEMASK_XY; /* deltas */
+ read2 = BRW_WRITEMASK_W; /* pixel w */
+ break;
+
+ case TGSI_OPCODE_DP3:
+ read0 = BRW_WRITEMASK_XYZ;
+ read1 = BRW_WRITEMASK_XYZ;
+ break;
+
+ case TGSI_OPCODE_DPH:
+ read0 = BRW_WRITEMASK_XYZ;
+ read1 = BRW_WRITEMASK_XYZW;
+ break;
+
+ case TGSI_OPCODE_DP4:
+ read0 = BRW_WRITEMASK_XYZW;
+ read1 = BRW_WRITEMASK_XYZW;
+ break;
+
+ case TGSI_OPCODE_LIT:
+ read0 = BRW_WRITEMASK_XYW;
+ break;
+
+ case TGSI_OPCODE_DST:
+ case WM_FRONTFACING:
+ case TGSI_OPCODE_KILP:
+ default:
+ break;
+ }
+
+ track_arg(c, inst, 0, read0);
+ track_arg(c, inst, 1, read1);
+ track_arg(c, inst, 2, read2);
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass1");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c
new file mode 100644
index 0000000000..19248b4519
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_pass2.c
@@ -0,0 +1,334 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_debug.h"
+#include "brw_wm.h"
+
+
+/* Use these to force spilling so that that functionality can be
+ * tested with known-good examples rather than having to construct new
+ * tests.
+ */
+#define TEST_PAYLOAD_SPILLS 0
+#define TEST_DST_SPILLS 0
+
+static void spill_value(struct brw_wm_compile *c,
+ struct brw_wm_value *value);
+
+static void prealloc_reg(struct brw_wm_compile *c,
+ struct brw_wm_value *value,
+ GLuint reg)
+{
+ if (value->lastuse) {
+ /* Set nextuse to zero, it will be corrected by
+ * update_register_usage().
+ */
+ c->pass2_grf[reg].value = value;
+ c->pass2_grf[reg].nextuse = 0;
+
+ value->resident = &c->pass2_grf[reg];
+ value->hw_reg = brw_vec8_grf(reg*2, 0);
+
+ if (TEST_PAYLOAD_SPILLS)
+ spill_value(c, value);
+ }
+}
+
+
+/* Initialize all the register values. Do the initial setup
+ * calculations for interpolants.
+ */
+static void init_registers( struct brw_wm_compile *c )
+{
+ GLuint reg = 0;
+ GLuint j;
+
+ for (j = 0; j < c->grf_limit; j++)
+ c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
+
+ /* Pre-allocate incoming payload regs:
+ */
+ for (j = 0; j < c->key.nr_depth_regs; j++)
+ prealloc_reg(c, &c->payload.depth[j], reg++);
+
+ for (j = 0; j < c->nr_creg; j++)
+ prealloc_reg(c, &c->creg[j], reg++);
+
+ reg++; /* XXX: skip over position output */
+
+ /* XXX: currently just hope the VS outputs line up with FS inputs:
+ */
+ for (j = 0; j < c->key.nr_inputs; j++)
+ prealloc_reg(c, &c->payload.input_interp[j], reg++);
+
+ c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.urb_read_length = (c->key.nr_inputs + 1) * 2;
+ c->prog_data.curb_read_length = c->nr_creg * 2;
+
+ /* Note this allocation:
+ */
+ c->max_wm_grf = reg * 2;
+}
+
+
+/* Update the nextuse value for each register in our file.
+ */
+static void update_register_usage(struct brw_wm_compile *c,
+ GLuint thisinsn)
+{
+ GLuint i;
+
+ for (i = 1; i < c->grf_limit; i++) {
+ struct brw_wm_grf *grf = &c->pass2_grf[i];
+
+ /* Only search those which can change:
+ */
+ if (grf->nextuse < thisinsn) {
+ const struct brw_wm_ref *ref = grf->value->lastuse;
+
+ /* Has last use of value been passed?
+ */
+ if (ref->insn < thisinsn) {
+ grf->value->resident = 0;
+ grf->value = 0;
+ grf->nextuse = BRW_WM_MAX_INSN;
+ }
+ else {
+ /* Else loop through chain to update:
+ */
+ while (ref->prevuse && ref->prevuse->insn >= thisinsn)
+ ref = ref->prevuse;
+
+ grf->nextuse = ref->insn;
+ }
+ }
+ }
+}
+
+
+static void spill_value(struct brw_wm_compile *c,
+ struct brw_wm_value *value)
+{
+ /* Allocate a spill slot. Note that allocations start from 0x40 -
+ * the first slot is reserved to mean "undef" in brw_wm_emit.c
+ */
+ if (!value->spill_slot) {
+ c->last_scratch += 0x40;
+ value->spill_slot = c->last_scratch;
+ }
+
+ /* The spill will be done in brw_wm_emit.c immediately after the
+ * value is calculated, so we can just take this reg without any
+ * further work.
+ */
+ value->resident->value = NULL;
+ value->resident->nextuse = BRW_WM_MAX_INSN;
+ value->resident = NULL;
+}
+
+
+
+/* Search for contiguous region with the most distant nearest
+ * member. Free regs count as very distant.
+ *
+ * TODO: implement spill-to-reg so that we can rearrange discontigous
+ * free regs and then spill the oldest non-free regs in sequence.
+ * This would mean inserting instructions in this pass.
+ */
+static GLuint search_contiguous_regs(struct brw_wm_compile *c,
+ GLuint nr,
+ GLuint thisinsn)
+{
+ struct brw_wm_grf *grf = c->pass2_grf;
+ GLuint furthest = 0;
+ GLuint reg = 0;
+ GLuint i, j;
+
+ /* Start search at 1: r0 is special and can't be used or spilled.
+ */
+ for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) {
+ GLuint group_nextuse = BRW_WM_MAX_INSN;
+
+ for (j = 0; j < nr; j++) {
+ if (grf[i+j].nextuse < group_nextuse)
+ group_nextuse = grf[i+j].nextuse;
+ }
+
+ if (group_nextuse > furthest) {
+ furthest = group_nextuse;
+ reg = i;
+ }
+ }
+
+ assert(furthest != thisinsn);
+
+ /* Any non-empty regs will need to be spilled:
+ */
+ for (j = 0; j < nr; j++)
+ if (grf[reg+j].value)
+ spill_value(c, grf[reg+j].value);
+
+ return reg;
+}
+
+
+static void alloc_contiguous_dest(struct brw_wm_compile *c,
+ struct brw_wm_value *dst[],
+ GLuint nr,
+ GLuint thisinsn)
+{
+ GLuint reg = search_contiguous_regs(c, nr, thisinsn);
+ GLuint i;
+
+ for (i = 0; i < nr; i++) {
+ if (!dst[i]) {
+ /* Need to grab a dummy value in TEX case. Don't introduce
+ * it into the tracking scheme.
+ */
+ dst[i] = &c->vreg[c->nr_vreg++];
+ }
+ else {
+ assert(!dst[i]->resident);
+ assert(c->pass2_grf[reg+i].nextuse != thisinsn);
+
+ c->pass2_grf[reg+i].value = dst[i];
+ c->pass2_grf[reg+i].nextuse = thisinsn;
+
+ dst[i]->resident = &c->pass2_grf[reg+i];
+ }
+
+ dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0);
+ }
+
+ if ((reg+nr)*2 > c->max_wm_grf)
+ c->max_wm_grf = (reg+nr) * 2;
+}
+
+
+static void load_args(struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst)
+{
+ GLuint thisinsn = inst - c->instruction;
+ GLuint i,j;
+
+ for (i = 0; i < 3; i++) {
+ for (j = 0; j < 4; j++) {
+ struct brw_wm_ref *ref = inst->src[i][j];
+
+ if (ref) {
+ if (!ref->value->resident) {
+ /* Need to bring the value in from scratch space. The code for
+ * this will be done in brw_wm_emit.c, here we just do the
+ * register allocation and mark the ref as requiring a fill.
+ */
+ GLuint reg = search_contiguous_regs(c, 1, thisinsn);
+
+ c->pass2_grf[reg].value = ref->value;
+ c->pass2_grf[reg].nextuse = thisinsn;
+
+ ref->value->resident = &c->pass2_grf[reg];
+
+ /* Note that a fill is required:
+ */
+ ref->unspill_reg = reg*2;
+ }
+
+ /* Adjust the hw_reg to point at the value's current location:
+ */
+ assert(ref->value == ref->value->resident->value);
+ ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2;
+ }
+ }
+ }
+}
+
+
+
+/* Step 3: Work forwards once again. Perform register allocations,
+ * taking into account instructions like TEX which require contiguous
+ * result registers. Where necessary spill registers to scratch space
+ * and reload later.
+ */
+void brw_wm_pass2( struct brw_wm_compile *c )
+{
+ GLuint insn;
+ GLuint i;
+
+ init_registers(c);
+
+ for (insn = 0; insn < c->nr_insns; insn++) {
+ struct brw_wm_instruction *inst = &c->instruction[insn];
+
+ /* Update registers' nextuse values:
+ */
+ update_register_usage(c, insn);
+
+ /* May need to unspill some args.
+ */
+ load_args(c, inst);
+
+ /* Allocate registers to hold results:
+ */
+ switch (inst->opcode) {
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXP:
+ alloc_contiguous_dest(c, inst->dst, 4, insn);
+ break;
+
+ default:
+ for (i = 0; i < 4; i++) {
+ if (inst->writemask & (1<<i)) {
+ assert(inst->dst[i]);
+ alloc_contiguous_dest(c, &inst->dst[i], 1, insn);
+ }
+ }
+ break;
+ }
+
+ if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) {
+ for (i = 0; i < 4; i++)
+ if (inst->dst[i])
+ spill_value(c, inst->dst[i]);
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass2");
+ }
+
+ c->state = PASS2_DONE;
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass2/done");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
new file mode 100644
index 0000000000..a8bc31c9ce
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -0,0 +1,229 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+#include "util/u_format.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_screen.h"
+
+
+/* Samplers aren't strictly wm state from the hardware's perspective,
+ * but that is the only situation in which we use them in this driver.
+ */
+
+
+
+static enum pipe_error
+upload_default_color( struct brw_context *brw,
+ const GLfloat *color,
+ struct brw_winsys_buffer **bo_out )
+{
+ struct brw_sampler_default_color sdc;
+ enum pipe_error ret;
+
+ COPY_4V(sdc.color, color);
+
+ ret = brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc,
+ NULL, 0, bo_out );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+struct wm_sampler_key {
+ int sampler_count;
+ struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
+};
+
+
+/** Sets up the cache key for sampler state for all texture units */
+static void
+brw_wm_sampler_populate_key(struct brw_context *brw,
+ struct wm_sampler_key *key)
+{
+ int i;
+
+ memset(key, 0, sizeof(*key));
+
+ key->sampler_count = MIN2(brw->curr.num_textures,
+ brw->curr.num_samplers);
+
+ for (i = 0; i < key->sampler_count; i++) {
+ const struct brw_texture *tex = brw_texture(brw->curr.texture[i]);
+ const struct brw_sampler *sampler = brw->curr.sampler[i];
+ struct brw_sampler_state *entry = &key->sampler[i];
+
+ entry->ss0 = sampler->ss0;
+ entry->ss1 = sampler->ss1;
+ entry->ss2.default_color_pointer = 0; /* reloc */
+ entry->ss3 = sampler->ss3;
+
+ /* Cube-maps on 965 and later must use the same wrap mode for all 3
+ * coordinate dimensions. Futher, only CUBE and CLAMP are valid.
+ */
+ if (tex->base.target == PIPE_TEXTURE_CUBE) {
+ if (FALSE &&
+ (sampler->ss0.min_filter != BRW_MAPFILTER_NEAREST ||
+ sampler->ss0.mag_filter != BRW_MAPFILTER_NEAREST)) {
+ entry->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ entry->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ } else {
+ entry->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ entry->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ }
+ } else if (tex->base.target == PIPE_TEXTURE_1D) {
+ /* There's a bug in 1D texture sampling - it actually pays
+ * attention to the wrap_t value, though it should not.
+ * Override the wrap_t value here to GL_REPEAT to keep
+ * any nonexistent border pixels from floating in.
+ */
+ entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ }
+ }
+}
+
+
+static enum pipe_error
+brw_wm_sampler_update_default_colors(struct brw_context *brw)
+{
+ enum pipe_error ret;
+ int nr = MIN2(brw->curr.num_textures,
+ brw->curr.num_samplers);
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ const struct brw_texture *tex = brw_texture(brw->curr.texture[i]);
+ const struct brw_sampler *sampler = brw->curr.sampler[i];
+ const float *bc;
+
+ if (util_format_is_depth_or_stencil(tex->base.format)) {
+ float bordercolor[4] = {
+ sampler->border_color[0],
+ sampler->border_color[0],
+ sampler->border_color[0],
+ sampler->border_color[0]
+ };
+
+ bc = bordercolor;
+ }
+ else {
+ bc = sampler->border_color;
+ }
+
+ /* GL specs that border color for depth textures is taken from the
+ * R channel, while the hardware uses A. Spam R into all the
+ * channels for safety.
+ */
+ ret = upload_default_color(brw,
+ bc,
+ &brw->wm.sdc_bo[i]);
+ if (ret)
+ return ret;
+ }
+
+ return PIPE_OK;
+}
+
+
+
+/* All samplers must be uploaded in a single contiguous array.
+ */
+static int upload_wm_samplers( struct brw_context *brw )
+{
+ struct wm_sampler_key key;
+ struct brw_winsys_reloc reloc[BRW_MAX_TEX_UNIT];
+ enum pipe_error ret;
+ int i;
+
+ brw_wm_sampler_update_default_colors(brw);
+ brw_wm_sampler_populate_key(brw, &key);
+
+ if (brw->wm.sampler_count != key.sampler_count) {
+ brw->wm.sampler_count = key.sampler_count;
+ brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+ }
+
+ if (brw->wm.sampler_count == 0) {
+ bo_reference(&brw->wm.sampler_bo, NULL);
+ return PIPE_OK;
+ }
+
+ /* Emit SDC relocations */
+ for (i = 0; i < key.sampler_count; i++) {
+ make_reloc( &reloc[i],
+ BRW_USAGE_SAMPLER,
+ 0,
+ i * sizeof(struct brw_sampler_state) +
+ offsetof(struct brw_sampler_state, ss2),
+ brw->wm.sdc_bo[i]);
+ }
+
+
+ if (brw_search_cache(&brw->cache, BRW_SAMPLER,
+ &key, sizeof(key),
+ reloc, key.sampler_count,
+ NULL,
+ &brw->wm.sampler_bo))
+ return PIPE_OK;
+
+ /* If we didnt find it in the cache, compute the state and put it in the
+ * cache.
+ */
+ ret = brw_upload_cache(&brw->cache, BRW_SAMPLER,
+ &key, sizeof(key),
+ reloc, key.sampler_count,
+ &key.sampler, sizeof(key.sampler),
+ NULL, NULL,
+ &brw->wm.sampler_bo);
+ if (ret)
+ return ret;
+
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_wm_samplers = {
+ .dirty = {
+ .mesa = PIPE_NEW_BOUND_TEXTURES | PIPE_NEW_SAMPLERS,
+ .brw = 0,
+ .cache = 0
+ },
+ .prepare = upload_wm_samplers,
+};
+
+
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
new file mode 100644
index 0000000000..ee970ac75b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -0,0 +1,339 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+#include "brw_debug.h"
+#include "brw_pipe_rast.h"
+
+/***********************************************************************
+ * WM unit - fragment programs and rasterization
+ */
+
+struct brw_wm_unit_key {
+ unsigned int total_grf, total_scratch;
+ unsigned int urb_entry_read_length;
+ unsigned int curb_entry_read_length;
+ unsigned int dispatch_grf_start_reg;
+
+ unsigned int curbe_offset;
+ unsigned int urb_size;
+
+ unsigned int max_threads;
+
+ unsigned int nr_surfaces, sampler_count;
+ GLboolean uses_depth, computes_depth, uses_kill, has_flow_control;
+ GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
+ GLfloat offset_units, offset_factor;
+};
+
+static void
+wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
+{
+ const struct brw_fragment_shader *fp = brw->curr.fragment_shader;
+
+ memset(key, 0, sizeof(*key));
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ key->max_threads = 1;
+ else {
+ /* WM maximum threads is number of EUs times number of threads per EU. */
+ if (BRW_IS_IGDNG(brw))
+ key->max_threads = 12 * 6;
+ else if (BRW_IS_G4X(brw))
+ key->max_threads = 10 * 5;
+ else
+ key->max_threads = 8 * 4;
+ }
+
+ /* CACHE_NEW_WM_PROG */
+ key->total_grf = brw->wm.prog_data->total_grf;
+ key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
+ key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
+ key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
+ key->total_scratch = align(brw->wm.prog_data->total_scratch, 1024);
+
+ /* BRW_NEW_URB_FENCE */
+ key->urb_size = brw->urb.vsize;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ key->curbe_offset = brw->curbe.wm_start;
+
+ /* BRW_NEW_NR_SURFACEs */
+ key->nr_surfaces = brw->wm.nr_surfaces;
+
+ /* CACHE_NEW_SAMPLER */
+ key->sampler_count = brw->wm.sampler_count;
+
+ /* PIPE_NEW_RAST */
+ key->polygon_stipple = brw->curr.rast->templ.poly_stipple_enable;
+
+ /* PIPE_NEW_FRAGMENT_PROGRAM */
+ key->uses_depth = fp->uses_depth;
+ key->computes_depth = fp->info.writes_z;
+
+ /* PIPE_NEW_DEPTH_BUFFER
+ *
+ * Override for NULL depthbuffer case, required by the Pixel Shader Computed
+ * Depth field.
+ */
+ if (brw->curr.fb.zsbuf == NULL)
+ key->computes_depth = 0;
+
+ /* PIPE_NEW_DEPTH_STENCIL_ALPHA */
+ key->uses_kill = (fp->info.uses_kill ||
+ brw->curr.zstencil->cc3.alpha_test);
+
+ key->has_flow_control = fp->has_flow_control;
+
+ /* temporary sanity check assertion */
+ assert(fp->has_flow_control == 0);
+
+ /* PIPE_NEW_QUERY */
+ key->stats_wm = (brw->query.stats_wm != 0);
+
+ /* PIPE_NEW_RAST */
+ key->line_stipple = brw->curr.rast->templ.line_stipple_enable;
+
+
+ key->offset_enable = (brw->curr.rast->templ.offset_cw ||
+ brw->curr.rast->templ.offset_ccw);
+
+ key->offset_units = brw->curr.rast->templ.offset_units;
+ key->offset_factor = brw->curr.rast->templ.offset_scale;
+}
+
+/**
+ * Setup wm hardware state. See page 225 of Volume 2
+ */
+static enum pipe_error
+wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ unsigned nr_reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_wm_unit_state wm;
+ enum pipe_error ret;
+
+ memset(&wm, 0, sizeof(wm));
+
+ wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ wm.thread0.kernel_start_pointer = 0; /* reloc */
+ wm.thread1.depth_coef_urb_read_offset = 1;
+ wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+ if (BRW_IS_IGDNG(brw))
+ wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
+ else
+ wm.thread1.binding_table_entry_count = key->nr_surfaces;
+
+ if (key->total_scratch != 0) {
+ wm.thread2.scratch_space_base_pointer = 0; /* reloc */
+ wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
+ } else {
+ wm.thread2.scratch_space_base_pointer = 0;
+ wm.thread2.per_thread_scratch_space = 0;
+ }
+
+ wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
+ wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ wm.thread3.urb_entry_read_offset = 0;
+ wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+ wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+
+ if (BRW_IS_IGDNG(brw))
+ wm.wm4.sampler_count = 0; /* hardware requirement */
+ else
+ wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
+
+ /* reloc */
+ wm.wm4.sampler_state_pointer = 0;
+
+ wm.wm5.program_uses_depth = key->uses_depth;
+ wm.wm5.program_computes_depth = key->computes_depth;
+ wm.wm5.program_uses_killpixel = key->uses_kill;
+
+ if (key->has_flow_control)
+ wm.wm5.enable_8_pix = 1;
+ else
+ wm.wm5.enable_16_pix = 1;
+
+ wm.wm5.max_threads = key->max_threads - 1;
+ wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */
+ wm.wm5.legacy_line_rast = 0;
+ wm.wm5.legacy_global_depth_bias = 0;
+ wm.wm5.early_depth_test = 1; /* never need to disable */
+ wm.wm5.line_aa_region_width = 0;
+ wm.wm5.line_endcap_aa_region_width = 1;
+
+ wm.wm5.polygon_stipple = key->polygon_stipple;
+
+ if (key->offset_enable) {
+ wm.wm5.depth_offset = 1;
+ /* Something wierd going on with legacy_global_depth_bias,
+ * offset_constant, scaling and MRD. This value passes glean
+ * but gives some odd results elsewere (eg. the
+ * quad-offset-units test).
+ */
+ wm.global_depth_offset_constant = key->offset_units * 2;
+
+ /* This is the only value that passes glean:
+ */
+ wm.global_depth_offset_scale = key->offset_factor;
+ }
+
+ wm.wm5.line_stipple = key->line_stipple;
+
+ if ((BRW_DEBUG & DEBUG_STATS) || key->stats_wm)
+ wm.wm4.stats_enable = 1;
+
+ ret = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
+ key, sizeof(*key),
+ reloc, nr_reloc,
+ &wm, sizeof(wm),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+static enum pipe_error upload_wm_unit( struct brw_context *brw )
+{
+ struct brw_wm_unit_key key;
+ struct brw_winsys_reloc reloc[3];
+ unsigned nr_reloc = 0;
+ enum pipe_error ret;
+ unsigned grf_reg_count;
+ unsigned per_thread_scratch_space;
+ unsigned stats_enable;
+ unsigned sampler_count;
+
+ wm_unit_populate_key(brw, &key);
+
+
+ /* Allocate the necessary scratch space if we haven't already. Don't
+ * bother reducing the allocation later, since we use scratch so
+ * rarely.
+ */
+ assert(key.total_scratch <= 12 * 1024);
+ if (key.total_scratch) {
+ GLuint total = key.total_scratch * key.max_threads;
+
+ /* Do we need a new buffer:
+ */
+ if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size)
+ bo_reference(&brw->wm.scratch_bo, NULL);
+
+ if (brw->wm.scratch_bo == NULL) {
+ ret = brw->sws->bo_alloc(brw->sws,
+ BRW_BUFFER_TYPE_SHADER_SCRATCH,
+ total,
+ 4096,
+ &brw->wm.scratch_bo);
+ if (ret)
+ return ret;
+ }
+ }
+
+
+ /* XXX: temporary:
+ */
+ grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
+ per_thread_scratch_space = key.total_scratch / 1024 - 1;
+ stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm;
+ sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4;
+
+ /* Emit WM program relocation */
+ make_reloc(&reloc[nr_reloc++],
+ BRW_USAGE_STATE,
+ grf_reg_count << 1,
+ offsetof(struct brw_wm_unit_state, thread0),
+ brw->wm.prog_bo);
+
+ /* Emit scratch space relocation */
+ if (key.total_scratch != 0) {
+ make_reloc(&reloc[nr_reloc++],
+ BRW_USAGE_SCRATCH,
+ per_thread_scratch_space,
+ offsetof(struct brw_wm_unit_state, thread2),
+ brw->wm.scratch_bo);
+ }
+
+ /* Emit sampler state relocation */
+ if (key.sampler_count != 0) {
+ make_reloc(&reloc[nr_reloc++],
+ BRW_USAGE_STATE,
+ stats_enable | (sampler_count << 2),
+ offsetof(struct brw_wm_unit_state, wm4),
+ brw->wm.sampler_bo);
+ }
+
+
+ if (brw_search_cache(&brw->cache, BRW_WM_UNIT,
+ &key, sizeof(key),
+ reloc, nr_reloc,
+ NULL,
+ &brw->wm.state_bo))
+ return PIPE_OK;
+
+ ret = wm_unit_create_from_key(brw, &key,
+ reloc, nr_reloc,
+ &brw->wm.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_wm_unit = {
+ .dirty = {
+ .mesa = (PIPE_NEW_FRAGMENT_SHADER |
+ PIPE_NEW_DEPTH_BUFFER |
+ PIPE_NEW_RAST |
+ PIPE_NEW_DEPTH_STENCIL_ALPHA |
+ PIPE_NEW_QUERY),
+
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_NR_WM_SURFACES),
+
+ .cache = (CACHE_NEW_WM_PROG |
+ CACHE_NEW_SAMPLER)
+ },
+ .prepare = upload_wm_unit,
+};
+
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
new file mode 100644
index 0000000000..f92b8198ed
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -0,0 +1,294 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_format.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_screen.h"
+
+
+
+
+static enum pipe_error
+brw_update_texture_surface( struct brw_context *brw,
+ struct brw_texture *tex,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_winsys_reloc reloc[1];
+ enum pipe_error ret;
+
+ /* Emit relocation to surface contents */
+ make_reloc(&reloc[0],
+ BRW_USAGE_SAMPLER,
+ 0,
+ offsetof(struct brw_surface_state, ss1),
+ tex->bo);
+
+ if (brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &tex->ss, sizeof tex->ss,
+ reloc, Elements(reloc),
+ NULL,
+ bo_out))
+ return PIPE_OK;
+
+ ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
+ &tex->ss, sizeof tex->ss,
+ reloc, Elements(reloc),
+ &tex->ss, sizeof tex->ss,
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+
+
+
+
+
+
+/**
+ * Sets up a surface state structure to point at the given region.
+ * While it is only used for the front/back buffer currently, it should be
+ * usable for further buffers when doing ARB_draw_buffer support.
+ */
+static enum pipe_error
+brw_update_render_surface(struct brw_context *brw,
+ struct brw_surface *surface,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_surf_ss0 blend_ss0 = brw->curr.blend->ss0;
+ struct brw_surface_state ss;
+ struct brw_winsys_reloc reloc[1];
+ enum pipe_error ret;
+
+ /* XXX: we will only be rendering to this surface:
+ */
+ make_reloc(&reloc[0],
+ BRW_USAGE_RENDER_TARGET,
+ 0,
+ offsetof(struct brw_surface_state, ss1),
+ surface->bo);
+
+ /* Surfaces are potentially shared between contexts, so can't
+ * scribble the in-place ss0 value in the surface.
+ */
+ memcpy(&ss, &surface->ss, sizeof ss);
+
+ ss.ss0.color_blend = blend_ss0.color_blend;
+ ss.ss0.writedisable_blue = blend_ss0.writedisable_blue;
+ ss.ss0.writedisable_green = blend_ss0.writedisable_green;
+ ss.ss0.writedisable_red = blend_ss0.writedisable_red;
+ ss.ss0.writedisable_alpha = blend_ss0.writedisable_alpha;
+
+ if (brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &ss, sizeof(ss),
+ reloc, Elements(reloc),
+ NULL,
+ bo_out))
+ return PIPE_OK;
+
+ ret = brw_upload_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &ss, sizeof ss,
+ reloc, Elements(reloc),
+ &ss, sizeof ss,
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static enum pipe_error
+brw_wm_get_binding_table(struct brw_context *brw,
+ struct brw_winsys_buffer **bo_out )
+{
+ enum pipe_error ret;
+ struct brw_winsys_reloc reloc[BRW_WM_MAX_SURF];
+ uint32_t data[BRW_WM_MAX_SURF];
+ GLuint nr_relocs = 0;
+ GLuint data_size = brw->wm.nr_surfaces * sizeof data[0];
+ int i;
+
+ assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
+ assert(brw->wm.nr_surfaces > 0);
+
+ /* Emit binding table relocations to surface state
+ */
+ for (i = 0; i < brw->wm.nr_surfaces; i++) {
+ if (brw->wm.surf_bo[i]) {
+ make_reloc(&reloc[nr_relocs++],
+ BRW_USAGE_STATE,
+ 0,
+ i * sizeof(GLuint),
+ brw->wm.surf_bo[i]);
+ }
+ }
+
+ /* Note there is no key for this search beyond the values in the
+ * relocation array:
+ */
+ if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ reloc, nr_relocs,
+ NULL,
+ bo_out))
+ return PIPE_OK;
+
+ /* Upload zero data, will all be overwitten with relocation
+ * offsets:
+ */
+ for (i = 0; i < brw->wm.nr_surfaces; i++)
+ data[i] = 0;
+
+ ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ reloc, nr_relocs,
+ data, data_size,
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static enum pipe_error prepare_wm_surfaces(struct brw_context *brw )
+{
+ enum pipe_error ret;
+ int nr_surfaces = 0;
+ GLuint i;
+
+ /* PIPE_NEW_COLOR_BUFFERS | PIPE_NEW_BLEND
+ *
+ * Update surfaces for drawing buffers. Mixes in colormask and
+ * blend state.
+ *
+ * XXX: no color buffer case
+ */
+ for (i = 0; i < brw->curr.fb.nr_cbufs; i++) {
+ ret = brw_update_render_surface(brw,
+ brw_surface(brw->curr.fb.cbufs[i]),
+ &brw->wm.surf_bo[BTI_COLOR_BUF(i)]);
+ if (ret)
+ return ret;
+
+ nr_surfaces = BTI_COLOR_BUF(i) + 1;
+ }
+
+
+
+ /* PIPE_NEW_FRAGMENT_CONSTANTS
+ */
+#if 0
+ if (brw->curr.fragment_constants) {
+ ret = brw_update_fragment_constant_surface(
+ brw,
+ brw->curr.fragment_constants,
+ &brw->wm.surf_bo[BTI_FRAGMENT_CONSTANTS]);
+
+ if (ret)
+ return ret;
+
+ nr_surfaces = BTI_FRAGMENT_CONSTANTS + 1;
+ }
+ else {
+ bo_reference(&brw->wm.surf_bo[SURF_FRAG_CONSTANTS], NULL);
+ }
+#endif
+
+
+ /* PIPE_NEW_TEXTURE
+ */
+ for (i = 0; i < brw->curr.num_textures; i++) {
+ ret = brw_update_texture_surface(brw,
+ brw_texture(brw->curr.texture[i]),
+ &brw->wm.surf_bo[BTI_TEXTURE(i)]);
+ if (ret)
+ return ret;
+
+ nr_surfaces = BTI_TEXTURE(i) + 1;
+ }
+
+ /* Clear any inactive entries:
+ */
+ for (i = brw->curr.fb.nr_cbufs; i < BRW_MAX_DRAW_BUFFERS; i++)
+ bo_reference(&brw->wm.surf_bo[BTI_COLOR_BUF(i)], NULL);
+
+ if (!brw->curr.fragment_constants)
+ bo_reference(&brw->wm.surf_bo[BTI_FRAGMENT_CONSTANTS], NULL);
+
+ /* XXX: no pipe_max_textures define?? */
+ for (i = brw->curr.num_textures; i < PIPE_MAX_SAMPLERS; i++)
+ bo_reference(&brw->wm.surf_bo[BTI_TEXTURE(i)], NULL);
+
+ if (brw->wm.nr_surfaces != nr_surfaces) {
+ brw->wm.nr_surfaces = nr_surfaces;
+ brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
+ }
+
+ ret = brw_wm_get_binding_table(brw, &brw->wm.bind_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_wm_surfaces = {
+ .dirty = {
+ .mesa = (PIPE_NEW_COLOR_BUFFERS |
+ PIPE_NEW_BOUND_TEXTURES |
+ PIPE_NEW_FRAGMENT_CONSTANTS |
+ PIPE_NEW_BLEND),
+ .brw = (BRW_NEW_CONTEXT |
+ BRW_NEW_WM_SURFACES),
+ .cache = 0
+ },
+ .prepare = prepare_wm_surfaces,
+};
+
+
+
diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c
new file mode 100644
index 0000000000..3166958bad
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_decode.c
@@ -0,0 +1,1790 @@
+/* -*- c-basic-offset: 4 -*- */
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file intel_decode.c
+ * This file contains code to print out batchbuffer contents in a
+ * human-readable format.
+ *
+ * The current version only supports i915 packets, and only pretty-prints a
+ * subset of them. The intention is for it to make just a best attempt to
+ * decode, but never crash in the process.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "intel_decode.h"
+
+/*#include "intel_chipset.h"*/
+#define IS_965(x) 1 /* XXX */
+#define IS_9XX(x) 1 /* XXX */
+
+#define BUFFER_FAIL(_count, _len, _name) do { \
+ fprintf(out, "Buffer size too small in %s (%d < %d)\n", \
+ (_name), (_count), (_len)); \
+ (*failures)++; \
+ return count; \
+} while (0)
+
+static FILE *out;
+static uint32_t saved_s2 = 0, saved_s4 = 0;
+static char saved_s2_set = 0, saved_s4_set = 0;
+
+static float
+int_as_float(uint32_t intval)
+{
+ union intfloat {
+ uint32_t i;
+ float f;
+ } uval;
+
+ uval.i = intval;
+ return uval.f;
+}
+
+static void
+instr_out(const uint32_t *data, uint32_t hw_offset, unsigned int index,
+ char *fmt, ...)
+{
+ va_list va;
+
+ fprintf(out, "0x%08x: 0x%08x:%s ", hw_offset + index * 4, data[index],
+ index == 0 ? "" : " ");
+ va_start(va, fmt);
+ vfprintf(out, fmt, va);
+ va_end(va);
+}
+
+
+static int
+decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ unsigned int opcode;
+
+ struct {
+ uint32_t opcode;
+ int len_mask;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_mi[] = {
+ { 0x08, 0, 1, 1, "MI_ARB_ON_OFF" },
+ { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
+ { 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" },
+ { 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" },
+ { 0x04, 0, 1, 1, "MI_FLUSH" },
+ { 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" },
+ { 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" },
+ { 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" },
+ { 0x00, 0, 1, 1, "MI_NOOP" },
+ { 0x11, 0x3f, 2, 2, "MI_OVERLAY_FLIP" },
+ { 0x07, 0, 1, 1, "MI_REPORT_HEAD" },
+ { 0x18, 0x3f, 2, 2, "MI_SET_CONTEXT" },
+ { 0x20, 0x3f, 3, 4, "MI_STORE_DATA_IMM" },
+ { 0x21, 0x3f, 3, 4, "MI_STORE_DATA_INDEX" },
+ { 0x24, 0x3f, 3, 3, "MI_STORE_REGISTER_MEM" },
+ { 0x02, 0, 1, 1, "MI_USER_INTERRUPT" },
+ { 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" },
+ };
+
+
+ for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]);
+ opcode++) {
+ if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) {
+ unsigned int len = 1, i;
+
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_mi[opcode].name);
+ if (opcodes_mi[opcode].max_len > 1) {
+ len = (data[0] & opcodes_mi[opcode].len_mask) + 2;
+ if (len < opcodes_mi[opcode].min_len ||
+ len > opcodes_mi[opcode].max_len)
+ {
+ fprintf(out, "Bad length (%d) in %s, [%d, %d]\n",
+ len, opcodes_mi[opcode].name,
+ opcodes_mi[opcode].min_len,
+ opcodes_mi[opcode].max_len);
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_mi[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "MI UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+static int
+decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ unsigned int opcode, len;
+ char *format = NULL;
+
+ struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_2d[] = {
+ { 0x40, 5, 5, "COLOR_BLT" },
+ { 0x43, 6, 6, "SRC_COPY_BLT" },
+ { 0x01, 8, 8, "XY_SETUP_BLT" },
+ { 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" },
+ { 0x03, 3, 3, "XY_SETUP_CLIP_BLT" },
+ { 0x24, 2, 2, "XY_PIXEL_BLT" },
+ { 0x25, 3, 3, "XY_SCANLINES_BLT" },
+ { 0x26, 4, 4, "Y_TEXT_BLT" },
+ { 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" },
+ { 0x50, 6, 6, "XY_COLOR_BLT" },
+ { 0x51, 6, 6, "XY_PAT_BLT" },
+ { 0x76, 8, 8, "XY_PAT_CHROMA_BLT" },
+ { 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" },
+ { 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" },
+ { 0x52, 9, 9, "XY_MONO_PAT_BLT" },
+ { 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" },
+ { 0x53, 8, 8, "XY_SRC_COPY_BLT" },
+ { 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" },
+ { 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" },
+ { 0x55, 9, 9, "XY_FULL_BLT" },
+ { 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" },
+ { 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" },
+ { 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" },
+ { 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" },
+ { 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT" },
+ };
+
+ switch ((data[0] & 0x1fc00000) >> 22) {
+ case 0x50:
+ instr_out(data, hw_offset, 0,
+ "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n",
+ (data[0] & (1 << 20)) ? "en" : "dis",
+ (data[0] & (1 << 21)) ? "en" : "dis",
+ (data[0] >> 11) & 1);
+
+ len = (data[0] & 0x000000ff) + 2;
+ if (len != 6)
+ fprintf(out, "Bad count in XY_COLOR_BLT\n");
+ if (count < 6)
+ BUFFER_FAIL(count, len, "XY_COLOR_BLT");
+
+ switch ((data[1] >> 24) & 0x3) {
+ case 0:
+ format="8";
+ break;
+ case 1:
+ format="565";
+ break;
+ case 2:
+ format="1555";
+ break;
+ case 3:
+ format="8888";
+ break;
+ }
+
+ instr_out(data, hw_offset, 1, "format %s, pitch %d, "
+ "clipping %sabled\n", format,
+ (short)(data[1] & 0xffff),
+ data[1] & (1 << 30) ? "en" : "dis");
+ instr_out(data, hw_offset, 2, "(%d,%d)\n",
+ data[2] & 0xffff, data[2] >> 16);
+ instr_out(data, hw_offset, 3, "(%d,%d)\n",
+ data[3] & 0xffff, data[3] >> 16);
+ instr_out(data, hw_offset, 4, "offset 0x%08x\n", data[4]);
+ instr_out(data, hw_offset, 5, "color\n");
+ return len;
+ case 0x53:
+ instr_out(data, hw_offset, 0,
+ "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, "
+ "src tile %d, dst tile %d)\n",
+ (data[0] & (1 << 20)) ? "en" : "dis",
+ (data[0] & (1 << 21)) ? "en" : "dis",
+ (data[0] >> 15) & 1,
+ (data[0] >> 11) & 1);
+
+ len = (data[0] & 0x000000ff) + 2;
+ if (len != 8)
+ fprintf(out, "Bad count in XY_SRC_COPY_BLT\n");
+ if (count < 8)
+ BUFFER_FAIL(count, len, "XY_SRC_COPY_BLT");
+
+ switch ((data[1] >> 24) & 0x3) {
+ case 0:
+ format="8";
+ break;
+ case 1:
+ format="565";
+ break;
+ case 2:
+ format="1555";
+ break;
+ case 3:
+ format="8888";
+ break;
+ }
+
+ instr_out(data, hw_offset, 1, "format %s, dst pitch %d, "
+ "clipping %sabled\n", format,
+ (short)(data[1] & 0xffff),
+ data[1] & (1 << 30) ? "en" : "dis");
+ instr_out(data, hw_offset, 2, "dst (%d,%d)\n",
+ data[2] & 0xffff, data[2] >> 16);
+ instr_out(data, hw_offset, 3, "dst (%d,%d)\n",
+ data[3] & 0xffff, data[3] >> 16);
+ instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]);
+ instr_out(data, hw_offset, 5, "src (%d,%d)\n",
+ data[5] & 0xffff, data[5] >> 16);
+ instr_out(data, hw_offset, 6, "src pitch %d\n",
+ (short)(data[6] & 0xffff));
+ instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]);
+ return len;
+ }
+
+ for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]);
+ opcode++) {
+ if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) {
+ unsigned int i;
+
+ len = 1;
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_2d[opcode].name);
+ if (opcodes_2d[opcode].max_len > 1) {
+ len = (data[0] & 0x000000ff) + 2;
+ if (len < opcodes_2d[opcode].min_len ||
+ len > opcodes_2d[opcode].max_len)
+ {
+ fprintf(out, "Bad count in %s\n", opcodes_2d[opcode].name);
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_2d[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "2D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+static int
+decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ switch ((data[0] & 0x00f80000) >> 19) {
+ case 0x11:
+ instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n");
+ return 1;
+ case 0x10:
+ instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n");
+ return 1;
+ case 0x01:
+ instr_out(data, hw_offset, 0, "3DSTATE_MAP_COORD_SET_I830\n");
+ return 1;
+ case 0x0a:
+ instr_out(data, hw_offset, 0, "3DSTATE_MAP_CUBE_I830\n");
+ return 1;
+ case 0x05:
+ instr_out(data, hw_offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n");
+ return 1;
+ }
+
+ instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+/** Sets the string dstname to describe the destination of the PS instruction */
+static void
+i915_get_instruction_dst(const uint32_t *data, int i, char *dstname, int do_mask)
+{
+ uint32_t a0 = data[i];
+ int dst_nr = (a0 >> 14) & 0xf;
+ char dstmask[8];
+ char *sat;
+
+ if (do_mask) {
+ if (((a0 >> 10) & 0xf) == 0xf) {
+ dstmask[0] = 0;
+ } else {
+ int dstmask_index = 0;
+
+ dstmask[dstmask_index++] = '.';
+ if (a0 & (1 << 10))
+ dstmask[dstmask_index++] = 'x';
+ if (a0 & (1 << 11))
+ dstmask[dstmask_index++] = 'y';
+ if (a0 & (1 << 12))
+ dstmask[dstmask_index++] = 'z';
+ if (a0 & (1 << 13))
+ dstmask[dstmask_index++] = 'w';
+ dstmask[dstmask_index++] = 0;
+ }
+
+ if (a0 & (1 << 22))
+ sat = ".sat";
+ else
+ sat = "";
+ } else {
+ dstmask[0] = 0;
+ sat = "";
+ }
+
+ switch ((a0 >> 19) & 0x7) {
+ case 0:
+ if (dst_nr > 15)
+ fprintf(out, "bad destination reg R%d\n", dst_nr);
+ sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat);
+ break;
+ case 4:
+ if (dst_nr > 0)
+ fprintf(out, "bad destination reg oC%d\n", dst_nr);
+ sprintf(dstname, "oC%s%s", dstmask, sat);
+ break;
+ case 5:
+ if (dst_nr > 0)
+ fprintf(out, "bad destination reg oD%d\n", dst_nr);
+ sprintf(dstname, "oD%s%s", dstmask, sat);
+ break;
+ case 6:
+ if (dst_nr > 2)
+ fprintf(out, "bad destination reg U%d\n", dst_nr);
+ sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
+ break;
+ default:
+ sprintf(dstname, "RESERVED");
+ break;
+ }
+}
+
+static char *
+i915_get_channel_swizzle(uint32_t select)
+{
+ switch (select & 0x7) {
+ case 0:
+ return (select & 8) ? "-x" : "x";
+ case 1:
+ return (select & 8) ? "-y" : "y";
+ case 2:
+ return (select & 8) ? "-z" : "z";
+ case 3:
+ return (select & 8) ? "-w" : "w";
+ case 4:
+ return (select & 8) ? "-0" : "0";
+ case 5:
+ return (select & 8) ? "-1" : "1";
+ default:
+ return (select & 8) ? "-bad" : "bad";
+ }
+}
+
+static void
+i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
+{
+ switch (src_type) {
+ case 0:
+ sprintf(name, "R%d", src_nr);
+ if (src_nr > 15)
+ fprintf(out, "bad src reg %s\n", name);
+ break;
+ case 1:
+ if (src_nr < 8)
+ sprintf(name, "T%d", src_nr);
+ else if (src_nr == 8)
+ sprintf(name, "DIFFUSE");
+ else if (src_nr == 9)
+ sprintf(name, "SPECULAR");
+ else if (src_nr == 10)
+ sprintf(name, "FOG");
+ else {
+ fprintf(out, "bad src reg T%d\n", src_nr);
+ sprintf(name, "RESERVED");
+ }
+ break;
+ case 2:
+ sprintf(name, "C%d", src_nr);
+ if (src_nr > 31)
+ fprintf(out, "bad src reg %s\n", name);
+ break;
+ case 4:
+ sprintf(name, "oC");
+ if (src_nr > 0)
+ fprintf(out, "bad src reg oC%d\n", src_nr);
+ break;
+ case 5:
+ sprintf(name, "oD");
+ if (src_nr > 0)
+ fprintf(out, "bad src reg oD%d\n", src_nr);
+ break;
+ case 6:
+ sprintf(name, "U%d", src_nr);
+ if (src_nr > 2)
+ fprintf(out, "bad src reg %s\n", name);
+ break;
+ default:
+ fprintf(out, "bad src reg type %d\n", src_type);
+ sprintf(name, "RESERVED");
+ break;
+ }
+}
+
+static void
+i915_get_instruction_src0(const uint32_t *data, int i, char *srcname)
+{
+ uint32_t a0 = data[i];
+ uint32_t a1 = data[i + 1];
+ int src_nr = (a0 >> 2) & 0x1f;
+ char *swizzle_x = i915_get_channel_swizzle((a1 >> 28) & 0xf);
+ char *swizzle_y = i915_get_channel_swizzle((a1 >> 24) & 0xf);
+ char *swizzle_z = i915_get_channel_swizzle((a1 >> 20) & 0xf);
+ char *swizzle_w = i915_get_channel_swizzle((a1 >> 16) & 0xf);
+ char swizzle[100];
+
+ i915_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_src1(const uint32_t *data, int i, char *srcname)
+{
+ uint32_t a1 = data[i + 1];
+ uint32_t a2 = data[i + 2];
+ int src_nr = (a1 >> 8) & 0x1f;
+ char *swizzle_x = i915_get_channel_swizzle((a1 >> 4) & 0xf);
+ char *swizzle_y = i915_get_channel_swizzle((a1 >> 0) & 0xf);
+ char *swizzle_z = i915_get_channel_swizzle((a2 >> 28) & 0xf);
+ char *swizzle_w = i915_get_channel_swizzle((a2 >> 24) & 0xf);
+ char swizzle[100];
+
+ i915_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_src2(const uint32_t *data, int i, char *srcname)
+{
+ uint32_t a2 = data[i + 2];
+ int src_nr = (a2 >> 16) & 0x1f;
+ char *swizzle_x = i915_get_channel_swizzle((a2 >> 12) & 0xf);
+ char *swizzle_y = i915_get_channel_swizzle((a2 >> 8) & 0xf);
+ char *swizzle_z = i915_get_channel_swizzle((a2 >> 4) & 0xf);
+ char *swizzle_w = i915_get_channel_swizzle((a2 >> 0) & 0xf);
+ char swizzle[100];
+
+ i915_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name)
+{
+ switch (src_type) {
+ case 0:
+ sprintf(name, "R%d", src_nr);
+ if (src_nr > 15)
+ fprintf(out, "bad src reg %s\n", name);
+ break;
+ case 1:
+ if (src_nr < 8)
+ sprintf(name, "T%d", src_nr);
+ else if (src_nr == 8)
+ sprintf(name, "DIFFUSE");
+ else if (src_nr == 9)
+ sprintf(name, "SPECULAR");
+ else if (src_nr == 10)
+ sprintf(name, "FOG");
+ else {
+ fprintf(out, "bad src reg T%d\n", src_nr);
+ sprintf(name, "RESERVED");
+ }
+ break;
+ case 4:
+ sprintf(name, "oC");
+ if (src_nr > 0)
+ fprintf(out, "bad src reg oC%d\n", src_nr);
+ break;
+ case 5:
+ sprintf(name, "oD");
+ if (src_nr > 0)
+ fprintf(out, "bad src reg oD%d\n", src_nr);
+ break;
+ default:
+ fprintf(out, "bad src reg type %d\n", src_type);
+ sprintf(name, "RESERVED");
+ break;
+ }
+}
+
+static void
+i915_decode_alu1(const uint32_t *data, uint32_t hw_offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100];
+
+ i915_get_instruction_dst(data, i, dst, 1);
+ i915_get_instruction_src0(data, i, src0);
+
+ instr_out(data, hw_offset, i++, "%s: %s %s, %s\n", instr_prefix,
+ op_name, dst, src0);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_alu2(const uint32_t *data, uint32_t hw_offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100], src1[100];
+
+ i915_get_instruction_dst(data, i, dst, 1);
+ i915_get_instruction_src0(data, i, src0);
+ i915_get_instruction_src1(data, i, src1);
+
+ instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s\n", instr_prefix,
+ op_name, dst, src0, src1);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_alu3(const uint32_t *data, uint32_t hw_offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100], src1[100], src2[100];
+
+ i915_get_instruction_dst(data, i, dst, 1);
+ i915_get_instruction_src0(data, i, src0);
+ i915_get_instruction_src1(data, i, src1);
+ i915_get_instruction_src2(data, i, src2);
+
+ instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix,
+ op_name, dst, src0, src1, src2);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_tex(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix,
+ char *tex_name)
+{
+ uint32_t t0 = data[i];
+ uint32_t t1 = data[i + 1];
+ char dst_name[100];
+ char addr_name[100];
+ int sampler_nr;
+
+ i915_get_instruction_dst(data, i, dst_name, 0);
+ i915_get_instruction_addr((t1 >> 24) & 0x7,
+ (t1 >> 17) & 0xf,
+ addr_name);
+ sampler_nr = t0 & 0xf;
+
+ instr_out(data, hw_offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix,
+ tex_name, dst_name, sampler_nr, addr_name);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_dcl(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix)
+{
+ uint32_t d0 = data[i];
+ char *sampletype;
+ int dcl_nr = (d0 >> 14) & 0xf;
+ char *dcl_x = d0 & (1 << 10) ? "x" : "";
+ char *dcl_y = d0 & (1 << 11) ? "y" : "";
+ char *dcl_z = d0 & (1 << 12) ? "z" : "";
+ char *dcl_w = d0 & (1 << 13) ? "w" : "";
+ char dcl_mask[10];
+
+ switch ((d0 >> 19) & 0x3) {
+ case 1:
+ sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w);
+ if (strcmp(dcl_mask, ".") == 0)
+ fprintf(out, "bad (empty) dcl mask\n");
+
+ if (dcl_nr > 10)
+ fprintf(out, "bad T%d dcl register number\n", dcl_nr);
+ if (dcl_nr < 8) {
+ if (strcmp(dcl_mask, ".x") != 0 &&
+ strcmp(dcl_mask, ".xy") != 0 &&
+ strcmp(dcl_mask, ".xz") != 0 &&
+ strcmp(dcl_mask, ".w") != 0 &&
+ strcmp(dcl_mask, ".xyzw") != 0) {
+ fprintf(out, "bad T%d.%s dcl mask\n", dcl_nr, dcl_mask);
+ }
+ instr_out(data, hw_offset, i++, "%s: DCL T%d%s\n", instr_prefix,
+ dcl_nr, dcl_mask);
+ } else {
+ if (strcmp(dcl_mask, ".xz") == 0)
+ fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+ else if (strcmp(dcl_mask, ".xw") == 0)
+ fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+ else if (strcmp(dcl_mask, ".xzw") == 0)
+ fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+
+ if (dcl_nr == 8) {
+ instr_out(data, hw_offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix,
+ dcl_mask);
+ } else if (dcl_nr == 9) {
+ instr_out(data, hw_offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix,
+ dcl_mask);
+ } else if (dcl_nr == 10) {
+ instr_out(data, hw_offset, i++, "%s: DCL FOG%s\n", instr_prefix,
+ dcl_mask);
+ }
+ }
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ break;
+ case 3:
+ switch ((d0 >> 22) & 0x3) {
+ case 0:
+ sampletype = "2D";
+ break;
+ case 1:
+ sampletype = "CUBE";
+ break;
+ case 2:
+ sampletype = "3D";
+ break;
+ default:
+ sampletype = "RESERVED";
+ break;
+ }
+ if (dcl_nr > 15)
+ fprintf(out, "bad S%d dcl register number\n", dcl_nr);
+ instr_out(data, hw_offset, i++, "%s: DCL S%d %s\n", instr_prefix,
+ dcl_nr, sampletype);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ break;
+ default:
+ instr_out(data, hw_offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ }
+}
+
+static void
+i915_decode_instruction(const uint32_t *data, uint32_t hw_offset,
+ int i, char *instr_prefix)
+{
+ switch ((data[i] >> 24) & 0x1f) {
+ case 0x0:
+ instr_out(data, hw_offset, i++, "%s: NOP\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ break;
+ case 0x01:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "ADD");
+ break;
+ case 0x02:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOV");
+ break;
+ case 0x03:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "MUL");
+ break;
+ case 0x04:
+ i915_decode_alu3(data, hw_offset, i, instr_prefix, "MAD");
+ break;
+ case 0x05:
+ i915_decode_alu3(data, hw_offset, i, instr_prefix, "DP2ADD");
+ break;
+ case 0x06:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP3");
+ break;
+ case 0x07:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP4");
+ break;
+ case 0x08:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "FRC");
+ break;
+ case 0x09:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "RCP");
+ break;
+ case 0x0a:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "RSQ");
+ break;
+ case 0x0b:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "EXP");
+ break;
+ case 0x0c:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "LOG");
+ break;
+ case 0x0d:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "CMP");
+ break;
+ case 0x0e:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "MIN");
+ break;
+ case 0x0f:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "MAX");
+ break;
+ case 0x10:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "FLR");
+ break;
+ case 0x11:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOD");
+ break;
+ case 0x12:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "TRC");
+ break;
+ case 0x13:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "SGE");
+ break;
+ case 0x14:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "SLT");
+ break;
+ case 0x15:
+ i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLD");
+ break;
+ case 0x16:
+ i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDP");
+ break;
+ case 0x17:
+ i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDB");
+ break;
+ case 0x19:
+ i915_decode_dcl(data, hw_offset, i, instr_prefix);
+ break;
+ default:
+ instr_out(data, hw_offset, i++, "%s: unknown\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ break;
+ }
+}
+
+static int
+decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830)
+{
+ unsigned int len, i, c, opcode, word, map, sampler, instr;
+ char *format;
+
+ struct {
+ uint32_t opcode;
+ int i830_only;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_3d_1d[] = {
+ { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
+ { 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" },
+ { 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" },
+ { 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
+ { 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
+ { 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
+ { 0x98, 0, 2, 2, "3DSTATE_DEFAULT_Z" },
+ { 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
+ { 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" },
+ { 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" },
+ { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
+ { 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
+ { 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" },
+ { 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" },
+ { 0x8f, 0, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" },
+ { 0x81, 0, 3, 3, "3DSTATE_SCISSOR_RECTANGLE" },
+ { 0x83, 0, 2, 2, "3DSTATE_SPAN_STIPPLE" },
+ { 0x8c, 1, 2, 2, "3DSTATE_MAP_COORD_TRANSFORM_I830" },
+ { 0x8b, 1, 2, 2, "3DSTATE_MAP_VERTEX_TRANSFORM_I830" },
+ { 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" },
+ { 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" },
+ { 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" },
+ };
+
+ switch ((data[0] & 0x00ff0000) >> 16) {
+ case 0x07:
+ /* This instruction is unusual. A 0 length means just 1 DWORD instead of
+ * 2. The 0 length is specified in one place to be unsupported, but
+ * stated to be required in another, and 0 length LOAD_INDIRECTs appear
+ * to cause no harm at least.
+ */
+ instr_out(data, hw_offset, 0, "3DSTATE_LOAD_INDIRECT\n");
+ len = (data[0] & 0x000000ff) + 1;
+ i = 1;
+ if (data[0] & (0x01 << 8)) {
+ if (i + 2 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+ instr_out(data, hw_offset, i++, "SIS.0\n");
+ instr_out(data, hw_offset, i++, "SIS.1\n");
+ }
+ if (data[0] & (0x02 << 8)) {
+ if (i + 1 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+ instr_out(data, hw_offset, i++, "DIS.0\n");
+ }
+ if (data[0] & (0x04 << 8)) {
+ if (i + 2 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+ instr_out(data, hw_offset, i++, "SSB.0\n");
+ instr_out(data, hw_offset, i++, "SSB.1\n");
+ }
+ if (data[0] & (0x08 << 8)) {
+ if (i + 2 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+ instr_out(data, hw_offset, i++, "MSB.0\n");
+ instr_out(data, hw_offset, i++, "MSB.1\n");
+ }
+ if (data[0] & (0x10 << 8)) {
+ if (i + 2 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+ instr_out(data, hw_offset, i++, "PSP.0\n");
+ instr_out(data, hw_offset, i++, "PSP.1\n");
+ }
+ if (data[0] & (0x20 << 8)) {
+ if (i + 2 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+ instr_out(data, hw_offset, i++, "PSC.0\n");
+ instr_out(data, hw_offset, i++, "PSC.1\n");
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+ (*failures)++;
+ return len;
+ }
+ return len;
+ case 0x04:
+ instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
+ len = (data[0] & 0x0000000f) + 2;
+ i = 1;
+ for (word = 0; word <= 7; word++) {
+ if (data[0] & (1 << (4 + word))) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1");
+
+ /* save vertex state for decode */
+ if (word == 2) {
+ saved_s2_set = 1;
+ saved_s2 = data[i];
+ }
+ if (word == 4) {
+ saved_s4_set = 1;
+ saved_s4 = data[i];
+ }
+
+ instr_out(data, hw_offset, i++, "S%d\n", word);
+ }
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+ (*failures)++;
+ }
+ return len;
+ case 0x00:
+ instr_out(data, hw_offset, 0, "3DSTATE_MAP_STATE\n");
+ len = (data[0] & 0x0000003f) + 2;
+ instr_out(data, hw_offset, 1, "mask\n");
+
+ i = 2;
+ for (map = 0; map <= 15; map++) {
+ if (data[1] & (1 << map)) {
+ if (i + 3 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE");
+ instr_out(data, hw_offset, i++, "map %d MS2\n", map);
+ instr_out(data, hw_offset, i++, "map %d MS3\n", map);
+ instr_out(data, hw_offset, i++, "map %d MS4\n", map);
+ }
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_MAP_STATE\n");
+ (*failures)++;
+ return len;
+ }
+ return len;
+ case 0x06:
+ instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n");
+ len = (data[0] & 0x000000ff) + 2;
+
+ i = 2;
+ for (c = 0; c <= 31; c++) {
+ if (data[1] & (1 << c)) {
+ if (i + 4 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_CONSTANTS");
+ instr_out(data, hw_offset, i, "C%d.X = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ instr_out(data, hw_offset, i, "C%d.Y = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ instr_out(data, hw_offset, i, "C%d.Z = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ instr_out(data, hw_offset, i, "C%d.W = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ }
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_CONSTANTS\n");
+ (*failures)++;
+ }
+ return len;
+ case 0x05:
+ instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n");
+ len = (data[0] & 0x000000ff) + 2;
+ if ((len - 1) % 3 != 0 || len > 370) {
+ fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_PROGRAM\n");
+ (*failures)++;
+ }
+ i = 1;
+ for (instr = 0; instr < (len - 1) / 3; instr++) {
+ char instr_prefix[10];
+
+ if (i + 3 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_PROGRAM");
+ sprintf(instr_prefix, "PS%03d", instr);
+ i915_decode_instruction(data, hw_offset, i, instr_prefix);
+ i += 3;
+ }
+ return len;
+ case 0x01:
+ if (i830)
+ break;
+ instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n");
+ instr_out(data, hw_offset, 1, "mask\n");
+ len = (data[0] & 0x0000003f) + 2;
+ i = 2;
+ for (sampler = 0; sampler <= 15; sampler++) {
+ if (data[1] & (1 << sampler)) {
+ if (i + 3 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_SAMPLER_STATE");
+ instr_out(data, hw_offset, i++, "sampler %d SS2\n",
+ sampler);
+ instr_out(data, hw_offset, i++, "sampler %d SS3\n",
+ sampler);
+ instr_out(data, hw_offset, i++, "sampler %d SS4\n",
+ sampler);
+ }
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_SAMPLER_STATE\n");
+ (*failures)++;
+ }
+ return len;
+ case 0x85:
+ len = (data[0] & 0x0000000f) + 2;
+
+ if (len != 2)
+ fprintf(out, "Bad count in 3DSTATE_DEST_BUFFER_VARIABLES\n");
+ if (count < 2)
+ BUFFER_FAIL(count, len, "3DSTATE_DEST_BUFFER_VARIABLES");
+
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_DEST_BUFFER_VARIABLES\n");
+
+ switch ((data[1] >> 8) & 0xf) {
+ case 0x0: format = "g8"; break;
+ case 0x1: format = "x1r5g5b5"; break;
+ case 0x2: format = "r5g6b5"; break;
+ case 0x3: format = "a8r8g8b8"; break;
+ case 0x4: format = "ycrcb_swapy"; break;
+ case 0x5: format = "ycrcb_normal"; break;
+ case 0x6: format = "ycrcb_swapuv"; break;
+ case 0x7: format = "ycrcb_swapuvy"; break;
+ case 0x8: format = "a4r4g4b4"; break;
+ case 0x9: format = "a1r5g5b5"; break;
+ case 0xa: format = "a2r10g10b10"; break;
+ default: format = "BAD"; break;
+ }
+ instr_out(data, hw_offset, 1, "%s format, early Z %sabled\n",
+ format,
+ (data[1] & (1 << 31)) ? "en" : "dis");
+ return len;
+ }
+
+ for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]);
+ opcode++)
+ {
+ if (opcodes_3d_1d[opcode].i830_only && !i830)
+ continue;
+
+ if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) {
+ len = 1;
+
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name);
+ if (opcodes_3d_1d[opcode].max_len > 1) {
+ len = (data[0] & 0x0000ffff) + 2;
+ if (len < opcodes_3d_1d[opcode].min_len ||
+ len > opcodes_3d_1d[opcode].max_len)
+ {
+ fprintf(out, "Bad count in %s\n",
+ opcodes_3d_1d[opcode].name);
+ (*failures)++;
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_3d_1d[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+static int
+decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
+ int *failures)
+{
+ char immediate = (data[0] & (1 << 23)) == 0;
+ unsigned int len, i;
+ char *primtype;
+
+ switch ((data[0] >> 18) & 0xf) {
+ case 0x0: primtype = "TRILIST"; break;
+ case 0x1: primtype = "TRISTRIP"; break;
+ case 0x2: primtype = "TRISTRIP_REVERSE"; break;
+ case 0x3: primtype = "TRIFAN"; break;
+ case 0x4: primtype = "POLYGON"; break;
+ case 0x5: primtype = "LINELIST"; break;
+ case 0x6: primtype = "LINESTRIP"; break;
+ case 0x7: primtype = "RECTLIST"; break;
+ case 0x8: primtype = "POINTLIST"; break;
+ case 0x9: primtype = "DIB"; break;
+ case 0xa: primtype = "CLEAR_RECT"; break;
+ default: primtype = "unknown"; break;
+ }
+
+ /* XXX: 3DPRIM_DIB not supported */
+ if (immediate) {
+ len = (data[0] & 0x0003ffff) + 2;
+ instr_out(data, hw_offset, 0, "3DPRIMITIVE inline %s\n", primtype);
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DPRIMITIVE inline");
+ if (!saved_s2_set || !saved_s4_set) {
+ fprintf(out, "unknown vertex format\n");
+ for (i = 1; i < len; i++) {
+ instr_out(data, hw_offset, i,
+ " vertex data (%f float)\n",
+ int_as_float(data[i]));
+ }
+ } else {
+ unsigned int vertex = 0;
+ for (i = 1; i < len;) {
+ unsigned int tc;
+
+#define VERTEX_OUT(fmt, ...) do { \
+ if (i < len) \
+ instr_out(data, hw_offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \
+ else \
+ fprintf(out, " missing data in V%d\n", vertex); \
+ i++; \
+} while (0)
+
+ VERTEX_OUT("X = %f", int_as_float(data[i]));
+ VERTEX_OUT("Y = %f", int_as_float(data[i]));
+ switch (saved_s4 >> 6 & 0x7) {
+ case 0x1:
+ VERTEX_OUT("Z = %f", int_as_float(data[i]));
+ break;
+ case 0x2:
+ VERTEX_OUT("Z = %f", int_as_float(data[i]));
+ VERTEX_OUT("W = %f", int_as_float(data[i]));
+ break;
+ case 0x3:
+ break;
+ case 0x4:
+ VERTEX_OUT("W = %f", int_as_float(data[i]));
+ break;
+ default:
+ fprintf(out, "bad S4 position mask\n");
+ }
+
+ if (saved_s4 & (1 << 10)) {
+ VERTEX_OUT("color = (A=0x%02x, R=0x%02x, G=0x%02x, "
+ "B=0x%02x)",
+ data[i] >> 24,
+ (data[i] >> 16) & 0xff,
+ (data[i] >> 8) & 0xff,
+ data[i] & 0xff);
+ }
+ if (saved_s4 & (1 << 11)) {
+ VERTEX_OUT("spec = (A=0x%02x, R=0x%02x, G=0x%02x, "
+ "B=0x%02x)",
+ data[i] >> 24,
+ (data[i] >> 16) & 0xff,
+ (data[i] >> 8) & 0xff,
+ data[i] & 0xff);
+ }
+ if (saved_s4 & (1 << 12))
+ VERTEX_OUT("width = 0x%08x)", data[i]);
+
+ for (tc = 0; tc <= 7; tc++) {
+ switch ((saved_s2 >> (tc * 4)) & 0xf) {
+ case 0x0:
+ VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+ VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+ break;
+ case 0x1:
+ VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+ VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+ VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i]));
+ break;
+ case 0x2:
+ VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+ VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+ VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i]));
+ VERTEX_OUT("T%d.W = %f", tc, int_as_float(data[i]));
+ break;
+ case 0x3:
+ VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+ break;
+ case 0x4:
+ VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]);
+ break;
+ case 0x5:
+ VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]);
+ VERTEX_OUT("T%d.ZW = 0x%08x half-float", tc, data[i]);
+ break;
+ case 0xf:
+ break;
+ default:
+ fprintf(out, "bad S2.T%d format\n", tc);
+ }
+ }
+ vertex++;
+ }
+ }
+ } else {
+ /* indirect vertices */
+ len = data[0] & 0x0000ffff; /* index count */
+ if (data[0] & (1 << 17)) {
+ /* random vertex access */
+ if (count < (len + 1) / 2 + 1) {
+ BUFFER_FAIL(count, (len + 1) / 2 + 1,
+ "3DPRIMITIVE random indirect");
+ }
+ instr_out(data, hw_offset, 0,
+ "3DPRIMITIVE random indirect %s (%d)\n", primtype, len);
+ if (len == 0) {
+ /* vertex indices continue until 0xffff is found */
+ for (i = 1; i < count; i++) {
+ if ((data[i] & 0xffff) == 0xffff) {
+ instr_out(data, hw_offset, i,
+ " indices: (terminator)\n");
+ return i;
+ } else if ((data[i] >> 16) == 0xffff) {
+ instr_out(data, hw_offset, i,
+ " indices: 0x%04x, "
+ "(terminator)\n",
+ data[i] & 0xffff);
+ return i;
+ } else {
+ instr_out(data, hw_offset, i,
+ " indices: 0x%04x, 0x%04x\n",
+ data[i] & 0xffff, data[i] >> 16);
+ }
+ }
+ fprintf(out,
+ "3DPRIMITIVE: no terminator found in index buffer\n");
+ (*failures)++;
+ return count;
+ } else {
+ /* fixed size vertex index buffer */
+ for (i = 0; i < len; i += 2) {
+ if (i * 2 == len - 1) {
+ instr_out(data, hw_offset, i,
+ " indices: 0x%04x\n",
+ data[i] & 0xffff);
+ } else {
+ instr_out(data, hw_offset, i,
+ " indices: 0x%04x, 0x%04x\n",
+ data[i] & 0xffff, data[i] >> 16);
+ }
+ }
+ }
+ return (len + 1) / 2 + 1;
+ } else {
+ /* sequential vertex access */
+ if (count < 2)
+ BUFFER_FAIL(count, 2, "3DPRIMITIVE seq indirect");
+ instr_out(data, hw_offset, 0,
+ "3DPRIMITIVE sequential indirect %s, %d starting from "
+ "%d\n", primtype, len, data[1] & 0xffff);
+ instr_out(data, hw_offset, 1, " start\n");
+ return 2;
+ }
+ }
+
+ return len;
+}
+
+static int
+decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ unsigned int opcode;
+
+ struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_3d[] = {
+ { 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" },
+ { 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" },
+ { 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" },
+ { 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" },
+ { 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+ { 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" },
+ { 0x0d, 1, 1, "3DSTATE_MODES_4" },
+ { 0x0c, 1, 1, "3DSTATE_MODES_5" },
+ { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+ };
+
+ switch ((data[0] & 0x1f000000) >> 24) {
+ case 0x1f:
+ return decode_3d_primitive(data, count, hw_offset, failures);
+ case 0x1d:
+ return decode_3d_1d(data, count, hw_offset, failures, 0);
+ case 0x1c:
+ return decode_3d_1c(data, count, hw_offset, failures);
+ }
+
+ for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+ opcode++) {
+ if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ unsigned int len = 1, i;
+
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+ if (opcodes_3d[opcode].max_len > 1) {
+ len = (data[0] & 0xff) + 2;
+ if (len < opcodes_3d[opcode].min_len ||
+ len > opcodes_3d[opcode].max_len)
+ {
+ fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+ switch (surfacetype) {
+ case 0: return "1D";
+ case 1: return "2D";
+ case 2: return "3D";
+ case 3: return "CUBE";
+ case 4: return "BUFFER";
+ case 7: return "NULL";
+ default: return "unknown";
+ }
+}
+
+static const char *
+get_965_depthformat(unsigned int depthformat)
+{
+ switch (depthformat) {
+ case 0: return "s8_z24float";
+ case 1: return "z32float";
+ case 2: return "z24s8";
+ case 5: return "z16";
+ default: return "unknown";
+ }
+}
+
+static const char *
+get_965_element_component(uint32_t data, int component)
+{
+ uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
+
+ switch (component_control) {
+ case 0:
+ return "nostore";
+ case 1:
+ switch (component) {
+ case 0: return "X";
+ case 1: return "Y";
+ case 2: return "Z";
+ case 3: return "W";
+ default: return "fail";
+ }
+ case 2:
+ return "0.0";
+ case 3:
+ return "1.0";
+ case 4:
+ return "0x1";
+ case 5:
+ return "VID";
+ default:
+ return "fail";
+ }
+}
+
+static const char *
+get_965_prim_type(uint32_t data)
+{
+ uint32_t primtype = (data >> 10) & 0x1f;
+
+ switch (primtype) {
+ case 0x01: return "point list";
+ case 0x02: return "line list";
+ case 0x03: return "line strip";
+ case 0x04: return "tri list";
+ case 0x05: return "tri strip";
+ case 0x06: return "tri fan";
+ case 0x07: return "quad list";
+ case 0x08: return "quad strip";
+ case 0x09: return "line list adj";
+ case 0x0a: return "line strip adj";
+ case 0x0b: return "tri list adj";
+ case 0x0c: return "tri strip adj";
+ case 0x0d: return "tri strip reverse";
+ case 0x0e: return "polygon";
+ case 0x0f: return "rect list";
+ case 0x10: return "line loop";
+ case 0x11: return "point list bf";
+ case 0x12: return "line strip cont";
+ case 0x13: return "line strip bf";
+ case 0x14: return "line strip cont bf";
+ case 0x15: return "tri fan no stipple";
+ default: return "fail";
+ }
+}
+
+static int
+decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ unsigned int opcode, len;
+ int i;
+
+ struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_3d[] = {
+ { 0x6000, 3, 3, "URB_FENCE" },
+ { 0x6001, 2, 2, "CS_URB_STATE" },
+ { 0x6002, 2, 2, "CONSTANT_BUFFER" },
+ { 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
+ { 0x6102, 2, 2 , "STATE_SIP" },
+ { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+ { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
+ { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+ { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
+ { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
+ { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
+ { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
+ { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
+ { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
+ { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
+ { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
+ { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
+ { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
+ { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
+ { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
+ { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
+ { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
+ { 0x7b00, 6, 6, "3DPRIMITIVE" },
+ };
+
+ len = (data[0] & 0x0000ffff) + 2;
+
+ switch ((data[0] & 0xffff0000) >> 16) {
+ case 0x6101:
+ if (len != 6)
+ fprintf(out, "Bad count in STATE_BASE_ADDRESS\n");
+ if (count < 6)
+ BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS");
+
+ instr_out(data, hw_offset, 0,
+ "STATE_BASE_ADDRESS\n");
+
+ if (data[1] & 1) {
+ instr_out(data, hw_offset, 1, "General state at 0x%08x\n",
+ data[1] & ~1);
+ } else
+ instr_out(data, hw_offset, 1, "General state not updated\n");
+
+ if (data[2] & 1) {
+ instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n",
+ data[2] & ~1);
+ } else
+ instr_out(data, hw_offset, 2, "Surface state not updated\n");
+
+ if (data[3] & 1) {
+ instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n",
+ data[3] & ~1);
+ } else
+ instr_out(data, hw_offset, 3, "Indirect state not updated\n");
+
+ if (data[4] & 1) {
+ instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n",
+ data[4] & ~1);
+ } else
+ instr_out(data, hw_offset, 4, "General state not updated\n");
+
+ if (data[5] & 1) {
+ instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n",
+ data[5] & ~1);
+ } else
+ instr_out(data, hw_offset, 5, "Indirect state not updated\n");
+
+ return len;
+ case 0x7800:
+ if (len != 7)
+ fprintf(out, "Bad count in 3DSTATE_PIPELINED_POINTERS\n");
+ if (count < 7)
+ BUFFER_FAIL(count, len, "3DSTATE_PIPELINED_POINTERS");
+
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_PIPELINED_POINTERS\n");
+ instr_out(data, hw_offset, 1, "VS state\n");
+ instr_out(data, hw_offset, 2, "GS state\n");
+ instr_out(data, hw_offset, 3, "Clip state\n");
+ instr_out(data, hw_offset, 4, "SF state\n");
+ instr_out(data, hw_offset, 5, "WM state\n");
+ instr_out(data, hw_offset, 6, "CC state\n");
+ return len;
+ case 0x7801:
+ if (len != 6)
+ fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n");
+ if (count < 6)
+ BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS\n");
+ instr_out(data, hw_offset, 1, "VS binding table\n");
+ instr_out(data, hw_offset, 2, "GS binding table\n");
+ instr_out(data, hw_offset, 3, "Clip binding table\n");
+ instr_out(data, hw_offset, 4, "SF binding table\n");
+ instr_out(data, hw_offset, 5, "WM binding table\n");
+
+ return len;
+
+ case 0x7808:
+ len = (data[0] & 0xff) + 2;
+ if ((len - 1) % 4 != 0)
+ fprintf(out, "Bad count in 3DSTATE_VERTEX_BUFFERS\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_VERTEX_BUFFERS");
+ instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
+
+ for (i = 1; i < len;) {
+ instr_out(data, hw_offset, i, "buffer %d: %s, pitch %db\n",
+ data[i] >> 27,
+ data[i] & (1 << 26) ? "random" : "sequential",
+ data[i] & 0x07ff);
+ i++;
+ instr_out(data, hw_offset, i++, "buffer address\n");
+ instr_out(data, hw_offset, i++, "max index\n");
+ instr_out(data, hw_offset, i++, "mbz\n");
+ }
+ return len;
+
+ case 0x7809:
+ len = (data[0] & 0xff) + 2;
+ if ((len + 1) % 2 != 0)
+ fprintf(out, "Bad count in 3DSTATE_VERTEX_ELEMENTS\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_VERTEX_ELEMENTS");
+ instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
+
+ for (i = 1; i < len;) {
+ instr_out(data, hw_offset, i, "buffer %d: %svalid, type 0x%04x, "
+ "src offset 0x%04x bytes\n",
+ data[i] >> 27,
+ data[i] & (1 << 26) ? "" : "in",
+ (data[i] >> 16) & 0x1ff,
+ data[i] & 0x07ff);
+ i++;
+ instr_out(data, hw_offset, i, "(%s, %s, %s, %s), "
+ "dst offset 0x%02x bytes\n",
+ get_965_element_component(data[i], 0),
+ get_965_element_component(data[i], 1),
+ get_965_element_component(data[i], 2),
+ get_965_element_component(data[i], 3),
+ (data[i] & 0xff) * 4);
+ i++;
+ }
+ return len;
+
+ case 0x780a:
+ len = (data[0] & 0xff) + 2;
+ if (len != 3)
+ fprintf(out, "Bad count in 3DSTATE_INDEX_BUFFER\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_INDEX_BUFFER");
+ instr_out(data, hw_offset, 0, "3DSTATE_INDEX_BUFFER\n");
+ instr_out(data, hw_offset, 1, "beginning buffer address\n");
+ instr_out(data, hw_offset, 2, "ending buffer address\n");
+ return len;
+
+ case 0x7900:
+ if (len != 4)
+ fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n");
+ if (count < 4)
+ BUFFER_FAIL(count, len, "3DSTATE_DRAWING_RECTANGLE");
+
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_DRAWING_RECTANGLE\n");
+ instr_out(data, hw_offset, 1, "top left: %d,%d\n",
+ data[1] & 0xffff,
+ (data[1] >> 16) & 0xffff);
+ instr_out(data, hw_offset, 2, "bottom right: %d,%d\n",
+ data[2] & 0xffff,
+ (data[2] >> 16) & 0xffff);
+ instr_out(data, hw_offset, 3, "origin: %d,%d\n",
+ (int)data[3] & 0xffff,
+ ((int)data[3] >> 16) & 0xffff);
+
+ return len;
+
+ case 0x7905:
+ if (len != 5 && len != 6)
+ fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER");
+
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_DEPTH_BUFFER\n");
+ instr_out(data, hw_offset, 1, "%s, %s, pitch = %d bytes, %stiled\n",
+ get_965_surfacetype(data[1] >> 29),
+ get_965_depthformat((data[1] >> 18) & 0x7),
+ (data[1] & 0x0001ffff) + 1,
+ data[1] & (1 << 27) ? "" : "not ");
+ instr_out(data, hw_offset, 2, "depth offset\n");
+ instr_out(data, hw_offset, 3, "%dx%d\n",
+ ((data[3] & 0x0007ffc0) >> 6) + 1,
+ ((data[3] & 0xfff80000) >> 19) + 1);
+ instr_out(data, hw_offset, 4, "volume depth\n");
+ if (len == 6)
+ instr_out(data, hw_offset, 5, "\n");
+
+ return len;
+
+ case 0x7b00:
+ len = (data[0] & 0xff) + 2;
+ if (len != 6)
+ fprintf(out, "Bad count in 3DPRIMITIVE\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DPRIMITIVE");
+
+ instr_out(data, hw_offset, 0,
+ "3DPRIMITIVE: %s %s\n",
+ get_965_prim_type(data[0]),
+ (data[0] & (1 << 15)) ? "random" : "sequential");
+ instr_out(data, hw_offset, 1, "vertex count\n");
+ instr_out(data, hw_offset, 2, "start vertex\n");
+ instr_out(data, hw_offset, 3, "instance count\n");
+ instr_out(data, hw_offset, 4, "start instance\n");
+ instr_out(data, hw_offset, 5, "index bias\n");
+ return len;
+ }
+
+ for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+ opcode++) {
+ if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) {
+ unsigned int i;
+ len = 1;
+
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+ if (opcodes_3d[opcode].max_len > 1) {
+ len = (data[0] & 0xff) + 2;
+ if (len < opcodes_3d[opcode].min_len ||
+ len > opcodes_3d[opcode].max_len)
+ {
+ fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+static int
+decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ unsigned int opcode;
+
+ struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_3d[] = {
+ { 0x02, 1, 1, "3DSTATE_MODES_3" },
+ { 0x03, 1, 1, "3DSTATE_ENABLES_1"},
+ { 0x04, 1, 1, "3DSTATE_ENABLES_2"},
+ { 0x05, 1, 1, "3DSTATE_VFT0"},
+ { 0x06, 1, 1, "3DSTATE_AA"},
+ { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+ { 0x08, 1, 1, "3DSTATE_MODES_1" },
+ { 0x09, 1, 1, "3DSTATE_STENCIL_TEST" },
+ { 0x0a, 1, 1, "3DSTATE_VFT1"},
+ { 0x0b, 1, 1, "3DSTATE_INDPT_ALPHA_BLEND" },
+ { 0x0c, 1, 1, "3DSTATE_MODES_5" },
+ { 0x0d, 1, 1, "3DSTATE_MAP_BLEND_OP" },
+ { 0x0e, 1, 1, "3DSTATE_MAP_BLEND_ARG" },
+ { 0x0f, 1, 1, "3DSTATE_MODES_2" },
+ { 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+ { 0x16, 1, 1, "3DSTATE_MODES_4" },
+ };
+
+ switch ((data[0] & 0x1f000000) >> 24) {
+ case 0x1f:
+ return decode_3d_primitive(data, count, hw_offset, failures);
+ case 0x1d:
+ return decode_3d_1d(data, count, hw_offset, failures, 1);
+ case 0x1c:
+ return decode_3d_1c(data, count, hw_offset, failures);
+ }
+
+ for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+ opcode++) {
+ if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ unsigned int len = 1, i;
+
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+ if (opcodes_3d[opcode].max_len > 1) {
+ len = (data[0] & 0xff) + 2;
+ if (len < opcodes_3d[opcode].min_len ||
+ len > opcodes_3d[opcode].max_len)
+ {
+ fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+/**
+ * Decodes an i830-i915 batch buffer, writing the output to stdout.
+ *
+ * \param data batch buffer contents
+ * \param count number of DWORDs to decode in the batch buffer
+ * \param hw_offset hardware address for the buffer
+ */
+int
+intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
+{
+ int index = 0;
+ int failures = 0;
+
+ out = stderr;
+
+ while (index < count) {
+ switch ((data[index] & 0xe0000000) >> 29) {
+ case 0x0:
+ index += decode_mi(data + index, count - index,
+ hw_offset + index * 4, &failures);
+ break;
+ case 0x2:
+ index += decode_2d(data + index, count - index,
+ hw_offset + index * 4, &failures);
+ break;
+ case 0x3:
+ if (IS_965(devid)) {
+ index += decode_3d_965(data + index, count - index,
+ hw_offset + index * 4, &failures);
+ } else if (IS_9XX(devid)) {
+ index += decode_3d(data + index, count - index,
+ hw_offset + index * 4, &failures);
+ } else {
+ index += decode_3d_i830(data + index, count - index,
+ hw_offset + index * 4, &failures);
+ }
+ break;
+ default:
+ instr_out(data, hw_offset, index, "UNKNOWN\n");
+ failures++;
+ index++;
+ break;
+ }
+ fflush(out);
+ }
+
+ return failures;
+}
+
+void intel_decode_context_reset(void)
+{
+ saved_s2_set = 0;
+ saved_s4_set = 1;
+}
+
diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h
new file mode 100644
index 0000000000..7683097b86
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_decode.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid);
+void intel_decode_context_reset(void);
diff --git a/src/gallium/drivers/i965/intel_structs.h b/src/gallium/drivers/i965/intel_structs.h
new file mode 100644
index 0000000000..522e3bd92c
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_structs.h
@@ -0,0 +1,132 @@
+#ifndef INTEL_STRUCTS_H
+#define INTEL_STRUCTS_H
+
+struct br0 {
+ GLuint length:8;
+ GLuint pad0:3;
+ GLuint dst_tiled:1;
+ GLuint pad1:8;
+ GLuint write_rgb:1;
+ GLuint write_alpha:1;
+ GLuint opcode:7;
+ GLuint client:3;
+};
+
+
+struct br13 {
+ GLint dest_pitch:16;
+ GLuint rop:8;
+ GLuint color_depth:2;
+ GLuint pad1:3;
+ GLuint mono_source_transparency:1;
+ GLuint clipping_enable:1;
+ GLuint pad0:1;
+};
+
+
+
+/* This is an attempt to move some of the 2D interaction in this
+ * driver to using structs for packets rather than a bunch of #defines
+ * and dwords.
+ */
+struct xy_color_blit {
+ struct br0 br0;
+ struct br13 br13;
+
+ struct {
+ GLuint dest_x1:16;
+ GLuint dest_y1:16;
+ } dw2;
+
+ struct {
+ GLuint dest_x2:16;
+ GLuint dest_y2:16;
+ } dw3;
+
+ GLuint dest_base_addr;
+ GLuint color;
+};
+
+struct xy_src_copy_blit {
+ struct br0 br0;
+ struct br13 br13;
+
+ struct {
+ GLuint dest_x1:16;
+ GLuint dest_y1:16;
+ } dw2;
+
+ struct {
+ GLuint dest_x2:16;
+ GLuint dest_y2:16;
+ } dw3;
+
+ GLuint dest_base_addr;
+
+ struct {
+ GLuint src_x1:16;
+ GLuint src_y1:16;
+ } dw5;
+
+ struct {
+ GLint src_pitch:16;
+ GLuint pad:16;
+ } dw6;
+
+ GLuint src_base_addr;
+};
+
+struct xy_setup_blit {
+ struct br0 br0;
+ struct br13 br13;
+
+ struct {
+ GLuint clip_x1:16;
+ GLuint clip_y1:16;
+ } dw2;
+
+ struct {
+ GLuint clip_x2:16;
+ GLuint clip_y2:16;
+ } dw3;
+
+ GLuint dest_base_addr;
+ GLuint background_color;
+ GLuint foreground_color;
+ GLuint pattern_base_addr;
+};
+
+
+struct xy_text_immediate_blit {
+ struct {
+ GLuint length:8;
+ GLuint pad2:3;
+ GLuint dst_tiled:1;
+ GLuint pad1:4;
+ GLuint byte_packed:1;
+ GLuint pad0:5;
+ GLuint opcode:7;
+ GLuint client:3;
+ } dw0;
+
+ struct {
+ GLuint dest_x1:16;
+ GLuint dest_y1:16;
+ } dw1;
+
+ struct {
+ GLuint dest_x2:16;
+ GLuint dest_y2:16;
+ } dw2;
+
+ /* Src bitmap data follows as inline dwords.
+ */
+};
+
+
+#define CLIENT_2D 0x2
+#define OPCODE_XY_SETUP_BLT 0x1
+#define OPCODE_XY_COLOR_BLT 0x50
+#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31
+
+#endif
diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index bedab56f59..9f5b4e6323 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -46,17 +46,6 @@ identity_destroy(struct pipe_context *_pipe)
}
static void
-identity_set_edgeflags(struct pipe_context *_pipe,
- const unsigned *bitfield)
-{
- struct identity_context *id_pipe = identity_context(_pipe);
- struct pipe_context *pipe = id_pipe->pipe;
-
- pipe->set_edgeflags(pipe,
- bitfield);
-}
-
-static boolean
identity_draw_arrays(struct pipe_context *_pipe,
unsigned prim,
unsigned start,
@@ -65,13 +54,13 @@ identity_draw_arrays(struct pipe_context *_pipe,
struct identity_context *id_pipe = identity_context(_pipe);
struct pipe_context *pipe = id_pipe->pipe;
- return pipe->draw_arrays(pipe,
- prim,
- start,
- count);
+ pipe->draw_arrays(pipe,
+ prim,
+ start,
+ count);
}
-static boolean
+static void
identity_draw_elements(struct pipe_context *_pipe,
struct pipe_buffer *_indexBuffer,
unsigned indexSize,
@@ -84,15 +73,15 @@ identity_draw_elements(struct pipe_context *_pipe,
struct pipe_context *pipe = id_pipe->pipe;
struct pipe_buffer *indexBuffer = id_buffer->buffer;
- return pipe->draw_elements(pipe,
- indexBuffer,
- indexSize,
- prim,
- start,
- count);
+ pipe->draw_elements(pipe,
+ indexBuffer,
+ indexSize,
+ prim,
+ start,
+ count);
}
-static boolean
+static void
identity_draw_range_elements(struct pipe_context *_pipe,
struct pipe_buffer *_indexBuffer,
unsigned indexSize,
@@ -107,14 +96,14 @@ identity_draw_range_elements(struct pipe_context *_pipe,
struct pipe_context *pipe = id_pipe->pipe;
struct pipe_buffer *indexBuffer = id_buffer->buffer;
- return pipe->draw_range_elements(pipe,
- indexBuffer,
- indexSize,
- minIndex,
- maxIndex,
- mode,
- start,
- count);
+ pipe->draw_range_elements(pipe,
+ indexBuffer,
+ indexSize,
+ minIndex,
+ maxIndex,
+ mode,
+ start,
+ count);
}
static struct pipe_query *
@@ -707,7 +696,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
id_pipe->base.draw = NULL;
id_pipe->base.destroy = identity_destroy;
- id_pipe->base.set_edgeflags = identity_set_edgeflags;
id_pipe->base.draw_arrays = identity_draw_arrays;
id_pipe->base.draw_elements = identity_draw_elements;
id_pipe->base.draw_range_elements = identity_draw_range_elements;
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 3bd2e70013..3ca676647c 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -9,6 +9,8 @@ if not env.has_key('LLVM_VERSION'):
env.Tool('udis86')
+env.Append(CPPPATH = ['.'])
+
env.CodeGenerate(
target = 'lp_tile_soa.c',
script = 'lp_tile_soa.py',
@@ -74,21 +76,19 @@ llvmpipe = env.ConvenienceLibrary(
env = env.Clone()
-env.Prepend(LIBS = [llvmpipe] + auxiliaries)
+env.Prepend(LIBS = [llvmpipe] + gallium)
-env.Program(
- target = 'lp_test_format',
- source = ['lp_test_format.c', 'lp_test_main.c'],
-)
+tests = [
+ 'format',
+ 'blend',
+ 'conv',
+]
-env.Program(
- target = 'lp_test_blend',
- source = ['lp_test_blend.c', 'lp_test_main.c'],
-)
-
-env.Program(
- target = 'lp_test_conv',
- source = ['lp_test_conv.c', 'lp_test_main.c'],
-)
+for test in tests:
+ target = env.Program(
+ target = 'lp_test_' + test,
+ source = ['lp_test_' + test + '.c', 'lp_test_main.c'],
+ )
+ env.InstallProgram(target)
Export('llvmpipe')
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index d14f468ba9..ced7b9c11d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -142,7 +142,7 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
enum lp_build_blend_swizzle {
LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
- LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
+ LP_BUILD_BLEND_SWIZZLE_AAAA = 1
};
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
index dcc25fbff8..25c10af29f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
@@ -47,7 +47,7 @@
*/
enum lp_build_flow_construct_kind {
lP_BUILD_FLOW_SCOPE,
- LP_BUILD_FLOW_SKIP,
+ LP_BUILD_FLOW_SKIP
};
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index 5836e0173f..10e82f120b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -130,7 +130,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
- // UIToFP can't be expressed in SSE2
+ /* UIToFP can't be expressed in SSE2 */
casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
if (normalized)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index a67c70ff25..61b033c9fc 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -321,7 +321,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
{
const uint unit = inst->Src[1].Register.Index;
LLVMValueRef lodbias;
- LLVMValueRef oow;
+ LLVMValueRef oow = NULL;
LLVMValueRef coords[3];
unsigned num_coords;
unsigned i;
@@ -446,7 +446,12 @@ emit_instruction(
{
unsigned chan_index;
LLVMValueRef src0, src1, src2;
- LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ LLVMValueRef tmp0, tmp1, tmp2;
+ LLVMValueRef tmp3 = NULL;
+ LLVMValueRef tmp4 = NULL;
+ LLVMValueRef tmp5 = NULL;
+ LLVMValueRef tmp6 = NULL;
+ LLVMValueRef tmp7 = NULL;
LLVMValueRef res;
LLVMValueRef dst0[NUM_CHANNELS];
@@ -1310,7 +1315,7 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_SHR:
+ case TGSI_OPCODE_ISHR:
/* deprecated? */
assert(0);
return 0;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 679e244274..37587d4f79 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -140,6 +140,7 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
unsigned i;
+ /* check if any of the bound drawing surfaces are this texture */
if(llvmpipe->dirty_render_cache) {
for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) {
if(llvmpipe->framebuffer.cbufs[i] &&
@@ -150,6 +151,13 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
llvmpipe->framebuffer.zsbuf->texture == texture)
return PIPE_REFERENCED_FOR_WRITE;
}
+
+ /* check if any of the tex_cache textures are this texture */
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ if (llvmpipe->tex_cache[i] &&
+ llvmpipe->tex_cache[i]->texture == texture)
+ return PIPE_REFERENCED_FOR_READ;
+ }
for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
if (llvmpipe->vertex_tex_cache[i] &&
llvmpipe->vertex_tex_cache[i]->texture == texture)
@@ -226,8 +234,6 @@ llvmpipe_create( struct pipe_screen *screen )
llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays;
llvmpipe->pipe.draw_elements = llvmpipe_draw_elements;
llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements;
- llvmpipe->pipe.set_edgeflags = llvmpipe_set_edgeflags;
-
llvmpipe->pipe.clear = llvmpipe_clear;
llvmpipe->pipe.flush = llvmpipe_flush;
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 0aa13a1fc6..c152b4413f 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -45,11 +45,11 @@
-boolean
+void
llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
{
- return llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count);
+ llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count);
}
@@ -58,7 +58,7 @@ llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
* Basically, map the vertex buffers (and drawing surfaces), then hand off
* the drawing to the 'draw' module.
*/
-boolean
+void
llvmpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -103,7 +103,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
draw_arrays(draw, mode, start, count);
/*
- * unmap vertex/index buffers - will cause draw module to flush
+ * unmap vertex/index buffers
*/
for (i = 0; i < lp->num_vertex_buffers; i++) {
draw_set_mapped_vertex_buffer(draw, i, NULL);
@@ -112,31 +112,28 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
+ /*
+ * TODO: Flush only when a user vertex/index buffer is present
+ * (or even better, modify draw module to do this
+ * internally when this condition is seen?)
+ */
+ draw_flush(draw);
/* Note: leave drawing surfaces mapped */
lp->dirty_render_cache = TRUE;
-
- return TRUE;
}
-boolean
+void
llvmpipe_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
- return llvmpipe_draw_range_elements( pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- mode, start, count );
+ llvmpipe_draw_range_elements( pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ mode, start, count );
}
-
-void
-llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
-{
- struct llvmpipe_context *lp = llvmpipe_context(pipe);
- draw_set_edgeflags(lp->draw, edgeflags);
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
index 4abff4eccc..e8e2e2524a 100644
--- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
@@ -128,6 +128,7 @@ lp_vbuf_unmap_vertices(struct vbuf_render *vbr,
{
struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size );
+ (void) cvbr;
/* do nothing */
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index d1c74ab07b..3e482cb904 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -197,14 +197,14 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp);
void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe );
-boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count);
-boolean llvmpipe_draw_elements(struct pipe_context *pipe,
+void llvmpipe_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count);
-boolean
+void
llvmpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -213,10 +213,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count);
void
-llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags);
-
-
-void
llvmpipe_map_transfers(struct llvmpipe_context *lp);
void
diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c
index b2e75d3b14..a94cd05ef2 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c
@@ -34,6 +34,7 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_debug_dump.h"
+#include "draw/draw_context.h"
#include "lp_screen.h"
#include "lp_context.h"
#include "lp_state.h"
@@ -51,6 +52,11 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe,
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ if (llvmpipe->blend == blend)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
llvmpipe->blend = blend;
llvmpipe->dirty |= LP_NEW_BLEND;
@@ -69,6 +75,11 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe,
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
unsigned i, j;
+ if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color);
if(!llvmpipe->jit_context.blend_color)
@@ -99,7 +110,12 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe,
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
- llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil;
+ if (llvmpipe->depth_stencil == depth_stencil)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->depth_stencil = depth_stencil;
if(llvmpipe->depth_stencil)
llvmpipe->jit_context.alpha_ref_value = llvmpipe->depth_stencil->alpha.ref_value;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index e703964aaa..acfd7be5f7 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -66,7 +66,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
/* compute vertex layout now */
const struct lp_fragment_shader *lpfs = llvmpipe->fs;
struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf;
- const uint num = draw_num_vs_outputs(llvmpipe->draw);
+ const uint num = draw_current_shader_outputs(llvmpipe->draw);
uint i;
/* Tell draw_vbuf to simply emit the whole post-xform vertex
@@ -116,13 +116,13 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
}
/* this includes texcoords and varying vars */
- src = draw_find_vs_output(llvmpipe->draw,
+ src = draw_find_shader_output(llvmpipe->draw,
lpfs->info.input_semantic_name[i],
lpfs->info.input_semantic_index[i]);
draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
}
- llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw,
+ llvmpipe->psize_slot = draw_find_shader_output(llvmpipe->draw,
TGSI_SEMANTIC_PSIZE, 0);
if (llvmpipe->psize_slot > 0) {
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT,
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 22683ff8b4..f2b8c36264 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -673,7 +673,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
- llvmpipe->fs = (struct lp_fragment_shader *) fs;
+ if (llvmpipe->fs == fs)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->fs = fs;
llvmpipe->dirty |= LP_NEW_FS;
}
@@ -688,6 +693,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
struct lp_fragment_shader_variant *variant;
assert(fs != llvmpipe->fs);
+ (void) llvmpipe;
variant = shader->variants;
while(variant) {
@@ -723,8 +729,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
- if(shader == PIPE_SHADER_VERTEX)
- draw_flush(llvmpipe->draw);
+ draw_flush(llvmpipe->draw);
/* note: reference counting */
pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer);
@@ -734,7 +739,8 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
}
if(shader == PIPE_SHADER_VERTEX) {
- draw_set_mapped_constant_buffer(llvmpipe->draw, data, size);
+ draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX,
+ data, size);
}
llvmpipe->dirty |= LP_NEW_CONSTANTS;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
index 4561c6b845..aa3b5a3f91 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
@@ -41,14 +41,17 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe,
}
void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe,
- void *setup)
+ void *rasterizer)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ if (llvmpipe->rasterizer == rasterizer)
+ return;
+
/* pass-through to draw module */
- draw_set_rasterizer_state(llvmpipe->draw, setup);
+ draw_set_rasterizer_state(llvmpipe->draw, rasterizer);
- llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup;
+ llvmpipe->rasterizer = rasterizer;
llvmpipe->dirty |= LP_NEW_RASTERIZER;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c
index ba970cac98..e37ff04f3d 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -51,6 +51,8 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
struct llvmpipe_context *lp = llvmpipe_context(pipe);
uint i;
+ draw_flush(lp->draw);
+
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
/* check if changing cbuf */
if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c
index 8a761648e7..884e3878e6 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c
@@ -70,14 +70,18 @@ fail:
void
-llvmpipe_bind_vs_state(struct pipe_context *pipe, void *vs)
+llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ const struct lp_vertex_shader *vs = (const struct lp_vertex_shader *)_vs;
- llvmpipe->vs = (const struct lp_vertex_shader *)vs;
+ if (llvmpipe->vs == vs)
+ return;
- draw_bind_vertex_shader(llvmpipe->draw,
- (llvmpipe->vs ? llvmpipe->vs->draw_data : NULL));
+ draw_bind_vertex_shader(llvmpipe->draw,
+ vs ? vs->draw_data : NULL);
+
+ llvmpipe->vs = vs;
llvmpipe->dirty |= LP_NEW_VS;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 968c7a2d4a..faddfb9677 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -330,7 +330,7 @@ test_one(unsigned verbose,
fprintf(stderr, "conv.bc written\n");
fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n");
firsttime = FALSE;
- //abort();
+ /* abort(); */
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.h b/src/gallium/drivers/llvmpipe/lp_tex_cache.h
index 9fa6c36812..05fded78e1 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.h
@@ -115,7 +115,7 @@ extern const struct llvmpipe_cached_tex_tile *
lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc,
union tex_tile_address addr );
-static INLINE const union tex_tile_address
+static INLINE union tex_tile_address
tex_tile_address( unsigned x,
unsigned y,
unsigned z,
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index 0d01c07fb5..68520fa4f0 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -1085,7 +1085,7 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
const struct pipe_sampler_state *sampler = samp->sampler;
unsigned level0, level1, j, imgFilter;
int width, height;
- float levelBlend;
+ float levelBlend = 0.0f;
choose_mipmap_levels(tgsi_sampler, s, t, p,
lodbias,
@@ -1241,7 +1241,7 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
/* get/map pipe_surfaces corresponding to 3D tex slices */
unsigned level0, level1, j, imgFilter;
int width, height, depth;
- float levelBlend;
+ float levelBlend = 0.0f;
const uint face = 0;
choose_mipmap_levels(tgsi_sampler, s, t, p,
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
index 040b01865d..19d00b58d3 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
@@ -29,7 +29,7 @@
#define LP_TILE_SOA_H
#include "pipe/p_compiler.h"
-#include "tgsi/tgsi_exec.h" // for NUM_CHANNELS
+#include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */
#ifdef __cplusplus
diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h
index 595481c2cb..74b472b653 100644
--- a/src/gallium/drivers/llvmpipe/lp_winsys.h
+++ b/src/gallium/drivers/llvmpipe/lp_winsys.h
@@ -35,7 +35,7 @@
#define LP_WINSYS_H
-#include "pipe/p_compiler.h" // for boolean
+#include "pipe/p_compiler.h" /* for boolean */
#include "pipe/p_format.h"
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index e4cf91c005..0437af3725 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -31,7 +31,7 @@ nouveau_screen_bo_skel(struct pipe_screen *pscreen, struct nouveau_bo *bo,
unsigned alignment, unsigned usage, unsigned size)
{
struct pipe_buffer *pb;
-
+
pb = CALLOC(1, sizeof(struct pipe_buffer)+sizeof(struct nouveau_bo *));
if (!pb) {
nouveau_bo_ref(NULL, &bo);
@@ -239,5 +239,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
void
nouveau_screen_fini(struct nouveau_screen *screen)
{
+ nouveau_channel_free(&screen->channel);
}
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index 42c77e5e77..4c3e08a43f 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -23,6 +23,9 @@
#define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17)
#define NOUVEAU_BUFFER_USAGE_TRANSFER (1 << 18)
+/* use along with GPU_WRITE for 2D-only writes */
+#define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19)
+
extern struct pipe_screen *
nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c
index 10d984ace9..770733a4a1 100644
--- a/src/gallium/drivers/nv04/nv04_context.c
+++ b/src/gallium/drivers/nv04/nv04_context.c
@@ -27,35 +27,30 @@ nv04_destroy(struct pipe_context *pipe)
FREE(nv04);
}
-static void
-nv04_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-}
-
static boolean
nv04_init_hwctx(struct nv04_context *nv04)
{
// requires a valid handle
-// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1);
+// BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1);
// OUT_RING(0);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1);
OUT_RING(0);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
OUT_RING(0x40182800);
// OUT_RING(1<<20/*no cull*/);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
// OUT_RING(0x24|(1<<6)|(1<<8));
OUT_RING(0x120001a4);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1);
OUT_RING(0x332213a1);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1);
OUT_RING(0x11001010);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1);
OUT_RING(0x0);
-// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1);
+// BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1);
// OUT_RING(SCREEN_OFFSET);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1);
OUT_RING(0xff000000);
@@ -83,7 +78,6 @@ nv04_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv04->pipe.winsys = ws;
nv04->pipe.screen = pscreen;
nv04->pipe.destroy = nv04_destroy;
- nv04->pipe.set_edgeflags = nv04_set_edgeflags;
nv04->pipe.draw_arrays = nv04_draw_arrays;
nv04->pipe.draw_elements = nv04_draw_elements;
nv04->pipe.clear = nv04_clear;
diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h
index 55326c787a..5951115293 100644
--- a/src/gallium/drivers/nv04/nv04_context.h
+++ b/src/gallium/drivers/nv04/nv04_context.h
@@ -141,9 +141,9 @@ extern void nv04_emit_hw_state(struct nv04_context *nv04);
extern void nv04_state_tex_update(struct nv04_context *nv04);
/* nv04_vbo.c */
-extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv04_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv04_draw_elements( struct pipe_context *pipe,
+extern void nv04_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count);
diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c
index 0cce71ad1d..c152b52119 100644
--- a/src/gallium/drivers/nv04/nv04_fragtex.c
+++ b/src/gallium/drivers/nv04/nv04_fragtex.c
@@ -4,7 +4,7 @@
#define _(m,tf) \
{ \
PIPE_FORMAT_##m, \
- NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \
+ NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \
}
struct nv04_texture_format {
@@ -53,14 +53,14 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit)
return;
}
- nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER
- | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
+ nv04->fragtex.format = NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER
+ | NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
| nv04_fragtex_format(pt->format)
- | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
- | ( log2i(pt->width0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
- | ( log2i(pt->height0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
- | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
- | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
+ | ( (pt->last_level + 1) << NV04_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
+ | ( log2i(pt->width0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
+ | ( log2i(pt->height0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
+ | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
+ | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
;
}
diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
index f6458232ae..25395edfd7 100644
--- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c
+++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
@@ -93,7 +93,7 @@ nv04_vbuf_render_set_primitive( struct vbuf_render *render,
static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5)
{
- BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49);
+ BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49);
OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
@@ -105,7 +105,7 @@ static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buf
static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2)
{
- BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25);
+ BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25);
OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
@@ -114,7 +114,7 @@ static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buff
static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3)
{
- BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33);
+ BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33);
OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
@@ -166,11 +166,11 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con
if (numvert<3)
break;
- BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 );
+ BEGIN_RING( fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 );
for(j = 0; j<numvert; j++)
OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 );
- BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 );
+ BEGIN_RING_NI( fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 );
for(j = 0; j<numtri/2; j++ )
OUT_RING(striptbl[j]);
if (numtri%2)
@@ -185,7 +185,7 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const
struct nv04_context* nv04 = render->nv04;
int i,j;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8);
for(i = 1; i<nr_indices; i+=14)
@@ -195,12 +195,12 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const
if (numvert < 3)
break;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
for(j=0;j<numvert;j++)
OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 );
- BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2);
+ BEGIN_RING_NI(fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2);
for(j = 0; j<numtri/2; j++)
OUT_RING(fantbl[j]);
if (numtri%2)
diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c
index 170ce3eb7e..7c5b6e8229 100644
--- a/src/gallium/drivers/nv04/nv04_screen.c
+++ b/src/gallium/drivers/nv04/nv04_screen.c
@@ -119,6 +119,8 @@ nv04_screen_destroy(struct pipe_screen *pscreen)
nouveau_grobj_free(&screen->fahrenheit);
nv04_surface_2d_takedown(&screen->eng2d);
+ nouveau_screen_fini(&screen->base);
+
FREE(pscreen);
}
@@ -163,10 +165,10 @@ nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
fahrenheit_class = 0;
sub3d_class = 0;
} else if (dev->chipset >= 0x10) {
- fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE;
+ fahrenheit_class = NV10_TEXTURED_TRIANGLE;
sub3d_class = NV10_CONTEXT_SURFACES_3D;
} else {
- fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE;
+ fahrenheit_class=NV04_TEXTURED_TRIANGLE;
sub3d_class = NV04_CONTEXT_SURFACES_3D;
}
diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c
index ef3005db5f..e3dc4c5bf4 100644
--- a/src/gallium/drivers/nv04/nv04_state.c
+++ b/src/gallium/drivers/nv04/nv04_state.c
@@ -50,28 +50,28 @@ wrap_mode(unsigned wrap) {
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT;
break;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT;
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE;
break;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER;
break;
case PIPE_TEX_WRAP_CLAMP:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
case PIPE_TEX_WRAP_MIRROR_CLAMP:
default:
NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
}
- return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT;
+ return ret >> NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT;
}
static void *
@@ -84,20 +84,20 @@ nv04_sampler_state_create(struct pipe_context *pipe,
ss = MALLOC(sizeof(struct nv04_sampler_state));
- ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) |
- (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT));
+ ss->format = ((wrap_mode(cso->wrap_s) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT));
if (cso->max_anisotropy > 1.0) {
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE;
}
switch (cso->mag_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR;
break;
case PIPE_TEX_FILTER_NEAREST:
default:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
break;
}
@@ -105,14 +105,14 @@ nv04_sampler_state_create(struct pipe_context *pipe,
case PIPE_TEX_FILTER_LINEAR:
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR;
break;
}
break;
@@ -120,14 +120,14 @@ nv04_sampler_state_create(struct pipe_context *pipe,
default:
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST;
break;
}
break;
@@ -181,7 +181,7 @@ nv04_rasterizer_state_create(struct pipe_context *pipe,
*/
rs = MALLOC(sizeof(struct nv04_rasterizer_state));
- rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
+ rs->blend = cso->flatshade ? NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
return (void *)rs;
}
@@ -229,16 +229,16 @@ nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe,
hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state));
hw->control = float_to_ubyte(cso->alpha.ref_value);
- hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT );
- hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0;
- hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN;
- hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0;
- hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT );
- hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module
- hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
- hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE;
- hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0;
- hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format
+ hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT );
+ hw->control |= cso->alpha.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE : 0;
+ hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN;
+ hw->control |= cso->depth.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE : 0;
+ hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT );
+ hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module
+ hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
+ hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE;
+ hw->control |= cso->depth.writemask ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE : 0;
+ hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format
return (void *)hw;
}
@@ -377,7 +377,7 @@ nv04_set_scissor_state(struct pipe_context *pipe,
/* struct nv04_context *nv04 = nv04_context(pipe);
// XXX
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2);
OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
}
diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c
index eb2c1c57c6..bd98ae091f 100644
--- a/src/gallium/drivers/nv04/nv04_state_emit.c
+++ b/src/gallium/drivers/nv04/nv04_state_emit.c
@@ -58,7 +58,7 @@ static void nv04_emit_control(struct nv04_context* nv04)
{
uint32_t control = nv04->dsa->control;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
OUT_RING(control);
}
@@ -75,7 +75,7 @@ static void nv04_emit_blend(struct nv04_context* nv04)
blend|=(nv04_blend_func(nv04->blend->b_src)<<24);
blend|=(nv04_blend_func(nv04->blend->b_dst)<<28);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
OUT_RING(blend);
}
@@ -84,7 +84,7 @@ static void nv04_emit_sampler(struct nv04_context *nv04, int unit)
struct nv04_miptree *nv04mt = nv04->tex_miptree[unit];
struct pipe_texture *pt = &nv04mt->base;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3);
OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
OUT_RING(nv04->sampler[unit]->filter);
@@ -163,7 +163,7 @@ nv04_emit_hw_state(struct nv04_context *nv04)
if (nv04->dirty & NV04_NEW_CONTROL) {
nv04->dirty &= ~NV04_NEW_CONTROL;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
OUT_RING(nv04->dsa->control);
}
@@ -218,7 +218,7 @@ nv04_emit_hw_state(struct nv04_context *nv04)
if (!(nv04->fp_samplers & (1 << i)))
continue;
struct nv04_miptree *nv04mt = nv04->tex_miptree[i];
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2);
OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
}
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index 12df7fd199..b24a9cee5a 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -77,7 +77,7 @@ nv04_scaled_image_format(enum pipe_format format)
}
static INLINE unsigned
-nv04_swizzle_bits(unsigned x, unsigned y)
+nv04_swizzle_bits_square(unsigned x, unsigned y)
{
unsigned u = (x & 0x001) << 0 |
(x & 0x002) << 1 |
@@ -107,6 +107,15 @@ nv04_swizzle_bits(unsigned x, unsigned y)
return v | u;
}
+/* rectangular swizzled textures are linear concatenations of swizzled square tiles */
+static INLINE unsigned
+nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ unsigned s = MIN2(w, h);
+ unsigned m = s - 1;
+ return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m);
+}
+
static int
nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
struct pipe_surface *dst, int dx, int dy,
@@ -158,20 +167,19 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
for (x = 0; x < w; x += sub_w) {
sub_w = MIN2(sub_w, w - x);
- /* Must be 64-byte aligned */
- assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format)) & 63));
+ assert(!(dst->offset & 63));
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
- OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format),
+ OUT_RELOCl(chan, dst_bo, dst->offset,
NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
OUT_RING (chan, nv04_scaled_image_format(src->format));
OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
- OUT_RING (chan, 0);
+ OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT));
OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w);
- OUT_RING (chan, 0);
+ OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT));
OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w);
OUT_RING (chan, 1 << 20);
OUT_RING (chan, 1 << 20);
@@ -491,3 +499,49 @@ nv04_surface_2d_init(struct nouveau_screen *screen)
ctx->fill = nv04_surface_fill;
return ctx;
}
+
+struct nv04_surface*
+nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns)
+{
+ int temp_flags;
+
+ // printf("creating temp, flags is %i!\n", flags);
+
+ if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD)
+ {
+ temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ;
+ ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD;
+ }
+ else
+ {
+ temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE;
+ ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ;
+ }
+
+ struct nv40_screen* screen = (struct nv40_screen*)pscreen;
+ ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE;
+
+ struct pipe_texture templ;
+ memset(&templ, 0, sizeof(templ));
+ templ.format = ns->base.texture->format;
+ templ.target = PIPE_TEXTURE_2D;
+ templ.width0 = ns->base.width;
+ templ.height0 = ns->base.height;
+ templ.depth0 = 1;
+ templ.last_level = 0;
+
+ // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented
+ templ.nr_samples = ns->base.texture->nr_samples;
+
+ templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+ struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ);
+ struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags);
+ temp_ns->backing = ns;
+
+ if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ)
+ eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height);
+
+ return temp_ns;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nv04/nv04_surface_2d.h
index 02b3f56ba8..ce696a11a3 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.h
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.h
@@ -4,6 +4,7 @@
struct nv04_surface {
struct pipe_surface base;
unsigned pitch;
+ struct nv04_surface* backing;
};
struct nv04_surface_2d {
@@ -30,4 +31,7 @@ nv04_surface_2d_init(struct nouveau_screen *screen);
void
nv04_surface_2d_takedown(struct nv04_surface_2d **);
+struct nv04_surface*
+nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns);
+
#endif
diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c
index 8446073ae8..2dd2e146a8 100644
--- a/src/gallium/drivers/nv04/nv04_transfer.c
+++ b/src/gallium/drivers/nv04/nv04_transfer.c
@@ -16,14 +16,14 @@ struct nv04_transfer {
};
static void
-nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width0 = u_minify(pt->width0, level);
- template->height0 = u_minify(pt->height0, level);
+ template->width0 = width;
+ template->height0 = height;
template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv04_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv04_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -80,6 +80,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv04_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
face, level, zslice,
pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -130,9 +132,9 @@ nv04_transfer_del(struct pipe_transfer *ptx)
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -151,8 +153,10 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c
index e3167814f2..3484771814 100644
--- a/src/gallium/drivers/nv04/nv04_vbo.c
+++ b/src/gallium/drivers/nv04/nv04_vbo.c
@@ -9,7 +9,7 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
-boolean nv04_draw_elements( struct pipe_context *pipe,
+void nv04_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count)
@@ -45,7 +45,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
- draw_set_mapped_constant_buffer(draw,
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX,
nv04->constbuf[PIPE_SHADER_VERTEX],
nv04->constbuf_nr[PIPE_SHADER_VERTEX]);
@@ -65,15 +65,13 @@ boolean nv04_draw_elements( struct pipe_context *pipe,
pipe_buffer_unmap(pscreen, indexBuffer);
draw_set_mapped_element_buffer(draw, 0, NULL);
}
-
- return TRUE;
}
-boolean nv04_draw_arrays( struct pipe_context *pipe,
- unsigned prim, unsigned start, unsigned count)
+void nv04_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
{
printf("coucou in draw arrays\n");
- return nv04_draw_elements(pipe, NULL, 0, prim, start, count);
+ nv04_draw_elements(pipe, NULL, 0, prim, start, count);
}
diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c
index 65a22b175e..0dadeb03dd 100644
--- a/src/gallium/drivers/nv10/nv10_context.c
+++ b/src/gallium/drivers/nv10/nv10_context.c
@@ -252,11 +252,6 @@ static void nv10_init_hwctx(struct nv10_context *nv10)
FIRE_RING (NULL);
}
-static void
-nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-}
-
struct pipe_context *
nv10_create(struct pipe_screen *pscreen, unsigned pctx_id)
{
@@ -276,7 +271,6 @@ nv10_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv10->pipe.winsys = ws;
nv10->pipe.screen = pscreen;
nv10->pipe.destroy = nv10_destroy;
- nv10->pipe.set_edgeflags = nv10_set_edgeflags;
nv10->pipe.draw_arrays = nv10_draw_arrays;
nv10->pipe.draw_elements = nv10_draw_elements;
nv10->pipe.clear = nv10_clear;
diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h
index 36a6aa7a74..3f829fd106 100644
--- a/src/gallium/drivers/nv10/nv10_context.h
+++ b/src/gallium/drivers/nv10/nv10_context.h
@@ -144,9 +144,9 @@ extern void nv10_emit_hw_state(struct nv10_context *nv10);
extern void nv10_state_tex_update(struct nv10_context *nv10);
/* nv10_vbo.c */
-extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv10_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv10_draw_elements( struct pipe_context *pipe,
+extern void nv10_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count);
diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c
index ee5901e743..6a39ddeaac 100644
--- a/src/gallium/drivers/nv10/nv10_screen.c
+++ b/src/gallium/drivers/nv10/nv10_screen.c
@@ -115,6 +115,9 @@ nv10_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->celsius);
+ nv04_surface_2d_takedown(&screen->eng2d);
+
+ nouveau_screen_fini(&screen->base);
FREE(pscreen);
}
diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c
index c664973e90..eb04af9782 100644
--- a/src/gallium/drivers/nv10/nv10_transfer.c
+++ b/src/gallium/drivers/nv10/nv10_transfer.c
@@ -16,14 +16,14 @@ struct nv10_transfer {
};
static void
-nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width0 = u_minify(pt->width0, level);
- template->height0 = u_minify(pt->height0, level);
+ template->width0 = width;
+ template->height0 = height;
template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv10_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv10_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -80,6 +80,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv10_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
face, level, zslice,
pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -130,9 +132,9 @@ nv10_transfer_del(struct pipe_transfer *ptx)
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -151,8 +153,10 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c
index 441a4f75f3..9180c72c9b 100644
--- a/src/gallium/drivers/nv10/nv10_vbo.c
+++ b/src/gallium/drivers/nv10/nv10_vbo.c
@@ -9,7 +9,7 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
-boolean nv10_draw_elements( struct pipe_context *pipe,
+void nv10_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count)
@@ -45,6 +45,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe,
}
draw_set_mapped_constant_buffer(draw,
+ PIPE_SHADER_VERTEX,
nv10->constbuf[PIPE_SHADER_VERTEX],
nv10->constbuf_nr[PIPE_SHADER_VERTEX]);
@@ -64,14 +65,12 @@ boolean nv10_draw_elements( struct pipe_context *pipe,
pipe_buffer_unmap(pscreen, indexBuffer);
draw_set_mapped_element_buffer(draw, 0, NULL);
}
-
- return TRUE;
}
-boolean nv10_draw_arrays( struct pipe_context *pipe,
- unsigned prim, unsigned start, unsigned count)
+void nv10_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
{
- return nv10_draw_elements(pipe, NULL, 0, prim, start, count);
+ nv10_draw_elements(pipe, NULL, 0, prim, start, count);
}
diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c
index 276db8b57b..6a147a4159 100644
--- a/src/gallium/drivers/nv20/nv20_context.c
+++ b/src/gallium/drivers/nv20/nv20_context.c
@@ -375,11 +375,6 @@ static void nv20_init_hwctx(struct nv20_context *nv20)
FIRE_RING (NULL);
}
-static void
-nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-}
-
struct pipe_context *
nv20_create(struct pipe_screen *pscreen, unsigned pctx_id)
{
@@ -399,7 +394,6 @@ nv20_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv20->pipe.winsys = ws;
nv20->pipe.screen = pscreen;
nv20->pipe.destroy = nv20_destroy;
- nv20->pipe.set_edgeflags = nv20_set_edgeflags;
nv20->pipe.draw_arrays = nv20_draw_arrays;
nv20->pipe.draw_elements = nv20_draw_elements;
nv20->pipe.clear = nv20_clear;
diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h
index a4eaa95660..c88a1bd9bd 100644
--- a/src/gallium/drivers/nv20/nv20_context.h
+++ b/src/gallium/drivers/nv20/nv20_context.h
@@ -143,9 +143,9 @@ extern void nv20_emit_hw_state(struct nv20_context *nv20);
extern void nv20_state_tex_update(struct nv20_context *nv20);
/* nv20_vbo.c */
-extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv20_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv20_draw_elements( struct pipe_context *pipe,
+extern void nv20_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count);
diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c
index d1291a92e0..8f7538e7f5 100644
--- a/src/gallium/drivers/nv20/nv20_miptree.c
+++ b/src/gallium/drivers/nv20/nv20_miptree.c
@@ -6,6 +6,7 @@
#include "nv20_context.h"
#include "nv20_screen.h"
+#include "../nv04/nv04_surface_2d.h"
static void
nv20_miptree_layout(struct nv20_miptree *nv20mt)
@@ -127,6 +128,12 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+ /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+ * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+ * This also happens for small mipmaps of large textures. */
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
nv20_miptree_layout(mt);
mt->buffer = screen->buffer_create(screen, 256, buf_usage, mt->total_size);
@@ -183,12 +190,27 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
ns->base.offset = nv20mt->level[level].image_offset[0];
}
+ /* create a linear temporary that we can render into if necessary.
+ * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+ * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+ if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+ return &nv04_surface_wrap_for_render(screen, ((struct nv20_screen*)screen)->eng2d, ns)->base;
+
return &ns->base;
}
static void
nv20_miptree_surface_destroy(struct pipe_surface *ps)
{
+ struct nv04_surface* ns = (struct nv04_surface*)ps;
+ if(ns->backing)
+ {
+ struct nv20_screen* screen = (struct nv20_screen*)ps->texture->screen;
+ if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+ screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+ nv20_miptree_surface_destroy(&ns->backing->base);
+ }
+
pipe_texture_reference(&ps->texture, NULL);
FREE(ps);
}
diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c
index 4eeacd1afd..a0973f1ebd 100644
--- a/src/gallium/drivers/nv20/nv20_screen.c
+++ b/src/gallium/drivers/nv20/nv20_screen.c
@@ -115,6 +115,9 @@ nv20_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->kelvin);
+ nv04_surface_2d_takedown(&screen->eng2d);
+
+ nouveau_screen_fini(&screen->base);
FREE(pscreen);
}
diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c
index 0122b1c2cd..63cba1f412 100644
--- a/src/gallium/drivers/nv20/nv20_state_emit.c
+++ b/src/gallium/drivers/nv20/nv20_state_emit.c
@@ -228,7 +228,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
}
/* always do position */ {
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_POSITION, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo->hwfmt[0] |= (1 << 0);
}
@@ -237,19 +237,19 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
for (i = 4; i < 6; i++) {
if (!generics[i])
continue;
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << (i - 3));
}
if (colors[0]) {
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
vinfo->hwfmt[0] |= (1 << 3);
}
if (colors[1]) {
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 1);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
vinfo->hwfmt[0] |= (1 << 4);
}
@@ -258,7 +258,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
for (i = 6; i < 10; i++) {
if (!generics[i])
continue;
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << (i - 1));
}
@@ -267,7 +267,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
for (i = 0; i < 4; i++) {
if (!generics[i])
continue;
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << (i + 9));
}
@@ -276,13 +276,13 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
for (i = 10; i < 12; i++) {
if (!generics[i])
continue;
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << (i + 3));
}
if (fog) {
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_FOG, 0);
draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << 15);
}
diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c
index 69b79c809f..699773e8e6 100644
--- a/src/gallium/drivers/nv20/nv20_transfer.c
+++ b/src/gallium/drivers/nv20/nv20_transfer.c
@@ -16,14 +16,14 @@ struct nv20_transfer {
};
static void
-nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width0 = u_minify(pt->width0, level);
- template->height0 = u_minify(pt->height0, level);
+ template->width0 = width;
+ template->height0 = height;
template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv20_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv20_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -80,6 +80,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv20_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
face, level, zslice,
pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -126,13 +128,13 @@ nv20_transfer_del(struct pipe_transfer *ptx)
dst = pscreen->get_tex_surface(pscreen, ptx->texture,
ptx->face, ptx->level, ptx->zslice,
- PIPE_BUFFER_USAGE_GPU_WRITE);
+ PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -151,8 +153,10 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c
index 84d7db6c5e..52991a0d85 100644
--- a/src/gallium/drivers/nv20/nv20_vbo.c
+++ b/src/gallium/drivers/nv20/nv20_vbo.c
@@ -9,7 +9,7 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
-boolean nv20_draw_elements( struct pipe_context *pipe,
+void nv20_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count)
@@ -45,7 +45,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
- draw_set_mapped_constant_buffer(draw,
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX,
nv20->constbuf[PIPE_SHADER_VERTEX],
nv20->constbuf_nr[PIPE_SHADER_VERTEX]);
@@ -67,13 +67,12 @@ boolean nv20_draw_elements( struct pipe_context *pipe,
}
draw_flush(nv20->draw);
- return TRUE;
}
-boolean nv20_draw_arrays( struct pipe_context *pipe,
+void nv20_draw_arrays( struct pipe_context *pipe,
unsigned prim, unsigned start, unsigned count)
{
- return nv20_draw_elements(pipe, NULL, 0, prim, start, count);
+ nv20_draw_elements(pipe, NULL, 0, prim, start, count);
}
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
index 9e8aab9754..7886c2af7e 100644
--- a/src/gallium/drivers/nv20/nv20_vertprog.c
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -530,6 +530,9 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc,
return FALSE;
}
break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ NOUVEAU_ERR("cannot handle edgeflag output\n");
+ return FALSE;
default:
NOUVEAU_ERR("bad output semantic\n");
return FALSE;
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
index 46a821a48b..38b39159f1 100644
--- a/src/gallium/drivers/nv30/nv30_context.c
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -25,6 +25,12 @@ static void
nv30_destroy(struct pipe_context *pipe)
{
struct nv30_context *nv30 = nv30_context(pipe);
+ unsigned i;
+
+ for (i = 0; i < NV30_STATE_MAX; i++) {
+ if (nv30->state.hw[i])
+ so_ref(NULL, &nv30->state.hw[i]);
+ }
if (nv30->draw)
draw_destroy(nv30->draw);
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
index 6f44b1c7fe..e175dfa0c4 100644
--- a/src/gallium/drivers/nv30/nv30_context.h
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -144,7 +144,6 @@ struct nv30_context {
unsigned vtxbuf_nr;
struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
unsigned vtxelt_nr;
- const unsigned *edgeflags;
};
static INLINE struct nv30_context *
@@ -199,9 +198,9 @@ extern struct nv30_state_entry nv30_state_fragtex;
extern struct nv30_state_entry nv30_state_vbo;
/* nv30_vbo.c */
-extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv30_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv30_draw_elements(struct pipe_context *pipe,
+extern void nv30_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index 40965a9772..d1ff18e2df 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -435,10 +435,11 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_CMP:
- tmp = temp(fpc);
- arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp = nv30_sr(NV30SR_NONE, 0);
tmp.cc_update = 1;
arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV30_VP_INST_COND_GE;
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
dst.cc_test = NV30_VP_INST_COND_LT;
arith(fpc, sat, MOV, dst, mask, src[1], none, none);
break;
@@ -517,13 +518,28 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
break;
case TGSI_OPCODE_SCS:
- if (mask & MASK_X) {
- arith(fpc, sat, COS, dst, MASK_X,
- swz(src[0], X, X, X, X), none, none);
+ /* avoid overwriting the source */
+ if(src[0].swz[SWZ_X] != SWZ_X)
+ {
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
}
- if (mask & MASK_Y) {
- arith(fpc, sat, SIN, dst, MASK_Y,
- swz(src[0], X, X, X, X), none, none);
+ else
+ {
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
}
break;
case TGSI_OPCODE_SIN:
@@ -870,6 +886,12 @@ void
nv30_fragprog_destroy(struct nv30_context *nv30,
struct nv30_fragment_program *fp)
{
+ if (fp->buffer)
+ pipe_buffer_reference(&fp->buffer, NULL);
+
+ if (fp->so)
+ so_ref(NULL, &fp->so);
+
if (fp->insn_len)
FREE(fp->insn);
}
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index ce95d9700f..8fbba38e78 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -5,6 +5,7 @@
#include "util/u_math.h"
#include "nv30_context.h"
+#include "../nv04/nv04_surface_2d.h"
static void
nv30_miptree_layout(struct nv30_miptree *nv30mt)
@@ -108,6 +109,12 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+ /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+ * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+ * This also happens for small mipmaps of large textures. */
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
nv30_miptree_layout(mt);
mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage,
@@ -196,12 +203,27 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
ns->base.offset = nv30mt->level[level].image_offset[0];
}
+ /* create a linear temporary that we can render into if necessary.
+ * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+ * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+ if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+ return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base;
+
return &ns->base;
}
static void
nv30_miptree_surface_del(struct pipe_surface *ps)
{
+ struct nv04_surface* ns = (struct nv04_surface*)ps;
+ if(ns->backing)
+ {
+ struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen;
+ if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+ screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+ nv30_miptree_surface_del(&ns->backing->base);
+ }
+
pipe_texture_reference(&ps->texture, NULL);
FREE(ps);
}
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index 7cd36902eb..760467f736 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -156,6 +156,12 @@ static void
nv30_screen_destroy(struct pipe_screen *pscreen)
{
struct nv30_screen *screen = nv30_screen(pscreen);
+ unsigned i;
+
+ for (i = 0; i < NV30_STATE_MAX; i++) {
+ if (screen->state[i])
+ so_ref(NULL, &screen->state[i]);
+ }
nouveau_resource_free(&screen->vp_exec_heap);
nouveau_resource_free(&screen->vp_data_heap);
@@ -163,6 +169,9 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->query);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->rankine);
+ nv04_surface_2d_takedown(&screen->eng2d);
+
+ nouveau_screen_fini(&screen->base);
FREE(pscreen);
}
diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c
index 3f802d9241..e6321b480f 100644
--- a/src/gallium/drivers/nv30/nv30_state.c
+++ b/src/gallium/drivers/nv30/nv30_state.c
@@ -672,16 +672,6 @@ nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count,
/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
}
-static void
-nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
- struct nv30_context *nv30 = nv30_context(pipe);
-
- nv30->edgeflags = bitfield;
- nv30->dirty |= NV30_NEW_ARRAYS;
- /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
-}
-
void
nv30_init_state_functions(struct nv30_context *nv30)
{
@@ -721,7 +711,6 @@ nv30_init_state_functions(struct nv30_context *nv30)
nv30->pipe.set_scissor_state = nv30_set_scissor_state;
nv30->pipe.set_viewport_state = nv30_set_viewport_state;
- nv30->pipe.set_edgeflags = nv30_set_edgeflags;
nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers;
nv30->pipe.set_vertex_elements = nv30_set_vertex_elements;
}
diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c
index 2255a02cae..65598991c6 100644
--- a/src/gallium/drivers/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nv30/nv30_transfer.c
@@ -16,14 +16,14 @@ struct nv30_transfer {
};
static void
-nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width0 = u_minify(pt->width0, level);
- template->height0 = u_minify(pt->height0, level);
+ template->width0 = width;
+ template->height0 = height;
template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv30_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -80,6 +80,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv30_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
0, 0, 0,
pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -126,13 +128,13 @@ nv30_transfer_del(struct pipe_transfer *ptx)
dst = pscreen->get_tex_surface(pscreen, ptx->texture,
ptx->face, ptx->level, ptx->zslice,
- PIPE_BUFFER_USAGE_GPU_WRITE);
+ PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -151,8 +153,10 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c
index 189656ec81..0e620b67a2 100644
--- a/src/gallium/drivers/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nv30/nv30_vbo.c
@@ -163,7 +163,7 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so,
return TRUE;
}
-boolean
+void
nv30_draw_arrays(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
@@ -175,7 +175,7 @@ nv30_draw_arrays(struct pipe_context *pipe,
if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
/*return nv30_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);*/
- return FALSE;
+ return;
}
while (count) {
@@ -362,7 +362,7 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
}
}
-static boolean
+static void
nv30_draw_elements_inline(struct pipe_context *pipe,
struct pipe_buffer *ib, unsigned ib_size,
unsigned mode, unsigned start, unsigned count)
@@ -393,10 +393,9 @@ nv30_draw_elements_inline(struct pipe_context *pipe,
}
pipe_buffer_unmap(pscreen, ib);
- return TRUE;
}
-static boolean
+static void
nv30_draw_elements_vbo(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
@@ -445,11 +444,9 @@ nv30_draw_elements_vbo(struct pipe_context *pipe,
count -= vc;
start = restart;
}
-
- return TRUE;
}
-boolean
+void
nv30_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
@@ -461,7 +458,7 @@ nv30_draw_elements(struct pipe_context *pipe,
if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
/*return nv30_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);*/
- return FALSE;
+ return;
}
if (idxbuf) {
@@ -472,7 +469,6 @@ nv30_draw_elements(struct pipe_context *pipe,
}
pipe->flush(pipe, 0, NULL);
- return TRUE;
}
static boolean
@@ -485,11 +481,6 @@ nv30_vbo_validate(struct nv30_context *nv30)
unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
int hw;
- if (nv30->edgeflags) {
- /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/
- return FALSE;
- }
-
vtxbuf = so_new(20, 18);
so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr);
vtxfmt = so_new(17, 0);
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index 36ac8299f0..5d60984622 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -530,6 +530,9 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
return FALSE;
}
break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ NOUVEAU_ERR("cannot handle edgeflag output\n");
+ return FALSE;
default:
NOUVEAU_ERR("bad output semantic\n");
return FALSE;
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
index eb9cce4c78..d56c7a6b49 100644
--- a/src/gallium/drivers/nv40/nv40_context.c
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -25,6 +25,12 @@ static void
nv40_destroy(struct pipe_context *pipe)
{
struct nv40_context *nv40 = nv40_context(pipe);
+ unsigned i;
+
+ for (i = 0; i < NV40_STATE_MAX; i++) {
+ if (nv40->state.hw[i])
+ so_ref(NULL, &nv40->state.hw[i]);
+ }
if (nv40->draw)
draw_destroy(nv40->draw);
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index cf33b64a86..112f017e8a 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -159,7 +159,6 @@ struct nv40_context {
unsigned vtxbuf_nr;
struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
unsigned vtxelt_nr;
- const unsigned *edgeflags;
};
static INLINE struct nv40_context *
@@ -184,7 +183,7 @@ extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen);
/* nv40_draw.c */
extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40);
-extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe,
+extern void nv40_draw_elements_swtnl(struct pipe_context *pipe,
struct pipe_buffer *idxbuf,
unsigned ib_size, unsigned mode,
unsigned start, unsigned count);
@@ -220,9 +219,9 @@ extern struct nv40_state_entry nv40_state_vbo;
extern struct nv40_state_entry nv40_state_vtxfmt;
/* nv40_vbo.c */
-extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv40_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv40_draw_elements(struct pipe_context *pipe,
+extern void nv40_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c
index b2f19ecb69..e5b9f4a5c8 100644
--- a/src/gallium/drivers/nv40/nv40_draw.c
+++ b/src/gallium/drivers/nv40/nv40_draw.c
@@ -226,7 +226,7 @@ nv40_draw_render_stage(struct nv40_context *nv40)
return &render->stage;
}
-boolean
+void
nv40_draw_elements_swtnl(struct pipe_context *pipe,
struct pipe_buffer *idxbuf, unsigned idxbuf_size,
unsigned mode, unsigned start, unsigned count)
@@ -237,7 +237,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
void *map;
if (!nv40_state_validate_swtnl(nv40))
- return FALSE;
+ return;
nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF);
nv40_state_emit(nv40);
@@ -261,7 +261,8 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
map = pipe_buffer_map(pscreen,
nv40->constbuf[PIPE_SHADER_VERTEX],
PIPE_BUFFER_USAGE_CPU_READ);
- draw_set_mapped_constant_buffer(nv40->draw, map, nr);
+ draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX,
+ map, nr);
}
draw_arrays(nv40->draw, mode, start, count);
@@ -277,15 +278,13 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
draw_flush(nv40->draw);
pipe->flush(pipe, 0, NULL);
-
- return TRUE;
}
static INLINE void
emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
unsigned semantic, unsigned index)
{
- unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index);
+ unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index);
unsigned a = nv40->swtnl.nr_attribs++;
nv40->swtnl.hw[a] = hw;
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 1bf16726d1..bb9c85cc43 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -149,7 +149,7 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
sizeof(uint32_t) * 4);
}
- sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
+ sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
break;
case NV40SR_NONE:
sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
@@ -445,10 +445,11 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_CMP:
- tmp = temp(fpc);
- arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp = nv40_sr(NV40SR_NONE, 0);
tmp.cc_update = 1;
arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV40_VP_INST_COND_GE;
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
dst.cc_test = NV40_VP_INST_COND_LT;
arith(fpc, sat, MOV, dst, mask, src[1], none, none);
break;
@@ -573,13 +574,28 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
neg(swz(tmp, X, X, X, X)), none, none);
break;
case TGSI_OPCODE_SCS:
- if (mask & MASK_X) {
- arith(fpc, sat, COS, dst, MASK_X,
- swz(src[0], X, X, X, X), none, none);
+ /* avoid overwriting the source */
+ if(src[0].swz[SWZ_X] != SWZ_X)
+ {
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
}
- if (mask & MASK_Y) {
- arith(fpc, sat, SIN, dst, MASK_Y,
- swz(src[0], X, X, X, X), none, none);
+ else
+ {
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
}
break;
case TGSI_OPCODE_SEQ:
@@ -752,7 +768,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)
{
struct tgsi_full_immediate *imm;
float vals[4];
-
+
imm = &p.FullToken.FullImmediate;
assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
assert(fpc->nr_imm < MAX_IMM);
@@ -836,7 +852,7 @@ nv40_fragprog_translate(struct nv40_context *nv40,
fp->insn[fpc->inst_offset + 1] = 0x00000000;
fp->insn[fpc->inst_offset + 2] = 0x00000000;
fp->insn[fpc->inst_offset + 3] = 0x00000000;
-
+
fp->translated = TRUE;
out_err:
tgsi_parse_free(&parse);
@@ -917,7 +933,7 @@ nv40_fragprog_validate(struct nv40_context *nv40)
update_constants:
if (fp->nr_consts) {
float *map;
-
+
map = pipe_buffer_map(pscreen, constbuf,
PIPE_BUFFER_USAGE_CPU_READ);
for (i = 0; i < fp->nr_consts; i++) {
@@ -948,6 +964,12 @@ void
nv40_fragprog_destroy(struct nv40_context *nv40,
struct nv40_fragment_program *fp)
{
+ if (fp->buffer)
+ pipe_buffer_reference(&fp->buffer, NULL);
+
+ if (fp->so)
+ so_ref(NULL, &fp->so);
+
if (fp->insn_len)
FREE(fp->insn);
}
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
index b974e68a07..89bd155ff4 100644
--- a/src/gallium/drivers/nv40/nv40_miptree.c
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -5,6 +5,7 @@
#include "util/u_math.h"
#include "nv40_context.h"
+#include "../nv04/nv04_surface_2d.h"
@@ -105,6 +106,12 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+ /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+ * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+ * This also happens for small mipmaps of large textures. */
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
nv40_miptree_layout(mt);
mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size);
@@ -191,12 +198,27 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
ns->base.offset = mt->level[level].image_offset[0];
}
+ /* create a linear temporary that we can render into if necessary.
+ * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+ * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+ if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+ return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base;
+
return &ns->base;
}
static void
nv40_miptree_surface_del(struct pipe_surface *ps)
{
+ struct nv04_surface* ns = (struct nv04_surface*)ps;
+ if(ns->backing)
+ {
+ struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen;
+ if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+ screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+ nv40_miptree_surface_del(&ns->backing->base);
+ }
+
pipe_texture_reference(&ps->texture, NULL);
FREE(ps);
}
diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c
index bd13dfddd1..d01e712805 100644
--- a/src/gallium/drivers/nv40/nv40_screen.c
+++ b/src/gallium/drivers/nv40/nv40_screen.c
@@ -140,6 +140,12 @@ static void
nv40_screen_destroy(struct pipe_screen *pscreen)
{
struct nv40_screen *screen = nv40_screen(pscreen);
+ unsigned i;
+
+ for (i = 0; i < NV40_STATE_MAX; i++) {
+ if (screen->state[i])
+ so_ref(NULL, &screen->state[i]);
+ }
nouveau_resource_free(&screen->vp_exec_heap);
nouveau_resource_free(&screen->vp_data_heap);
@@ -147,6 +153,7 @@ nv40_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->query);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->curie);
+ nv04_surface_2d_takedown(&screen->eng2d);
nouveau_screen_fini(&screen->base);
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
index bc34e32a4b..ed55d29aff 100644
--- a/src/gallium/drivers/nv40/nv40_state.c
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -687,16 +687,6 @@ nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count,
nv40->draw_dirty |= NV40_NEW_ARRAYS;
}
-static void
-nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
- struct nv40_context *nv40 = nv40_context(pipe);
-
- nv40->edgeflags = bitfield;
- nv40->dirty |= NV40_NEW_ARRAYS;
- nv40->draw_dirty |= NV40_NEW_ARRAYS;
-}
-
void
nv40_init_state_functions(struct nv40_context *nv40)
{
@@ -736,7 +726,6 @@ nv40_init_state_functions(struct nv40_context *nv40)
nv40->pipe.set_scissor_state = nv40_set_scissor_state;
nv40->pipe.set_viewport_state = nv40_set_viewport_state;
- nv40->pipe.set_edgeflags = nv40_set_edgeflags;
nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers;
nv40->pipe.set_vertex_elements = nv40_set_vertex_elements;
}
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index ba0fbcb26a..789ed16126 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -168,7 +168,6 @@ nv40_state_validate_swtnl(struct nv40_context *nv40)
draw_set_viewport_state(draw, &nv40->viewport);
if (nv40->draw_dirty & NV40_NEW_ARRAYS) {
- draw_set_edgeflags(draw, nv40->edgeflags);
draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf);
draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);
}
diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c
index b084a38b48..791ee6823d 100644
--- a/src/gallium/drivers/nv40/nv40_transfer.c
+++ b/src/gallium/drivers/nv40/nv40_transfer.c
@@ -16,14 +16,14 @@ struct nv40_transfer {
};
static void
-nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width0 = u_minify(pt->width0, level);
- template->height0 = u_minify(pt->height0, level);
+ template->width0 = width;
+ template->height0 = height;
template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv40_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv40_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -80,6 +80,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv40_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
0, 0, 0,
pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -126,13 +128,13 @@ nv40_transfer_del(struct pipe_transfer *ptx)
dst = pscreen->get_tex_surface(pscreen, ptx->texture,
ptx->face, ptx->level, ptx->zslice,
- PIPE_BUFFER_USAGE_GPU_WRITE);
+ PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -151,8 +153,10 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
index b2753b8e2e..d45de15000 100644
--- a/src/gallium/drivers/nv40/nv40_vbo.c
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -164,7 +164,7 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so,
return TRUE;
}
-boolean
+void
nv40_draw_arrays(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
@@ -174,8 +174,9 @@ nv40_draw_arrays(struct pipe_context *pipe,
nv40_vbo_set_idxbuf(nv40, NULL, 0);
if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
- return nv40_draw_elements_swtnl(pipe, NULL, 0,
- mode, start, count);
+ nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ return;
}
while (count) {
@@ -221,7 +222,6 @@ nv40_draw_arrays(struct pipe_context *pipe,
}
pipe->flush(pipe, 0, NULL);
- return TRUE;
}
static INLINE void
@@ -362,7 +362,7 @@ nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
}
}
-static boolean
+static void
nv40_draw_elements_inline(struct pipe_context *pipe,
struct pipe_buffer *ib, unsigned ib_size,
unsigned mode, unsigned start, unsigned count)
@@ -393,10 +393,9 @@ nv40_draw_elements_inline(struct pipe_context *pipe,
}
pipe_buffer_unmap(pscreen, ib);
- return TRUE;
}
-static boolean
+static void
nv40_draw_elements_vbo(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
@@ -445,11 +444,9 @@ nv40_draw_elements_vbo(struct pipe_context *pipe,
count -= vc;
start = restart;
}
-
- return TRUE;
}
-boolean
+void
nv40_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
@@ -459,8 +456,9 @@ nv40_draw_elements(struct pipe_context *pipe,
idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize);
if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
- return nv40_draw_elements_swtnl(pipe, NULL, 0,
- mode, start, count);
+ nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ return;
}
if (idxbuf) {
@@ -471,7 +469,6 @@ nv40_draw_elements(struct pipe_context *pipe,
}
pipe->flush(pipe, 0, NULL);
- return TRUE;
}
static boolean
@@ -484,11 +481,6 @@ nv40_vbo_validate(struct nv40_context *nv40)
unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
int hw;
- if (nv40->edgeflags) {
- nv40->fallback_swtnl |= NV40_NEW_ARRAYS;
- return FALSE;
- }
-
vtxbuf = so_new(20, 18);
so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr);
vtxfmt = so_new(17, 0);
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index 55835ee644..d9fc31006f 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -621,6 +621,10 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
return FALSE;
}
break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ /* not really an error just a fallback */
+ NOUVEAU_ERR("cannot handle edgeflag output\n");
+ return FALSE;
default:
NOUVEAU_ERR("bad output semantic\n");
return FALSE;
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 219e7a7862..5997456e4c 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -43,16 +43,44 @@ nv50_destroy(struct pipe_context *pipe)
{
struct nv50_context *nv50 = nv50_context(pipe);
+ if (nv50->state.fb)
+ so_ref(NULL, &nv50->state.fb);
+ if (nv50->state.blend)
+ so_ref(NULL, &nv50->state.blend);
+ if (nv50->state.blend_colour)
+ so_ref(NULL, &nv50->state.blend_colour);
+ if (nv50->state.zsa)
+ so_ref(NULL, &nv50->state.zsa);
+ if (nv50->state.rast)
+ so_ref(NULL, &nv50->state.rast);
+ if (nv50->state.stipple)
+ so_ref(NULL, &nv50->state.stipple);
+ if (nv50->state.scissor)
+ so_ref(NULL, &nv50->state.scissor);
+ if (nv50->state.viewport)
+ so_ref(NULL, &nv50->state.viewport);
+ if (nv50->state.tsc_upload)
+ so_ref(NULL, &nv50->state.tsc_upload);
+ if (nv50->state.tic_upload)
+ so_ref(NULL, &nv50->state.tic_upload);
+ if (nv50->state.vertprog)
+ so_ref(NULL, &nv50->state.vertprog);
+ if (nv50->state.fragprog)
+ so_ref(NULL, &nv50->state.fragprog);
+ if (nv50->state.programs)
+ so_ref(NULL, &nv50->state.programs);
+ if (nv50->state.vtxfmt)
+ so_ref(NULL, &nv50->state.vtxfmt);
+ if (nv50->state.vtxbuf)
+ so_ref(NULL, &nv50->state.vtxbuf);
+ if (nv50->state.vtxattr)
+ so_ref(NULL, &nv50->state.vtxattr);
+
draw_destroy(nv50->draw);
FREE(nv50);
}
-static void
-nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-}
-
struct pipe_context *
nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
{
@@ -71,7 +99,6 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv50->pipe.destroy = nv50_destroy;
- nv50->pipe.set_edgeflags = nv50_set_edgeflags;
nv50->pipe.draw_arrays = nv50_draw_arrays;
nv50->pipe.draw_elements = nv50_draw_elements;
nv50->pipe.clear = nv50_clear;
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 5578a5838f..cbd4c3ff86 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -191,9 +191,9 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst,
extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50);
/* nv50_vbo.c */
-extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv50_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv50_draw_elements(struct pipe_context *pipe,
+extern void nv50_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index e496cf4cad..2d0b1818ef 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -98,9 +98,17 @@ struct nv50_reg {
#define NV50_MOD_ABS 2
#define NV50_MOD_SAT 4
-/* arbitrary limits */
-#define MAX_IF_DEPTH 4
-#define MAX_LOOP_DEPTH 4
+/* STACK: Conditionals and loops have to use the (per warp) stack.
+ * Stack entries consist of an entry type (divergent path, join at),
+ * a mask indicating the active threads of the warp, and an address.
+ * MPs can store 12 stack entries internally, if we need more (and
+ * we probably do), we have to create a stack buffer in VRAM.
+ */
+/* impose low limits for now */
+#define NV50_MAX_COND_NESTING 4
+#define NV50_MAX_LOOP_NESTING 3
+
+#define JOIN_ON(e) e; pc->p->exec_tail->inst[1] |= 2
struct nv50_pc {
struct nv50_program *p;
@@ -123,6 +131,7 @@ struct nv50_pc {
int immd_nr;
struct nv50_reg **addr;
int addr_nr;
+ uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */
struct nv50_reg *temp_temp[16];
unsigned temp_temp_nr;
@@ -139,33 +148,23 @@ struct nv50_pc {
struct nv50_reg *iv_p;
struct nv50_reg *iv_c;
- struct nv50_program_exec *if_cond;
- struct nv50_program_exec *if_insn[MAX_IF_DEPTH];
- struct nv50_program_exec *br_join[MAX_IF_DEPTH];
- struct nv50_program_exec *br_loop[MAX_LOOP_DEPTH]; /* for BRK branch */
+ struct nv50_program_exec *if_insn[NV50_MAX_COND_NESTING];
+ struct nv50_program_exec *if_join[NV50_MAX_COND_NESTING];
+ struct nv50_program_exec *loop_brka[NV50_MAX_LOOP_NESTING];
int if_lvl, loop_lvl;
- unsigned loop_pos[MAX_LOOP_DEPTH];
+ unsigned loop_pos[NV50_MAX_LOOP_NESTING];
+
+ unsigned *insn_pos; /* actual program offset of each TGSI insn */
+ boolean in_subroutine;
/* current instruction and total number of insns */
unsigned insn_cur;
unsigned insn_nr;
boolean allow32;
-};
-
-static INLINE struct nv50_reg *
-reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
-{
- struct nv50_reg *ri;
- assert(pc->reg_instance_nr < 16);
- ri = &pc->reg_instances[pc->reg_instance_nr++];
- if (reg) {
- *ri = *reg;
- reg->mod = 0;
- }
- return ri;
-}
+ uint8_t edgeflag_out;
+};
static INLINE void
ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
@@ -193,8 +192,7 @@ terminate_mbb(struct nv50_pc *pc)
/* remove records of temporary address register values */
for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
- if (pc->r_addr[i].index < 0)
- pc->r_addr[i].rhw = -1;
+ pc->r_addr[i].rhw = -1;
}
static void
@@ -246,6 +244,21 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
assert(0);
}
+static INLINE struct nv50_reg *
+reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+ struct nv50_reg *ri;
+
+ assert(pc->reg_instance_nr < 16);
+ ri = &pc->reg_instances[pc->reg_instance_nr++];
+ if (reg) {
+ alloc_reg(pc, reg);
+ *ri = *reg;
+ reg->mod = 0;
+ }
+ return ri;
+}
+
/* XXX: For shaders that aren't executed linearly (e.g. shaders that
* contain loops), we need to assign all hw regs to TGSI TEMPs early,
* lest we risk temp_temps overwriting regs alloc'd "later".
@@ -272,22 +285,6 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
return NULL;
}
-/* Assign the hw of the discarded temporary register src
- * to the tgsi register dst and free src.
- */
-static void
-assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- assert(src->index == -1 && src->hw != -1);
-
- if (dst->hw != -1)
- pc->r_temp[dst->hw] = NULL;
- pc->r_temp[src->hw] = dst;
- dst->hw = src->hw;
-
- FREE(src);
-}
-
/* release the hardware resource held by r */
static void
release_hw(struct nv50_pc *pc, struct nv50_reg *r)
@@ -444,10 +441,19 @@ is_immd(struct nv50_program_exec *e)
return FALSE;
}
+static boolean
+is_join(struct nv50_program_exec *e)
+{
+ if (is_long(e) && (e->inst[1] & 3) == 2)
+ return TRUE;
+ return FALSE;
+}
+
static INLINE void
set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
struct nv50_program_exec *e)
{
+ assert(!is_immd(e));
set_long(pc, e);
e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
e->inst[1] |= (pred << 7) | (idx << 12);
@@ -490,15 +496,6 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
static INLINE void
set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
{
- union {
- float f;
- uint32_t ui;
- } u;
- u.ui = pc->immd_buf[imm->hw];
-
- u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f;
- u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f;
-
set_long(pc, e);
/* XXX: can't be predicated - bits overlap; cases where both
* are required should be avoided by using pc->allow32 */
@@ -506,8 +503,8 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
set_pred_wr(pc, 0, 0, e);
e->inst[1] |= 0x00000002 | 0x00000001;
- e->inst[0] |= (u.ui & 0x3f) << 16;
- e->inst[1] |= (u.ui >> 6) << 2;
+ e->inst[0] |= (pc->immd_buf[imm->hw] & 0x3f) << 16;
+ e->inst[1] |= (pc->immd_buf[imm->hw] >> 6) << 2;
}
static INLINE void
@@ -539,21 +536,24 @@ emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst,
static struct nv50_reg *
alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
{
- int i;
struct nv50_reg *a_tgsi = NULL, *a = NULL;
+ int i;
+ uint8_t avail = ~pc->addr_alloc;
if (!ref) {
- /* allocate for TGSI address reg */
- for (i = 0; i < NV50_SU_MAX_ADDR; ++i) {
- if (pc->r_addr[i].index >= 0)
- continue;
- if (pc->r_addr[i].rhw >= 0 &&
- pc->r_addr[i].acc == pc->insn_cur)
- continue;
+ /* allocate for TGSI_FILE_ADDRESS */
+ while (avail) {
+ i = ffs(avail) - 1;
- pc->r_addr[i].rhw = -1;
- pc->r_addr[i].index = i;
- return &pc->r_addr[i];
+ if (pc->r_addr[i].rhw < 0 ||
+ pc->r_addr[i].acc != pc->insn_cur) {
+ pc->addr_alloc |= (1 << i);
+
+ pc->r_addr[i].rhw = -1;
+ pc->r_addr[i].index = i;
+ return &pc->r_addr[i];
+ }
+ avail &= ~(1 << i);
}
assert(0);
return NULL;
@@ -561,15 +561,16 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
/* Allocate and set an address reg so we can access 'ref'.
*
- * If and r_addr has index < 0, it is not reserved for TGSI,
- * and index will be the negative of the TGSI addr index the
- * value in rhw is relative to, or -256 if rhw is an offset
- * from 0. If rhw < 0, the reg has not been initialized.
+ * If and r_addr->index will be -1 or the hw index the value
+ * value in rhw is relative to. If rhw < 0, the reg has not
+ * been initialized or is in use for TGSI_FILE_ADDRESS.
*/
- for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) {
- if (pc->r_addr[i].index >= 0) /* occupied for TGSI */
- continue;
- if (pc->r_addr[i].rhw < 0) { /* unused */
+ while (avail) { /* only consider regs that are not TGSI */
+ i = ffs(avail) - 1;
+ avail &= ~(1 << i);
+
+ if ((!a || a->rhw >= 0) && pc->r_addr[i].rhw < 0) {
+ /* prefer an usused reg with low hw index */
a = &pc->r_addr[i];
continue;
}
@@ -579,8 +580,8 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
if (ref->hw - pc->r_addr[i].rhw >= 128)
continue;
- if ((ref->acc >= 0 && pc->r_addr[i].index == -256) ||
- (ref->acc < 0 && -pc->r_addr[i].index == ref->index)) {
+ if ((ref->acc >= 0 && pc->r_addr[i].index < 0) ||
+ (ref->acc < 0 && pc->r_addr[i].index == ref->index)) {
pc->r_addr[i].acc = pc->insn_cur;
return &pc->r_addr[i];
}
@@ -594,7 +595,7 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
a->rhw = ref->hw & ~0x7f;
a->acc = pc->insn_cur;
- a->index = a_tgsi ? -ref->index : -256;
+ a->index = a_tgsi ? ref->index : -1;
return a;
}
@@ -652,6 +653,7 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
}
+/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */
static void
emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
@@ -704,6 +706,34 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
FREE(imm);
}
+/* Assign the hw of the discarded temporary register src
+ * to the tgsi register dst and free src.
+ */
+static void
+assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ assert(src->index == -1 && src->hw != -1);
+
+ if (pc->if_lvl || pc->loop_lvl ||
+ (dst->type != P_TEMP) ||
+ (src->hw < pc->result_nr * 4 &&
+ pc->p->type == PIPE_SHADER_FRAGMENT) ||
+ pc->p->info.opcode_count[TGSI_OPCODE_CAL] ||
+ pc->p->info.opcode_count[TGSI_OPCODE_BRA]) {
+
+ emit_mov(pc, dst, src);
+ free_temp(pc, src);
+ return;
+ }
+
+ if (dst->hw != -1)
+ pc->r_temp[dst->hw] = NULL;
+ pc->r_temp[src->hw] = dst;
+ dst->hw = src->hw;
+
+ FREE(src);
+}
+
static void
emit_nop(struct nv50_pc *pc)
{
@@ -875,7 +905,7 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
set_dst(pc, dst, e);
set_src_0(pc, src0, e);
if (src1->type == P_IMMD && !is_long(e)) {
- if (src0->mod & NV50_MOD_NEG)
+ if (src0->mod ^ src1->mod)
e->inst[0] |= 0x00008000;
set_immd(pc, src1, e);
} else {
@@ -986,6 +1016,8 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
op != TGSI_OPCODE_XOR)
assert(!"invalid bit op");
+ assert(!(src0->mod | src1->mod));
+
if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) {
set_immd(pc, src1, e);
if (op == TGSI_OPCODE_OR)
@@ -1037,6 +1069,14 @@ emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
src2->mod ^= NV50_MOD_NEG;
}
+#define NV50_FLOP_RCP 0
+#define NV50_FLOP_RSQ 2
+#define NV50_FLOP_LG2 3
+#define NV50_FLOP_SIN 4
+#define NV50_FLOP_COS 5
+#define NV50_FLOP_EX2 6
+
+/* rcp, rsqrt, lg2 support neg and abs */
static void
emit_flop(struct nv50_pc *pc, unsigned sub,
struct nv50_reg *dst, struct nv50_reg *src)
@@ -1044,17 +1084,20 @@ emit_flop(struct nv50_pc *pc, unsigned sub,
struct nv50_program_exec *e = exec(pc);
e->inst[0] |= 0x90000000;
- if (sub) {
+ if (sub || src->mod) {
set_long(pc, e);
e->inst[1] |= (sub << 29);
}
set_dst(pc, dst, e);
+ set_src_0_restricted(pc, src, e);
- if (sub == 0 || sub == 2)
- set_src_0_restricted(pc, src, e);
- else
- set_src_0(pc, src, e);
+ assert(!src->mod || sub < 4);
+
+ if (src->mod & NV50_MOD_NEG)
+ e->inst[1] |= 0x04000000;
+ if (src->mod & NV50_MOD_ABS)
+ e->inst[1] |= 0x00100000;
emit(pc, e);
}
@@ -1071,6 +1114,11 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
set_long(pc, e);
e->inst[1] |= (6 << 29) | 0x00004000;
+ if (src->mod & NV50_MOD_NEG)
+ e->inst[1] |= 0x04000000;
+ if (src->mod & NV50_MOD_ABS)
+ e->inst[1] |= 0x00100000;
+
emit(pc, e);
}
@@ -1086,6 +1134,11 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
set_long(pc, e);
e->inst[1] |= (6 << 29);
+ if (src->mod & NV50_MOD_NEG)
+ e->inst[1] |= 0x04000000;
+ if (src->mod & NV50_MOD_ABS)
+ e->inst[1] |= 0x00100000;
+
emit(pc, e);
}
@@ -1184,7 +1237,6 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
set_src_1(pc, src1, e);
emit(pc, e);
- pc->if_cond = pc->p->exec_tail; /* record for OPCODE_IF */
/* cvt.f32.u32/s32 (?) if we didn't only write the predicate */
if (rdst)
@@ -1221,10 +1273,10 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
{
struct nv50_reg *temp = alloc_temp(pc, NULL);
- emit_flop(pc, 3, temp, v);
+ emit_flop(pc, NV50_FLOP_LG2, temp, v);
emit_mul(pc, temp, temp, e);
emit_preex2(pc, temp, temp);
- emit_flop(pc, 6, dst, temp);
+ emit_flop(pc, NV50_FLOP_EX2, dst, temp);
free_temp(pc, temp);
}
@@ -1306,41 +1358,73 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
{
struct nv50_program_exec *e;
const int r_pred = 1;
- unsigned cvn = CVT_F32_F32;
- if (src->mod & NV50_MOD_NEG)
- cvn |= CVT_NEG;
- /* write predicate reg */
- emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn);
-
- /* conditional discard */
e = exec(pc);
- e->inst[0] = 0x00000002;
- set_long(pc, e);
- set_pred(pc, 0x1 /* LT */, r_pred, e);
+ e->inst[0] = 0x00000002; /* discard */
+ set_long(pc, e); /* sets cond code to ALWAYS */
+
+ if (src) {
+ unsigned cvn = CVT_F32_F32;
+
+ set_pred(pc, 0x1 /* cc = LT */, r_pred, e);
+
+ if (src->mod & NV50_MOD_NEG)
+ cvn |= CVT_NEG;
+ /* write predicate reg */
+ emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn);
+ }
+
emit(pc, e);
}
static struct nv50_program_exec *
-emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
- struct nv50_program_exec **join)
+emit_control_flow(struct nv50_pc *pc, unsigned op, int pred, unsigned cc)
{
struct nv50_program_exec *e = exec(pc);
- if (join) {
- set_long(pc, e);
- e->inst[0] |= 0xa0000002;
- emit(pc, e);
- *join = e;
- e = exec(pc);
- }
-
+ e->inst[0] = (op << 28) | 2;
set_long(pc, e);
- e->inst[0] |= 0x10000002;
if (pred >= 0)
set_pred(pc, cc, pred, e);
+
emit(pc, e);
- return pc->p->exec_tail;
+ return e;
+}
+
+static INLINE struct nv50_program_exec *
+emit_breakaddr(struct nv50_pc *pc)
+{
+ return emit_control_flow(pc, 0x4, -1, 0);
+}
+
+static INLINE void
+emit_break(struct nv50_pc *pc, int pred, unsigned cc)
+{
+ emit_control_flow(pc, 0x5, pred, cc);
+}
+
+static INLINE struct nv50_program_exec *
+emit_joinat(struct nv50_pc *pc)
+{
+ return emit_control_flow(pc, 0xa, -1, 0);
+}
+
+static INLINE struct nv50_program_exec *
+emit_branch(struct nv50_pc *pc, int pred, unsigned cc)
+{
+ return emit_control_flow(pc, 0x1, pred, cc);
+}
+
+static INLINE struct nv50_program_exec *
+emit_call(struct nv50_pc *pc, int pred, unsigned cc)
+{
+ return emit_control_flow(pc, 0x2, pred, cc);
+}
+
+static INLINE void
+emit_ret(struct nv50_pc *pc, int pred, unsigned cc)
+{
+ emit_control_flow(pc, 0x3, pred, cc);
}
#define QOP_ADD 0
@@ -1403,7 +1487,7 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
if (arg == 4) /* there is no textureProj(samplerCubeShadow) */
emit_mov(pc, t[3], src[3]);
- emit_flop(pc, 0, t[2], t[2]);
+ emit_flop(pc, NV50_FLOP_RCP, t[2], t[2]);
emit_mul(pc, t[0], src[0], t[2]);
emit_mul(pc, t[1], src[1], t[2]);
@@ -1421,7 +1505,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
t[3]->rhw = src[3]->rhw;
emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
- emit_flop(pc, 0, t[3], t[3]);
+ emit_flop(pc, NV50_FLOP_RCP, t[3], t[3]);
for (c = 0; c < dim; ++c) {
t[c]->rhw = src[c]->rhw;
@@ -1435,7 +1519,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
/* XXX: for some reason the blob sometimes uses MAD
* (mad f32 $rX $rY $rZ neg $r63)
*/
- emit_flop(pc, 0, t[3], src[3]);
+ emit_flop(pc, NV50_FLOP_RCP, t[3], src[3]);
for (c = 0; c < dim; ++c)
emit_mul(pc, t[c], src[c], t[3]);
if (arg != dim) /* depth reference value */
@@ -1482,25 +1566,31 @@ emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod,
struct nv50_reg *src, struct nv50_program_exec *tex)
{
struct nv50_program_exec *join_at;
- unsigned i, target = pc->p->exec_size + 7 * 2;
+ unsigned i, target = pc->p->exec_size + 9 * 2;
+
+ if (pc->p->type != PIPE_SHADER_FRAGMENT) {
+ emit(pc, tex);
+ return;
+ }
+ pc->allow32 = FALSE;
/* Subtract lod of each pixel from lod of top left pixel, jump
* texlod insn if result is 0, then repeat for 2 other pixels.
*/
+ join_at = emit_joinat(pc);
emit_quadop(pc, NULL, 0, 0, tlod, tlod, 0x55);
- emit_branch(pc, 0, 2, &join_at)->param.index = target;
+ emit_branch(pc, 0, 2)->param.index = target;
for (i = 1; i < 4; ++i) {
emit_quadop(pc, NULL, 0, i, tlod, tlod, 0x55);
- emit_branch(pc, 0, 2, NULL)->param.index = target;
+ emit_branch(pc, 0, 2)->param.index = target;
}
emit_mov(pc, tlod, src); /* target */
emit(pc, tex); /* texlod */
join_at->param.index = target + 2 * 2;
- emit_nop(pc);
- pc->p->exec_tail->inst[1] |= 2; /* join _after_ tex */
+ JOIN_ON(emit_nop(pc)); /* join _after_ tex */
}
static void
@@ -1608,6 +1698,7 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
emit(pc, e);
} else
if (bias_lod < 0) {
+ assert(pc->p->type == PIPE_SHADER_FRAGMENT);
e->inst[0] |= arg << 22;
e->inst[1] |= 0x20000000; /* texbias */
emit_mov(pc, t[arg], src[3]);
@@ -1647,8 +1738,8 @@ emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
assert(src->type == P_TEMP);
- e->inst[0] = 0xc0140000;
- e->inst[1] = 0x89800000;
+ e->inst[0] = (src->mod & NV50_MOD_NEG) ? 0xc0240000 : 0xc0140000;
+ e->inst[1] = (src->mod & NV50_MOD_NEG) ? 0x86400000 : 0x89800000;
set_long(pc, e);
set_dst(pc, dst, e);
set_src_0(pc, src, e);
@@ -1660,25 +1751,16 @@ emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
static void
emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
- struct nv50_reg *r = src;
struct nv50_program_exec *e = exec(pc);
assert(src->type == P_TEMP);
- if (!(src->mod & NV50_MOD_NEG)) { /* ! double negation */
- r = alloc_temp(pc, NULL);
- emit_neg(pc, r, src);
- }
-
- e->inst[0] = 0xc0150000;
- e->inst[1] = 0x8a400000;
+ e->inst[0] = (src->mod & NV50_MOD_NEG) ? 0xc0250000 : 0xc0150000;
+ e->inst[1] = (src->mod & NV50_MOD_NEG) ? 0x85800000 : 0x8a400000;
set_long(pc, e);
set_dst(pc, dst, e);
- set_src_0(pc, r, e);
- set_src_2(pc, r, e);
-
- if (r != src)
- free_temp(pc, r);
+ set_src_0(pc, src, e);
+ set_src_2(pc, src, e);
emit(pc, e);
}
@@ -1736,19 +1818,24 @@ static boolean
negate_supported(const struct tgsi_full_instruction *insn, int i)
{
switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_DDX:
case TGSI_OPCODE_DDY:
case TGSI_OPCODE_DP3:
case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_EX2:
case TGSI_OPCODE_KIL:
- case TGSI_OPCODE_ADD:
- case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_LG2:
case TGSI_OPCODE_MAD:
- return TRUE;
+ case TGSI_OPCODE_MUL:
case TGSI_OPCODE_POW:
- if (i == 1)
- return TRUE;
- return FALSE;
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ: /* ignored, RSQ = rsqrt(abs(src.x)) */
+ case TGSI_OPCODE_SCS:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_SUB:
+ return TRUE;
default:
return FALSE;
}
@@ -1773,7 +1860,9 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
case TGSI_OPCODE_DST:
return mask & (c ? 0xa : 0x6);
case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_EXP:
case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_LOG:
case TGSI_OPCODE_POW:
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
@@ -1995,6 +2084,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c)
assert(0);
return 0x0;
}
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LOG:
case TGSI_OPCODE_LIT:
case TGSI_OPCODE_SCS:
case TGSI_OPCODE_TEX:
@@ -2035,34 +2126,51 @@ nv50_kill_branch(struct nv50_pc *pc)
if (pc->if_insn[lvl]->next != pc->p->exec_tail)
return FALSE;
+ if (is_immd(pc->p->exec_tail))
+ return FALSE;
/* if ccode == 'true', the BRA is from an ELSE and the predicate
* reg may no longer be valid, since we currently always use $p0
*/
if (has_pred(pc->if_insn[lvl], 0xf))
return FALSE;
- assert(pc->if_insn[lvl] && pc->br_join[lvl]);
+ assert(pc->if_insn[lvl] && pc->if_join[lvl]);
- /* We'll use the exec allocated for JOIN_AT (as we can't easily
- * update prev's next); if exec_tail is BRK, update the pointer.
+ /* We'll use the exec allocated for JOIN_AT (we can't easily
+ * access nv50_program_exec's prev).
*/
- if (pc->loop_lvl && pc->br_loop[pc->loop_lvl - 1] == pc->p->exec_tail)
- pc->br_loop[pc->loop_lvl - 1] = pc->br_join[lvl];
-
pc->p->exec_size -= 4; /* remove JOIN_AT and BRA */
- *pc->br_join[lvl] = *pc->p->exec_tail;
+ *pc->if_join[lvl] = *pc->p->exec_tail;
FREE(pc->if_insn[lvl]);
FREE(pc->p->exec_tail);
- pc->p->exec_tail = pc->br_join[lvl];
+ pc->p->exec_tail = pc->if_join[lvl];
pc->p->exec_tail->next = NULL;
set_pred(pc, 0xd, 0, pc->p->exec_tail);
return TRUE;
}
+static void
+nv50_fp_move_results(struct nv50_pc *pc)
+{
+ struct nv50_reg reg;
+ unsigned i;
+
+ ctor_reg(&reg, P_TEMP, -1, -1);
+
+ for (i = 0; i < pc->result_nr * 4; ++i) {
+ if (pc->result[i].rhw < 0 || pc->result[i].hw < 0)
+ continue;
+ if (pc->result[i].rhw != pc->result[i].hw) {
+ reg.hw = pc->result[i].rhw;
+ emit_mov(pc, &reg, &pc->result[i]);
+ }
+ }
+}
+
static boolean
nv50_program_tx_insn(struct nv50_pc *pc,
const struct tgsi_full_instruction *inst)
@@ -2149,13 +2257,25 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_arl(pc, dst[0], temp, 4);
break;
case TGSI_OPCODE_BGNLOOP:
+ pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc);
pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size;
terminate_mbb(pc);
break;
+ case TGSI_OPCODE_BGNSUB:
+ assert(!pc->in_subroutine);
+ pc->in_subroutine = TRUE;
+ /* probably not necessary, but align to 8 byte boundary */
+ if (!is_long(pc->p->exec_tail))
+ convert_to_long(pc, pc->p->exec_tail);
+ break;
case TGSI_OPCODE_BRK:
- emit_branch(pc, -1, 0, NULL);
assert(pc->loop_lvl > 0);
- pc->br_loop[pc->loop_lvl - 1] = pc->p->exec_tail;
+ emit_break(pc, -1, 0);
+ break;
+ case TGSI_OPCODE_CAL:
+ assert(inst->Label.Label < pc->insn_nr);
+ emit_call(pc, -1, 0)->param.index = inst->Label.Label;
+ /* replaced by actual offset in nv50_program_fixup_insns */
break;
case TGSI_OPCODE_CEIL:
for (c = 0; c < 4; c++) {
@@ -2177,17 +2297,22 @@ nv50_program_tx_insn(struct nv50_pc *pc,
set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */
}
break;
+ case TGSI_OPCODE_CONT:
+ assert(pc->loop_lvl > 0);
+ emit_branch(pc, -1, 0)->param.index =
+ pc->loop_pos[pc->loop_lvl - 1];
+ break;
case TGSI_OPCODE_COS:
if (mask & 8) {
emit_precossin(pc, temp, src[0][3]);
- emit_flop(pc, 5, dst[3], temp);
+ emit_flop(pc, NV50_FLOP_COS, dst[3], temp);
if (!(mask &= 7))
break;
if (temp == dst[3])
temp = brdc = temp_temp(pc);
}
emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, 5, brdc, temp);
+ emit_flop(pc, NV50_FLOP_COS, brdc, temp);
break;
case TGSI_OPCODE_DDX:
for (c = 0; c < 4; c++) {
@@ -2231,7 +2356,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_mov_immdval(pc, dst[0], 1.0f);
break;
case TGSI_OPCODE_ELSE:
- emit_branch(pc, -1, 0, NULL);
+ emit_branch(pc, -1, 0);
pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
terminate_mbb(pc);
@@ -2243,26 +2368,56 @@ nv50_program_tx_insn(struct nv50_pc *pc,
if (nv50_kill_branch(pc) == TRUE)
break;
- if (pc->br_join[pc->if_lvl]) {
- pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size;
- pc->br_join[pc->if_lvl] = NULL;
+ if (pc->if_join[pc->if_lvl]) {
+ pc->if_join[pc->if_lvl]->param.index = pc->p->exec_size;
+ pc->if_join[pc->if_lvl] = NULL;
}
terminate_mbb(pc);
/* emit a NOP as join point, we could set it on the next
* one, but would have to make sure it is long and !immd
*/
- emit_nop(pc);
- pc->p->exec_tail->inst[1] |= 2;
+ JOIN_ON(emit_nop(pc));
break;
case TGSI_OPCODE_ENDLOOP:
- emit_branch(pc, -1, 0, NULL);
- pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl];
- pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size;
+ emit_branch(pc, -1, 0)->param.index =
+ pc->loop_pos[--pc->loop_lvl];
+ pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size;
terminate_mbb(pc);
break;
+ case TGSI_OPCODE_ENDSUB:
+ assert(pc->in_subroutine);
+ pc->in_subroutine = FALSE;
+ break;
case TGSI_OPCODE_EX2:
emit_preex2(pc, temp, src[0][0]);
- emit_flop(pc, 6, brdc, temp);
+ emit_flop(pc, NV50_FLOP_EX2, brdc, temp);
+ break;
+ case TGSI_OPCODE_EXP:
+ {
+ struct nv50_reg *t[2];
+
+ assert(!temp);
+ t[0] = temp_temp(pc);
+ t[1] = temp_temp(pc);
+
+ if (mask & 0x6)
+ emit_mov(pc, t[0], src[0][0]);
+ if (mask & 0x3)
+ emit_flr(pc, t[1], src[0][0]);
+
+ if (mask & (1 << 1))
+ emit_sub(pc, dst[1], t[0], t[1]);
+ if (mask & (1 << 0)) {
+ emit_preex2(pc, t[1], t[1]);
+ emit_flop(pc, NV50_FLOP_EX2, dst[0], t[1]);
+ }
+ if (mask & (1 << 2)) {
+ emit_preex2(pc, t[0], t[0]);
+ emit_flop(pc, NV50_FLOP_EX2, dst[2], t[0]);
+ }
+ if (mask & (1 << 3))
+ emit_mov_immdval(pc, dst[3], 1.0f);
+ }
break;
case TGSI_OPCODE_FLR:
for (c = 0; c < 4; c++) {
@@ -2281,26 +2436,56 @@ nv50_program_tx_insn(struct nv50_pc *pc,
}
break;
case TGSI_OPCODE_IF:
- /* emitting a join_at may not be necessary */
- assert(pc->if_lvl < MAX_IF_DEPTH);
- /* set_pred_wr(pc, 1, 0, pc->if_cond); */
+ assert(pc->if_lvl < NV50_MAX_COND_NESTING);
emit_cvt(pc, NULL, src[0][0], 0, CVTOP_ABS | CVTOP_RN,
CVT_F32_F32);
- emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]);
- pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+ pc->if_join[pc->if_lvl] = emit_joinat(pc);
+ pc->if_insn[pc->if_lvl++] = emit_branch(pc, 0, 2);;
terminate_mbb(pc);
break;
case TGSI_OPCODE_KIL:
+ assert(src[0][0] && src[0][1] && src[0][2] && src[0][3]);
emit_kil(pc, src[0][0]);
emit_kil(pc, src[0][1]);
emit_kil(pc, src[0][2]);
emit_kil(pc, src[0][3]);
break;
+ case TGSI_OPCODE_KILP:
+ emit_kil(pc, NULL);
+ break;
case TGSI_OPCODE_LIT:
emit_lit(pc, &dst[0], mask, &src[0][0]);
break;
case TGSI_OPCODE_LG2:
- emit_flop(pc, 3, brdc, src[0][0]);
+ emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ {
+ struct nv50_reg *t[2];
+
+ t[0] = temp_temp(pc);
+ if (mask & (1 << 1))
+ t[1] = temp_temp(pc);
+ else
+ t[1] = t[0];
+
+ emit_abs(pc, t[0], src[0][0]);
+ emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]);
+ if (mask & (1 << 2))
+ emit_mov(pc, dst[2], t[1]);
+ emit_flr(pc, t[1], t[1]);
+ if (mask & (1 << 0))
+ emit_mov(pc, dst[0], t[1]);
+ if (mask & (1 << 1)) {
+ t[1]->mod = NV50_MOD_NEG;
+ emit_preex2(pc, t[1], t[1]);
+ t[1]->mod = 0;
+ emit_flop(pc, NV50_FLOP_EX2, t[1], t[1]);
+ emit_mul(pc, dst[1], t[0], t[1]);
+ }
+ if (mask & (1 << 3))
+ emit_mov_immdval(pc, dst[3], 1.0f);
+ }
break;
case TGSI_OPCODE_LRP:
temp = temp_temp(pc);
@@ -2350,19 +2535,25 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_pow(pc, brdc, src[0][0], src[1][0]);
break;
case TGSI_OPCODE_RCP:
- emit_flop(pc, 0, brdc, src[0][0]);
+ emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]);
+ break;
+ case TGSI_OPCODE_RET:
+ if (pc->p->type == PIPE_SHADER_FRAGMENT && !pc->in_subroutine)
+ nv50_fp_move_results(pc);
+ emit_ret(pc, -1, 0);
break;
case TGSI_OPCODE_RSQ:
- emit_flop(pc, 2, brdc, src[0][0]);
+ src[0][0]->mod |= NV50_MOD_ABS;
+ emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]);
break;
case TGSI_OPCODE_SCS:
temp = temp_temp(pc);
if (mask & 3)
emit_precossin(pc, temp, src[0][0]);
if (mask & (1 << 0))
- emit_flop(pc, 5, dst[0], temp);
+ emit_flop(pc, NV50_FLOP_COS, dst[0], temp);
if (mask & (1 << 1))
- emit_flop(pc, 4, dst[1], temp);
+ emit_flop(pc, NV50_FLOP_SIN, dst[1], temp);
if (mask & (1 << 2))
emit_mov_immdval(pc, dst[2], 0.0);
if (mask & (1 << 3))
@@ -2371,14 +2562,14 @@ nv50_program_tx_insn(struct nv50_pc *pc,
case TGSI_OPCODE_SIN:
if (mask & 8) {
emit_precossin(pc, temp, src[0][3]);
- emit_flop(pc, 4, dst[3], temp);
+ emit_flop(pc, NV50_FLOP_SIN, dst[3], temp);
if (!(mask &= 7))
break;
if (temp == dst[3])
temp = brdc = temp_temp(pc);
}
emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, 4, brdc, temp);
+ emit_flop(pc, NV50_FLOP_SIN, brdc, temp);
break;
case TGSI_OPCODE_SLT:
case TGSI_OPCODE_SGE:
@@ -2442,6 +2633,17 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_mov_immdval(pc, dst[3], 1.0);
break;
case TGSI_OPCODE_END:
+ if (pc->p->type == PIPE_SHADER_FRAGMENT)
+ nv50_fp_move_results(pc);
+
+ /* last insn must be long so it can have the exit bit set */
+ if (!is_long(pc->p->exec_tail))
+ convert_to_long(pc, pc->p->exec_tail);
+ else
+ if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail))
+ emit_nop(pc);
+
+ pc->p->exec_tail->inst[1] |= 1; /* set exit bit */
break;
default:
NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
@@ -2486,10 +2688,16 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
mask = dst->WriteMask;
if (dst->File == TGSI_FILE_TEMPORARY)
- reg = pc->temp;
+ reg = pc->temp;
else
- if (dst->File == TGSI_FILE_OUTPUT)
- reg = pc->result;
+ if (dst->File == TGSI_FILE_OUTPUT) {
+ reg = pc->result;
+
+ if (insn->Instruction.Opcode == TGSI_OPCODE_MOV &&
+ dst->Index == pc->edgeflag_out &&
+ insn->Src[0].Register.File == TGSI_FILE_INPUT)
+ pc->p->cfg.edgeflag_in = insn->Src[0].Register.Index;
+ }
if (reg) {
for (c = 0; c < 4; c++) {
@@ -2656,7 +2864,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
pc->r_brdc = NULL;
- if (!deqs)
+ if (!deqs || (!rdep[0] && !rdep[1] && !rdep[2] && !rdep[3]))
return nv50_program_tx_insn(pc, &insn);
deqs = nv50_revdep_reorder(m, rdep);
@@ -2707,7 +2915,7 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg)
iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1;
emit_interp(pc, iv, NULL, mode & INTERP_CENTROID);
- emit_flop(pc, 0, iv, iv);
+ emit_flop(pc, NV50_FLOP_RCP, iv, iv);
/* XXX: when loading interpolants dynamically, move these
* to the program head, or make sure it can't be skipped.
@@ -2788,6 +2996,9 @@ nv50_program_tx_prep(struct nv50_pc *pc)
if (p->cfg.io_nr > first)
p->cfg.io_nr = first;
break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ pc->edgeflag_out = first;
+ break;
/*
case TGSI_SEMANTIC_CLIP_DISTANCE:
p->cfg.clpd = MIN2(p->cfg.clpd, first);
@@ -3036,6 +3247,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
p->cfg.two_side[0].hw = 0x40;
p->cfg.two_side[1].hw = 0x40;
+ p->cfg.edgeflag_in = pc->edgeflag_out = 0xff;
+
switch (p->type) {
case PIPE_SHADER_VERTEX:
p->cfg.psiz = 0x40;
@@ -3110,24 +3323,6 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
}
static void
-nv50_fp_move_results(struct nv50_pc *pc)
-{
- struct nv50_reg reg;
- unsigned i;
-
- ctor_reg(&reg, P_TEMP, -1, -1);
-
- for (i = 0; i < pc->result_nr * 4; ++i) {
- if (pc->result[i].rhw < 0 || pc->result[i].hw < 0)
- continue;
- if (pc->result[i].rhw != pc->result[i].hw) {
- reg.hw = pc->result[i].rhw;
- emit_mov(pc, &reg, &pc->result[i]);
- }
- }
-}
-
-static void
nv50_program_fixup_insns(struct nv50_pc *pc)
{
struct nv50_program_exec *e, **bra_list;
@@ -3142,16 +3337,6 @@ nv50_program_fixup_insns(struct nv50_pc *pc)
if (e->param.index >= 0 && !e->param.mask)
bra_list[n++] = e;
- /* last instruction must be long so it can have the exit bit set */
- if (!is_long(pc->p->exec_tail))
- convert_to_long(pc, pc->p->exec_tail);
- /* set exit bit */
- pc->p->exec_tail->inst[1] |= 1;
-
- /* !immd on exit insn simultaneously means !join */
- assert(!is_immd(pc->p->exec_head));
- assert(!is_immd(pc->p->exec_tail));
-
/* Make sure we don't have any single 32 bit instructions. */
for (e = pc->p->exec_head, pos = 0; e; e = e->next) {
pos += is_long(e) ? 2 : 1;
@@ -3160,12 +3345,24 @@ nv50_program_fixup_insns(struct nv50_pc *pc)
for (i = 0; i < n; ++i)
if (bra_list[i]->param.index >= pos)
bra_list[i]->param.index += 1;
+ for (i = 0; i < pc->insn_nr; ++i)
+ if (pc->insn_pos[i] >= pos)
+ pc->insn_pos[i] += 1;
convert_to_long(pc, e);
++pos;
}
}
FREE(bra_list);
+
+ if (!pc->p->info.opcode_count[TGSI_OPCODE_CAL])
+ return;
+
+ /* fill in CALL offsets */
+ for (e = pc->p->exec_head; e; e = e->next) {
+ if ((e->inst[0] & 2) && (e->inst[0] >> 28) == 0x2)
+ e->param.index = pc->insn_pos[e->param.index];
+ }
}
static boolean
@@ -3187,19 +3384,20 @@ nv50_program_tx(struct nv50_program *p)
if (ret == FALSE)
goto out_cleanup;
+ pc->insn_pos = MALLOC(pc->insn_nr * sizeof(unsigned));
+
tgsi_parse_init(&parse, pc->p->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&parse)) {
const union tgsi_full_token *tok = &parse.FullToken;
- /* don't allow half insn/immd on first and last instruction */
+ /* previously allow32 was FALSE for first & last instruction */
pc->allow32 = TRUE;
- if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr)
- pc->allow32 = FALSE;
tgsi_parse_token(&parse);
switch (tok->Token.Type) {
case TGSI_TOKEN_TYPE_INSTRUCTION:
+ pc->insn_pos[pc->insn_cur] = pc->p->exec_size;
++pc->insn_cur;
ret = nv50_tgsi_insn(pc, tok);
if (ret == FALSE)
@@ -3210,9 +3408,6 @@ nv50_program_tx(struct nv50_program *p)
}
}
- if (pc->p->type == PIPE_SHADER_FRAGMENT)
- nv50_fp_move_results(pc);
-
nv50_program_fixup_insns(pc);
p->param_nr = pc->param_nr * 4;
@@ -3430,7 +3625,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
so_data (so, p->cfg.high_temp);
so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
so_data (so, p->cfg.high_result);
- so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1);
+ so_method(so, tesla, NV50TCL_FP_CONTROL, 1);
so_data (so, p->cfg.regs[2]);
so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
so_data (so, p->cfg.regs[3]);
@@ -3602,7 +3797,7 @@ nv50_linkage_validate(struct nv50_context *nv50)
so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
so_data (so, reg[4]);
- so_method(so, tesla, 0x1540, 4);
+ so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
so_datap (so, lin, 4);
if (nv50->rasterizer->pipe.point_sprite) {
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 4a90c372ce..461fec1d89 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -58,6 +58,7 @@ struct nv50_program {
/* VP only */
uint8_t clpd, clpd_nr;
uint8_t psiz;
+ uint8_t edgeflag_in;
} cfg;
};
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 268c9823f7..5d9e18218a 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -77,9 +77,9 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_query *q = nv50_query(pq);
- BEGIN_RING(chan, tesla, 0x1530, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_RESET, 1);
OUT_RING (chan, 1);
- BEGIN_RING(chan, tesla, 0x1514, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1);
OUT_RING (chan, 1);
q->ready = FALSE;
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index d443ca3ad0..7e039ea82e 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -128,7 +128,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
return 1;
case PIPE_CAP_TGSI_CONT_SUPPORTED:
- return 0;
+ return 1;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
return 1;
case NOUVEAU_CAP_HW_VTXBUF:
@@ -165,6 +165,21 @@ static void
nv50_screen_destroy(struct pipe_screen *pscreen)
{
struct nv50_screen *screen = nv50_screen(pscreen);
+ unsigned i;
+
+ for (i = 0; i < 2; i++) {
+ if (screen->constbuf_parm[i])
+ nouveau_bo_ref(NULL, &screen->constbuf_parm[i]);
+ }
+
+ if (screen->constbuf_misc[0])
+ nouveau_bo_ref(NULL, &screen->constbuf_misc[0]);
+ if (screen->tic)
+ nouveau_bo_ref(NULL, &screen->tic);
+ if (screen->tsc)
+ nouveau_bo_ref(NULL, &screen->tsc);
+ if (screen->static_init)
+ so_ref(NULL, &screen->static_init);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->tesla);
@@ -231,8 +246,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
break;
case 0x80:
case 0x90:
- /* this stupid name should be corrected. */
- tesla_class = NV54TCL;
+ tesla_class = NV84TCL;
break;
case 0xa0:
switch (chipset) {
@@ -242,7 +256,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
tesla_class = NVA0TCL;
break;
default:
- tesla_class = 0x8597;
+ tesla_class = NVA8TCL;
break;
}
break;
@@ -287,7 +301,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_data (so, chan->vram->handle);
so_method(so, screen->eng2d, NV50_2D_OPERATION, 1);
so_data (so, NV50_2D_OPERATION_SRCCOPY);
- so_method(so, screen->eng2d, 0x0290, 1);
+ so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1);
so_data (so, 0);
so_method(so, screen->eng2d, 0x0888, 1);
so_data (so, 1);
@@ -297,34 +311,33 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
/* Static tesla init */
so = so_new(256, 20);
- so_method(so, screen->tesla, 0x1558, 1);
- so_data (so, 1);
+ so_method(so, screen->tesla, NV50TCL_COND_MODE, 1);
+ so_data (so, NV50TCL_COND_MODE_ALWAYS);
so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1);
so_data (so, screen->sync->handle);
- so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0),
- NV50TCL_DMA_UNK0__SIZE);
- for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++)
+ so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11);
+ for (i = 0; i < 11; i++)
so_data(so, chan->vram->handle);
- so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0),
- NV50TCL_DMA_UNK1__SIZE);
- for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++)
+ so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0),
+ NV50TCL_DMA_COLOR__SIZE);
+ for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++)
so_data(so, chan->vram->handle);
- so_method(so, screen->tesla, 0x121c, 1);
+ so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1);
so_data (so, 1);
/* activate all 32 lanes (threads) in a warp */
- so_method(so, screen->tesla, 0x19a0, 1);
+ so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1);
so_data (so, 0x2);
so_method(so, screen->tesla, 0x1400, 1);
so_data (so, 0xf);
/* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
- so_method(so, screen->tesla, 0x13b4, 1);
+ so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1);
so_data (so, 0x54);
- so_method(so, screen->tesla, 0x13bc, 1);
+ so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1);
so_data (so, 0x54);
/* origin is top left (set to 1 for bottom left) */
- so_method(so, screen->tesla, 0x13ac, 1);
+ so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1);
so_data (so, 0);
so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
so_data (so, 8);
@@ -360,7 +373,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
// B = buffer ID (maybe more than 1 byte)
// N = CB index used in shader instruction
// P = program type (0 = VP, 2 = GP, 3 = FP)
- so_method(so, screen->tesla, 0x1694, 1);
+ so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x000BBNP1);
*/
@@ -424,23 +437,26 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
/* Vertex array limits - max them out */
for (i = 0; i < 16; i++) {
- so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2);
+ so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
so_data (so, 0x000000ff);
so_data (so, 0xffffffff);
}
- so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2);
+ so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2);
so_data (so, fui(0.0));
so_data (so, fui(1.0));
/* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
- so_method(so, screen->tesla, 0x1234, 1);
+ so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1);
so_data (so, 1);
/* activate first scissor rectangle */
- so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1);
+ so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
so_data (so, 1);
+ so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ so_data (so, 1); /* default edgeflag to TRUE */
+
so_emit(chan, so);
so_ref (so, &screen->static_init);
so_ref (NULL, &so);
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 88aef52d08..30b2b0f91b 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -295,7 +295,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
NV50TCL_SHADE_MODEL_SMOOTH);
- so_method(so, tesla, 0x1684, 1);
+ so_method(so, tesla, NV50TCL_PROVOKING_VERTEX_LAST, 1);
so_data (so, cso->flatshade_first ? 0 : 1);
so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1);
@@ -392,7 +392,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1);
so_data (so, fui(cso->offset_scale));
so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1);
- so_data (so, fui(cso->offset_units));
+ so_data (so, fui(cso->offset_units * 2.0f));
}
rso->pipe = *cso;
@@ -439,9 +439,8 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
so_data (so, 0);
}
- /* XXX: keep hex values until header is updated (names reversed) */
if (cso->stencil[0].enabled) {
- so_method(so, tesla, 0x1380, 8);
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8);
so_data (so, 1);
so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
@@ -451,23 +450,23 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
so_data (so, cso->stencil[0].writemask);
so_data (so, cso->stencil[0].valuemask);
} else {
- so_method(so, tesla, 0x1380, 1);
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1);
so_data (so, 0);
}
if (cso->stencil[1].enabled) {
- so_method(so, tesla, 0x1594, 5);
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5);
so_data (so, 1);
so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
so_data (so, nvgl_comparison_op(cso->stencil[1].func));
- so_method(so, tesla, 0x0f54, 3);
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3);
so_data (so, cso->stencil[1].ref_value);
so_data (so, cso->stencil[1].writemask);
so_data (so, cso->stencil[1].valuemask);
} else {
- so_method(so, tesla, 0x1594, 1);
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1);
so_data (so, 0);
}
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 871e8097b6..c8bdf9dc27 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -41,7 +41,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
* FP result 0 always goes to RT[0], bits 4 - 6 are ignored.
* Ambiguous assignment results in no rendering (no DATA_ERROR).
*/
- so_method(so, tesla, 0x121c, 1);
+ so_method(so, tesla, NV50TCL_RT_CONTROL, 1);
so_data (so, fb->nr_cbufs |
(0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) |
(4 << 16) | (5 << 19) | (6 << 22) | (7 << 25));
@@ -87,7 +87,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
level[fb->cbufs[i]->level].tile_mode << 4);
so_data(so, 0x00000000);
- so_method(so, tesla, 0x1224, 1);
+ so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1);
so_data (so, 1);
}
@@ -124,22 +124,22 @@ nv50_state_validate_fb(struct nv50_context *nv50)
level[fb->zsbuf->level].tile_mode << 4);
so_data(so, 0x00000000);
- so_method(so, tesla, 0x1538, 1);
+ so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
so_data (so, 1);
so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3);
so_data (so, fb->zsbuf->width);
so_data (so, fb->zsbuf->height);
so_data (so, 0x00010001);
} else {
- so_method(so, tesla, 0x1538, 1);
+ so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
so_data (so, 0);
}
- so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2);
+ so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2);
so_data (so, w << 16);
so_data (so, h << 16);
/* set window lower left corner */
- so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2);
+ so_method(so, tesla, NV50TCL_WINDOW_OFFSET_X, 2);
so_data (so, 0);
so_data (so, 0);
/* set screen scissor rectangle */
@@ -325,7 +325,7 @@ nv50_state_validate(struct nv50_context *nv50)
nv50->state.scissor_enabled = rast->scissor;
so = so_new(3, 0);
- so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2);
+ so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
if (nv50->state.scissor_enabled) {
so_data(so, (s->maxx << 16) | s->minx);
so_data(so, (s->maxy << 16) | s->miny);
@@ -355,11 +355,11 @@ scissor_uptodate:
so = so_new(14, 0);
if (!bypass) {
- so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3);
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
so_data (so, fui(nv50->viewport.translate[0]));
so_data (so, fui(nv50->viewport.translate[1]));
so_data (so, fui(nv50->viewport.translate[2]));
- so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3);
+ so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
so_data (so, fui(nv50->viewport.scale[0]));
so_data (so, fui(nv50->viewport.scale[1]));
so_data (so, fui(nv50->viewport.scale[2]));
@@ -440,7 +440,7 @@ void nv50_so_init_sifc(struct nv50_context *nv50,
so_data (so, 1);
so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0);
- so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2);
+ so_method(so, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
so_data (so, 0);
so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM);
so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10);
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 79655fc08d..6378132979 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -176,11 +176,11 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
if (ret)
return;
- BEGIN_RING(chan, eng2d, 0x0580, 3);
- OUT_RING (chan, 4);
+ BEGIN_RING(chan, eng2d, NV50_2D_DRAW_SHAPE, 3);
+ OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES);
OUT_RING (chan, format);
OUT_RING (chan, value);
- BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4);
+ BEGIN_RING(chan, eng2d, NV50_2D_DRAW_POINT32_X(0), 4);
OUT_RING (chan, destx);
OUT_RING (chan, desty);
OUT_RING (chan, width);
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 4d9afa6fed..a2f1db2914 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -47,7 +47,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1);
+ NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1);
OUT_RING (chan, src_pitch);
src_offset += (sy * src_pitch) + (sx * cpp);
} else {
@@ -66,7 +66,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1);
+ NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1);
OUT_RING (chan, dst_pitch);
dst_offset += (dy * dst_pitch) + (dx * cpp);
} else {
@@ -89,7 +89,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc);
BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
+ NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
OUT_RELOCl(chan, src_bo, src_offset, src_reloc);
OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc);
if (src_bo->tile_flags) {
@@ -107,7 +107,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
dst_offset += (line_count * dst_pitch);
}
BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4);
+ NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4);
OUT_RING (chan, width * cpp);
OUT_RING (chan, line_count);
OUT_RING (chan, 0x00000101);
@@ -291,7 +291,7 @@ nv50_upload_sifc(struct nv50_context *nv50,
/* NV50_2D_OPERATION_SRCCOPY assumed already set */
- BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2);
+ BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
OUT_RING (chan, 0);
OUT_RING (chan, src_format);
BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
@@ -334,6 +334,6 @@ nv50_upload_sifc(struct nv50_context *nv50,
src += src_pitch;
}
- BEGIN_RING(chan, tesla, 0x1440, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
OUT_RING (chan, 0);
}
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index f7fa0659e8..df18c2dd20 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -99,19 +99,19 @@ nv50_vbo_size_to_hw(unsigned size, unsigned nr_c)
{
static const uint32_t hw_values[] = {
0, 0, 0, 0,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16_16,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16,
0, 0, 0, 0,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32_32 };
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 };
/* we'd also have R11G11B10 and R10G10B10A2 */
@@ -152,7 +152,7 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
return (hw_type | hw_size);
}
-boolean
+void
nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
unsigned count)
{
@@ -182,7 +182,9 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
- return ret;
+ /* XXX: not sure what to do if ret != TRUE: flush and retry?
+ */
+ assert(ret);
}
static INLINE boolean
@@ -198,7 +200,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
return nv50_push_elements_u08(nv50, map, count);
if (count & 1) {
- BEGIN_RING(chan, tesla, 0x15e8, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, map[0]);
map++;
count--;
@@ -208,7 +210,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
unsigned nr = count > 2046 ? 2046 : count;
int i;
- BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
for (i = 0; i < nr; i += 2)
OUT_RING (chan, (map[i + 1] << 16) | map[i]);
@@ -231,7 +233,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
return nv50_push_elements_u16(nv50, map, count);
if (count & 1) {
- BEGIN_RING(chan, tesla, 0x15e8, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, map[0]);
map++;
count--;
@@ -241,7 +243,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
unsigned nr = count > 2046 ? 2046 : count;
int i;
- BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
for (i = 0; i < nr; i += 2)
OUT_RING (chan, (map[i + 1] << 16) | map[i]);
@@ -266,7 +268,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
while (count) {
unsigned nr = count > 2047 ? 2047 : count;
- BEGIN_RING(chan, tesla, 0x400015e8, nr);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr);
OUT_RINGp (chan, map, nr);
count -= nr;
@@ -275,7 +277,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
return TRUE;
}
-boolean
+void
nv50_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
@@ -317,8 +319,10 @@ nv50_draw_elements(struct pipe_context *pipe,
OUT_RING (chan, 0);
pipe_buffer_unmap(pscreen, indexBuffer);
-
- return ret;
+
+ /* XXX: what to do if ret != TRUE? Flush and retry?
+ */
+ assert(ret);
}
static INLINE boolean
@@ -372,6 +376,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
so_data (so, fui(v[1]));
break;
case 1:
+ if (attrib == nv50->vertprog->cfg.edgeflag_in) {
+ so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ so_data (so, v[0] ? 1 : 0);
+ }
so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1);
so_data (so, fui(v[0]));
break;
@@ -401,6 +409,9 @@ nv50_vbo_validate(struct nv50_context *nv50)
!(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
nv50->vbo_fifo = 0xffff;
+ if (nv50->vertprog->cfg.edgeflag_in < 16)
+ nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
+
n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
vtxattr = NULL;
@@ -445,7 +456,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
/* vertex array limits */
- so_method(vtxbuf, tesla, 0x1080 + (i * 8), 2);
+ so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
so_reloc (vtxbuf, bo, vb->buffer->size - 1,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_HIGH, 0, 0);
@@ -479,6 +490,9 @@ struct nv50_vbo_emitctx
unsigned nr_ve;
unsigned vtx_dwords;
unsigned vtx_max;
+
+ float edgeflag;
+ unsigned ve_edgeflag;
};
static INLINE void
@@ -622,6 +636,9 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
if (nv50_map_vbufs(nv50) == FALSE)
return FALSE;
+ emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
+
+ emit->edgeflag = 0.5f;
emit->nr_ve = 0;
emit->vtx_dwords = 0;
@@ -644,7 +661,8 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
desc = util_format_description(ve->src_format);
assert(desc);
- size = util_format_get_component_bits(ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
+ size = util_format_get_component_bits(
+ ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
assert(ve->nr_components > 0 && ve->nr_components <= 4);
@@ -686,10 +704,31 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
}
emit->vtx_max = 512 / emit->vtx_dwords;
+ if (emit->ve_edgeflag < 16)
+ emit->vtx_max = 1;
return TRUE;
}
+static INLINE void
+set_edgeflag(struct nouveau_channel *chan,
+ struct nouveau_grobj *tesla,
+ struct nv50_vbo_emitctx *emit, uint32_t index)
+{
+ unsigned i = emit->ve_edgeflag;
+
+ if (i < 16) {
+ float f = *((float *)(emit->map[i] + index * emit->stride[i]));
+
+ if (emit->edgeflag != f) {
+ emit->edgeflag = f;
+
+ BEGIN_RING(chan, tesla, 0x15e4, 1);
+ OUT_RING (chan, f ? 1 : 0);
+ }
+ }
+}
+
static boolean
nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
{
@@ -704,6 +743,8 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
unsigned i, dw, nr = MIN2(count, emit.vtx_max);
dw = nr * emit.vtx_dwords;
+ set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
+
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
for (i = 0; i < nr; ++i)
emit_vtx_next(chan, &emit);
@@ -729,6 +770,8 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
unsigned i, dw, nr = MIN2(count, emit.vtx_max);
dw = nr * emit.vtx_dwords;
+ set_edgeflag(chan, tesla, &emit, *map);
+
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
for (i = 0; i < nr; ++i)
emit_vtx(chan, &emit, *map++);
@@ -754,6 +797,8 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
unsigned i, dw, nr = MIN2(count, emit.vtx_max);
dw = nr * emit.vtx_dwords;
+ set_edgeflag(chan, tesla, &emit, *map);
+
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
for (i = 0; i < nr; ++i)
emit_vtx(chan, &emit, *map++);
@@ -779,6 +824,8 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
unsigned i, dw, nr = MIN2(count, emit.vtx_max);
dw = nr * emit.vtx_dwords;
+ set_edgeflag(chan, tesla, &emit, *map);
+
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
for (i = 0; i < nr; ++i)
emit_vtx(chan, &emit, *map++);
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
index 0d2de17be9..183aa17f9b 100644
--- a/src/gallium/drivers/r300/SConscript
+++ b/src/gallium/drivers/r300/SConscript
@@ -4,7 +4,12 @@ r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript')
env = env.Clone()
# add the paths for r300compiler
-env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa'])
+env.Append(CPPPATH = [
+ '#/src/mesa/drivers/dri/r300/compiler',
+ '#/src/gallium/winsys/drm/radeon/core',
+ '#/include',
+ '#/src/mesa',
+])
r300 = env.ConvenienceLibrary(
target = 'r300',
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 199ce3a945..1dc9216a7b 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -997,7 +997,7 @@ validate:
goto validate;
}
} else {
- // debug_printf("No VBO while emitting dirty state!\n");
+ /* debug_printf("No VBO while emitting dirty state!\n"); */
}
if (!r300->winsys->validate(r300->winsys)) {
r300->context.flush(&r300->context, 0, NULL);
@@ -1129,7 +1129,7 @@ validate:
*/
/* Finally, emit the VBO. */
- //r300_emit_vertex_buffer(r300);
+ /* r300_emit_vertex_buffer(r300); */
r300->dirty_hw++;
}
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index d8d08fbe26..0aa1da07f8 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -2638,7 +2638,7 @@ enum {
VE_COND_MUX_GTE = 25,
VE_SET_GREATER_THAN = 26,
VE_SET_EQUAL = 27,
- VE_SET_NOT_EQUAL = 28,
+ VE_SET_NOT_EQUAL = 28
};
enum {
@@ -2672,20 +2672,20 @@ enum {
ME_PRED_SET_CLR = 25,
ME_PRED_SET_INV = 26,
ME_PRED_SET_POP = 27,
- ME_PRED_SET_RESTORE = 28,
+ ME_PRED_SET_RESTORE = 28
};
enum {
/* R3XX */
PVS_MACRO_OP_2CLK_MADD = 0,
- PVS_MACRO_OP_2CLK_M2X_ADD = 1,
+ PVS_MACRO_OP_2CLK_M2X_ADD = 1
};
enum {
PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */
PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */
PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */
- PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */
+ PVS_SRC_REG_ALT_TEMPORARY = 3 /* Alternate Intermediate Storage */
};
enum {
@@ -2694,7 +2694,7 @@ enum {
PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */
PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */
PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */
- PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */
+ PVS_DST_REG_INPUT = 5 /* Output Memory & Replicate X to all channels */
};
enum {
@@ -2703,7 +2703,7 @@ enum {
PVS_SRC_SELECT_Z = 2, /* Select Z Component */
PVS_SRC_SELECT_W = 3, /* Select W Component */
PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */
- PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */
+ PVS_SRC_SELECT_FORCE_1 = 5 /* Force Component to 1.0 */
};
/* PVS Opcode & Destination Operand Description */
@@ -2742,7 +2742,7 @@ enum {
PVS_DST_ADDR_SEL_MASK = 0x3,
PVS_DST_ADDR_SEL_SHIFT = 29,
PVS_DST_ADDR_MODE_0_MASK = 0x1,
- PVS_DST_ADDR_MODE_0_SHIFT = 31,
+ PVS_DST_ADDR_MODE_0_SHIFT = 31
};
/* PVS Source Operand Description */
@@ -2777,7 +2777,7 @@ enum {
PVS_SRC_ADDR_SEL_MASK = 0x3,
PVS_SRC_ADDR_SEL_SHIFT = 29,
PVS_SRC_ADDR_MODE_1_MASK = 0x0,
- PVS_SRC_ADDR_MODE_1_SHIFT = 32,
+ PVS_SRC_ADDR_MODE_1_SHIFT = 32
};
/*\}*/
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 2d70ec2ac9..a4ac9ad9a7 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -213,7 +213,7 @@ validate:
}
/* This is the fast-path drawing & emission for HW TCL. */
-boolean r300_draw_range_elements(struct pipe_context* pipe,
+void r300_draw_range_elements(struct pipe_context* pipe,
struct pipe_buffer* indexBuffer,
unsigned indexSize,
unsigned minIndex,
@@ -225,30 +225,33 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
if (!u_trim_pipe_prim(mode, &count)) {
- return FALSE;
+ return;
}
if (count > 65535) {
- return FALSE;
+ /* XXX: use aux/indices functions to split this into smaller
+ * primitives.
+ */
+ return;
}
if (r300_nothing_to_draw(r300)) {
- return TRUE;
+ return;
}
r300_update_derived_state(r300);
if (!r300_setup_vertex_buffers(r300)) {
- return FALSE;
+ return;
}
if (!r300->winsys->add_buffer(r300->winsys, indexBuffer,
RADEON_GEM_DOMAIN_GTT, 0)) {
- return FALSE;
+ return;
}
if (!r300->winsys->validate(r300->winsys)) {
- return FALSE;
+ return;
}
r300_emit_dirty_state(r300);
@@ -257,41 +260,42 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex,
mode, start, count);
-
- return TRUE;
}
/* Simple helpers for context setup. Should probably be moved to util. */
-boolean r300_draw_elements(struct pipe_context* pipe,
- struct pipe_buffer* indexBuffer,
- unsigned indexSize, unsigned mode,
- unsigned start, unsigned count)
+void r300_draw_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize, unsigned mode,
+ unsigned start, unsigned count)
{
- return pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0,
- mode, start, count);
+ pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0,
+ mode, start, count);
}
-boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
unsigned start, unsigned count)
{
struct r300_context* r300 = r300_context(pipe);
if (!u_trim_pipe_prim(mode, &count)) {
- return FALSE;
+ return;
}
if (count > 65535) {
- return FALSE;
+ /* XXX: driver needs to handle this -- use the functions in
+ * aux/indices to split this into several smaller primitives.
+ */
+ return;
}
if (r300_nothing_to_draw(r300)) {
- return TRUE;
+ return;
}
r300_update_derived_state(r300);
if (!r300_setup_vertex_buffers(r300)) {
- return FALSE;
+ return;
}
r300_emit_dirty_state(r300);
@@ -299,8 +303,6 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
r300_emit_aos(r300, start);
r300_emit_draw_arrays(r300, mode, count);
-
- return TRUE;
}
/****************************************************************************
@@ -309,7 +311,7 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
***************************************************************************/
/* SW TCL arrays, using Draw. */
-boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
+void r300_swtcl_draw_arrays(struct pipe_context* pipe,
unsigned mode,
unsigned start,
unsigned count)
@@ -318,11 +320,11 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
int i;
if (!u_trim_pipe_prim(mode, &count)) {
- return FALSE;
+ return;
}
if (r300_nothing_to_draw(r300)) {
- return TRUE;
+ return;
}
for (i = 0; i < r300->vertex_buffer_count; i++) {
@@ -335,8 +337,9 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
draw_set_mapped_element_buffer(r300->draw, 0, NULL);
draw_set_mapped_constant_buffer(r300->draw,
- r300->shader_constants[PIPE_SHADER_VERTEX].constants,
- r300->shader_constants[PIPE_SHADER_VERTEX].count *
+ PIPE_SHADER_VERTEX,
+ r300->shader_constants[PIPE_SHADER_VERTEX].constants,
+ r300->shader_constants[PIPE_SHADER_VERTEX].count *
(sizeof(float) * 4));
draw_arrays(r300->draw, mode, start, count);
@@ -345,12 +348,10 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer);
draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
}
-
- return TRUE;
}
/* SW TCL elements, using Draw. */
-boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
+void r300_swtcl_draw_range_elements(struct pipe_context* pipe,
struct pipe_buffer* indexBuffer,
unsigned indexSize,
unsigned minIndex,
@@ -361,13 +362,14 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
int i;
+ void* indices;
if (!u_trim_pipe_prim(mode, &count)) {
- return FALSE;
+ return;
}
if (r300_nothing_to_draw(r300)) {
- return TRUE;
+ return;
}
for (i = 0; i < r300->vertex_buffer_count; i++) {
@@ -377,12 +379,13 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
draw_set_mapped_vertex_buffer(r300->draw, i, buf);
}
- void* indices = pipe_buffer_map(pipe->screen, indexBuffer,
- PIPE_BUFFER_USAGE_CPU_READ);
+ indices = pipe_buffer_map(pipe->screen, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_element_buffer_range(r300->draw, indexSize,
minIndex, maxIndex, indices);
draw_set_mapped_constant_buffer(r300->draw,
+ PIPE_SHADER_VERTEX,
r300->shader_constants[PIPE_SHADER_VERTEX].constants,
r300->shader_constants[PIPE_SHADER_VERTEX].count *
(sizeof(float) * 4));
@@ -397,8 +400,6 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
pipe_buffer_unmap(pipe->screen, indexBuffer);
draw_set_mapped_element_buffer_range(r300->draw, 0, start,
start + count - 1, NULL);
-
- return TRUE;
}
/* Object for rendering using Draw. */
@@ -474,7 +475,7 @@ static void* r300_render_map_vertices(struct vbuf_render* render)
r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo,
PIPE_BUFFER_USAGE_CPU_WRITE);
- return (r300render->vbo_ptr + r300render->vbo_offset);
+ return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset);
}
static void r300_render_unmap_vertices(struct vbuf_render* render,
diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h
index da83069083..27b5e6a963 100644
--- a/src/gallium/drivers/r300/r300_render.h
+++ b/src/gallium/drivers/r300/r300_render.h
@@ -25,35 +25,35 @@
uint32_t r300_translate_primitive(unsigned prim);
-boolean r300_draw_range_elements(struct pipe_context* pipe,
- struct pipe_buffer* indexBuffer,
- unsigned indexSize,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count);
-
-boolean r300_draw_elements(struct pipe_context* pipe,
- struct pipe_buffer* indexBuffer,
- unsigned indexSize, unsigned mode,
- unsigned start, unsigned count);
-
-boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
- unsigned start, unsigned count);
-
-boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
- unsigned mode,
- unsigned start,
- unsigned count);
-
-boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
- struct pipe_buffer* indexBuffer,
- unsigned indexSize,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count);
+void r300_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count);
+
+void r300_draw_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize, unsigned mode,
+ unsigned start, unsigned count);
+
+void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+ unsigned start, unsigned count);
+
+void r300_swtcl_draw_arrays(struct pipe_context* pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count);
+
+void r300_swtcl_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count);
#endif /* R300_RENDER_H */
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 8bcd6c5060..49072462ec 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -283,13 +283,6 @@ static void r300_delete_dsa_state(struct pipe_context* pipe,
FREE(state);
}
-static void r300_set_edgeflags(struct pipe_context* pipe,
- const unsigned* bitfield)
-{
- /* XXX you know it's bad when i915 has this blank too */
- /* XXX and even worse, I have no idea WTF the bitfield is */
-}
-
static void r300_set_scissor_regs(const struct pipe_scissor_state* state,
struct r300_scissor_regs *scissor,
boolean is_r500)
@@ -850,8 +843,6 @@ void r300_init_state_functions(struct r300_context* r300)
r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state;
r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state;
- r300->context.set_edgeflags = r300_set_edgeflags;
-
r300->context.set_framebuffer_state = r300_set_framebuffer_state;
r300->context.create_fs_state = r300_create_fs_state;
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 29bc701a86..727ae7ade6 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -71,9 +71,9 @@ static void r300_draw_emit_attrib(struct r300_context* r300,
struct tgsi_shader_info* info = &r300->vs->info;
int output;
- output = draw_find_vs_output(r300->draw,
- info->output_semantic_name[index],
- info->output_semantic_index[index]);
+ output = draw_find_shader_output(r300->draw,
+ info->output_semantic_name[index],
+ info->output_semantic_index[index]);
draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output);
}
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 096cdb20bb..a792c2cf98 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -120,7 +120,7 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */
/* case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; */
/* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */
- /* case TGSI_OPCODE_SHR: return RC_OPCODE_SHR; */
+ /* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */
/* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */
/* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */
/* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index fa207c939c..c4ed0d712f 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -77,6 +77,11 @@ static void r300_shader_read_vs_outputs(
vs_outputs->fog = i;
break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ assert(index == 0);
+ fprintf(stderr, "r300 VP: cannot handle edgeflag output\n");
+ assert(0);
+ break;
default:
assert(0);
}
diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c
index f98087deb8..5f130453c3 100644
--- a/src/gallium/drivers/softpipe/sp_clear.c
+++ b/src/gallium/drivers/softpipe/sp_clear.c
@@ -36,6 +36,7 @@
#include "util/u_pack_color.h"
#include "sp_clear.h"
#include "sp_context.h"
+#include "sp_query.h"
#include "sp_tile_cache.h"
@@ -55,6 +56,9 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
if (softpipe->no_rast)
return;
+ if (!softpipe_check_render_cond(softpipe))
+ return;
+
#if 0
softpipe_update_derived(softpipe); /* not needed?? */
#endif
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index f8bf3e9974..f3ac6760db 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -176,6 +176,19 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe,
}
+static void
+softpipe_render_condition( struct pipe_context *pipe,
+ struct pipe_query *query,
+ uint mode )
+{
+ struct softpipe_context *softpipe = softpipe_context( pipe );
+
+ softpipe->render_cond_query = query;
+ softpipe->render_cond_mode = mode;
+}
+
+
+
struct pipe_context *
softpipe_create( struct pipe_screen *screen )
{
@@ -191,6 +204,7 @@ softpipe_create( struct pipe_screen *screen )
#endif
softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE );
+ softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE );
softpipe->pipe.winsys = screen->winsys;
softpipe->pipe.screen = screen;
@@ -222,6 +236,10 @@ softpipe_create( struct pipe_screen *screen )
softpipe->pipe.bind_vs_state = softpipe_bind_vs_state;
softpipe->pipe.delete_vs_state = softpipe_delete_vs_state;
+ softpipe->pipe.create_gs_state = softpipe_create_gs_state;
+ softpipe->pipe.bind_gs_state = softpipe_bind_gs_state;
+ softpipe->pipe.delete_gs_state = softpipe_delete_gs_state;
+
softpipe->pipe.set_blend_color = softpipe_set_blend_color;
softpipe->pipe.set_clip_state = softpipe_set_clip_state;
softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer;
@@ -238,8 +256,6 @@ softpipe_create( struct pipe_screen *screen )
softpipe->pipe.draw_arrays = softpipe_draw_arrays;
softpipe->pipe.draw_elements = softpipe_draw_elements;
softpipe->pipe.draw_range_elements = softpipe_draw_range_elements;
- softpipe->pipe.set_edgeflags = softpipe_set_edgeflags;
-
softpipe->pipe.clear = softpipe_clear;
softpipe->pipe.flush = softpipe_flush;
@@ -249,6 +265,8 @@ softpipe_create( struct pipe_screen *screen )
softpipe_init_query_funcs( softpipe );
+ softpipe->pipe.render_condition = softpipe_render_condition;
+
/*
* Alloc caches for accessing drawing surfaces and textures.
* Must be before quad stage setup!
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 8ce20c5744..73fa744f9d 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -58,6 +58,7 @@ struct softpipe_context {
struct pipe_rasterizer_state *rasterizer;
struct sp_fragment_shader *fs;
struct sp_vertex_shader *vs;
+ struct sp_geometry_shader *gs;
/** Other rendering state */
struct pipe_blend_color blend_color;
@@ -115,6 +116,10 @@ struct softpipe_context {
unsigned line_stipple_counter;
+ /** Conditional query object and mode */
+ struct pipe_query *render_cond_query;
+ uint render_cond_mode;
+
/** Software quad rendering pipeline */
struct {
struct quad_stage *shade;
@@ -147,6 +152,7 @@ struct softpipe_context {
unsigned use_sse : 1;
unsigned dump_fs : 1;
+ unsigned dump_gs : 1;
unsigned no_rast : 1;
};
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index d4045816d0..3826a9e41a 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -38,6 +38,7 @@
#include "util/u_prim.h"
#include "sp_context.h"
+#include "sp_query.h"
#include "sp_state.h"
#include "draw/draw_context.h"
@@ -48,7 +49,7 @@ static void
softpipe_map_constant_buffers(struct softpipe_context *sp)
{
struct pipe_winsys *ws = sp->pipe.winsys;
- uint i, size;
+ uint i, vssize, gssize;
for (i = 0; i < PIPE_SHADER_TYPES; i++) {
if (sp->constants[i].buffer && sp->constants[i].buffer->size)
@@ -57,13 +58,21 @@ softpipe_map_constant_buffers(struct softpipe_context *sp)
}
if (sp->constants[PIPE_SHADER_VERTEX].buffer)
- size = sp->constants[PIPE_SHADER_VERTEX].buffer->size;
+ vssize = sp->constants[PIPE_SHADER_VERTEX].buffer->size;
else
- size = 0;
+ vssize = 0;
- draw_set_mapped_constant_buffer(sp->draw,
+ if (sp->constants[PIPE_SHADER_GEOMETRY].buffer)
+ gssize = sp->constants[PIPE_SHADER_GEOMETRY].buffer->size;
+ else
+ gssize = 0;
+
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX,
sp->mapped_constants[PIPE_SHADER_VERTEX],
- size);
+ vssize);
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY,
+ sp->mapped_constants[PIPE_SHADER_GEOMETRY],
+ gssize);
}
@@ -78,9 +87,10 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp)
*/
draw_flush(sp->draw);
- draw_set_mapped_constant_buffer(sp->draw, NULL, 0);
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, NULL, 0);
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0);
- for (i = 0; i < 2; i++) {
+ for (i = 0; i < PIPE_SHADER_TYPES; i++) {
if (sp->constants[i].buffer && sp->constants[i].buffer->size)
ws->buffer_unmap(ws, sp->constants[i].buffer);
sp->mapped_constants[i] = NULL;
@@ -88,11 +98,11 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp)
}
-boolean
+void
softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
{
- return softpipe_draw_elements(pipe, NULL, 0, mode, start, count);
+ softpipe_draw_elements(pipe, NULL, 0, mode, start, count);
}
@@ -101,7 +111,7 @@ softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
* Basically, map the vertex buffers (and drawing surfaces), then hand off
* the drawing to the 'draw' module.
*/
-boolean
+void
softpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -113,6 +123,9 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
struct draw_context *draw = sp->draw;
unsigned i;
+ if (!softpipe_check_render_cond(sp))
+ return TRUE;
+
sp->reduced_api_prim = u_reduced_prim(mode);
if (sp->dirty)
@@ -168,27 +181,17 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
softpipe_unmap_constant_buffers(sp);
sp->dirty_render_cache = TRUE;
-
- return TRUE;
}
-boolean
+void
softpipe_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
- return softpipe_draw_range_elements( pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- mode, start, count );
-}
-
-
-void
-softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
-{
- struct softpipe_context *sp = softpipe_context(pipe);
- draw_set_edgeflags(sp->draw, edgeflags);
+ softpipe_draw_range_elements( pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ mode, start, count );
}
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 5fbac06a53..7f573aef3c 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -128,6 +128,7 @@ sp_vbuf_unmap_vertices(struct vbuf_render *vbr,
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size );
+ (void) cvbr;
/* do nothing */
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index fe6b6cec35..d9babe81da 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -229,7 +229,7 @@ blend_quad(struct quad_stage *qs,
static const float zero[4] = { 0, 0, 0, 0 };
static const float one[4] = { 1, 1, 1, 1 };
struct softpipe_context *softpipe = qs->softpipe;
- float source[4][QUAD_SIZE];
+ float source[4][QUAD_SIZE] = { { 0 } };
/*
* Compute src/first term RGB
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index 379cf4ad06..4ef5d9f7b1 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -99,6 +99,32 @@ softpipe_get_query_result(struct pipe_context *pipe,
}
+/**
+ * Called by rendering function to check rendering is conditional.
+ * \return TRUE if we should render, FALSE if we should skip rendering
+ */
+boolean
+softpipe_check_render_cond(struct softpipe_context *sp)
+{
+ struct pipe_context *pipe = &sp->pipe;
+ boolean b, wait;
+ uint64_t result;
+
+ if (!sp->render_cond_query) {
+ return TRUE; /* no query predicate, draw normally */
+ }
+
+ wait = (sp->render_cond_mode == PIPE_RENDER_COND_WAIT ||
+ sp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT);
+
+ b = pipe->get_query_result(pipe, sp->render_cond_query, wait, &result);
+ if (b)
+ return result > 0;
+ else
+ return TRUE;
+}
+
+
void softpipe_init_query_funcs(struct softpipe_context *softpipe )
{
softpipe->pipe.create_query = softpipe_create_query;
diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h
index 05060a4575..736c033897 100644
--- a/src/gallium/drivers/softpipe/sp_query.h
+++ b/src/gallium/drivers/softpipe/sp_query.h
@@ -32,6 +32,10 @@
#ifndef SP_QUERY_H
#define SP_QUERY_H
+extern boolean
+softpipe_check_render_cond(struct softpipe_context *sp);
+
+
struct softpipe_context;
extern void softpipe_init_query_funcs(struct softpipe_context * );
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 615581b95f..3da75364c5 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -1268,7 +1268,7 @@ void sp_setup_prepare( struct setup_context *setup )
}
/* Note: nr_attrs is only used for debugging (vertex printing) */
- setup->nr_vertex_attrs = draw_num_vs_outputs(sp->draw);
+ setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw);
sp->quad.first->begin( sp->quad.first );
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index d488fb8710..9b18dac67b 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -50,6 +50,7 @@
#define SP_NEW_VERTEX 0x1000
#define SP_NEW_VS 0x2000
#define SP_NEW_QUERY 0x4000
+#define SP_NEW_GS 0x8000
struct tgsi_sampler;
@@ -90,6 +91,11 @@ struct sp_vertex_shader {
int max_sampler; /* -1 if no samplers */
};
+/** Subclass of pipe_shader_state */
+struct sp_geometry_shader {
+ struct pipe_shader_state shader;
+ struct draw_geometry_shader *draw_data;
+};
void *
@@ -143,6 +149,10 @@ void *softpipe_create_vs_state(struct pipe_context *,
const struct pipe_shader_state *);
void softpipe_bind_vs_state(struct pipe_context *, void *);
void softpipe_delete_vs_state(struct pipe_context *, void *);
+void *softpipe_create_gs_state(struct pipe_context *,
+ const struct pipe_shader_state *);
+void softpipe_bind_gs_state(struct pipe_context *, void *);
+void softpipe_delete_gs_state(struct pipe_context *, void *);
void softpipe_set_polygon_stipple( struct pipe_context *,
const struct pipe_poly_stipple * );
@@ -174,14 +184,14 @@ void softpipe_set_vertex_buffers(struct pipe_context *,
void softpipe_update_derived( struct softpipe_context *softpipe );
-boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
- unsigned start, unsigned count);
+void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count);
-boolean softpipe_draw_elements(struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned mode, unsigned start, unsigned count);
-boolean
+void softpipe_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count);
+void
softpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -190,10 +200,6 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count);
void
-softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags);
-
-
-void
softpipe_map_transfers(struct softpipe_context *sp);
void
diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c
index efed082f82..95ab323433 100644
--- a/src/gallium/drivers/softpipe/sp_state_blend.c
+++ b/src/gallium/drivers/softpipe/sp_state_blend.c
@@ -29,6 +29,7 @@
*/
#include "util/u_memory.h"
+#include "draw/draw_context.h"
#include "sp_context.h"
#include "sp_state.h"
@@ -45,6 +46,8 @@ void softpipe_bind_blend_state( struct pipe_context *pipe,
{
struct softpipe_context *softpipe = softpipe_context(pipe);
+ draw_flush(softpipe->draw);
+
softpipe->blend = (struct pipe_blend_state *)blend;
softpipe->dirty |= SP_NEW_BLEND;
@@ -62,6 +65,8 @@ void softpipe_set_blend_color( struct pipe_context *pipe,
{
struct softpipe_context *softpipe = softpipe_context(pipe);
+ draw_flush(softpipe->draw);
+
softpipe->blend_color = *blend_color;
softpipe->dirty |= SP_NEW_BLEND;
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index c24a737d07..f6856a5f69 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -67,7 +67,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
/* compute vertex layout now */
const struct sp_fragment_shader *spfs = softpipe->fs;
struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf;
- const uint num = draw_num_vs_outputs(softpipe->draw);
+ const uint num = draw_current_shader_outputs(softpipe->draw);
uint i;
/* Tell draw_vbuf to simply emit the whole post-xform vertex
@@ -117,13 +117,13 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
}
/* this includes texcoords and varying vars */
- src = draw_find_vs_output(softpipe->draw,
- spfs->info.input_semantic_name[i],
- spfs->info.input_semantic_index[i]);
+ src = draw_find_shader_output(softpipe->draw,
+ spfs->info.input_semantic_name[i],
+ spfs->info.input_semantic_index[i]);
draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
}
- softpipe->psize_slot = draw_find_vs_output(softpipe->draw,
+ softpipe->psize_slot = draw_find_shader_output(softpipe->draw,
TGSI_SEMANTIC_PSIZE, 0);
if (softpipe->psize_slot > 0) {
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT,
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index b41f7e8ab7..aa12bb215a 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -69,7 +69,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
- softpipe->fs = (struct sp_fragment_shader *) fs;
+ draw_flush(softpipe->draw);
+
+ if (softpipe->fs == fs)
+ return;
+
+ draw_flush(softpipe->draw);
+
+ softpipe->fs = fs;
softpipe->dirty |= SP_NEW_FS;
}
@@ -159,9 +166,75 @@ softpipe_set_constant_buffer(struct pipe_context *pipe,
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
+ draw_flush(softpipe->draw);
+
/* note: reference counting */
pipe_buffer_reference(&softpipe->constants[shader].buffer,
buf ? buf->buffer : NULL);
softpipe->dirty |= SP_NEW_CONSTANTS;
}
+
+void *
+softpipe_create_gs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ struct sp_geometry_shader *state;
+
+ state = CALLOC_STRUCT(sp_geometry_shader);
+ if (state == NULL )
+ goto fail;
+
+ /* debug */
+ if (softpipe->dump_gs)
+ tgsi_dump(templ->tokens, 0);
+
+ /* copy shader tokens, the ones passed in will go away.
+ */
+ state->shader.tokens = tgsi_dup_tokens(templ->tokens);
+ if (state->shader.tokens == NULL)
+ goto fail;
+
+ state->draw_data = draw_create_geometry_shader(softpipe->draw, templ);
+ if (state->draw_data == NULL)
+ goto fail;
+
+ return state;
+
+fail:
+ if (state) {
+ FREE( (void *)state->shader.tokens );
+ FREE( state->draw_data );
+ FREE( state );
+ }
+ return NULL;
+}
+
+
+void
+softpipe_bind_gs_state(struct pipe_context *pipe, void *gs)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ softpipe->gs = (struct sp_geometry_shader *)gs;
+
+ draw_bind_geometry_shader(softpipe->draw,
+ (softpipe->gs ? softpipe->gs->draw_data : NULL));
+
+ softpipe->dirty |= SP_NEW_GS;
+}
+
+
+void
+softpipe_delete_gs_state(struct pipe_context *pipe, void *gs)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ struct sp_geometry_shader *state =
+ (struct sp_geometry_shader *)gs;
+
+ draw_delete_geometry_shader(softpipe->draw,
+ (state) ? state->draw_data : 0);
+ FREE(state);
+}
diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c
index 87b7219683..a5b00336d4 100644
--- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c
+++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c
@@ -41,14 +41,17 @@ softpipe_create_rasterizer_state(struct pipe_context *pipe,
}
void softpipe_bind_rasterizer_state(struct pipe_context *pipe,
- void *setup)
+ void *rasterizer)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
+ if (softpipe->rasterizer == rasterizer)
+ return;
+
/* pass-through to draw module */
- draw_set_rasterizer_state(softpipe->draw, setup);
+ draw_set_rasterizer_state(softpipe->draw, rasterizer);
- softpipe->rasterizer = (struct pipe_rasterizer_state *)setup;
+ softpipe->rasterizer = rasterizer;
softpipe->dirty |= SP_NEW_RASTERIZER;
}
diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c
index a518248bb1..f6154109ea 100644
--- a/src/gallium/drivers/softpipe/sp_state_surface.c
+++ b/src/gallium/drivers/softpipe/sp_state_surface.c
@@ -51,6 +51,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe,
struct softpipe_context *sp = softpipe_context(pipe);
uint i;
+ draw_flush(sp->draw);
+
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
/* check if changing cbuf */
if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) {
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index c3de12b4a3..af99c9de37 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -29,6 +29,7 @@
#include "pipe/p_inlines.h"
#include "pipe/p_screen.h"
#include "util/u_memory.h"
+#include "util/u_bitmask.h"
#include "util/u_upload_mgr.h"
#include "svga_context.h"
@@ -61,6 +62,9 @@ static void svga_destroy( struct pipe_context *pipe )
u_upload_destroy( svga->upload_vb );
u_upload_destroy( svga->upload_ib );
+ util_bitmask_destroy( svga->vs_bm );
+ util_bitmask_destroy( svga->fs_bm );
+
for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader)
pipe_buffer_reference( &svga->curr.cb[shader], NULL );
@@ -130,7 +134,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
svga = CALLOC_STRUCT(svga_context);
if (svga == NULL)
- goto error1;
+ goto no_svga;
svga->pipe.winsys = screen->winsys;
svga->pipe.screen = screen;
@@ -142,7 +146,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
svga->swc = svgascreen->sws->context_create(svgascreen->sws);
if(!svga->swc)
- goto error2;
+ goto no_swc;
svga_init_blend_functions(svga);
svga_init_blit_functions(svga);
@@ -165,32 +169,40 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0);
if (!svga_init_swtnl(svga))
- goto error3;
+ goto no_swtnl;
+
+ svga->fs_bm = util_bitmask_create();
+ if (svga->fs_bm == NULL)
+ goto no_fs_bm;
+
+ svga->vs_bm = util_bitmask_create();
+ if (svga->vs_bm == NULL)
+ goto no_vs_bm;
svga->upload_ib = u_upload_create( svga->pipe.screen,
32 * 1024,
16,
PIPE_BUFFER_USAGE_INDEX );
if (svga->upload_ib == NULL)
- goto error4;
+ goto no_upload_ib;
svga->upload_vb = u_upload_create( svga->pipe.screen,
128 * 1024,
16,
PIPE_BUFFER_USAGE_VERTEX );
if (svga->upload_vb == NULL)
- goto error5;
+ goto no_upload_vb;
svga->hwtnl = svga_hwtnl_create( svga,
svga->upload_ib,
svga->swc );
if (svga->hwtnl == NULL)
- goto error6;
+ goto no_hwtnl;
ret = svga_emit_initial_state( svga );
if (ret)
- goto error7;
+ goto no_state;
/* Avoid shortcircuiting state with initial value of zero.
*/
@@ -209,19 +221,23 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
return &svga->pipe;
-error7:
+no_state:
svga_hwtnl_destroy( svga->hwtnl );
-error6:
+no_hwtnl:
u_upload_destroy( svga->upload_vb );
-error5:
+no_upload_vb:
u_upload_destroy( svga->upload_ib );
-error4:
+no_upload_ib:
+ util_bitmask_destroy( svga->vs_bm );
+no_vs_bm:
+ util_bitmask_destroy( svga->fs_bm );
+no_fs_bm:
svga_destroy_swtnl(svga);
-error3:
+no_swtnl:
svga->swc->destroy(svga->swc);
-error2:
+no_swc:
FREE(svga);
-error1:
+no_svga:
return NULL;
}
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index e650a251d1..fa7f6cb3bb 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -41,6 +41,7 @@
struct draw_vertex_shader;
struct svga_shader_result;
struct SVGACmdMemory;
+struct util_bitmask;
struct u_upload_mgr;
@@ -202,8 +203,6 @@ struct svga_state
struct pipe_clip_state clip;
struct pipe_viewport_state viewport;
- const unsigned *edgeflags;
-
unsigned num_samplers;
unsigned num_textures;
unsigned num_vertex_elements;
@@ -321,12 +320,14 @@ struct svga_context
boolean new_vdecl;
} swtnl;
+ /* Bitmask of used shader IDs */
+ struct util_bitmask *fs_bm;
+ struct util_bitmask *vs_bm;
+
struct {
unsigned dirty[4];
unsigned texture_timestamp;
- unsigned next_fs_id;
- unsigned next_vs_id;
/* Internally generated shaders:
*/
@@ -369,7 +370,7 @@ struct svga_context
#define SVGA_NEW_FRAME_BUFFER 0x800
#define SVGA_NEW_STIPPLE 0x1000
#define SVGA_NEW_SCISSOR 0x2000
-#define SVGA_NEW_BLEND_COLOR 0x5000
+#define SVGA_NEW_BLEND_COLOR 0x4000
#define SVGA_NEW_CLIP 0x8000
#define SVGA_NEW_VIEWPORT 0x10000
#define SVGA_NEW_PRESCALE 0x20000
@@ -380,9 +381,8 @@ struct svga_context
#define SVGA_NEW_NEED_SWTNL 0x400000
#define SVGA_NEW_FS_RESULT 0x800000
#define SVGA_NEW_VS_RESULT 0x1000000
-#define SVGA_NEW_EDGEFLAGS 0x2000000
-#define SVGA_NEW_ZERO_STRIDE 0x4000000
-#define SVGA_NEW_TEXTURE_FLAGS 0x8000000
+#define SVGA_NEW_ZERO_STRIDE 0x2000000
+#define SVGA_NEW_TEXTURE_FLAGS 0x4000000
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 8db40d0fd5..ca73cf9d5a 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -164,7 +164,8 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
}
SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
- svga_surface(svga->curr.framebuffer.cbufs[0])->handle,
+ svga->curr.framebuffer.cbufs[0] ?
+ svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
hwtnl->cmd.prim_count);
ret = SVGA3D_BeginDrawPrimitives(swc,
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
index 71a552862e..0f24ef4ee8 100644
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -149,7 +149,7 @@ retry:
-static boolean
+static void
svga_draw_range_elements( struct pipe_context *pipe,
struct pipe_buffer *index_buffer,
unsigned index_size,
@@ -162,7 +162,7 @@ svga_draw_range_elements( struct pipe_context *pipe,
enum pipe_error ret = 0;
if (!u_trim_pipe_prim( prim, &count ))
- return TRUE;
+ return;
/*
* Mark currently bound target surfaces as dirty
@@ -183,7 +183,7 @@ svga_draw_range_elements( struct pipe_context *pipe,
#ifdef DEBUG
if (svga->curr.vs->base.id == svga->debug.disable_shader ||
svga->curr.fs->base.id == svga->debug.disable_shader)
- return 0;
+ return;
#endif
if (svga->state.sw.need_swtnl)
@@ -225,31 +225,29 @@ svga_draw_range_elements( struct pipe_context *pipe,
svga_hwtnl_flush_retry( svga );
svga_context_flush(svga, NULL);
}
-
- return ret == PIPE_OK;
}
-static boolean
+static void
svga_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *index_buffer,
unsigned index_size,
unsigned prim, unsigned start, unsigned count)
{
- return svga_draw_range_elements( pipe, index_buffer,
- index_size,
- 0, 0xffffffff,
- prim, start, count );
+ svga_draw_range_elements( pipe, index_buffer,
+ index_size,
+ 0, 0xffffffff,
+ prim, start, count );
}
-static boolean
+static void
svga_draw_arrays( struct pipe_context *pipe,
unsigned prim, unsigned start, unsigned count)
{
- return svga_draw_range_elements(pipe, NULL, 0,
- start, start + count - 1,
- prim,
- start, count);
+ svga_draw_range_elements(pipe, NULL, 0,
+ start, start + count - 1,
+ prim,
+ start, count);
}
diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c
index e3be840d92..a461a86dd3 100644
--- a/src/gallium/drivers/svga/svga_pipe_fs.c
+++ b/src/gallium/drivers/svga/svga_pipe_fs.c
@@ -26,6 +26,7 @@
#include "pipe/p_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_bitmask.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_text.h"
@@ -107,6 +108,8 @@ void svga_delete_fs_state(struct pipe_context *pipe, void *shader)
assert(ret == PIPE_OK);
}
+ util_bitmask_clear( svga->fs_bm, result->id );
+
svga_destroy_shader_result( result );
}
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
index 28e2787e0d..42f290d162 100644
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -84,18 +84,6 @@ static void svga_set_vertex_elements(struct pipe_context *pipe,
}
-static void svga_set_edgeflags(struct pipe_context *pipe,
- const unsigned *bitfield)
-{
- struct svga_context *svga = svga_context(pipe);
-
- if (bitfield != NULL || svga->curr.edgeflags != NULL) {
- svga->curr.edgeflags = bitfield;
- svga->dirty |= SVGA_NEW_EDGEFLAGS;
- }
-}
-
-
void svga_cleanup_vertex_state( struct svga_context *svga )
{
unsigned i;
@@ -109,7 +97,6 @@ void svga_init_vertex_functions( struct svga_context *svga )
{
svga->pipe.set_vertex_buffers = svga_set_vertex_buffers;
svga->pipe.set_vertex_elements = svga_set_vertex_elements;
- svga->pipe.set_edgeflags = svga_set_edgeflags;
}
diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c
index c104c41f5f..e82d10c259 100644
--- a/src/gallium/drivers/svga/svga_pipe_vs.c
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -27,6 +27,7 @@
#include "pipe/p_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_bitmask.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_text.h"
@@ -172,6 +173,8 @@ static void svga_delete_vs_state(struct pipe_context *pipe, void *shader)
assert(ret == PIPE_OK);
}
+ util_bitmask_clear( svga->vs_bm, result->id );
+
svga_destroy_shader_result( result );
}
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index 6ec38ed3e4..1902b0106b 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -26,6 +26,7 @@
#include "pipe/p_inlines.h"
#include "pipe/p_defines.h"
#include "util/u_math.h"
+#include "util/u_bitmask.h"
#include "svga_context.h"
#include "svga_state.h"
@@ -74,9 +75,12 @@ static enum pipe_error compile_fs( struct svga_context *svga,
goto fail;
}
+ result->id = util_bitmask_add(svga->fs_bm);
+ if(result->id == UTIL_BITMASK_INVALID_INDEX)
+ goto fail;
ret = SVGA3D_DefineShader(svga->swc,
- svga->state.next_fs_id,
+ result->id,
SVGA3D_SHADERTYPE_PS,
result->tokens,
result->nr_tokens * sizeof result->tokens[0]);
@@ -84,14 +88,16 @@ static enum pipe_error compile_fs( struct svga_context *svga,
goto fail;
*out_result = result;
- result->id = svga->state.next_fs_id++;
result->next = fs->base.results;
fs->base.results = result;
return PIPE_OK;
fail:
- if (result)
+ if (result) {
+ if (result->id != UTIL_BITMASK_INVALID_INDEX)
+ util_bitmask_clear( svga->fs_bm, result->id );
svga_destroy_shader_result( result );
+ }
return ret;
}
@@ -116,7 +122,7 @@ fail:
*/
static int emit_white_fs( struct svga_context *svga )
{
- int ret;
+ int ret = PIPE_ERROR;
/* ps_3_0
* def c0, 1.000000, 0.000000, 0.000000, 1.000000
@@ -137,16 +143,26 @@ static int emit_white_fs( struct svga_context *svga )
0x0000ffff,
};
+ assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX);
+ svga->state.white_fs_id = util_bitmask_add(svga->fs_bm);
+ if(svga->state.white_fs_id == SVGA3D_INVALID_ID)
+ goto no_fs_id;
+
ret = SVGA3D_DefineShader(svga->swc,
- svga->state.next_fs_id,
+ svga->state.white_fs_id,
SVGA3D_SHADERTYPE_PS,
white_tokens,
sizeof(white_tokens));
if (ret)
- return ret;
+ goto no_definition;
- svga->state.white_fs_id = svga->state.next_fs_id++;
return 0;
+
+no_definition:
+ util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id);
+ svga->state.white_fs_id = SVGA3D_INVALID_ID;
+no_fs_id:
+ return ret;
}
@@ -251,12 +267,14 @@ static int emit_hw_fs( struct svga_context *svga,
assert(id != SVGA3D_INVALID_ID);
- if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) {
- ret = SVGA3D_SetShader(svga->swc,
- SVGA3D_SHADERTYPE_PS,
- id );
- if (ret)
- return ret;
+ if (result != svga->state.hw_draw.fs) {
+ if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) {
+ ret = SVGA3D_SetShader(svga->swc,
+ SVGA3D_SHADERTYPE_PS,
+ id );
+ if (ret)
+ return ret;
+ }
svga->dirty |= SVGA_NEW_FS_RESULT;
svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id;
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c
index 00201b8091..3c35a8579f 100644
--- a/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -108,6 +108,7 @@ static int update_need_pipeline( struct svga_context *svga,
{
boolean need_pipeline = FALSE;
+ struct svga_vertex_shader *vs = svga->curr.vs;
/* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
*/
@@ -119,11 +120,9 @@ static int update_need_pipeline( struct svga_context *svga,
need_pipeline = TRUE;
}
- /* SVGA_NEW_EDGEFLAGS
+ /* EDGEFLAGS
*/
- if (svga->curr.rast->hw_unfilled != PIPE_POLYGON_MODE_FILL &&
- svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES &&
- svga->curr.edgeflags != NULL) {
+ if (vs->base.info.writes_edgeflag) {
SVGA_DBG(DEBUG_SWTNL, "%s: edgeflags\n", __FUNCTION__);
need_pipeline = TRUE;
}
@@ -150,6 +149,7 @@ struct svga_tracked_state svga_update_need_pipeline =
"need pipeline",
(SVGA_NEW_RAST |
SVGA_NEW_CLIP |
+ SVGA_NEW_VS |
SVGA_NEW_REDUCED_PRIMITIVE),
update_need_pipeline
};
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 44b7ceb4fa..7e34c0a980 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -27,6 +27,7 @@
#include "pipe/p_defines.h"
#include "util/u_format.h"
#include "util/u_math.h"
+#include "util/u_bitmask.h"
#include "translate/translate.h"
#include "svga_context.h"
@@ -78,8 +79,12 @@ static enum pipe_error compile_vs( struct svga_context *svga,
goto fail;
}
+ result->id = util_bitmask_add(svga->vs_bm);
+ if(result->id == UTIL_BITMASK_INVALID_INDEX)
+ goto fail;
+
ret = SVGA3D_DefineShader(svga->swc,
- svga->state.next_vs_id,
+ result->id,
SVGA3D_SHADERTYPE_VS,
result->tokens,
result->nr_tokens * sizeof result->tokens[0]);
@@ -87,14 +92,16 @@ static enum pipe_error compile_vs( struct svga_context *svga,
goto fail;
*out_result = result;
- result->id = svga->state.next_vs_id++;
result->next = vs->base.results;
vs->base.results = result;
return PIPE_OK;
fail:
- if (result)
+ if (result) {
+ if (result->id != UTIL_BITMASK_INVALID_INDEX)
+ util_bitmask_clear( svga->vs_bm, result->id );
svga_destroy_shader_result( result );
+ }
return ret;
}
@@ -142,12 +149,14 @@ static int emit_hw_vs( struct svga_context *svga,
id = result->id;
}
- if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) {
- ret = SVGA3D_SetShader(svga->swc,
- SVGA3D_SHADERTYPE_VS,
- id );
- if (ret)
- return ret;
+ if (result != svga->state.hw_draw.vs) {
+ if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) {
+ ret = SVGA3D_SetShader(svga->swc,
+ SVGA3D_SHADERTYPE_VS,
+ id );
+ if (ret)
+ return ret;
+ }
svga->dirty |= SVGA_NEW_VS_RESULT;
svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id;
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
index 8b14c913f7..7655121bec 100644
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -90,7 +90,7 @@ svga_swtnl_draw_range_elements(struct svga_context *svga,
PIPE_BUFFER_USAGE_CPU_READ);
assert(map);
draw_set_mapped_constant_buffer(
- draw,
+ draw, PIPE_SHADER_VERTEX,
map,
svga->curr.cb[PIPE_SHADER_VERTEX]->size);
}
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
index 1616312113..94b6ccc62d 100644
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -120,10 +120,6 @@ static int update_swtnl_draw( struct svga_context *svga,
draw_set_mrd(svga->swtnl.draw,
svga->curr.depthscale);
- if (dirty & SVGA_NEW_EDGEFLAGS)
- draw_set_edgeflags( svga->swtnl.draw,
- svga->curr.edgeflags );
-
return 0;
}
@@ -138,8 +134,7 @@ struct svga_tracked_state svga_update_swtnl_draw =
SVGA_NEW_VIEWPORT |
SVGA_NEW_RAST |
SVGA_NEW_FRAME_BUFFER |
- SVGA_NEW_REDUCED_PRIMITIVE |
- SVGA_NEW_EDGEFLAGS),
+ SVGA_NEW_REDUCED_PRIMITIVE),
update_swtnl_draw
};
@@ -161,7 +156,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga )
memset(vdecl, 0, sizeof(vdecl));
/* always add position */
- src = draw_find_vs_output(draw, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo->attrib[0].emit = EMIT_4F;
vdecl[0].array.offset = offset;
@@ -174,7 +169,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga )
for (i = 0; i < fs->base.info.num_inputs; i++) {
unsigned name = fs->base.info.input_semantic_name[i];
unsigned index = fs->base.info.input_semantic_index[i];
- src = draw_find_vs_output(draw, name, index);
+ src = draw_find_shader_output(draw, name, index);
vdecl[nr_decls].array.offset = offset;
vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i];
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index b8ef137c01..0cd620189b 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -31,6 +31,7 @@
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_scan.h"
#include "util/u_memory.h"
+#include "util/u_bitmask.h"
#include "svgadump/svga_shader_dump.h"
@@ -221,6 +222,7 @@ svga_tgsi_translate( const struct svga_shader *shader,
result->tokens = (const unsigned *)emit.buf;
result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned);
memcpy(&result->key, &key, sizeof key);
+ result->id = UTIL_BITMASK_INVALID_INDEX;
if (SVGA_DEBUG & DEBUG_TGSI)
{
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 1670da8bfa..dc5eb8fc60 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -2109,7 +2109,7 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_NOT:
case TGSI_OPCODE_SHL:
- case TGSI_OPCODE_SHR:
+ case TGSI_OPCODE_ISHR:
case TGSI_OPCODE_XOR:
return FALSE;
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
index e6d4a74e86..d59fb89a58 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -1444,6 +1444,312 @@ dump_SVGA3dCmdBlitSurfaceToScreen(const SVGA3dCmdBlitSurfaceToScreen *cmd)
void
+svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size)
+{
+ const uint8_t *body = (const uint8_t *)data;
+ const uint8_t *next = body + size;
+
+ switch(cmd_id) {
+ case SVGA_3D_CMD_SURFACE_DEFINE:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n");
+ {
+ const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body;
+ dump_SVGA3dCmdDefineSurface(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dSize) <= next) {
+ dump_SVGA3dSize((const SVGA3dSize *)body);
+ body += sizeof(SVGA3dSize);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SURFACE_DESTROY:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n");
+ {
+ const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body;
+ dump_SVGA3dCmdDestroySurface(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SURFACE_COPY:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n");
+ {
+ const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body;
+ dump_SVGA3dCmdSurfaceCopy(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dCopyBox) <= next) {
+ dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
+ body += sizeof(SVGA3dCopyBox);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SURFACE_STRETCHBLT:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n");
+ {
+ const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body;
+ dump_SVGA3dCmdSurfaceStretchBlt(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SURFACE_DMA:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n");
+ {
+ const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body;
+ dump_SVGA3dCmdSurfaceDMA(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dCopyBox) <= next) {
+ dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
+ body += sizeof(SVGA3dCopyBox);
+ }
+ while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) {
+ dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body);
+ body += sizeof(SVGA3dCmdSurfaceDMASuffix);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_CONTEXT_DEFINE:
+ _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n");
+ {
+ const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body;
+ dump_SVGA3dCmdDefineContext(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_CONTEXT_DESTROY:
+ _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n");
+ {
+ const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body;
+ dump_SVGA3dCmdDestroyContext(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETTRANSFORM:
+ _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n");
+ {
+ const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body;
+ dump_SVGA3dCmdSetTransform(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETZRANGE:
+ _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n");
+ {
+ const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body;
+ dump_SVGA3dCmdSetZRange(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETRENDERSTATE:
+ _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n");
+ {
+ const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body;
+ dump_SVGA3dCmdSetRenderState(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dRenderState) <= next) {
+ dump_SVGA3dRenderState((const SVGA3dRenderState *)body);
+ body += sizeof(SVGA3dRenderState);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SETRENDERTARGET:
+ _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n");
+ {
+ const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body;
+ dump_SVGA3dCmdSetRenderTarget(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETTEXTURESTATE:
+ _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n");
+ {
+ const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body;
+ dump_SVGA3dCmdSetTextureState(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dTextureState) <= next) {
+ dump_SVGA3dTextureState((const SVGA3dTextureState *)body);
+ body += sizeof(SVGA3dTextureState);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SETMATERIAL:
+ _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n");
+ {
+ const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body;
+ dump_SVGA3dCmdSetMaterial(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETLIGHTDATA:
+ _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n");
+ {
+ const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body;
+ dump_SVGA3dCmdSetLightData(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETLIGHTENABLED:
+ _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n");
+ {
+ const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body;
+ dump_SVGA3dCmdSetLightEnabled(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETVIEWPORT:
+ _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n");
+ {
+ const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body;
+ dump_SVGA3dCmdSetViewport(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETCLIPPLANE:
+ _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n");
+ {
+ const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body;
+ dump_SVGA3dCmdSetClipPlane(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_CLEAR:
+ _debug_printf("\tSVGA_3D_CMD_CLEAR\n");
+ {
+ const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body;
+ dump_SVGA3dCmdClear(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dRect) <= next) {
+ dump_SVGA3dRect((const SVGA3dRect *)body);
+ body += sizeof(SVGA3dRect);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_PRESENT:
+ _debug_printf("\tSVGA_3D_CMD_PRESENT\n");
+ {
+ const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body;
+ dump_SVGA3dCmdPresent(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dCopyRect) <= next) {
+ dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body);
+ body += sizeof(SVGA3dCopyRect);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SHADER_DEFINE:
+ _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n");
+ {
+ const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body;
+ dump_SVGA3dCmdDefineShader(cmd);
+ body = (const uint8_t *)&cmd[1];
+ svga_shader_dump((const uint32_t *)body,
+ (unsigned)(next - body)/sizeof(uint32_t),
+ FALSE );
+ body = next;
+ }
+ break;
+ case SVGA_3D_CMD_SHADER_DESTROY:
+ _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n");
+ {
+ const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body;
+ dump_SVGA3dCmdDestroyShader(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SET_SHADER:
+ _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n");
+ {
+ const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body;
+ dump_SVGA3dCmdSetShader(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SET_SHADER_CONST:
+ _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n");
+ {
+ const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body;
+ dump_SVGA3dCmdSetShaderConst(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_DRAW_PRIMITIVES:
+ _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n");
+ {
+ const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body;
+ unsigned i, j;
+ dump_SVGA3dCmdDrawPrimitives(cmd);
+ body = (const uint8_t *)&cmd[1];
+ for(i = 0; i < cmd->numVertexDecls; ++i) {
+ dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body);
+ body += sizeof(SVGA3dVertexDecl);
+ }
+ for(j = 0; j < cmd->numRanges; ++j) {
+ dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body);
+ body += sizeof(SVGA3dPrimitiveRange);
+ }
+ while(body + sizeof(SVGA3dVertexDivisor) <= next) {
+ dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body);
+ body += sizeof(SVGA3dVertexDivisor);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SETSCISSORRECT:
+ _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n");
+ {
+ const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body;
+ dump_SVGA3dCmdSetScissorRect(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_BEGIN_QUERY:
+ _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n");
+ {
+ const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body;
+ dump_SVGA3dCmdBeginQuery(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_END_QUERY:
+ _debug_printf("\tSVGA_3D_CMD_END_QUERY\n");
+ {
+ const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body;
+ dump_SVGA3dCmdEndQuery(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_WAIT_FOR_QUERY:
+ _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n");
+ {
+ const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body;
+ dump_SVGA3dCmdWaitForQuery(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN:
+ _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n");
+ {
+ const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body;
+ dump_SVGA3dCmdBlitSurfaceToScreen(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGASignedRect) <= next) {
+ dump_SVGASignedRect((const SVGASignedRect *)body);
+ body += sizeof(SVGASignedRect);
+ }
+ }
+ break;
+ default:
+ _debug_printf("\t0x%08x\n", cmd_id);
+ break;
+ }
+
+ while(body + sizeof(uint32_t) <= next) {
+ _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+ body += sizeof(uint32_t);
+ }
+ while(body + sizeof(uint32_t) <= next)
+ _debug_printf("\t\t0x%02x\n", *body++);
+}
+
+
+void
svga_dump_commands(const void *commands, uint32_t size)
{
const uint8_t *next = commands;
@@ -1458,307 +1764,11 @@ svga_dump_commands(const void *commands, uint32_t size)
const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next;
const uint8_t *body = (const uint8_t *)&header[1];
- next = (const uint8_t *)body + header->size;
+ next = body + header->size;
if(next > last)
break;
- switch(cmd_id) {
- case SVGA_3D_CMD_SURFACE_DEFINE:
- _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n");
- {
- const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body;
- dump_SVGA3dCmdDefineSurface(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dSize) <= next) {
- dump_SVGA3dSize((const SVGA3dSize *)body);
- body += sizeof(SVGA3dSize);
- }
- }
- break;
- case SVGA_3D_CMD_SURFACE_DESTROY:
- _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n");
- {
- const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body;
- dump_SVGA3dCmdDestroySurface(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SURFACE_COPY:
- _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n");
- {
- const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body;
- dump_SVGA3dCmdSurfaceCopy(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dCopyBox) <= next) {
- dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
- body += sizeof(SVGA3dCopyBox);
- }
- }
- break;
- case SVGA_3D_CMD_SURFACE_STRETCHBLT:
- _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n");
- {
- const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body;
- dump_SVGA3dCmdSurfaceStretchBlt(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SURFACE_DMA:
- _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n");
- {
- const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body;
- dump_SVGA3dCmdSurfaceDMA(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dCopyBox) <= next) {
- dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
- body += sizeof(SVGA3dCopyBox);
- }
- while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) {
- dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body);
- body += sizeof(SVGA3dCmdSurfaceDMASuffix);
- }
- }
- break;
- case SVGA_3D_CMD_CONTEXT_DEFINE:
- _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n");
- {
- const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body;
- dump_SVGA3dCmdDefineContext(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_CONTEXT_DESTROY:
- _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n");
- {
- const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body;
- dump_SVGA3dCmdDestroyContext(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETTRANSFORM:
- _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n");
- {
- const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body;
- dump_SVGA3dCmdSetTransform(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETZRANGE:
- _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n");
- {
- const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body;
- dump_SVGA3dCmdSetZRange(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETRENDERSTATE:
- _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n");
- {
- const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body;
- dump_SVGA3dCmdSetRenderState(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dRenderState) <= next) {
- dump_SVGA3dRenderState((const SVGA3dRenderState *)body);
- body += sizeof(SVGA3dRenderState);
- }
- }
- break;
- case SVGA_3D_CMD_SETRENDERTARGET:
- _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n");
- {
- const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body;
- dump_SVGA3dCmdSetRenderTarget(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETTEXTURESTATE:
- _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n");
- {
- const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body;
- dump_SVGA3dCmdSetTextureState(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dTextureState) <= next) {
- dump_SVGA3dTextureState((const SVGA3dTextureState *)body);
- body += sizeof(SVGA3dTextureState);
- }
- }
- break;
- case SVGA_3D_CMD_SETMATERIAL:
- _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n");
- {
- const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body;
- dump_SVGA3dCmdSetMaterial(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETLIGHTDATA:
- _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n");
- {
- const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body;
- dump_SVGA3dCmdSetLightData(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETLIGHTENABLED:
- _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n");
- {
- const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body;
- dump_SVGA3dCmdSetLightEnabled(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETVIEWPORT:
- _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n");
- {
- const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body;
- dump_SVGA3dCmdSetViewport(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETCLIPPLANE:
- _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n");
- {
- const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body;
- dump_SVGA3dCmdSetClipPlane(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_CLEAR:
- _debug_printf("\tSVGA_3D_CMD_CLEAR\n");
- {
- const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body;
- dump_SVGA3dCmdClear(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dRect) <= next) {
- dump_SVGA3dRect((const SVGA3dRect *)body);
- body += sizeof(SVGA3dRect);
- }
- }
- break;
- case SVGA_3D_CMD_PRESENT:
- _debug_printf("\tSVGA_3D_CMD_PRESENT\n");
- {
- const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body;
- dump_SVGA3dCmdPresent(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dCopyRect) <= next) {
- dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body);
- body += sizeof(SVGA3dCopyRect);
- }
- }
- break;
- case SVGA_3D_CMD_SHADER_DEFINE:
- _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n");
- {
- const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body;
- dump_SVGA3dCmdDefineShader(cmd);
- body = (const uint8_t *)&cmd[1];
- svga_shader_dump((const uint32_t *)body,
- (unsigned)(next - body)/sizeof(uint32_t),
- FALSE );
- body = next;
- }
- break;
- case SVGA_3D_CMD_SHADER_DESTROY:
- _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n");
- {
- const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body;
- dump_SVGA3dCmdDestroyShader(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SET_SHADER:
- _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n");
- {
- const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body;
- dump_SVGA3dCmdSetShader(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SET_SHADER_CONST:
- _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n");
- {
- const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body;
- dump_SVGA3dCmdSetShaderConst(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_DRAW_PRIMITIVES:
- _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n");
- {
- const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body;
- unsigned i, j;
- dump_SVGA3dCmdDrawPrimitives(cmd);
- body = (const uint8_t *)&cmd[1];
- for(i = 0; i < cmd->numVertexDecls; ++i) {
- dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body);
- body += sizeof(SVGA3dVertexDecl);
- }
- for(j = 0; j < cmd->numRanges; ++j) {
- dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body);
- body += sizeof(SVGA3dPrimitiveRange);
- }
- while(body + sizeof(SVGA3dVertexDivisor) <= next) {
- dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body);
- body += sizeof(SVGA3dVertexDivisor);
- }
- }
- break;
- case SVGA_3D_CMD_SETSCISSORRECT:
- _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n");
- {
- const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body;
- dump_SVGA3dCmdSetScissorRect(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_BEGIN_QUERY:
- _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n");
- {
- const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body;
- dump_SVGA3dCmdBeginQuery(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_END_QUERY:
- _debug_printf("\tSVGA_3D_CMD_END_QUERY\n");
- {
- const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body;
- dump_SVGA3dCmdEndQuery(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_WAIT_FOR_QUERY:
- _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n");
- {
- const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body;
- dump_SVGA3dCmdWaitForQuery(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN:
- _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n");
- {
- const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body;
- dump_SVGA3dCmdBlitSurfaceToScreen(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGASignedRect) <= next) {
- dump_SVGASignedRect((const SVGASignedRect *)body);
- body += sizeof(SVGASignedRect);
- }
- }
- break;
- default:
- _debug_printf("\t0x%08x\n", cmd_id);
- break;
- }
-
- while(body + sizeof(uint32_t) <= next) {
- _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
- body += sizeof(uint32_t);
- }
- while(body + sizeof(uint32_t) <= next)
- _debug_printf("\t\t0x%02x\n", *body++);
+ svga_dump_command(cmd_id, body, header->size);
}
else if(cmd_id == SVGA_CMD_FENCE) {
_debug_printf("\tSVGA_CMD_FENCE\n");
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h
index 69a8702087..ca0154361c 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.h
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.h
@@ -28,6 +28,9 @@
#include "pipe/p_compiler.h"
+void
+svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size);
+
void
svga_dump_commands(const void *commands, uint32_t size);
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py
index a1ada29ef8..0bc0b3ae31 100755
--- a/src/gallium/drivers/svga/svgadump/svga_dump.py
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.py
@@ -208,6 +208,56 @@ cmds = [
def dump_cmds():
print r'''
void
+svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size)
+{
+ const uint8_t *body = (const uint8_t *)data;
+ const uint8_t *next = body + size;
+'''
+ print ' switch(cmd_id) {'
+ indexes = 'ijklmn'
+ for id, header, body, footer in cmds:
+ print ' case %s:' % id
+ print ' _debug_printf("\\t%s\\n");' % id
+ print ' {'
+ print ' const %s *cmd = (const %s *)body;' % (header, header)
+ if len(body):
+ print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';'
+ print ' dump_%s(cmd);' % header
+ print ' body = (const uint8_t *)&cmd[1];'
+ for i in range(len(body)):
+ struct, count = body[i]
+ idx = indexes[i]
+ print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx)
+ print ' dump_%s((const %s *)body);' % (struct, struct)
+ print ' body += sizeof(%s);' % struct
+ print ' }'
+ if footer is not None:
+ print ' while(body + sizeof(%s) <= next) {' % footer
+ print ' dump_%s((const %s *)body);' % (footer, footer)
+ print ' body += sizeof(%s);' % footer
+ print ' }'
+ if id == 'SVGA_3D_CMD_SHADER_DEFINE':
+ print ' svga_shader_dump((const uint32_t *)body,'
+ print ' (unsigned)(next - body)/sizeof(uint32_t),'
+ print ' FALSE);'
+ print ' body = next;'
+ print ' }'
+ print ' break;'
+ print ' default:'
+ print ' _debug_printf("\\t0x%08x\\n", cmd_id);'
+ print ' break;'
+ print ' }'
+ print r'''
+ while(body + sizeof(uint32_t) <= next) {
+ _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+ body += sizeof(uint32_t);
+ }
+ while(body + sizeof(uint32_t) <= next)
+ _debug_printf("\t\t0x%02x\n", *body++);
+}
+'''
+ print r'''
+void
svga_dump_commands(const void *commands, uint32_t size)
{
const uint8_t *next = commands;
@@ -222,51 +272,11 @@ svga_dump_commands(const void *commands, uint32_t size)
const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next;
const uint8_t *body = (const uint8_t *)&header[1];
- next = (const uint8_t *)body + header->size;
+ next = body + header->size;
if(next > last)
break;
-'''
- print ' switch(cmd_id) {'
- indexes = 'ijklmn'
- for id, header, body, footer in cmds:
- print ' case %s:' % id
- print ' _debug_printf("\\t%s\\n");' % id
- print ' {'
- print ' const %s *cmd = (const %s *)body;' % (header, header)
- if len(body):
- print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';'
- print ' dump_%s(cmd);' % header
- print ' body = (const uint8_t *)&cmd[1];'
- for i in range(len(body)):
- struct, count = body[i]
- idx = indexes[i]
- print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx)
- print ' dump_%s((const %s *)body);' % (struct, struct)
- print ' body += sizeof(%s);' % struct
- print ' }'
- if footer is not None:
- print ' while(body + sizeof(%s) <= next) {' % footer
- print ' dump_%s((const %s *)body);' % (footer, footer)
- print ' body += sizeof(%s);' % footer
- print ' }'
- if id == 'SVGA_3D_CMD_SHADER_DEFINE':
- print ' sh_svga_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t));'
- print ' body = next;'
- print ' }'
- print ' break;'
- print ' default:'
- print ' _debug_printf("\\t0x%08x\\n", cmd_id);'
- print ' break;'
- print ' }'
-
- print r'''
- while(body + sizeof(uint32_t) <= next) {
- _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
- body += sizeof(uint32_t);
- }
- while(body + sizeof(uint32_t) <= next)
- _debug_printf("\t\t0x%02x\n", *body++);
+ svga_dump_command(cmd_id, body, header->size);
}
else if(cmd_id == SVGA_CMD_FENCE) {
_debug_printf("\tSVGA_CMD_FENCE\n");
diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README
index 1000c31e49..203c3851bc 100644
--- a/src/gallium/drivers/trace/README
+++ b/src/gallium/drivers/trace/README
@@ -24,11 +24,10 @@ ensure the right libGL.so is being picked by doing
ldd progs/trivial/tri
-== Traceing ==
+== Tracing ==
-For traceing then do
+For tracing then do
- export XMESA_TRACE=y
GALLIUM_TRACE=tri.trace progs/trivial/tri
which should create a tri.trace file, which is an XML file. You can view copying
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 2f0f063d2d..075e4f9a0b 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -52,6 +52,7 @@ trace_buffer_unwrap(struct trace_context *tr_ctx,
assert(tr_buf->buffer);
assert(tr_buf->buffer->screen == tr_scr->screen);
+ (void) tr_scr;
return tr_buf->buffer;
}
@@ -90,30 +91,12 @@ trace_surface_unwrap(struct trace_context *tr_ctx,
assert(tr_surf->surface);
assert(tr_surf->surface->texture->screen == tr_scr->screen);
+ (void) tr_scr;
return tr_surf->surface;
}
static INLINE void
-trace_context_set_edgeflags(struct pipe_context *_pipe,
- const unsigned *bitfield)
-{
- struct trace_context *tr_ctx = trace_context(_pipe);
- struct pipe_context *pipe = tr_ctx->pipe;
-
- trace_dump_call_begin("pipe_context", "set_edgeflags");
-
- trace_dump_arg(ptr, pipe);
- /* FIXME: we don't know how big this array is */
- trace_dump_arg(ptr, bitfield);
-
- pipe->set_edgeflags(pipe, bitfield);
-
- trace_dump_call_end();
-}
-
-
-static INLINE void
trace_context_draw_block(struct trace_context *tr_ctx, int flag)
{
int k;
@@ -178,16 +161,15 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag)
pipe_mutex_unlock(tr_ctx->draw_mutex);
}
-static INLINE boolean
+static INLINE void
trace_context_draw_arrays(struct pipe_context *_pipe,
unsigned mode, unsigned start, unsigned count)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct pipe_context *pipe = tr_ctx->pipe;
- boolean result;
if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled)
- return 0;
+ return;
trace_context_draw_block(tr_ctx, 1);
@@ -198,19 +180,15 @@ trace_context_draw_arrays(struct pipe_context *_pipe,
trace_dump_arg(uint, start);
trace_dump_arg(uint, count);
- result = pipe->draw_arrays(pipe, mode, start, count);
-
- trace_dump_ret(bool, result);
+ pipe->draw_arrays(pipe, mode, start, count);
trace_dump_call_end();
trace_context_draw_block(tr_ctx, 2);
-
- return result;
}
-static INLINE boolean
+static INLINE void
trace_context_draw_elements(struct pipe_context *_pipe,
struct pipe_buffer *_indexBuffer,
unsigned indexSize,
@@ -220,10 +198,9 @@ trace_context_draw_elements(struct pipe_context *_pipe,
struct trace_buffer *tr_buf = trace_buffer(_indexBuffer);
struct pipe_context *pipe = tr_ctx->pipe;
struct pipe_buffer *indexBuffer = tr_buf->buffer;
- boolean result;
if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled)
- return 0;
+ return;
trace_context_draw_block(tr_ctx, 1);
@@ -238,19 +215,15 @@ trace_context_draw_elements(struct pipe_context *_pipe,
trace_dump_arg(uint, start);
trace_dump_arg(uint, count);
- result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);
-
- trace_dump_ret(bool, result);
+ pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);
trace_dump_call_end();
trace_context_draw_block(tr_ctx, 2);
-
- return result;
}
-static INLINE boolean
+static INLINE void
trace_context_draw_range_elements(struct pipe_context *_pipe,
struct pipe_buffer *_indexBuffer,
unsigned indexSize,
@@ -264,10 +237,9 @@ trace_context_draw_range_elements(struct pipe_context *_pipe,
struct trace_buffer *tr_buf = trace_buffer(_indexBuffer);
struct pipe_context *pipe = tr_ctx->pipe;
struct pipe_buffer *indexBuffer = tr_buf->buffer;
- boolean result;
if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled)
- return 0;
+ return;
trace_context_draw_block(tr_ctx, 1);
@@ -284,18 +256,14 @@ trace_context_draw_range_elements(struct pipe_context *_pipe,
trace_dump_arg(uint, start);
trace_dump_arg(uint, count);
- result = pipe->draw_range_elements(pipe,
- indexBuffer,
- indexSize, minIndex, maxIndex,
- mode, start, count);
-
- trace_dump_ret(bool, result);
+ pipe->draw_range_elements(pipe,
+ indexBuffer,
+ indexSize, minIndex, maxIndex,
+ mode, start, count);
trace_dump_call_end();
trace_context_draw_block(tr_ctx, 2);
-
- return result;
}
@@ -1298,7 +1266,6 @@ trace_context_create(struct pipe_screen *_screen,
tr_ctx->base.winsys = _screen->winsys;
tr_ctx->base.screen = _screen;
tr_ctx->base.destroy = trace_context_destroy;
- tr_ctx->base.set_edgeflags = trace_context_set_edgeflags;
tr_ctx->base.draw_arrays = trace_context_draw_arrays;
tr_ctx->base.draw_elements = trace_context_draw_elements;
tr_ctx->base.draw_range_elements = trace_context_draw_range_elements;
diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c
index 7e2ccbcfdc..0f45e211a3 100644
--- a/src/gallium/drivers/trace/tr_dump.c
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -40,7 +40,7 @@
#include "pipe/p_config.h"
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
#include <stdlib.h>
#endif
@@ -258,7 +258,7 @@ boolean trace_dump_trace_begin()
trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n");
trace_dump_writes("<trace version='0.1'>\n");
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
/* Linux applications rarely cleanup GL / Gallium resources so catch
* application exit here */
atexit(trace_dump_trace_close);
diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c
index c31b1d8698..0546aad9b5 100644
--- a/src/gallium/drivers/trace/tr_rbug.c
+++ b/src/gallium/drivers/trace/tr_rbug.c
@@ -45,7 +45,7 @@
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
# define sleep Sleep
-#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD)
+#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_APPLE)
void usleep(int);
# define sleep usleep
#else
@@ -180,7 +180,7 @@ static int
trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header, uint32_t serial)
{
struct trace_screen *tr_scr = tr_rbug->tr_scr;
- struct trace_texture *tr_tex;
+ struct trace_texture *tr_tex = NULL;
struct rbug_proto_texture_info *gpti = (struct rbug_proto_texture_info *)header;
struct tr_list *ptr;
struct pipe_texture *t;
@@ -223,7 +223,7 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header,
struct rbug_proto_texture_read *gptr = (struct rbug_proto_texture_read *)header;
struct trace_screen *tr_scr = tr_rbug->tr_scr;
- struct trace_texture *tr_tex;
+ struct trace_texture *tr_tex = NULL;
struct tr_list *ptr;
struct pipe_screen *screen = tr_scr->screen;
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index ac20a47af1..117503aaff 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -426,7 +426,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen,
struct pipe_transfer *transfer = tr_trans->transfer;
if(tr_trans->map) {
- size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->width) * transfer->stride;
+ size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride;
trace_dump_call_begin("pipe_screen", "transfer_write");
diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h
index 1c16042ee5..e2f981d051 100644
--- a/src/gallium/drivers/trace/tr_state.h
+++ b/src/gallium/drivers/trace/tr_state.h
@@ -32,7 +32,7 @@ struct tgsi_token;
enum trace_shader_type {
TRACE_SHADER_FRAGMENT = 0,
TRACE_SHADER_VERTEX = 1,
- TRACE_SHADER_GEOMETRY = 2,
+ TRACE_SHADER_GEOMETRY = 2
};
struct trace_shader
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index f896001eb1..d2f8085b42 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -57,41 +57,41 @@ struct pipe_context {
void (*destroy)( struct pipe_context * );
-
- /* Possible interface for setting edgeflags. These aren't really
- * vertex elements, so don't fit there.
- */
- void (*set_edgeflags)( struct pipe_context *,
- const unsigned *bitfield );
-
-
/**
* VBO drawing (return false on fallbacks (temporary??))
*/
/*@{*/
- boolean (*draw_arrays)( struct pipe_context *pipe,
- unsigned mode, unsigned start, unsigned count);
+ void (*draw_arrays)( struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count);
- boolean (*draw_elements)( struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned mode, unsigned start, unsigned count);
+ void (*draw_elements)( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count);
/* XXX: this is (probably) a temporary entrypoint, as the range
* information should be available from the vertex_buffer state.
* Using this to quickly evaluate a specialized path in the draw
* module.
*/
- boolean (*draw_range_elements)( struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count);
+ void (*draw_range_elements)( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count);
/*@}*/
+ /**
+ * Predicate subsequent rendering on occlusion query result
+ * \param query the query predicate, or NULL if no predicate
+ * \param mode one of PIPE_COND_RENDER_x
+ */
+ void (*render_condition)( struct pipe_context *pipe,
+ struct pipe_query *query,
+ uint mode );
/**
* Query objects
@@ -150,6 +150,12 @@ struct pipe_context {
const struct pipe_shader_state *);
void (*bind_vs_state)(struct pipe_context *, void *);
void (*delete_vs_state)(struct pipe_context *, void *);
+
+ void * (*create_gs_state)(struct pipe_context *,
+ const struct pipe_shader_state *);
+ void (*bind_gs_state)(struct pipe_context *, void *);
+ void (*delete_gs_state)(struct pipe_context *, void *);
+
/*@}*/
/**
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index fe1390d765..c3b1e634ff 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -321,23 +321,28 @@ enum pipe_transfer_usage {
*/
#define PIPE_SHADER_VERTEX 0
#define PIPE_SHADER_FRAGMENT 1
-#define PIPE_SHADER_TYPES 2
+#define PIPE_SHADER_GEOMETRY 2
+#define PIPE_SHADER_TYPES 3
/**
* Primitive types:
*/
-#define PIPE_PRIM_POINTS 0
-#define PIPE_PRIM_LINES 1
-#define PIPE_PRIM_LINE_LOOP 2
-#define PIPE_PRIM_LINE_STRIP 3
-#define PIPE_PRIM_TRIANGLES 4
-#define PIPE_PRIM_TRIANGLE_STRIP 5
-#define PIPE_PRIM_TRIANGLE_FAN 6
-#define PIPE_PRIM_QUADS 7
-#define PIPE_PRIM_QUAD_STRIP 8
-#define PIPE_PRIM_POLYGON 9
-#define PIPE_PRIM_MAX 10
+#define PIPE_PRIM_POINTS 0
+#define PIPE_PRIM_LINES 1
+#define PIPE_PRIM_LINE_LOOP 2
+#define PIPE_PRIM_LINE_STRIP 3
+#define PIPE_PRIM_TRIANGLES 4
+#define PIPE_PRIM_TRIANGLE_STRIP 5
+#define PIPE_PRIM_TRIANGLE_FAN 6
+#define PIPE_PRIM_QUADS 7
+#define PIPE_PRIM_QUAD_STRIP 8
+#define PIPE_PRIM_POLYGON 9
+#define PIPE_PRIM_LINES_ADJACENCY 10
+#define PIPE_PRIM_LINE_STRIP_ADJACENCY 11
+#define PIPE_PRIM_TRIANGLES_ADJACENCY 12
+#define PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY 13
+#define PIPE_PRIM_MAX 14
/**
@@ -350,6 +355,15 @@ enum pipe_transfer_usage {
/**
+ * Conditional rendering modes
+ */
+#define PIPE_RENDER_COND_WAIT 0
+#define PIPE_RENDER_COND_NO_WAIT 1
+#define PIPE_RENDER_COND_BY_REGION_WAIT 2
+#define PIPE_RENDER_COND_BY_REGION_NO_WAIT 3
+
+
+/**
* Point sprite coord modes
*/
#define PIPE_SPRITE_COORD_NONE 0
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 79f3d3f056..550e2abc32 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -121,15 +121,17 @@ struct tgsi_declaration_range
unsigned Last : 16; /**< UINT */
};
-#define TGSI_SEMANTIC_POSITION 0
-#define TGSI_SEMANTIC_COLOR 1
-#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */
-#define TGSI_SEMANTIC_FOG 3
-#define TGSI_SEMANTIC_PSIZE 4
-#define TGSI_SEMANTIC_GENERIC 5
-#define TGSI_SEMANTIC_NORMAL 6
-#define TGSI_SEMANTIC_FACE 7
-#define TGSI_SEMANTIC_COUNT 8 /**< number of semantic values */
+#define TGSI_SEMANTIC_POSITION 0
+#define TGSI_SEMANTIC_COLOR 1
+#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */
+#define TGSI_SEMANTIC_FOG 3
+#define TGSI_SEMANTIC_PSIZE 4
+#define TGSI_SEMANTIC_GENERIC 5
+#define TGSI_SEMANTIC_NORMAL 6
+#define TGSI_SEMANTIC_FACE 7
+#define TGSI_SEMANTIC_EDGEFLAG 8
+#define TGSI_SEMANTIC_PRIMID 9
+#define TGSI_SEMANTIC_COUNT 10 /**< number of semantic values */
struct tgsi_declaration_semantic
{
@@ -139,6 +141,8 @@ struct tgsi_declaration_semantic
};
#define TGSI_IMM_FLOAT32 0
+#define TGSI_IMM_UINT32 1
+#define TGSI_IMM_INT32 2
struct tgsi_immediate
{
@@ -151,6 +155,8 @@ struct tgsi_immediate
union tgsi_immediate_data
{
float Float;
+ unsigned Uint;
+ int Int;
};
#define TGSI_PROPERTY_GS_INPUT_PRIM 0
@@ -262,7 +268,7 @@ struct tgsi_property_data {
#define TGSI_OPCODE_NOT 85
#define TGSI_OPCODE_TRUNC 86
#define TGSI_OPCODE_SHL 87
-#define TGSI_OPCODE_SHR 88
+ /* gap */
#define TGSI_OPCODE_AND 89
#define TGSI_OPCODE_OR 90
#define TGSI_OPCODE_MOD 91
@@ -287,7 +293,33 @@ struct tgsi_property_data {
#define TGSI_OPCODE_KIL 116 /* conditional kill */
#define TGSI_OPCODE_END 117 /* aka HALT */
/* gap */
-#define TGSI_OPCODE_LAST 119
+#define TGSI_OPCODE_F2I 119
+#define TGSI_OPCODE_IDIV 120
+#define TGSI_OPCODE_IMAX 121
+#define TGSI_OPCODE_IMIN 122
+#define TGSI_OPCODE_INEG 123
+#define TGSI_OPCODE_ISGE 124
+#define TGSI_OPCODE_ISHR 125
+#define TGSI_OPCODE_ISLT 126
+#define TGSI_OPCODE_F2U 127
+#define TGSI_OPCODE_U2F 128
+#define TGSI_OPCODE_UADD 129
+#define TGSI_OPCODE_UDIV 130
+#define TGSI_OPCODE_UMAD 131
+#define TGSI_OPCODE_UMAX 132
+#define TGSI_OPCODE_UMIN 133
+#define TGSI_OPCODE_UMOD 134
+#define TGSI_OPCODE_UMUL 135
+#define TGSI_OPCODE_USEQ 136
+#define TGSI_OPCODE_USGE 137
+#define TGSI_OPCODE_USHR 138
+#define TGSI_OPCODE_USLT 139
+#define TGSI_OPCODE_USNE 140
+#define TGSI_OPCODE_SWITCH 141
+#define TGSI_OPCODE_CASE 142
+#define TGSI_OPCODE_DEFAULT 143
+#define TGSI_OPCODE_ENDSWITCH 144
+#define TGSI_OPCODE_LAST 145
#define TGSI_SAT_NONE 0 /* do not saturate */
#define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */
diff --git a/src/gallium/state_trackers/dri/dri_context.c b/src/gallium/state_trackers/dri/dri_context.c
index 8819936fca..f2e5f3fb23 100644
--- a/src/gallium/state_trackers/dri/dri_context.c
+++ b/src/gallium/state_trackers/dri/dri_context.c
@@ -44,9 +44,9 @@
GLboolean
dri_create_context(const __GLcontextModes * visual,
- __DRIcontextPrivate * cPriv, void *sharedContextPrivate)
+ __DRIcontext * cPriv, void *sharedContextPrivate)
{
- __DRIscreenPrivate *sPriv = cPriv->driScreenPriv;
+ __DRIscreen *sPriv = cPriv->driScreenPriv;
struct dri_screen *screen = dri_screen(sPriv);
struct dri_context *ctx = NULL;
struct st_context *st_share = NULL;
@@ -97,7 +97,7 @@ dri_create_context(const __GLcontextModes * visual,
}
void
-dri_destroy_context(__DRIcontextPrivate * cPriv)
+dri_destroy_context(__DRIcontext * cPriv)
{
struct dri_context *ctx = dri_context(cPriv);
@@ -116,7 +116,7 @@ dri_destroy_context(__DRIcontextPrivate * cPriv)
}
GLboolean
-dri_unbind_context(__DRIcontextPrivate * cPriv)
+dri_unbind_context(__DRIcontext * cPriv)
{
if (cPriv) {
struct dri_context *ctx = dri_context(cPriv);
@@ -133,9 +133,9 @@ dri_unbind_context(__DRIcontextPrivate * cPriv)
}
GLboolean
-dri_make_current(__DRIcontextPrivate * cPriv,
- __DRIdrawablePrivate * driDrawPriv,
- __DRIdrawablePrivate * driReadPriv)
+dri_make_current(__DRIcontext * cPriv,
+ __DRIdrawable * driDrawPriv,
+ __DRIdrawable * driReadPriv)
{
if (cPriv) {
struct dri_context *ctx = dri_context(cPriv);
diff --git a/src/gallium/state_trackers/dri/dri_context.h b/src/gallium/state_trackers/dri/dri_context.h
index 4650178734..13f497462f 100644
--- a/src/gallium/state_trackers/dri/dri_context.h
+++ b/src/gallium/state_trackers/dri/dri_context.h
@@ -44,10 +44,10 @@ struct dri_drawable;
struct dri_context
{
/* dri */
- __DRIscreenPrivate *sPriv;
- __DRIcontextPrivate *cPriv;
- __DRIdrawablePrivate *dPriv;
- __DRIdrawablePrivate *rPriv;
+ __DRIscreen *sPriv;
+ __DRIcontext *cPriv;
+ __DRIdrawable *dPriv;
+ __DRIdrawable *rPriv;
driOptionCache optionCache;
@@ -67,7 +67,7 @@ struct dri_context
};
static INLINE struct dri_context *
-dri_context(__DRIcontextPrivate * driContextPriv)
+dri_context(__DRIcontext * driContextPriv)
{
return (struct dri_context *)driContextPriv->driverPrivate;
}
@@ -99,18 +99,18 @@ dri_unlock(struct dri_context *ctx)
*/
extern struct dri1_api_lock_funcs dri1_lf;
-void dri_destroy_context(__DRIcontextPrivate * driContextPriv);
+void dri_destroy_context(__DRIcontext * driContextPriv);
-boolean dri_unbind_context(__DRIcontextPrivate * driContextPriv);
+boolean dri_unbind_context(__DRIcontext * driContextPriv);
boolean
-dri_make_current(__DRIcontextPrivate * driContextPriv,
- __DRIdrawablePrivate * driDrawPriv,
- __DRIdrawablePrivate * driReadPriv);
+dri_make_current(__DRIcontext * driContextPriv,
+ __DRIdrawable * driDrawPriv,
+ __DRIdrawable * driReadPriv);
boolean
dri_create_context(const __GLcontextModes * visual,
- __DRIcontextPrivate * driContextPriv,
+ __DRIcontext * driContextPriv,
void *sharedContextPrivate);
/***********************************************************************
diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c
index 4b12243ddf..1058dd38c2 100644
--- a/src/gallium/state_trackers/dri/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/dri_drawable.c
@@ -118,7 +118,7 @@ dri2_check_if_pixmap(__DRIbuffer *buffers, int count)
* This will be called a drawable is known to have been resized.
*/
void
-dri_get_buffers(__DRIdrawablePrivate * dPriv)
+dri_get_buffers(__DRIdrawable * dPriv)
{
struct dri_drawable *drawable = dri_drawable(dPriv);
@@ -299,8 +299,8 @@ dri_flush_frontbuffer(struct pipe_screen *screen,
* This is called when we need to set up GL rendering to a new X window.
*/
boolean
-dri_create_buffer(__DRIscreenPrivate * sPriv,
- __DRIdrawablePrivate * dPriv,
+dri_create_buffer(__DRIscreen * sPriv,
+ __DRIdrawable * dPriv,
const __GLcontextModes * visual, boolean isPixmap)
{
struct dri_screen *screen = sPriv->private;
@@ -416,7 +416,7 @@ dri_swap_fences_push_back(struct dri_drawable *draw,
}
void
-dri_destroy_buffer(__DRIdrawablePrivate * dPriv)
+dri_destroy_buffer(__DRIdrawable * dPriv)
{
struct dri_drawable *drawable = dri_drawable(dPriv);
struct pipe_fence_handle *fence;
@@ -434,8 +434,8 @@ dri_destroy_buffer(__DRIdrawablePrivate * dPriv)
static void
dri1_update_drawables_locked(struct dri_context *ctx,
- __DRIdrawablePrivate * driDrawPriv,
- __DRIdrawablePrivate * driReadPriv)
+ __DRIdrawable * driDrawPriv,
+ __DRIdrawable * driReadPriv)
{
if (ctx->stLostLock) {
ctx->stLostLock = FALSE;
@@ -458,8 +458,8 @@ dri1_update_drawables_locked(struct dri_context *ctx,
static void
dri1_propagate_drawable_change(struct dri_context *ctx)
{
- __DRIdrawablePrivate *dPriv = ctx->dPriv;
- __DRIdrawablePrivate *rPriv = ctx->rPriv;
+ __DRIdrawable *dPriv = ctx->dPriv;
+ __DRIdrawable *rPriv = ctx->rPriv;
boolean flushed = FALSE;
if (dPriv && ctx->d_stamp != dPriv->lastStamp) {
@@ -532,7 +532,7 @@ static void
dri1_swap_copy(struct dri_context *ctx,
struct pipe_surface *dst,
struct pipe_surface *src,
- __DRIdrawablePrivate * dPriv, const struct drm_clip_rect *bbox)
+ __DRIdrawable * dPriv, const struct drm_clip_rect *bbox)
{
struct pipe_context *pipe = ctx->pipe;
struct drm_clip_rect clip;
@@ -563,7 +563,7 @@ dri1_swap_copy(struct dri_context *ctx,
static void
dri1_copy_to_front(struct dri_context *ctx,
struct pipe_surface *surf,
- __DRIdrawablePrivate * dPriv,
+ __DRIdrawable * dPriv,
const struct drm_clip_rect *sub_box,
struct pipe_fence_handle **fence)
{
@@ -636,7 +636,7 @@ dri1_flush_frontbuffer(struct pipe_screen *screen,
}
void
-dri_swap_buffers(__DRIdrawablePrivate * dPriv)
+dri_swap_buffers(__DRIdrawable * dPriv)
{
struct dri_context *ctx;
struct pipe_surface *back_surf;
@@ -668,7 +668,7 @@ dri_swap_buffers(__DRIdrawablePrivate * dPriv)
}
void
-dri_copy_sub_buffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h)
+dri_copy_sub_buffer(__DRIdrawable * dPriv, int x, int y, int w, int h)
{
struct pipe_screen *screen = dri_screen(dPriv->driScreenPriv)->pipe_screen;
struct drm_clip_rect sub_bbox;
diff --git a/src/gallium/state_trackers/dri/dri_drawable.h b/src/gallium/state_trackers/dri/dri_drawable.h
index b910930db4..80bb5d7f61 100644
--- a/src/gallium/state_trackers/dri/dri_drawable.h
+++ b/src/gallium/state_trackers/dri/dri_drawable.h
@@ -41,8 +41,8 @@ struct dri_context;
struct dri_drawable
{
/* dri */
- __DRIdrawablePrivate *dPriv;
- __DRIscreenPrivate *sPriv;
+ __DRIdrawable *dPriv;
+ __DRIscreen *sPriv;
unsigned attachments[8];
unsigned num_attachments;
@@ -67,7 +67,7 @@ struct dri_drawable
};
static INLINE struct dri_drawable *
-dri_drawable(__DRIdrawablePrivate * driDrawPriv)
+dri_drawable(__DRIdrawable * driDrawPriv)
{
return (struct dri_drawable *)driDrawPriv->driverPrivate;
}
@@ -76,22 +76,22 @@ dri_drawable(__DRIdrawablePrivate * driDrawPriv)
* dri_drawable.c
*/
boolean
-dri_create_buffer(__DRIscreenPrivate * sPriv,
- __DRIdrawablePrivate * dPriv,
+dri_create_buffer(__DRIscreen * sPriv,
+ __DRIdrawable * dPriv,
const __GLcontextModes * visual, boolean isPixmap);
void
dri_flush_frontbuffer(struct pipe_screen *screen,
struct pipe_surface *surf, void *context_private);
-void dri_swap_buffers(__DRIdrawablePrivate * dPriv);
+void dri_swap_buffers(__DRIdrawable * dPriv);
void
-dri_copy_sub_buffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h);
+dri_copy_sub_buffer(__DRIdrawable * dPriv, int x, int y, int w, int h);
-void dri_get_buffers(__DRIdrawablePrivate * dPriv);
+void dri_get_buffers(__DRIdrawable * dPriv);
-void dri_destroy_buffer(__DRIdrawablePrivate * dPriv);
+void dri_destroy_buffer(__DRIdrawable * dPriv);
void dri2_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target,
GLint glx_texture_format, __DRIdrawable *dPriv);
diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c
index cb864d45d5..bb12baf281 100644
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -202,7 +202,7 @@ dri_fill_in_modes(struct dri_screen *screen,
* Get information about previous buffer swaps.
*/
static int
-dri_get_swap_info(__DRIdrawablePrivate * dPriv, __DRIswapInfo * sInfo)
+dri_get_swap_info(__DRIdrawable * dPriv, __DRIswapInfo * sInfo)
{
if (dPriv == NULL || dPriv->driverPrivate == NULL || sInfo == NULL)
return -1;
@@ -220,7 +220,7 @@ dri_copy_version(struct dri1_api_version *dst,
}
static const __DRIconfig **
-dri_init_screen(__DRIscreenPrivate * sPriv)
+dri_init_screen(__DRIscreen * sPriv)
{
struct dri_screen *screen;
const __DRIconfig **configs;
@@ -285,7 +285,7 @@ dri_init_screen(__DRIscreenPrivate * sPriv)
* Returns the __GLcontextModes supported by this driver.
*/
static const __DRIconfig **
-dri_init_screen2(__DRIscreenPrivate * sPriv)
+dri_init_screen2(__DRIscreen * sPriv)
{
struct dri_screen *screen;
struct drm_create_screen_arg arg;
@@ -319,7 +319,7 @@ dri_init_screen2(__DRIscreenPrivate * sPriv)
}
static void
-dri_destroy_screen(__DRIscreenPrivate * sPriv)
+dri_destroy_screen(__DRIscreen * sPriv)
{
struct dri_screen *screen = dri_screen(sPriv);
@@ -346,4 +346,12 @@ PUBLIC const struct __DriverAPIRec driDriverAPI = {
.InitScreen2 = dri_init_screen2,
};
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+ &driCoreExtension.base,
+ &driLegacyExtension.base,
+ &driDRI2Extension.base,
+ NULL
+};
+
/* vim: set sw=3 ts=8 sts=3 expandtab: */
diff --git a/src/gallium/state_trackers/dri/dri_screen.h b/src/gallium/state_trackers/dri/dri_screen.h
index f6c56d0f0c..03387a0e81 100644
--- a/src/gallium/state_trackers/dri/dri_screen.h
+++ b/src/gallium/state_trackers/dri/dri_screen.h
@@ -42,7 +42,7 @@
struct dri_screen
{
/* dri */
- __DRIscreenPrivate *sPriv;
+ __DRIscreen *sPriv;
/**
* Configuration cache with default values for all contexts
@@ -63,7 +63,7 @@ struct dri_screen
/** cast wrapper */
static INLINE struct dri_screen *
-dri_screen(__DRIscreenPrivate * sPriv)
+dri_screen(__DRIscreen * sPriv)
{
return (struct dri_screen *)sPriv->private;
}
diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c
index 0b60b5be05..d55aa51b82 100644
--- a/src/gallium/state_trackers/egl/egl_surface.c
+++ b/src/gallium/state_trackers/egl/egl_surface.c
@@ -171,9 +171,9 @@ drm_takedown_shown_screen(_EGLDisplay *dpy, struct drm_screen *screen)
drmModeSetCrtc(
dev->drmFD,
screen->crtcID,
- 0, // FD
+ 0, /* FD */
0, 0,
- NULL, 0, // List of output ids
+ NULL, 0, /* List of output ids */
NULL);
drmModeRmFB(dev->drmFD, screen->fbID);
diff --git a/src/gallium/state_trackers/egl/egl_tracker.c b/src/gallium/state_trackers/egl/egl_tracker.c
index 745803c7eb..9345b0f490 100644
--- a/src/gallium/state_trackers/egl/egl_tracker.c
+++ b/src/gallium/state_trackers/egl/egl_tracker.c
@@ -152,6 +152,7 @@ drm_initialize(_EGLDriver *drv, _EGLDisplay *disp, EGLint *major, EGLint *minor)
int num_screens = 0;
EGLint i;
int fd;
+ _EGLConfig *config;
dev = (struct drm_device *) calloc(1, sizeof(struct drm_device));
if (!dev)
@@ -206,7 +207,7 @@ drm_initialize(_EGLDriver *drv, _EGLDisplay *disp, EGLint *major, EGLint *minor)
disp->DriverData = dev;
/* for now we only have one config */
- _EGLConfig *config = calloc(1, sizeof(*config));
+ config = calloc(1, sizeof(*config));
memset(config, 1, sizeof(*config));
_eglInitConfig(config, 1);
_eglSetConfigAttrib(config, EGL_RED_SIZE, 8);
diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c
index f2881b9a31..228ac9a20e 100644
--- a/src/gallium/state_trackers/glx/xlib/glx_api.c
+++ b/src/gallium/state_trackers/glx/xlib/glx_api.c
@@ -644,6 +644,7 @@ register_with_display(Display *dpy)
XExtCodes *c = XAddExtension(dpy);
ext = dpy->ext_procs; /* new extension is at head of list */
assert(c->extension == ext->codes.extension);
+ (void) c;
ext->name = _mesa_strdup(extName);
ext->close_display = close_display_callback;
}
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c
index c76dfb31d2..1783bc504d 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@@ -67,6 +67,10 @@
#include "pipe/p_screen.h"
#include "pipe/p_context.h"
+#include "trace/tr_screen.h"
+#include "trace/tr_context.h"
+#include "trace/tr_texture.h"
+
#include "xm_winsys.h"
#include <GL/glx.h>
@@ -87,6 +91,8 @@ void xmesa_set_driver( const struct xm_driver *templ )
*/
pipe_mutex _xmesa_lock;
+static struct pipe_screen *_screen = NULL;
+static struct pipe_screen *screen = NULL;
/**********************************************************************/
@@ -754,7 +760,7 @@ PUBLIC
XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
{
static GLboolean firstTime = GL_TRUE;
- static struct pipe_screen *screen = NULL;
+ struct pipe_context *_pipe = NULL;
struct pipe_context *pipe = NULL;
XMesaContext c;
GLcontext *mesaCtx;
@@ -762,7 +768,8 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
if (firstTime) {
pipe_mutex_init(_xmesa_lock);
- screen = driver.create_pipe_screen();
+ _screen = driver.create_pipe_screen();
+ screen = trace_screen_create( _screen );
firstTime = GL_FALSE;
}
@@ -781,9 +788,11 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
if (screen == NULL)
goto fail;
- pipe = driver.create_pipe_context(screen, (void *) c);
- if (pipe == NULL)
+ _pipe = driver.create_pipe_context(_screen, (void *) c);
+ if (_pipe == NULL)
goto fail;
+ pipe = trace_context_create(screen, _pipe);
+ pipe->priv = c;
c->st = st_create_context(pipe,
&v->mesa_visual,
@@ -1110,6 +1119,12 @@ void XMesaSwapBuffers( XMesaBuffer b )
st_swapbuffers(b->stfb, &frontLeftSurf, NULL);
if (frontLeftSurf) {
+ if (_screen != screen) {
+ struct trace_surface *tr_surf = trace_surface( frontLeftSurf );
+ struct pipe_surface *surf = tr_surf->surface;
+ frontLeftSurf = surf;
+ }
+
driver.display_surface(b, frontLeftSurf);
}
diff --git a/src/gallium/state_trackers/python/SConscript b/src/gallium/state_trackers/python/SConscript
index ec385e7c44..d4fdd43688 100644
--- a/src/gallium/state_trackers/python/SConscript
+++ b/src/gallium/state_trackers/python/SConscript
@@ -38,10 +38,12 @@ if 'python' in env['statetrackers']:
],
)
+ env['no_import_lib'] = 1
+
env.SharedLibrary(
target = '_gallium',
source = [
'st_hardpipe_winsys.c',
],
- LIBS = [pyst, softpipe, trace] + auxiliaries + env['LIBS'],
+ LIBS = [pyst, softpipe, trace] + gallium + env['LIBS'],
)
diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i
index 8e323f4896..96b13c2258 100644
--- a/src/gallium/state_trackers/python/gallium.i
+++ b/src/gallium/state_trackers/python/gallium.i
@@ -46,6 +46,7 @@
#include "util/u_draw_quad.h"
#include "util/u_tile.h"
#include "util/u_math.h"
+#include "util/u_format.h"
#include "util/u_memory.h"
#include "tgsi/tgsi_text.h"
#include "tgsi/tgsi_dump.h"
diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i
index 9728207d9c..84ce1a41e6 100644
--- a/src/gallium/state_trackers/python/p_context.i
+++ b/src/gallium/state_trackers/python/p_context.i
@@ -52,11 +52,16 @@ struct st_context {
cso_set_blend($self->cso, state);
}
- void set_sampler( unsigned index, const struct pipe_sampler_state *state ) {
+ void set_fragment_sampler( unsigned index, const struct pipe_sampler_state *state ) {
cso_single_sampler($self->cso, index, state);
cso_single_sampler_done($self->cso);
}
+ void set_vertex_sampler( unsigned index, const struct pipe_sampler_state *state ) {
+ cso_single_vertex_sampler($self->cso, index, state);
+ cso_single_vertex_sampler_done($self->cso);
+ }
+
void set_rasterizer( const struct pipe_rasterizer_state *state ) {
cso_set_rasterizer($self->cso, state);
}
@@ -103,6 +108,25 @@ struct st_context {
$self->vs = vs;
}
+ void set_geometry_shader( const struct pipe_shader_state *state ) {
+ void *gs;
+
+ if(!state) {
+ cso_set_geometry_shader_handle($self->cso, NULL);
+ return;
+ }
+
+ gs = $self->pipe->create_gs_state($self->pipe, state);
+ if(!gs)
+ return;
+
+ if(cso_set_geometry_shader_handle($self->cso, gs) != PIPE_OK)
+ return;
+
+ cso_delete_geometry_shader($self->cso, $self->gs);
+ $self->gs = gs;
+ }
+
/*
* Parameter-like state (or properties)
*/
@@ -142,14 +166,24 @@ struct st_context {
cso_set_viewport($self->cso, state);
}
- void set_sampler_texture(unsigned index,
- struct pipe_texture *texture) {
+ void set_fragment_sampler_texture(unsigned index,
+ struct pipe_texture *texture) {
if(!texture)
texture = $self->default_texture;
- pipe_texture_reference(&$self->sampler_textures[index], texture);
- $self->pipe->set_fragment_sampler_textures($self->pipe,
+ pipe_texture_reference(&$self->fragment_sampler_textures[index], texture);
+ $self->pipe->set_fragment_sampler_textures($self->pipe,
PIPE_MAX_SAMPLERS,
- $self->sampler_textures);
+ $self->fragment_sampler_textures);
+ }
+
+ void set_vertex_sampler_texture(unsigned index,
+ struct pipe_texture *texture) {
+ if(!texture)
+ texture = $self->default_texture;
+ pipe_texture_reference(&$self->vertex_sampler_textures[index], texture);
+ $self->pipe->set_vertex_sampler_textures($self->pipe,
+ PIPE_MAX_VERTEX_SAMPLERS,
+ $self->vertex_sampler_textures);
}
void set_vertex_buffer(unsigned index,
diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i
index fb793d5cbd..2dc995adb0 100644
--- a/src/gallium/state_trackers/python/p_device.i
+++ b/src/gallium/state_trackers/python/p_device.i
@@ -112,10 +112,6 @@ struct st_device {
struct pipe_texture templat;
memset(&templat, 0, sizeof(templat));
templat.format = format;
-<<<<<<< HEAD
- util_format_get_block(templat.format, &templat.block);
-=======
->>>>>>> master
templat.width0 = width;
templat.height0 = height;
templat.depth0 = depth;
diff --git a/src/gallium/state_trackers/python/p_texture.i b/src/gallium/state_trackers/python/p_texture.i
index 1de7f86a3c..761587dc53 100644
--- a/src/gallium/state_trackers/python/p_texture.i
+++ b/src/gallium/state_trackers/python/p_texture.i
@@ -132,8 +132,8 @@ struct st_surface
struct pipe_transfer *transfer;
unsigned stride;
- stride = pf_get_stride(texture->format, w);
- *LENGTH = pf_get_nblocksy(texture->format, h) * stride;
+ stride = util_format_get_stride(texture->format, w);
+ *LENGTH = util_format_get_nblocksy(texture->format, h) * stride;
*STRING = (char *) malloc(*LENGTH);
if(!*STRING)
return;
@@ -159,9 +159,9 @@ struct st_surface
struct pipe_transfer *transfer;
if(stride == 0)
- stride = pf_get_stride(texture->format, w);
+ stride = util_format_get_stride(texture->format, w);
- if(LENGTH < pf_get_nblocksy(texture->format, h) * stride)
+ if(LENGTH < util_format_get_nblocksy(texture->format, h) * stride)
SWIG_exception(SWIG_ValueError, "offset must be smaller than buffer size");
transfer = screen->get_tex_transfer(screen,
diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py
index b32eafe23f..a68709f5cf 100755
--- a/src/gallium/state_trackers/python/retrace/interpreter.py
+++ b/src/gallium/state_trackers/python/retrace/interpreter.py
@@ -52,10 +52,10 @@ def make_image(surface, x=None, y=None, w=None, h=None):
w = surface.width - x
if h is None:
h = surface.height - y
- data = surface.get_tile_rgba8(0, 0, surface.width, surface.height)
+ data = surface.get_tile_rgba8(x, y, surface.width, surface.height)
import Image
- outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1)
+ outimage = Image.fromstring('RGBA', (w, h), data, "raw", 'RGBA', 0, 1)
return outimage
def save_image(filename, surface, x=None, y=None, w=None, h=None):
@@ -278,9 +278,9 @@ class Screen(Object):
def texture_create(self, templat):
return self.real.texture_create(
format = templat.format,
- width = templat.width0,
- height = templat.height0,
- depth = templat.depth0,
+ width = templat.width,
+ height = templat.height,
+ depth = templat.depth,
last_level = templat.last_level,
target = templat.target,
tex_usage = templat.tex_usage,
@@ -387,9 +387,13 @@ class Context(Object):
def delete_sampler_state(self, state):
pass
+ def bind_vertex_sampler_states(self, num_states, states):
+ for i in range(num_states):
+ self.real.set_vertex_sampler(i, states[i])
+
def bind_fragment_sampler_states(self, num_states, states):
for i in range(num_states):
- self.real.set_sampler(i, states[i])
+ self.real.set_fragment_sampler(i, states[i])
def create_rasterizer_state(self, state):
return state
@@ -487,7 +491,11 @@ class Context(Object):
def set_fragment_sampler_textures(self, num_textures, textures):
for i in range(num_textures):
- self.real.set_sampler_texture(i, textures[i])
+ self.real.set_fragment_sampler_texture(i, textures[i])
+
+ def set_vertex_sampler_textures(self, num_textures, textures):
+ for i in range(num_textures):
+ self.real.set_vertex_sampler_texture(i, textures[i])
def set_vertex_buffers(self, num_buffers, buffers):
self.vbufs = buffers[0:num_buffers]
@@ -507,10 +515,6 @@ class Context(Object):
self.real.set_vertex_element(i, elements[i])
self.real.set_vertex_elements(num_elements)
- def set_edgeflags(self, bitfield):
- # FIXME
- pass
-
def dump_vertices(self, start, count):
if not self.interpreter.verbosity(2):
return
diff --git a/src/gallium/state_trackers/python/samples/gs.py b/src/gallium/state_trackers/python/samples/gs.py
new file mode 100644
index 0000000000..1ceead5f17
--- /dev/null
+++ b/src/gallium/state_trackers/python/samples/gs.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python
+##########################################################################
+#
+# Copyright 2009 VMware
+# All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sub license, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+##########################################################################
+
+
+from gallium import *
+
+
+def make_image(surface):
+ data = surface.get_tile_rgba8(0, 0, surface.width, surface.height)
+
+ import Image
+ outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1)
+ return outimage
+
+def save_image(filename, surface):
+ outimage = make_image(surface)
+ outimage.save(filename, "PNG")
+
+def show_image(surface):
+ outimage = make_image(surface)
+
+ import Tkinter as tk
+ from PIL import Image, ImageTk
+ root = tk.Tk()
+
+ root.title('background image')
+
+ image1 = ImageTk.PhotoImage(outimage)
+ w = image1.width()
+ h = image1.height()
+ x = 100
+ y = 100
+ root.geometry("%dx%d+%d+%d" % (w, h, x, y))
+ panel1 = tk.Label(root, image=image1)
+ panel1.pack(side='top', fill='both', expand='yes')
+ panel1.image = image1
+ root.mainloop()
+
+
+def test(dev):
+ ctx = dev.context_create()
+
+ width = 255
+ height = 255
+ minz = 0.0
+ maxz = 1.0
+
+ # disabled blending/masking
+ blend = Blend()
+ blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE
+ blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE
+ blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO
+ blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO
+ blend.colormask = PIPE_MASK_RGBA
+ ctx.set_blend(blend)
+
+ # depth/stencil/alpha
+ depth_stencil_alpha = DepthStencilAlpha()
+ depth_stencil_alpha.depth.enabled = 1
+ depth_stencil_alpha.depth.writemask = 1
+ depth_stencil_alpha.depth.func = PIPE_FUNC_LESS
+ ctx.set_depth_stencil_alpha(depth_stencil_alpha)
+
+ # rasterizer
+ rasterizer = Rasterizer()
+ rasterizer.front_winding = PIPE_WINDING_CW
+ rasterizer.cull_mode = PIPE_WINDING_NONE
+ rasterizer.scissor = 1
+ ctx.set_rasterizer(rasterizer)
+
+ # viewport
+ viewport = Viewport()
+ scale = FloatArray(4)
+ scale[0] = width / 2.0
+ scale[1] = -height / 2.0
+ scale[2] = (maxz - minz) / 2.0
+ scale[3] = 1.0
+ viewport.scale = scale
+ translate = FloatArray(4)
+ translate[0] = width / 2.0
+ translate[1] = height / 2.0
+ translate[2] = (maxz - minz) / 2.0
+ translate[3] = 0.0
+ viewport.translate = translate
+ ctx.set_viewport(viewport)
+
+ # samplers
+ sampler = Sampler()
+ sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE
+ sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE
+ sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE
+ sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE
+ sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST
+ sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST
+ sampler.normalized_coords = 1
+ ctx.set_sampler(0, sampler)
+
+ # scissor
+ scissor = Scissor()
+ scissor.minx = 0
+ scissor.miny = 0
+ scissor.maxx = width
+ scissor.maxy = height
+ ctx.set_scissor(scissor)
+
+ clip = Clip()
+ clip.nr = 0
+ ctx.set_clip(clip)
+
+ # framebuffer
+ cbuf = dev.texture_create(
+ PIPE_FORMAT_X8R8G8B8_UNORM,
+ width, height,
+ tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET,
+ ).get_surface()
+ zbuf = dev.texture_create(
+ PIPE_FORMAT_Z16_UNORM,
+ width, height,
+ tex_usage=PIPE_TEXTURE_USAGE_DEPTH_STENCIL,
+ ).get_surface()
+ fb = Framebuffer()
+ fb.width = width
+ fb.height = height
+ fb.nr_cbufs = 1
+ fb.set_cbuf(0, cbuf)
+ fb.set_zsbuf(zbuf)
+ ctx.set_framebuffer(fb)
+ rgba = FloatArray(4);
+ rgba[0] = 0.0
+ rgba[1] = 0.0
+ rgba[2] = 0.0
+ rgba[3] = 0.0
+ ctx.clear(PIPE_CLEAR_COLOR | PIPE_CLEAR_DEPTHSTENCIL, rgba, 1.0, 0xff)
+
+ # vertex shader
+ vs = Shader('''
+ VERT
+ DCL IN[0], POSITION, CONSTANT
+ DCL IN[1], COLOR, CONSTANT
+ DCL OUT[0], POSITION, CONSTANT
+ DCL OUT[1], COLOR, CONSTANT
+ 0:MOV OUT[0], IN[0]
+ 1:MOV OUT[1], IN[1]
+ 2:END
+ ''')
+ ctx.set_vertex_shader(vs)
+
+ gs = Shader('''
+ GEOM
+ PROPERTY GS_INPUT_PRIMITIVE TRIANGLES
+ PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP
+ DCL IN[][0], POSITION, CONSTANT
+ DCL IN[][1], COLOR, CONSTANT
+ DCL OUT[0], POSITION, CONSTANT
+ DCL OUT[1], COLOR, CONSTANT
+ 0:MOV OUT[0], IN[0][0]
+ 1:MOV OUT[1], IN[0][1]
+ 2:EMIT
+ 3:MOV OUT[0], IN[1][0]
+ 4:MOV OUT[1], IN[1][1]
+ 5:EMIT
+ 6:MOV OUT[0], IN[2][0]
+ 7:MOV OUT[1], IN[2][1]
+ 8:EMIT
+ 9:ENDPRIM
+ 10:END
+ ''')
+ ctx.set_geometry_shader(gs)
+
+ # fragment shader
+ fs = Shader('''
+ FRAG
+ DCL IN[0], COLOR, LINEAR
+ DCL OUT[0], COLOR, CONSTANT
+ 0:MOV OUT[0], IN[0]
+ 1:END
+ ''')
+ ctx.set_fragment_shader(fs)
+
+ nverts = 3
+ nattrs = 2
+ verts = FloatArray(nverts * nattrs * 4)
+
+ verts[ 0] = 0.0 # x1
+ verts[ 1] = 0.8 # y1
+ verts[ 2] = 0.2 # z1
+ verts[ 3] = 1.0 # w1
+ verts[ 4] = 1.0 # r1
+ verts[ 5] = 0.0 # g1
+ verts[ 6] = 0.0 # b1
+ verts[ 7] = 1.0 # a1
+ verts[ 8] = -0.8 # x2
+ verts[ 9] = -0.8 # y2
+ verts[10] = 0.5 # z2
+ verts[11] = 1.0 # w2
+ verts[12] = 0.0 # r2
+ verts[13] = 1.0 # g2
+ verts[14] = 0.0 # b2
+ verts[15] = 1.0 # a2
+ verts[16] = 0.8 # x3
+ verts[17] = -0.8 # y3
+ verts[18] = 0.8 # z3
+ verts[19] = 1.0 # w3
+ verts[20] = 0.0 # r3
+ verts[21] = 0.0 # g3
+ verts[22] = 1.0 # b3
+ verts[23] = 1.0 # a3
+
+ ctx.draw_vertices(PIPE_PRIM_TRIANGLES,
+ nverts,
+ nattrs,
+ verts)
+
+ ctx.flush()
+
+ show_image(cbuf)
+ #show_image(zbuf)
+ #save_image('cbuf.png', cbuf)
+ #save_image('zbuf.png', zbuf)
+
+
+
+def main():
+ dev = Device()
+ test(dev)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/gallium/state_trackers/python/samples/tri.py b/src/gallium/state_trackers/python/samples/tri.py
index 87acf60366..af80426dc6 100644
--- a/src/gallium/state_trackers/python/samples/tri.py
+++ b/src/gallium/state_trackers/python/samples/tri.py
@@ -118,7 +118,7 @@ def test(dev):
sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST
sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST
sampler.normalized_coords = 1
- ctx.set_sampler(0, sampler)
+ ctx.set_fragment_sampler(0, sampler)
# scissor
scissor = Scissor()
diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c
index 10c7ecbd78..d144af2447 100644
--- a/src/gallium/state_trackers/python/st_device.c
+++ b/src/gallium/state_trackers/python/st_device.c
@@ -135,7 +135,9 @@ st_context_destroy(struct st_context *st_ctx)
st_ctx->pipe->destroy(st_ctx->pipe);
for(i = 0; i < PIPE_MAX_SAMPLERS; ++i)
- pipe_texture_reference(&st_ctx->sampler_textures[i], NULL);
+ pipe_texture_reference(&st_ctx->fragment_sampler_textures[i], NULL);
+ for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+ pipe_texture_reference(&st_ctx->vertex_sampler_textures[i], NULL);
pipe_texture_reference(&st_ctx->default_texture, NULL);
FREE(st_ctx);
@@ -276,9 +278,12 @@ st_context_create(struct st_device *st_dev)
}
for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
- pipe_texture_reference(&st_ctx->sampler_textures[i], st_ctx->default_texture);
+ pipe_texture_reference(&st_ctx->fragment_sampler_textures[i], st_ctx->default_texture);
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++)
+ pipe_texture_reference(&st_ctx->vertex_sampler_textures[i], st_ctx->default_texture);
- cso_set_sampler_textures(st_ctx->cso, PIPE_MAX_SAMPLERS, st_ctx->sampler_textures);
+ cso_set_sampler_textures(st_ctx->cso, PIPE_MAX_SAMPLERS, st_ctx->fragment_sampler_textures);
+ cso_set_vertex_sampler_textures(st_ctx->cso, PIPE_MAX_VERTEX_SAMPLERS, st_ctx->vertex_sampler_textures);
}
/* vertex shader */
diff --git a/src/gallium/state_trackers/python/st_device.h b/src/gallium/state_trackers/python/st_device.h
index a246b6a1f2..f786e13411 100644
--- a/src/gallium/state_trackers/python/st_device.h
+++ b/src/gallium/state_trackers/python/st_device.h
@@ -57,9 +57,11 @@ struct st_context {
void *vs;
void *fs;
+ void *gs;
struct pipe_texture *default_texture;
- struct pipe_texture *sampler_textures[PIPE_MAX_SAMPLERS];
+ struct pipe_texture *fragment_sampler_textures[PIPE_MAX_SAMPLERS];
+ struct pipe_texture *vertex_sampler_textures[PIPE_MAX_VERTEX_SAMPLERS];
unsigned num_vertex_buffers;
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py b/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py
index 35673b3ec9..eed6cdd1e6 100644
--- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py
+++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py
@@ -96,7 +96,7 @@ def test(dev, name):
sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST
sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST
sampler.normalized_coords = 1
- ctx.set_sampler(0, sampler)
+ ctx.set_fragment_sampler(0, sampler)
# scissor
scissor = Scissor()
diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py
index 5be1ca80f3..41bebd0604 100644
--- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py
+++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py
@@ -96,7 +96,7 @@ def test(dev, name):
sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST
sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST
sampler.normalized_coords = 1
- ctx.set_sampler(0, sampler)
+ ctx.set_fragment_sampler(0, sampler)
# scissor
scissor = Scissor()
diff --git a/src/gallium/state_trackers/python/tests/texture_render.py b/src/gallium/state_trackers/python/tests/texture_render.py
index 8a2db9dbcf..79287f2cac 100755
--- a/src/gallium/state_trackers/python/tests/texture_render.py
+++ b/src/gallium/state_trackers/python/tests/texture_render.py
@@ -144,8 +144,8 @@ class TextureTest(TestCase):
sampler.normalized_coords = 1
sampler.min_lod = 0
sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1
- ctx.set_sampler(0, sampler)
- ctx.set_sampler_texture(0, src_texture)
+ ctx.set_fragment_sampler(0, sampler)
+ ctx.set_fragment_sampler_texture(0, src_texture)
# framebuffer
cbuf_tex = dev.texture_create(
diff --git a/src/gallium/state_trackers/python/tests/texture_sample.py b/src/gallium/state_trackers/python/tests/texture_sample.py
index 92a6c4dfb9..520961c805 100755
--- a/src/gallium/state_trackers/python/tests/texture_sample.py
+++ b/src/gallium/state_trackers/python/tests/texture_sample.py
@@ -169,7 +169,7 @@ class TextureColorSampleTest(TestCase):
sampler.normalized_coords = 1
sampler.min_lod = 0
sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1
- ctx.set_sampler(0, sampler)
+ ctx.set_fragment_sampler(0, sampler)
# texture
texture = dev.texture_create(
@@ -189,7 +189,7 @@ class TextureColorSampleTest(TestCase):
zslice = zslice,
).sample_rgba(expected_rgba)
- ctx.set_sampler_texture(0, texture)
+ ctx.set_fragment_sampler_texture(0, texture)
# framebuffer
cbuf_tex = dev.texture_create(
@@ -359,7 +359,7 @@ class TextureDepthSampleTest(TestCase):
sampler.normalized_coords = 1
sampler.min_lod = 0
sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1
- ctx.set_sampler(0, sampler)
+ ctx.set_fragment_sampler(0, sampler)
# texture
texture = dev.texture_create(
@@ -379,7 +379,7 @@ class TextureDepthSampleTest(TestCase):
zslice = zslice,
).sample_rgba(expected_rgba)
- ctx.set_sampler_texture(0, texture)
+ ctx.set_fragment_sampler_texture(0, texture)
# framebuffer
cbuf_tex = dev.texture_create(
diff --git a/src/gallium/state_trackers/vega/Makefile b/src/gallium/state_trackers/vega/Makefile
index b8c805b06c..fc97bf51f8 100644
--- a/src/gallium/state_trackers/vega/Makefile
+++ b/src/gallium/state_trackers/vega/Makefile
@@ -61,14 +61,7 @@ VG_MINOR = 0
VG_TINY = 0
GALLIUM_LIBS = \
- $(GALLIUM)/src/gallium/auxiliary/pipebuffer/libpipebuffer.a \
- $(GALLIUM)/src/gallium/auxiliary/sct/libsct.a \
- $(GALLIUM)/src/gallium/auxiliary/draw/libdraw.a \
- $(GALLIUM)/src/gallium/auxiliary/rtasm/librtasm.a \
- $(GALLIUM)/src/gallium/auxiliary/translate/libtranslate.a \
- $(GALLIUM)/src/gallium/auxiliary/cso_cache/libcso_cache.a \
- $(GALLIUM)/src/gallium/auxiliary/util/libutil.a \
- $(GALLIUM)/src/gallium/auxiliary/tgsi/libtgsi.a
+ $(GALLIUM)/src/gallium/auxiliary/libgallium.a
.SUFFIXES : .cpp
diff --git a/src/gallium/state_trackers/vega/api_path.c b/src/gallium/state_trackers/vega/api_path.c
index a6b7a2bb93..15ac1900f4 100644
--- a/src/gallium/state_trackers/vega/api_path.c
+++ b/src/gallium/state_trackers/vega/api_path.c
@@ -164,8 +164,7 @@ void vgAppendPathData(VGPath dstPath,
return;
}
for (i = 0; i < numSegments; ++i) {
- if (pathSegments[i] < VG_CLOSE_PATH ||
- pathSegments[i] > VG_LCWARC_TO_REL) {
+ if (pathSegments[i] > VG_LCWARC_TO_REL) {
vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
return;
}
diff --git a/src/gallium/state_trackers/vega/arc.c b/src/gallium/state_trackers/vega/arc.c
index 8b04d21ea7..2d12340870 100644
--- a/src/gallium/state_trackers/vega/arc.c
+++ b/src/gallium/state_trackers/vega/arc.c
@@ -528,7 +528,6 @@ static INLINE int num_beziers_needed(struct arc *arc)
double threshold = 0.05;
VGboolean found = VG_FALSE;
int n = 1;
- int i;
double min_eta, max_eta;
min_eta = MIN2(arc->eta1, arc->eta2);
@@ -538,6 +537,7 @@ static INLINE int num_beziers_needed(struct arc *arc)
double d_eta = (max_eta - min_eta) / n;
if (d_eta <= 0.5 * M_PI) {
double eta_b = min_eta;
+ int i;
found = VG_TRUE;
for (i = 0; found && (i < n); ++i) {
double etaA = eta_b;
diff --git a/src/gallium/state_trackers/vega/bezier.c b/src/gallium/state_trackers/vega/bezier.c
index 0d5504004c..5769e8ea86 100644
--- a/src/gallium/state_trackers/vega/bezier.c
+++ b/src/gallium/state_trackers/vega/bezier.c
@@ -256,7 +256,6 @@ static enum shift_result good_offset(const struct bezier *b1,
const float max_dist_normal = threshold*offset;
const float spacing = 0.25;
float i;
-
for (i = spacing; i < 0.99; i += spacing) {
float p1[2],p2[2], d, l;
float normal[2];
diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c
index 271abafbef..64e3a7c545 100644
--- a/src/gallium/state_trackers/vega/renderer.c
+++ b/src/gallium/state_trackers/vega/renderer.c
@@ -57,7 +57,7 @@ static void setup_shaders(struct renderer *ctx)
{
struct pipe_context *pipe = ctx->pipe;
/* fragment shader */
- ctx->fs = util_make_fragment_tex_shader(pipe);
+ ctx->fs = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
}
static struct pipe_buffer *
diff --git a/src/gallium/state_trackers/vega/stroker.c b/src/gallium/state_trackers/vega/stroker.c
index 1b92d2b5c6..68a52029db 100644
--- a/src/gallium/state_trackers/vega/stroker.c
+++ b/src/gallium/state_trackers/vega/stroker.c
@@ -476,7 +476,7 @@ static enum intersection_type line_intersect(const VGfloat *l1,
const VGfloat *l2,
float *intersection_point)
{
- VGfloat isect[2];
+ VGfloat isect[2] = { 0 };
enum intersection_type type;
VGboolean dx_zero, ldx_zero;
@@ -649,7 +649,7 @@ static void create_joins(struct stroker *stroker,
VGfloat prev_line[] = {stroker->back2_x, stroker->back2_y,
stroker->back1_x, stroker->back1_y};
- VGfloat isect[2];
+ VGfloat isect[2] = { 0 };
enum intersection_type type = line_intersect(prev_line, next_line, isect);
if (join == SquareJoin) {
diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript
index b05944a33b..352c087475 100644
--- a/src/gallium/state_trackers/wgl/SConscript
+++ b/src/gallium/state_trackers/wgl/SConscript
@@ -11,10 +11,11 @@ if env['platform'] in ['windows']:
'.',
])
- env.Append(CPPDEFINES = [
+ env.AppendUnique(CPPDEFINES = [
'_GDI32_', # prevent wgl* being declared __declspec(dllimport)
'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers
'WIN32_THREADS', # use Win32 thread API
+ 'WIN32_LEAN_AND_MEAN', # http://msdn2.microsoft.com/en-us/library/6dwk3a1z.aspx
])
sources = [
diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c
index c776faa53f..650d2c0d1d 100644
--- a/src/gallium/state_trackers/xorg/xorg_crtc.c
+++ b/src/gallium/state_trackers/xorg/xorg_crtc.c
@@ -71,6 +71,8 @@ struct crtc_private
static void
crtc_dpms(xf86CrtcPtr crtc, int mode)
{
+ /* ScrnInfoPtr pScrn = crtc->scrn; */
+
switch (mode) {
case DPMSModeOn:
case DPMSModeStandby:
@@ -121,7 +123,8 @@ crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode,
drm_mode.vrefresh = mode->VRefresh;
if (!mode->name)
xf86SetModeDefaultName(mode);
- strncpy(drm_mode.name, mode->name, DRM_DISPLAY_MODE_LEN);
+ strncpy(drm_mode.name, mode->name, DRM_DISPLAY_MODE_LEN - 1);
+ drm_mode.name[DRM_DISPLAY_MODE_LEN - 1] = '\0';
ret = drmModeSetCrtc(ms->fd, drm_crtc->crtc_id, ms->fb_id, x, y,
&drm_connector->connector_id, 1, &drm_mode);
@@ -147,18 +150,23 @@ crtc_gamma_set(xf86CrtcPtr crtc, CARD16 * red, CARD16 * green, CARD16 * blue,
static void *
crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height)
{
+ /* ScrnInfoPtr pScrn = crtc->scrn; */
+
return NULL;
}
static PixmapPtr
crtc_shadow_create(xf86CrtcPtr crtc, void *data, int width, int height)
{
+ /* ScrnInfoPtr pScrn = crtc->scrn; */
+
return NULL;
}
static void
crtc_shadow_destroy(xf86CrtcPtr crtc, PixmapPtr rotate_pixmap, void *data)
{
+ /* ScrnInfoPtr pScrn = crtc->scrn; */
}
/*
diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c
index 5391595891..b02fe68f31 100644
--- a/src/gallium/state_trackers/xorg/xorg_driver.c
+++ b/src/gallium/state_trackers/xorg/xorg_driver.c
@@ -181,8 +181,7 @@ drv_crtc_resize(ScrnInfoPtr pScrn, int width, int height)
if (!pScreen->ModifyPixmapHeader(rootPixmap, width, height, -1, -1, -1, NULL))
return FALSE;
- /* HW dependent - FIXME */
- pScrn->displayWidth = pScrn->virtualX;
+ pScrn->displayWidth = rootPixmap->devKind / (rootPixmap->drawable.bitsPerPixel / 8);
/* now create new frontbuffer */
return ms->create_front_buffer(pScrn) && ms->bind_front_buffer(pScrn);
@@ -220,6 +219,12 @@ static Bool
drv_init_resource_management(ScrnInfoPtr pScrn)
{
modesettingPtr ms = modesettingPTR(pScrn);
+ /*
+ ScreenPtr pScreen = pScrn->pScreen;
+ PixmapPtr rootPixmap = pScreen->GetScreenPixmap(pScreen);
+ Bool fbAccessDisabled;
+ CARD8 *fbstart;
+ */
if (ms->screen || ms->kms)
return TRUE;
@@ -249,9 +254,19 @@ static Bool
drv_close_resource_management(ScrnInfoPtr pScrn)
{
modesettingPtr ms = modesettingPTR(pScrn);
+ int i;
- if (ms->screen)
+ if (ms->screen) {
+ assert(ms->ctx == NULL);
+
+ for (i = 0; i < XORG_NR_FENCES; i++) {
+ if (ms->fence[i]) {
+ ms->screen->fence_finish(ms->screen, ms->fence[i], 0);
+ ms->screen->fence_reference(ms->screen, &ms->fence[i], NULL);
+ }
+ }
ms->screen->destroy(ms->screen);
+ }
ms->screen = NULL;
if (ms->api && ms->api->destroy)
@@ -461,7 +476,7 @@ static void drv_block_handler(int i, pointer blockData, pointer pTimeout,
* quite small. Let us get a fair way ahead of hardware before
* throttling.
*/
- for (j = 0; j < XORG_NR_FENCES; j++)
+ for (j = 0; j < XORG_NR_FENCES - 1; j++)
ms->screen->fence_reference(ms->screen,
&ms->fence[j],
ms->fence[j+1]);
@@ -480,7 +495,7 @@ static void drv_block_handler(int i, pointer blockData, pointer pTimeout,
if (num_cliprects) {
drmModeClip *clip = alloca(num_cliprects * sizeof(drmModeClip));
BoxPtr rect = REGION_RECTS(dirty);
- int i;
+ int i, ret;
/* XXX no need for copy? */
for (i = 0; i < num_cliprects; i++, rect++) {
@@ -491,7 +506,11 @@ static void drv_block_handler(int i, pointer blockData, pointer pTimeout,
}
/* TODO query connector property to see if this is needed */
- drmModeDirtyFB(ms->fd, ms->fb_id, clip, num_cliprects);
+ ret = drmModeDirtyFB(ms->fd, ms->fb_id, clip, num_cliprects);
+ if (ret) {
+ debug_printf("%s: failed to send dirty (%i, %s)\n",
+ __func__, ret, strerror(-ret));
+ }
DamageEmpty(ms->damage);
}
@@ -837,6 +856,7 @@ drv_create_front_buffer_ga3d(ScrnInfoPtr pScrn)
modesettingPtr ms = modesettingPTR(pScrn);
unsigned handle, stride;
struct pipe_texture *tex;
+ int ret;
ms->noEvict = TRUE;
@@ -850,16 +870,21 @@ drv_create_front_buffer_ga3d(ScrnInfoPtr pScrn)
tex,
&stride,
&handle))
- return FALSE;
+ goto err_destroy;
- drmModeAddFB(ms->fd,
- pScrn->virtualX,
- pScrn->virtualY,
- pScrn->depth,
- pScrn->bitsPerPixel,
- stride,
- handle,
- &ms->fb_id);
+ ret = drmModeAddFB(ms->fd,
+ pScrn->virtualX,
+ pScrn->virtualY,
+ pScrn->depth,
+ pScrn->bitsPerPixel,
+ stride,
+ handle,
+ &ms->fb_id);
+ if (ret) {
+ debug_printf("%s: failed to create framebuffer (%i, %s)",
+ __func__, ret, strerror(-ret));
+ goto err_destroy;
+ }
pScrn->frameX0 = 0;
pScrn->frameY0 = 0;
@@ -869,6 +894,10 @@ drv_create_front_buffer_ga3d(ScrnInfoPtr pScrn)
pipe_texture_reference(&tex, NULL);
return TRUE;
+
+err_destroy:
+ pipe_texture_reference(&tex, NULL);
+ return FALSE;
}
static Bool
@@ -898,6 +927,14 @@ static Bool
drv_destroy_front_buffer_kms(ScrnInfoPtr pScrn)
{
modesettingPtr ms = modesettingPTR(pScrn);
+ ScreenPtr pScreen = pScrn->pScreen;
+ PixmapPtr rootPixmap = pScreen->GetScreenPixmap(pScreen);
+
+ /* XXX Do something with the rootPixmap.
+ * This currently works fine but if we are getting crashes in
+ * the fb functions after VT switches maybe look more into it.
+ */
+ (void)rootPixmap;
if (!ms->root_bo)
return TRUE;
@@ -914,6 +951,7 @@ drv_create_front_buffer_kms(ScrnInfoPtr pScrn)
unsigned handle, stride;
struct kms_bo *bo;
unsigned attr[8];
+ int ret;
attr[0] = KMS_BO_TYPE;
attr[1] = KMS_BO_TYPE_SCANOUT;
@@ -932,14 +970,19 @@ drv_create_front_buffer_kms(ScrnInfoPtr pScrn)
if (kms_bo_get_prop(bo, KMS_HANDLE, &handle))
goto err_destroy;
- drmModeAddFB(ms->fd,
- pScrn->virtualX,
- pScrn->virtualY,
- pScrn->depth,
- pScrn->bitsPerPixel,
- stride,
- handle,
- &ms->fb_id);
+ ret = drmModeAddFB(ms->fd,
+ pScrn->virtualX,
+ pScrn->virtualY,
+ pScrn->depth,
+ pScrn->bitsPerPixel,
+ stride,
+ handle,
+ &ms->fb_id);
+ if (ret) {
+ debug_printf("%s: failed to create framebuffer (%i, %s)",
+ __func__, ret, strerror(-ret));
+ goto err_destroy;
+ }
pScrn->frameX0 = 0;
pScrn->frameY0 = 0;
@@ -966,7 +1009,7 @@ drv_bind_front_buffer_kms(ScrnInfoPtr pScrn)
return FALSE;
if (kms_bo_map(ms->root_bo, &ptr))
- return FALSE;
+ goto err_destroy;
pScreen->ModifyPixmapHeader(rootPixmap,
pScreen->width,
@@ -976,6 +1019,10 @@ drv_bind_front_buffer_kms(ScrnInfoPtr pScrn)
stride,
ptr);
return TRUE;
+
+err_destroy:
+ kms_bo_destroy(&ms->root_bo);
+ return FALSE;
}
#endif /* HAVE_LIBKMS */
diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c
index 1769c12e80..aa68570b9c 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa.c
@@ -344,6 +344,9 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg)
#if DEBUG_PRINT
debug_printf("ExaPrepareSolid(0x%x)\n", fg);
#endif
+ if (!exa->accel)
+ return FALSE;
+
if (!exa->pipe)
XORG_FALLBACK("accle not enabled");
@@ -362,7 +365,7 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg)
XORG_FALLBACK("format %s", pf_name(priv->tex->format));
}
- return exa->accel && xorg_solid_bind_state(exa, priv, fg);
+ return xorg_solid_bind_state(exa, priv, fg);
}
static void
@@ -418,6 +421,10 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir,
#if DEBUG_PRINT
debug_printf("ExaPrepareCopy\n");
#endif
+
+ if (!exa->accel)
+ return FALSE;
+
if (!exa->pipe)
XORG_FALLBACK("accle not enabled");
@@ -491,7 +498,7 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir,
}
- return exa->accel;
+ return TRUE;
}
static void
@@ -509,6 +516,7 @@ ExaCopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY,
#endif
debug_assert(priv == exa->copy.dst);
+ (void) priv;
if (exa->copy.use_surface_copy) {
/* XXX: consider exposing >1 box in surface_copy interface.
@@ -599,15 +607,19 @@ ExaCheckComposite(int op,
ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum];
modesettingPtr ms = modesettingPTR(pScrn);
struct exa_context *exa = ms->exa;
- boolean accelerated = xorg_composite_accelerated(op,
- pSrcPicture,
- pMaskPicture,
- pDstPicture);
+
#if DEBUG_PRINT
debug_printf("ExaCheckComposite(%d, %p, %p, %p) = %d\n",
op, pSrcPicture, pMaskPicture, pDstPicture, accelerated);
#endif
- return exa->accel && accelerated;
+
+ if (!exa->accel)
+ return FALSE;
+
+ return xorg_composite_accelerated(op,
+ pSrcPicture,
+ pMaskPicture,
+ pDstPicture);
}
@@ -621,6 +633,9 @@ ExaPrepareComposite(int op, PicturePtr pSrcPicture,
struct exa_context *exa = ms->exa;
struct exa_pixmap_priv *priv;
+ if (!exa->accel)
+ return FALSE;
+
#if DEBUG_PRINT
debug_printf("ExaPrepareComposite(%d, src=0x%p, mask=0x%p, dst=0x%p)\n",
op, pSrcPicture, pMaskPicture, pDstPicture);
@@ -679,8 +694,7 @@ ExaPrepareComposite(int op, PicturePtr pSrcPicture,
render_format_name(pMaskPicture->format));
}
- return exa->accel &&
- xorg_composite_bind_state(exa, op, pSrcPicture, pMaskPicture,
+ return xorg_composite_bind_state(exa, op, pSrcPicture, pMaskPicture,
pDstPicture,
pSrc ? exaGetPixmapDriverPrivate(pSrc) : NULL,
pMask ? exaGetPixmapDriverPrivate(pMask) : NULL,
@@ -1006,6 +1020,9 @@ xorg_exa_close(ScrnInfoPtr pScrn)
if (exa->pipe)
exa->pipe->destroy(exa->pipe);
+ exa->pipe = NULL;
+ /* Since this was shared be proper with the pointer */
+ ms->ctx = NULL;
exaDriverFini(pScrn->pScreen);
xfree(exa);
diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c
index 89b794a09a..bed17caab7 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c
@@ -47,22 +47,22 @@ static void
print_fs_traits(int fs_traits)
{
const char *strings[] = {
- "FS_COMPOSITE", /* = 1 << 0 */
- "FS_MASK", /* = 1 << 1 */
- "FS_SOLID_FILL", /* = 1 << 2 */
- "FS_LINGRAD_FILL", /* = 1 << 3 */
- "FS_RADGRAD_FILL", /* = 1 << 4 */
- "FS_CA_FULL", /* = 1 << 5 - src.rgba * mask.rgba */
- "FS_CA_SRCALPHA", /* = 1 << 6 - src.aaaa * mask.rgba */
- "FS_YUV", /* = 1 << 7 */
- "FS_SRC_REPEAT_NONE", /* = 1 << 8 */
- "FS_MASK_REPEAT_NONE",/* = 1 << 9 */
- "FS_SRC_SWIZZLE_RGB", /* = 1 << 10 */
- "FS_MASK_SWIZZLE_RGB",/* = 1 << 11 */
- "FS_SRC_SET_ALPHA", /* = 1 << 12 */
- "FS_MASK_SET_ALPHA", /* = 1 << 13 */
- "FS_SRC_LUMINANCE", /* = 1 << 14 */
- "FS_MASK_LUMINANCE", /* = 1 << 15 */
+ "FS_COMPOSITE", /* = 1 << 0, */
+ "FS_MASK", /* = 1 << 1, */
+ "FS_SOLID_FILL", /* = 1 << 2, */
+ "FS_LINGRAD_FILL", /* = 1 << 3, */
+ "FS_RADGRAD_FILL", /* = 1 << 4, */
+ "FS_CA_FULL", /* = 1 << 5, */ /* src.rgba * mask.rgba */
+ "FS_CA_SRCALPHA", /* = 1 << 6, */ /* src.aaaa * mask.rgba */
+ "FS_YUV", /* = 1 << 7, */
+ "FS_SRC_REPEAT_NONE", /* = 1 << 8, */
+ "FS_MASK_REPEAT_NONE",/* = 1 << 9, */
+ "FS_SRC_SWIZZLE_RGB", /* = 1 << 10, */
+ "FS_MASK_SWIZZLE_RGB",/* = 1 << 11, */
+ "FS_SRC_SET_ALPHA", /* = 1 << 12, */
+ "FS_MASK_SET_ALPHA", /* = 1 << 13, */
+ "FS_SRC_LUMINANCE", /* = 1 << 14, */
+ "FS_MASK_LUMINANCE", /* = 1 << 15, */
};
int i, k;
debug_printf("%s: ", __func__);
@@ -492,6 +492,7 @@ create_fs(struct pipe_context *pipe,
/* it has to be either a fill, a composite op or a yuv conversion */
debug_assert((is_fill ^ is_composite) ^ is_yuv);
+ (void) is_yuv;
out = ureg_DECL_output(ureg,
TGSI_SEMANTIC_COLOR,
diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c
index ba15f8a784..d80f341e6c 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.c
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.c
@@ -438,6 +438,7 @@ void renderer_copy_prepare(struct xorg_renderer *r,
PIPE_TEXTURE_2D,
PIPE_TEXTURE_USAGE_RENDER_TARGET,
0));
+ (void) screen;
/* set misc state we care about */
diff --git a/src/gallium/winsys/drm/SConscript b/src/gallium/winsys/drm/SConscript
index 9f7b383d2d..66b73a8bf9 100644
--- a/src/gallium/winsys/drm/SConscript
+++ b/src/gallium/winsys/drm/SConscript
@@ -58,6 +58,11 @@ if env['dri']:
'intel/SConscript',
])
+ if 'i965' in env['winsys']:
+ SConscript([
+ 'i965/SConscript',
+ ])
+
if 'radeon' in env['winsys']:
SConscript([
'radeon/SConscript',
diff --git a/src/gallium/winsys/drm/i965/Makefile b/src/gallium/winsys/drm/i965/Makefile
new file mode 100644
index 0000000000..d8feef6824
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/Makefile
@@ -0,0 +1,12 @@
+# src/gallium/winsys/drm/intel/Makefile
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+SUBDIRS = gem $(GALLIUM_STATE_TRACKERS_DIRS)
+
+default install clean:
+ @for dir in $(SUBDIRS) ; do \
+ if [ -d $$dir ] ; then \
+ (cd $$dir && $(MAKE) $@) || exit 1; \
+ fi \
+ done
diff --git a/src/gallium/winsys/drm/i965/SConscript b/src/gallium/winsys/drm/i965/SConscript
new file mode 100644
index 0000000000..50d7b75ed6
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/SConscript
@@ -0,0 +1,7 @@
+Import('*')
+
+SConscript(['gem/SConscript',])
+
+if 'mesa' in env['statetrackers']:
+
+ SConscript(['dri/SConscript'])
diff --git a/src/gallium/winsys/drm/i965/dri/Makefile b/src/gallium/winsys/drm/i965/dri/Makefile
new file mode 100644
index 0000000000..f7e81eed87
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/dri/Makefile
@@ -0,0 +1,26 @@
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i965_dri.so
+
+PIPE_DRIVERS = \
+ $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \
+ $(TOP)/src/gallium/winsys/drm/i965/gem/libi965drm.a \
+ $(TOP)/src/gallium/drivers/trace/libtrace.a \
+ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+ $(TOP)/src/gallium/drivers/identity/libidentity.a \
+ $(TOP)/src/gallium/drivers/i965/libi965.a
+
+
+DRIVER_SOURCES =
+
+C_SOURCES = \
+ $(COMMON_GALLIUM_SOURCES) \
+ $(DRIVER_SOURCES)
+
+include ../../Makefile.template
+
+DRI_LIB_DEPS += -ldrm_intel
+
+symlinks: $(TOP)/$(LIB_DIR)/gallium
+ @rm -f $(TOP)/$(LIB_DIR)/gallium/i965_dri.so
diff --git a/src/gallium/winsys/drm/i965/dri/SConscript b/src/gallium/winsys/drm/i965/dri/SConscript
new file mode 100644
index 0000000000..a99533fd24
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/dri/SConscript
@@ -0,0 +1,19 @@
+Import('*')
+
+env = drienv.Clone()
+
+env.ParseConfig('pkg-config --cflags --libs libdrm_intel')
+
+drivers = [
+ st_dri,
+ i965drm,
+ i965,
+ trace,
+]
+
+env.LoadableModule(
+ target ='i965_dri.so',
+ source = COMMON_GALLIUM_SOURCES,
+ LIBS = drivers + mesa + gallium + env['LIBS'],
+ SHLIBPREFIX = '',
+)
diff --git a/src/gallium/winsys/drm/i965/egl/Makefile b/src/gallium/winsys/drm/i965/egl/Makefile
new file mode 100644
index 0000000000..a1b32eb2a7
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/egl/Makefile
@@ -0,0 +1,29 @@
+TOP = ../../../../../..
+GALLIUMDIR = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = EGL_i965.so
+
+PIPE_DRIVERS = \
+ $(TOP)/src/gallium/state_trackers/egl/libegldrm.a \
+ $(GALLIUMDIR)/winsys/drm/i965/gem/libi965drm.a \
+ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+ $(TOP)/src/gallium/drivers/trace/libtrace.a \
+ $(TOP)/src/gallium/drivers/i965/libi965.a
+
+DRIVER_SOURCES =
+
+C_SOURCES = \
+ $(COMMON_GALLIUM_SOURCES) \
+ $(DRIVER_SOURCES)
+
+DRIVER_EXTRAS = -ldrm_intel
+
+ASM_SOURCES =
+
+DRIVER_DEFINES = -I../gem $(shell pkg-config libdrm --atleast-version=2.3.1 \
+ && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP")
+
+include ../../Makefile.template
+
+symlinks:
diff --git a/src/gallium/winsys/drm/i965/gem/Makefile b/src/gallium/winsys/drm/i965/gem/Makefile
new file mode 100644
index 0000000000..6a7497b6be
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/gem/Makefile
@@ -0,0 +1,14 @@
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i965drm
+
+C_SOURCES = \
+ i965_drm_buffer.c \
+ i965_drm_api.c
+
+LIBRARY_INCLUDES = $(shell pkg-config libdrm --cflags-only-I)
+
+LIBRARY_DEFINES = $(shell pkg-config libdrm --cflags-only-other)
+
+include ../../../../Makefile.template
diff --git a/src/gallium/winsys/drm/i965/gem/SConscript b/src/gallium/winsys/drm/i965/gem/SConscript
new file mode 100644
index 0000000000..6256ec6eaf
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/gem/SConscript
@@ -0,0 +1,15 @@
+Import('*')
+
+env = drienv.Clone()
+
+i965drm_sources = [
+ 'i965_drm_api.c',
+ 'i965_drm_buffer.c',
+]
+
+i965drm = env.ConvenienceLibrary(
+ target ='i965drm',
+ source = i965drm_sources,
+)
+
+Export('i965drm')
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c
new file mode 100644
index 0000000000..fc9678d2b6
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c
@@ -0,0 +1,243 @@
+
+#include <stdio.h>
+#include "state_tracker/drm_api.h"
+
+#include "i965_drm_winsys.h"
+#include "util/u_memory.h"
+
+#include "i965/brw_context.h" /* XXX: shouldn't be doing this */
+#include "i965/brw_screen.h" /* XXX: shouldn't be doing this */
+
+#include "trace/tr_drm.h"
+
+/*
+ * Helper functions
+ */
+
+
+static void
+i965_libdrm_get_device_id(unsigned int *device_id)
+{
+ char path[512];
+ FILE *file;
+ void *shutup_gcc;
+
+ /*
+ * FIXME: Fix this up to use a drm ioctl or whatever.
+ */
+
+ snprintf(path, sizeof(path), "/sys/class/drm/card0/device/device");
+ file = fopen(path, "r");
+ if (!file) {
+ return;
+ }
+
+ shutup_gcc = fgets(path, sizeof(path), file);
+ sscanf(path, "%x", device_id);
+ fclose(file);
+}
+
+static struct i965_libdrm_buffer *
+i965_libdrm_buffer_from_handle(struct i965_libdrm_winsys *idws,
+ const char* name, unsigned handle)
+{
+ struct i965_libdrm_buffer *buf = CALLOC_STRUCT(i965_libdrm_buffer);
+ uint32_t swizzle = 0;
+
+ if (BRW_DUMP)
+ debug_printf("%s\n", __FUNCTION__);
+
+ if (!buf)
+ return NULL;
+ pipe_reference_init(&buf->base.reference, 1);
+ buf->bo = drm_intel_bo_gem_create_from_name(idws->gem, name, handle);
+ buf->base.size = buf->bo->size;
+ buf->base.sws = &idws->base;
+ buf->flinked = TRUE;
+ buf->flink = handle;
+
+
+ if (!buf->bo)
+ goto err;
+
+ drm_intel_bo_get_tiling(buf->bo, &buf->tiling, &swizzle);
+ if (buf->tiling != 0)
+ buf->map_gtt = TRUE;
+
+ return buf;
+
+err:
+ FREE(buf);
+ return NULL;
+}
+
+
+/*
+ * Exported functions
+ */
+
+
+static struct pipe_texture *
+i965_libdrm_texture_from_shared_handle(struct drm_api *api,
+ struct pipe_screen *screen,
+ struct pipe_texture *template,
+ const char* name,
+ unsigned pitch,
+ unsigned handle)
+{
+ /* XXX: this is silly -- there should be a way to get directly from
+ * the "drm_api" struct to ourselves, without peering into
+ * unrelated code:
+ */
+ struct i965_libdrm_winsys *idws = i965_libdrm_winsys(brw_screen(screen)->sws);
+ struct i965_libdrm_buffer *buffer;
+
+ if (BRW_DUMP)
+ debug_printf("%s %s pitch %d handle 0x%x\n", __FUNCTION__,
+ name, pitch, handle);
+
+ buffer = i965_libdrm_buffer_from_handle(idws, name, handle);
+ if (!buffer)
+ return NULL;
+
+ return brw_texture_blanket_winsys_buffer(screen, template, pitch,
+ buffer->tiling,
+ &buffer->base);
+}
+
+
+static boolean
+i965_libdrm_shared_handle_from_texture(struct drm_api *api,
+ struct pipe_screen *screen,
+ struct pipe_texture *texture,
+ unsigned *pitch,
+ unsigned *handle)
+{
+ struct i965_libdrm_buffer *buf = NULL;
+ struct brw_winsys_buffer *buffer = NULL;
+
+ if (BRW_DUMP)
+ debug_printf("%s\n", __FUNCTION__);
+
+ if (!brw_texture_get_winsys_buffer(texture, &buffer, pitch))
+ return FALSE;
+
+ buf = i965_libdrm_buffer(buffer);
+ if (!buf->flinked) {
+ if (drm_intel_bo_flink(buf->bo, &buf->flink))
+ return FALSE;
+ buf->flinked = TRUE;
+ }
+
+ *handle = buf->flink;
+
+ if (BRW_DUMP)
+ debug_printf(" -> pitch %d handle 0x%x\n", *pitch, *handle);
+
+ return TRUE;
+}
+
+static boolean
+i965_libdrm_local_handle_from_texture(struct drm_api *api,
+ struct pipe_screen *screen,
+ struct pipe_texture *texture,
+ unsigned *pitch,
+ unsigned *handle)
+{
+ struct brw_winsys_buffer *buffer = NULL;
+
+ if (BRW_DUMP)
+ debug_printf("%s\n", __FUNCTION__);
+
+ if (!brw_texture_get_winsys_buffer(texture, &buffer, pitch))
+ return FALSE;
+
+ *handle = i965_libdrm_buffer(buffer)->bo->handle;
+
+ if (BRW_DUMP)
+ debug_printf(" -> pitch %d handle 0x%x\n", *pitch, *handle);
+
+ return TRUE;
+}
+
+static void
+i965_libdrm_winsys_destroy(struct brw_winsys_screen *iws)
+{
+ struct i965_libdrm_winsys *idws = i965_libdrm_winsys(iws);
+
+ if (BRW_DUMP)
+ debug_printf("%s\n", __FUNCTION__);
+
+ drm_intel_bufmgr_destroy(idws->gem);
+
+ FREE(idws);
+}
+
+static struct pipe_screen *
+i965_libdrm_create_screen(struct drm_api *api, int drmFD,
+ struct drm_create_screen_arg *arg)
+{
+ struct i965_libdrm_winsys *idws;
+ unsigned int deviceID;
+
+ debug_printf("%s\n", __FUNCTION__);
+
+ if (arg != NULL) {
+ switch(arg->mode) {
+ case DRM_CREATE_NORMAL:
+ break;
+ default:
+ return NULL;
+ }
+ }
+
+ idws = CALLOC_STRUCT(i965_libdrm_winsys);
+ if (!idws)
+ return NULL;
+
+ i965_libdrm_get_device_id(&deviceID);
+
+ i965_libdrm_winsys_init_buffer_functions(idws);
+
+ idws->fd = drmFD;
+ idws->id = deviceID;
+
+ idws->base.destroy = i965_libdrm_winsys_destroy;
+
+ idws->gem = drm_intel_bufmgr_gem_init(idws->fd, BRW_BATCH_SIZE);
+ drm_intel_bufmgr_gem_enable_reuse(idws->gem);
+
+ idws->send_cmd = !debug_get_bool_option("BRW_NO_HW", FALSE);
+
+ return brw_create_screen(&idws->base, deviceID);
+}
+
+static struct pipe_context *
+i965_libdrm_create_context(struct drm_api *api, struct pipe_screen *screen)
+{
+ return brw_create_context(screen);
+}
+
+static void
+destroy(struct drm_api *api)
+{
+ if (BRW_DUMP)
+ debug_printf("%s\n", __FUNCTION__);
+
+}
+
+struct drm_api i965_libdrm_api =
+{
+ .create_context = i965_libdrm_create_context,
+ .create_screen = i965_libdrm_create_screen,
+ .texture_from_shared_handle = i965_libdrm_texture_from_shared_handle,
+ .shared_handle_from_texture = i965_libdrm_shared_handle_from_texture,
+ .local_handle_from_texture = i965_libdrm_local_handle_from_texture,
+ .destroy = destroy,
+};
+
+struct drm_api *
+drm_api_create()
+{
+ return trace_drm_create(&i965_libdrm_api);
+}
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c
new file mode 100644
index 0000000000..a4a72b372d
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c
@@ -0,0 +1,427 @@
+
+#include "i965_drm_winsys.h"
+#include "util/u_memory.h"
+
+#include "i915_drm.h"
+#include "intel_bufmgr.h"
+
+
+
+const char *names[BRW_BUFFER_TYPE_MAX] = {
+ "TEXTURE",
+ "SCANOUT",
+ "VERTEX",
+ "CURBE",
+ "QUERY",
+ "SHADER_CONSTANTS",
+ "WM_SCRATCH",
+ "BATCH",
+ "GENERAL_STATE",
+ "SURFACE_STATE",
+ "PIXEL",
+ "GENERIC",
+};
+
+const char *usages[BRW_USAGE_MAX] = {
+ "STATE",
+ "QUERY_RESULT",
+ "RENDER_TARGET",
+ "DEPTH_BUFFER",
+ "BLIT_SOURCE",
+ "BLIT_DEST",
+ "SAMPLER",
+ "VERTEX",
+ "SCRATCH"
+};
+
+
+const char *data_types[BRW_DATA_MAX] =
+{
+ "GS: CC_VP",
+ "GS: CC_UNIT",
+ "GS: WM_PROG",
+ "GS: SAMPLER_DEFAULT_COLOR",
+ "GS: SAMPLER",
+ "GS: WM_UNIT",
+ "GS: SF_PROG",
+ "GS: SF_VP",
+ "GS: SF_UNIT",
+ "GS: VS_UNIT",
+ "GS: VS_PROG",
+ "GS: GS_UNIT",
+ "GS: GS_PROG",
+ "GS: CLIP_VP",
+ "GS: CLIP_UNIT",
+ "GS: CLIP_PROG",
+ "SS: SURFACE",
+ "SS: SURF_BIND",
+ "CONSTANT DATA",
+ "BATCH DATA",
+ "(untyped)"
+};
+
+static enum pipe_error
+i965_libdrm_bo_alloc(struct brw_winsys_screen *sws,
+ enum brw_buffer_type type,
+ unsigned size,
+ unsigned alignment,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct i965_libdrm_winsys *idws = i965_libdrm_winsys(sws);
+ struct i965_libdrm_buffer *buf;
+
+ if (BRW_DUMP)
+ debug_printf("%s type %s sz %d align %d\n",
+ __FUNCTION__, names[type], size, alignment );
+
+ buf = CALLOC_STRUCT(i965_libdrm_buffer);
+ if (!buf)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ switch (type) {
+ case BRW_BUFFER_TYPE_TEXTURE:
+/* case BRW_BUFFER_TYPE_SCANOUT:*/
+ case BRW_BUFFER_TYPE_VERTEX:
+ case BRW_BUFFER_TYPE_CURBE:
+ case BRW_BUFFER_TYPE_QUERY:
+ case BRW_BUFFER_TYPE_SHADER_CONSTANTS:
+ case BRW_BUFFER_TYPE_SHADER_SCRATCH:
+ case BRW_BUFFER_TYPE_BATCH:
+ case BRW_BUFFER_TYPE_GENERAL_STATE:
+ case BRW_BUFFER_TYPE_SURFACE_STATE:
+ case BRW_BUFFER_TYPE_PIXEL:
+ case BRW_BUFFER_TYPE_GENERIC:
+ break;
+ case BRW_BUFFER_TYPE_SCANOUT:
+ buf->map_gtt = TRUE;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ buf->bo = drm_intel_bo_alloc(idws->gem,
+ names[type],
+ size,
+ alignment);
+
+ if (!buf->bo)
+ goto err;
+
+ pipe_reference_init(&buf->base.reference, 1);
+ buf->base.size = size;
+ buf->base.sws = sws;
+
+ *bo_out = &buf->base;
+ return PIPE_OK;
+
+err:
+ assert(0);
+ FREE(buf);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+static void
+i965_libdrm_bo_destroy(struct brw_winsys_buffer *buffer)
+{
+ struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+
+ if (BRW_DUMP)
+ debug_printf("%s\n", __FUNCTION__);
+
+ drm_intel_bo_unreference(buf->bo);
+ FREE(buffer);
+}
+
+static enum pipe_error
+i965_libdrm_bo_emit_reloc(struct brw_winsys_buffer *buffer,
+ enum brw_buffer_usage usage,
+ unsigned delta,
+ unsigned offset,
+ struct brw_winsys_buffer *buffer2)
+{
+ struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+ struct i965_libdrm_buffer *buf2 = i965_libdrm_buffer(buffer2);
+ int read, write;
+ int ret;
+
+ if (BRW_DUMP)
+ debug_printf("%s buf %p offset %x delta %x buf2 %p/%s/%s\n",
+ __FUNCTION__, (void *)buffer,
+ offset, delta,
+ (void *)buffer2, names[buf2->data_type], usages[usage]);
+
+ switch (usage) {
+ case BRW_USAGE_STATE:
+ read = I915_GEM_DOMAIN_INSTRUCTION;
+ write = 0;
+ break;
+ case BRW_USAGE_QUERY_RESULT:
+ read = I915_GEM_DOMAIN_INSTRUCTION;
+ write = I915_GEM_DOMAIN_INSTRUCTION;
+ break;
+ case BRW_USAGE_RENDER_TARGET:
+ read = I915_GEM_DOMAIN_RENDER;
+ write = 0;
+ break;
+ case BRW_USAGE_DEPTH_BUFFER:
+ read = I915_GEM_DOMAIN_RENDER;
+ write = I915_GEM_DOMAIN_RENDER;
+ break;
+ case BRW_USAGE_BLIT_SOURCE:
+ read = 0;
+ write = I915_GEM_DOMAIN_RENDER;
+ break;
+ case BRW_USAGE_BLIT_DEST:
+ read = I915_GEM_DOMAIN_RENDER;
+ write = I915_GEM_DOMAIN_RENDER;
+ break;
+ case BRW_USAGE_SAMPLER:
+ read = I915_GEM_DOMAIN_SAMPLER;
+ write = 0;
+ break;
+ case BRW_USAGE_VERTEX:
+ read = I915_GEM_DOMAIN_VERTEX;
+ write = 0;
+ break;
+ case BRW_USAGE_SCRATCH:
+ read = 0;
+ write = 0;
+ break;
+ default:
+ assert(0);
+ return -1;
+ }
+
+ /* Needed??
+ ((uint32_t *)buf->bo->virtual)[offset/4] = (delta +
+ buf2->bo->offset);
+ */
+
+ ret = dri_bo_emit_reloc( buf->bo, read, write, delta, offset, buf2->bo );
+ if (ret)
+ return -1;
+
+ return 0;
+}
+
+static enum pipe_error
+i965_libdrm_bo_exec(struct brw_winsys_buffer *buffer,
+ unsigned bytes_used)
+{
+ struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+ struct i965_libdrm_winsys *idws = i965_libdrm_winsys(buffer->sws);
+ int ret;
+
+ if (BRW_DUMP)
+ debug_printf("execute buffer %p, bytes %d\n", (void *)buffer, bytes_used);
+
+ if (idws->send_cmd) {
+ ret = dri_bo_exec(buf->bo, bytes_used, NULL, 0, 0);
+ if (ret)
+ return PIPE_ERROR;
+ }
+
+ return PIPE_OK;
+}
+
+static enum pipe_error
+i965_libdrm_bo_subdata(struct brw_winsys_buffer *buffer,
+ enum brw_buffer_data_type data_type,
+ size_t offset,
+ size_t size,
+ const void *data,
+ const struct brw_winsys_reloc *reloc,
+ unsigned nr_reloc)
+{
+ struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+ struct i965_libdrm_winsys *idws = i965_libdrm_winsys(buffer->sws);
+ int ret, i;
+
+ (void)data_type;
+
+ if (BRW_DUMP)
+ debug_printf("%s buf %p off %d sz %d %s relocs: %d\n",
+ __FUNCTION__,
+ (void *)buffer, offset, size,
+ data_types[data_type],
+ nr_reloc);
+
+ if (BRW_DUMP)
+ brw_dump_data( idws->id,
+ data_type,
+ buf->bo->offset + offset,
+ data, size );
+
+ /* XXX: use bo_map_gtt/memcpy/unmap_gtt under some circumstances???
+ */
+ ret = drm_intel_bo_subdata(buf->bo, offset, size, (void*)data);
+ if (ret)
+ return PIPE_ERROR;
+
+ for (i = 0; i < nr_reloc; i++) {
+ i965_libdrm_bo_emit_reloc(buffer, reloc[i].usage, reloc[i].delta,
+ reloc[i].offset, reloc[i].bo);
+ }
+
+ return PIPE_OK;
+}
+
+static boolean
+i965_libdrm_bo_is_busy(struct brw_winsys_buffer *buffer)
+{
+ struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+ boolean ret;
+
+ if (BRW_DUMP)
+ debug_printf("%s %p\n", __FUNCTION__, (void *)buffer);
+
+ ret = drm_intel_bo_busy(buf->bo);
+
+ if (BRW_DUMP)
+ debug_printf(" --> %d\n", ret);
+
+ return ret;
+}
+
+static boolean
+i965_libdrm_bo_references(struct brw_winsys_buffer *a,
+ struct brw_winsys_buffer *b)
+{
+ struct i965_libdrm_buffer *bufa = i965_libdrm_buffer(a);
+ struct i965_libdrm_buffer *bufb = i965_libdrm_buffer(b);
+ boolean ret;
+
+ if (BRW_DUMP)
+ debug_printf("%s %p %p\n", __FUNCTION__, (void *)a, (void *)b);
+
+ ret = drm_intel_bo_references(bufa->bo, bufb->bo);
+
+ if (BRW_DUMP)
+ debug_printf(" --> %d\n", ret);
+
+ return ret;
+}
+
+/* XXX: couldn't this be handled by returning true/false on
+ * bo_emit_reloc?
+ */
+static enum pipe_error
+i965_libdrm_check_aperture_space(struct brw_winsys_screen *iws,
+ struct brw_winsys_buffer **buffers,
+ unsigned count)
+{
+ static drm_intel_bo *bos[128];
+ int i;
+ int ret;
+
+ if (BRW_DUMP)
+ debug_printf("%s\n", __FUNCTION__);
+
+ if (count > Elements(bos)) {
+ assert(0);
+ return FALSE;
+ }
+
+ for (i = 0; i < count; i++)
+ bos[i] = i965_libdrm_buffer(buffers[i])->bo;
+
+ /* XXX: converting from ??? to pipe_error:
+ */
+ ret = dri_bufmgr_check_aperture_space(bos, count);
+
+ if (BRW_DUMP)
+ debug_printf(" --> %d (ok == %d)\n", ret, PIPE_OK);
+
+ return ret;
+}
+
+static void *
+i965_libdrm_bo_map(struct brw_winsys_buffer *buffer,
+ enum brw_buffer_data_type data_type,
+ unsigned offset,
+ unsigned length,
+ boolean write,
+ boolean discard,
+ boolean flush_explicit)
+{
+ struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+ int ret;
+
+
+ if (BRW_DUMP)
+ debug_printf("%s %p %s %s\n", __FUNCTION__, (void *)buffer,
+ write ? "read/write" : "read",
+ write ? data_types[data_type] : "");
+
+ if (!buf->map_count) {
+ if (buf->map_gtt) {
+ ret = drm_intel_gem_bo_map_gtt(buf->bo);
+ if (ret)
+ return NULL;
+ }
+ else {
+ ret = drm_intel_bo_map(buf->bo, write);
+ if (ret)
+ return NULL;
+ }
+ }
+
+ buf->data_type = data_type;
+ buf->map_count++;
+ return buf->bo->virtual;
+}
+
+static void
+i965_libdrm_bo_flush_range(struct brw_winsys_buffer *buffer,
+ unsigned offset,
+ unsigned length)
+{
+ struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+ struct i965_libdrm_winsys *idws = i965_libdrm_winsys(buffer->sws);
+
+ if (BRW_DUMP)
+ debug_printf("%s %s offset %d len %d\n", __FUNCTION__,
+ data_types[buf->data_type],
+ offset, length);
+
+ if (BRW_DUMP)
+ brw_dump_data( idws->id,
+ buf->data_type,
+ buf->bo->offset + offset,
+ buf->bo->virtual + offset,
+ length );
+}
+
+static void
+i965_libdrm_bo_unmap(struct brw_winsys_buffer *buffer)
+{
+ struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+
+ if (BRW_DUMP)
+ debug_printf("%s\n", __FUNCTION__);
+
+ if (--buf->map_count > 0)
+ return;
+
+ if (buf->map_gtt)
+ drm_intel_gem_bo_unmap_gtt(buf->bo);
+ else
+ drm_intel_bo_unmap(buf->bo);
+}
+
+void
+i965_libdrm_winsys_init_buffer_functions(struct i965_libdrm_winsys *idws)
+{
+ idws->base.bo_alloc = i965_libdrm_bo_alloc;
+ idws->base.bo_destroy = i965_libdrm_bo_destroy;
+ idws->base.bo_emit_reloc = i965_libdrm_bo_emit_reloc;
+ idws->base.bo_exec = i965_libdrm_bo_exec;
+ idws->base.bo_subdata = i965_libdrm_bo_subdata;
+ idws->base.bo_is_busy = i965_libdrm_bo_is_busy;
+ idws->base.bo_references = i965_libdrm_bo_references;
+ idws->base.check_aperture_space = i965_libdrm_check_aperture_space;
+ idws->base.bo_map = i965_libdrm_bo_map;
+ idws->base.bo_flush_range = i965_libdrm_bo_flush_range;
+ idws->base.bo_unmap = i965_libdrm_bo_unmap;
+}
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h
new file mode 100644
index 0000000000..c6a7d4a8c5
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h
@@ -0,0 +1,64 @@
+
+#ifndef INTEL_DRM_WINSYS_H
+#define INTEL_DRM_WINSYS_H
+
+#include "i965/brw_winsys.h"
+
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+
+
+/*
+ * Winsys
+ */
+
+
+struct i965_libdrm_winsys
+{
+ struct brw_winsys_screen base;
+ drm_intel_bufmgr *gem;
+
+ boolean send_cmd;
+
+ int fd; /**< Drm file discriptor */
+
+ unsigned id;
+};
+
+static INLINE struct i965_libdrm_winsys *
+i965_libdrm_winsys(struct brw_winsys_screen *iws)
+{
+ return (struct i965_libdrm_winsys *)iws;
+}
+
+struct i965_libdrm_winsys *i965_libdrm_winsys_create(int fd, unsigned pci_id);
+
+void i965_libdrm_winsys_init_buffer_functions(struct i965_libdrm_winsys *idws);
+
+
+/* Buffer.
+ */
+struct i965_libdrm_buffer {
+ struct brw_winsys_buffer base;
+
+ drm_intel_bo *bo;
+
+ void *ptr;
+ unsigned map_count;
+ unsigned data_type; /* valid while mapped */
+ unsigned tiling;
+
+ boolean map_gtt;
+ boolean flinked;
+ unsigned flink;
+};
+
+static INLINE struct i965_libdrm_buffer *
+i965_libdrm_buffer(struct brw_winsys_buffer *buffer)
+{
+ return (struct i965_libdrm_buffer *)buffer;
+}
+
+
+#endif
diff --git a/src/gallium/winsys/drm/i965/xlib/Makefile b/src/gallium/winsys/drm/i965/xlib/Makefile
new file mode 100644
index 0000000000..0efa0ca6f9
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/xlib/Makefile
@@ -0,0 +1,97 @@
+# src/gallium/winsys/xlib/Makefile
+
+# This makefile produces a "stand-alone" libGL.so which is based on
+# Xlib (no DRI HW acceleration)
+
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+
+GL_MAJOR = 1
+GL_MINOR = 5
+GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY)
+
+
+INCLUDE_DIRS = \
+ -I$(TOP)/include \
+ -I$(TOP)/src/mesa \
+ -I$(TOP)/src/mesa/main \
+ -I$(TOP)/src/gallium/include \
+ -I$(TOP)/src/gallium/drivers \
+ -I$(TOP)/src/gallium/drivers/i965 \
+ -I$(TOP)/src/gallium/drivers/i965/include \
+ -I$(TOP)/src/gallium/state_trackers/glx/xlib \
+ -I$(TOP)/src/gallium/auxiliary \
+ -I/usr/include/drm
+
+XLIB_WINSYS_SOURCES = \
+ xlib_i965.c \
+
+
+
+XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o)
+
+
+
+LIBS = \
+ $(TOP)/src/gallium/drivers/i965/libi965.a \
+ $(TOP)/src/gallium/drivers/trace/libtrace.a \
+ $(TOP)/src/gallium/state_trackers/glx/xlib/libxlib.a \
+ $(TOP)/src/mesa/libglapi.a \
+ $(TOP)/src/mesa/libmesagallium.a \
+ $(GALLIUM_AUXILIARIES)
+
+# $(TOP)/src/gallium/drivers/i965/lib/libi9xx.a \
+
+.SUFFIXES : .cpp
+
+.c.o:
+ $(CC) -c $(INCLUDE_DIRS) $(DEFINES) $(CFLAGS) $< -o $@
+
+.cpp.o:
+ $(CXX) -c $(INCLUDE_DIRS) $(DEFINES) $(CXXFLAGS) $< -o $@
+
+
+
+default: $(TOP)/$(LIB_DIR)/gallium $(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME)
+
+$(TOP)/$(LIB_DIR)/gallium:
+ @ mkdir -p $(TOP)/$(LIB_DIR)/gallium
+
+# Make the libGL.so library
+$(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME): $(XLIB_WINSYS_OBJECTS) $(LIBS) Makefile
+ $(TOP)/bin/mklib -o $(GL_LIB) \
+ -linker "$(CC)" \
+ -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \
+ -install $(TOP)/$(LIB_DIR)/gallium \
+ $(MKLIB_OPTIONS) $(XLIB_WINSYS_OBJECTS) \
+ -Wl,--start-group $(LIBS) -Wl,--end-group $(GL_LIB_DEPS)
+
+
+depend: $(XLIB_WINSYS_SOURCES)
+ @ echo "running $(MKDEP)"
+ @ rm -f depend # workaround oops on gutsy?!?
+ @ touch depend
+ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(XLIB_WINSYS_SOURCES) \
+ > /dev/null 2>/dev/null
+
+
+install: default
+ $(INSTALL) -d $(INSTALL_DIR)/include/GL
+ $(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR)
+ $(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL
+ @if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \
+ $(MINSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \
+ fi
+
+
+# Emacs tags
+tags:
+ etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h
+
+clean:
+ -rm -f *.o
+
+
+include depend
diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
new file mode 100644
index 0000000000..d2b9a1ab31
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
@@ -0,0 +1,522 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell
+ * Brian Paul
+ */
+
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "pipe/p_error.h"
+#include "pipe/p_context.h"
+
+#include "xm_winsys.h"
+
+#include "i965/brw_winsys.h"
+#include "i965/brw_screen.h"
+#include "i965/brw_reg.h"
+#include "i965/brw_structs_dump.h"
+
+#define MAX_VRAM (128*1024*1024)
+
+
+
+extern int brw_disasm (FILE *file,
+ const struct brw_instruction *inst,
+ unsigned count );
+
+extern int intel_decode(const uint32_t *data,
+ int count,
+ uint32_t hw_offset,
+ uint32_t devid);
+
+struct xlib_brw_buffer
+{
+ struct brw_winsys_buffer base;
+ char *virtual;
+ unsigned offset;
+ unsigned type;
+ int map_count;
+ boolean modified;
+};
+
+
+/**
+ * Subclass of brw_winsys_screen for Xlib winsys
+ */
+struct xlib_brw_winsys
+{
+ struct brw_winsys_screen base;
+ struct brw_chipset chipset;
+
+ unsigned size;
+ unsigned used;
+};
+
+static struct xlib_brw_winsys *
+xlib_brw_winsys( struct brw_winsys_screen *screen )
+{
+ return (struct xlib_brw_winsys *)screen;
+}
+
+
+static struct xlib_brw_buffer *
+xlib_brw_buffer( struct brw_winsys_buffer *buffer )
+{
+ return (struct xlib_brw_buffer *)buffer;
+}
+
+
+
+const char *names[BRW_BUFFER_TYPE_MAX] = {
+ "TEXTURE",
+ "SCANOUT",
+ "VERTEX",
+ "CURBE",
+ "QUERY",
+ "SHADER_CONSTANTS",
+ "WM_SCRATCH",
+ "BATCH",
+ "GENERAL_STATE",
+ "SURFACE_STATE",
+ "PIXEL",
+ "GENERIC",
+};
+
+const char *usages[BRW_USAGE_MAX] = {
+ "STATE",
+ "QUERY_RESULT",
+ "RENDER_TARGET",
+ "DEPTH_BUFFER",
+ "BLIT_SOURCE",
+ "BLIT_DEST",
+ "SAMPLER",
+ "VERTEX",
+ "SCRATCH"
+};
+
+
+const char *data_types[BRW_DATA_MAX] =
+{
+ "GS: CC_VP",
+ "GS: CC_UNIT",
+ "GS: WM_PROG",
+ "GS: SAMPLER_DEFAULT_COLOR",
+ "GS: SAMPLER",
+ "GS: WM_UNIT",
+ "GS: SF_PROG",
+ "GS: SF_VP",
+ "GS: SF_UNIT",
+ "GS: VS_UNIT",
+ "GS: VS_PROG",
+ "GS: GS_UNIT",
+ "GS: GS_PROG",
+ "GS: CLIP_VP",
+ "GS: CLIP_UNIT",
+ "GS: CLIP_PROG",
+ "SS: SURFACE",
+ "SS: SURF_BIND",
+ "CONSTANT DATA",
+ "BATCH DATA",
+ "(untyped)"
+};
+
+
+static enum pipe_error
+xlib_brw_bo_alloc( struct brw_winsys_screen *sws,
+ enum brw_buffer_type type,
+ unsigned size,
+ unsigned alignment,
+ struct brw_winsys_buffer **bo_out )
+{
+ struct xlib_brw_winsys *xbw = xlib_brw_winsys(sws);
+ struct xlib_brw_buffer *buf;
+
+ if (BRW_DEBUG & DEBUG_WINSYS)
+ debug_printf("%s type %s sz %d align %d\n",
+ __FUNCTION__, names[type], size, alignment );
+
+ buf = CALLOC_STRUCT(xlib_brw_buffer);
+ if (!buf)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ pipe_reference_init(&buf->base.reference, 1);
+
+ buf->offset = align(xbw->used, alignment);
+ buf->type = type;
+ buf->virtual = MALLOC(size);
+ buf->base.size = size;
+ buf->base.sws = sws;
+
+ xbw->used = align(xbw->used, alignment) + size;
+ if (xbw->used > MAX_VRAM)
+ goto err;
+
+ /* XXX: possibly rentrant call to bo_destroy:
+ */
+ bo_reference(bo_out, &buf->base);
+ return PIPE_OK;
+
+err:
+ assert(0);
+ FREE(buf->virtual);
+ FREE(buf);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+static void
+xlib_brw_bo_destroy( struct brw_winsys_buffer *buffer )
+{
+ struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
+
+ FREE(buf);
+}
+
+static int
+xlib_brw_bo_emit_reloc( struct brw_winsys_buffer *buffer,
+ enum brw_buffer_usage usage,
+ unsigned delta,
+ unsigned offset,
+ struct brw_winsys_buffer *buffer2)
+{
+ struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
+ struct xlib_brw_buffer *buf2 = xlib_brw_buffer(buffer2);
+
+ if (BRW_DEBUG & DEBUG_WINSYS)
+ debug_printf("%s buf %p offset %x val %x + %x buf2 %p/%s/%s\n",
+ __FUNCTION__, (void *)buffer, offset,
+ buf2->offset, delta,
+ (void *)buffer2, names[buf2->type], usages[usage]);
+
+ *(uint32_t *)(buf->virtual + offset) = buf2->offset + delta;
+
+ return 0;
+}
+
+static int
+xlib_brw_bo_exec( struct brw_winsys_buffer *buffer,
+ unsigned bytes_used )
+{
+ if (BRW_DEBUG & DEBUG_WINSYS)
+ debug_printf("execute buffer %p, bytes %d\n", (void *)buffer, bytes_used);
+
+ return 0;
+}
+
+
+
+
+static int
+xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer,
+ enum brw_buffer_data_type data_type,
+ size_t offset,
+ size_t size,
+ const void *data,
+ const struct brw_winsys_reloc *reloc,
+ unsigned nr_relocs)
+{
+ struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
+ struct xlib_brw_winsys *xbw = xlib_brw_winsys(buffer->sws);
+ unsigned i;
+
+ if (BRW_DEBUG & DEBUG_WINSYS)
+ debug_printf("%s buf %p off %d sz %d %s relocs: %d\n",
+ __FUNCTION__,
+ (void *)buffer, offset, size,
+ data_types[data_type],
+ nr_relocs);
+
+ assert(buf->base.size >= offset + size);
+ memcpy(buf->virtual + offset, data, size);
+
+ /* Apply the relocations:
+ */
+ for (i = 0; i < nr_relocs; i++) {
+ if (BRW_DEBUG & DEBUG_WINSYS)
+ debug_printf("\treloc[%d] usage %s off %d value %x+%x\n",
+ i, usages[reloc[i].usage], reloc[i].offset,
+ xlib_brw_buffer(reloc[i].bo)->offset, reloc[i].delta);
+
+ *(unsigned *)(buf->virtual + offset + reloc[i].offset) =
+ xlib_brw_buffer(reloc[i].bo)->offset + reloc[i].delta;
+ }
+
+ if (BRW_DUMP)
+ brw_dump_data( xbw->chipset.pci_id,
+ data_type,
+ buf->offset + offset,
+ buf->virtual + offset, size );
+
+
+ return 0;
+}
+
+
+static boolean
+xlib_brw_bo_is_busy(struct brw_winsys_buffer *buffer)
+{
+ if (BRW_DEBUG & DEBUG_WINSYS)
+ debug_printf("%s %p\n", __FUNCTION__, (void *)buffer);
+ return TRUE;
+}
+
+static boolean
+xlib_brw_bo_references(struct brw_winsys_buffer *a,
+ struct brw_winsys_buffer *b)
+{
+ if (BRW_DEBUG & DEBUG_WINSYS)
+ debug_printf("%s %p %p\n", __FUNCTION__, (void *)a, (void *)b);
+ return TRUE;
+}
+
+static enum pipe_error
+xlib_brw_check_aperture_space( struct brw_winsys_screen *iws,
+ struct brw_winsys_buffer **buffers,
+ unsigned count )
+{
+ unsigned tot_size = 0;
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ tot_size += buffers[i]->size;
+
+ if (BRW_DEBUG & DEBUG_WINSYS)
+ debug_printf("%s %d bufs, tot_size: %d kb\n",
+ __FUNCTION__, count,
+ (tot_size + 1023) / 1024);
+
+ return PIPE_OK;
+}
+
+static void *
+xlib_brw_bo_map(struct brw_winsys_buffer *buffer,
+ enum brw_buffer_data_type data_type,
+ unsigned offset,
+ unsigned length,
+ boolean write,
+ boolean discard,
+ boolean explicit)
+{
+ struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
+
+ if (BRW_DEBUG & DEBUG_WINSYS)
+ debug_printf("%s %p %s %s\n", __FUNCTION__, (void *)buffer,
+ write ? "read/write" : "read",
+ write ? data_types[data_type] : "");
+
+ if (write)
+ buf->modified = 1;
+
+ buf->map_count++;
+ return buf->virtual;
+}
+
+
+static void
+xlib_brw_bo_flush_range( struct brw_winsys_buffer *buffer,
+ unsigned offset,
+ unsigned length )
+{
+}
+
+
+static void
+xlib_brw_bo_unmap(struct brw_winsys_buffer *buffer)
+{
+ struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
+
+ if (BRW_DEBUG & DEBUG_WINSYS)
+ debug_printf("%s %p\n", __FUNCTION__, (void *)buffer);
+
+ --buf->map_count;
+ assert(buf->map_count >= 0);
+
+ if (buf->map_count == 0 &&
+ buf->modified) {
+
+ buf->modified = 0;
+
+ /* Consider dumping new buffer contents here, using the
+ * flush-range info to minimize verbosity.
+ */
+ }
+}
+
+
+static void
+xlib_brw_bo_wait_idle( struct brw_winsys_buffer *buffer )
+{
+}
+
+
+static void
+xlib_brw_winsys_destroy( struct brw_winsys_screen *sws )
+{
+ struct xlib_brw_winsys *xbw = xlib_brw_winsys(sws);
+
+ FREE(xbw);
+}
+
+static struct brw_winsys_screen *
+xlib_create_brw_winsys_screen( void )
+{
+ struct xlib_brw_winsys *ws;
+
+ ws = CALLOC_STRUCT(xlib_brw_winsys);
+ if (!ws)
+ return NULL;
+
+ ws->used = 0;
+
+ ws->base.destroy = xlib_brw_winsys_destroy;
+ ws->base.bo_alloc = xlib_brw_bo_alloc;
+ ws->base.bo_destroy = xlib_brw_bo_destroy;
+ ws->base.bo_emit_reloc = xlib_brw_bo_emit_reloc;
+ ws->base.bo_exec = xlib_brw_bo_exec;
+ ws->base.bo_subdata = xlib_brw_bo_subdata;
+ ws->base.bo_is_busy = xlib_brw_bo_is_busy;
+ ws->base.bo_references = xlib_brw_bo_references;
+ ws->base.check_aperture_space = xlib_brw_check_aperture_space;
+ ws->base.bo_map = xlib_brw_bo_map;
+ ws->base.bo_flush_range = xlib_brw_bo_flush_range;
+ ws->base.bo_unmap = xlib_brw_bo_unmap;
+ ws->base.bo_wait_idle = xlib_brw_bo_wait_idle;
+
+ return &ws->base;
+}
+
+
+/***********************************************************************
+ * Implementation of Xlib co-state-tracker's winsys interface
+ */
+
+static void
+xlib_i965_display_surface(struct xmesa_buffer *xm_buffer,
+ struct pipe_surface *surf)
+{
+ struct brw_surface *surface = brw_surface(surf);
+ struct xlib_brw_buffer *bo = xlib_brw_buffer(surface->bo);
+
+ if (BRW_DEBUG & DEBUG_WINSYS)
+ debug_printf("%s offset %x+%x sz %dx%d\n", __FUNCTION__,
+ bo->offset,
+ surface->draw_offset,
+ surf->width,
+ surf->height);
+}
+
+static void
+xlib_i965_flush_frontbuffer(struct pipe_screen *screen,
+ struct pipe_surface *surf,
+ void *context_private)
+{
+ xlib_i965_display_surface(NULL, surf);
+}
+
+
+static struct pipe_screen *
+xlib_create_i965_screen( void )
+{
+ struct brw_winsys_screen *winsys;
+ struct pipe_screen *screen;
+
+ winsys = xlib_create_brw_winsys_screen();
+ if (winsys == NULL)
+ return NULL;
+
+ screen = brw_create_screen(winsys, PCI_CHIP_GM45_GM);
+ if (screen == NULL)
+ goto fail;
+
+ xlib_brw_winsys(winsys)->chipset = brw_screen(screen)->chipset;
+
+ screen->flush_frontbuffer = xlib_i965_flush_frontbuffer;
+ return screen;
+
+fail:
+ if (winsys)
+ winsys->destroy( winsys );
+
+ return NULL;
+}
+
+
+static struct pipe_context *
+xlib_create_i965_context( struct pipe_screen *screen,
+ void *context_private )
+{
+ struct pipe_context *pipe;
+
+ pipe = brw_create_context(screen);
+ if (pipe == NULL)
+ goto fail;
+
+ pipe->priv = context_private;
+ return pipe;
+
+fail:
+ /* Free stuff here */
+ return NULL;
+}
+
+
+
+
+struct xm_driver xlib_i965_driver =
+{
+ .create_pipe_screen = xlib_create_i965_screen,
+ .create_pipe_context = xlib_create_i965_context,
+ .display_surface = xlib_i965_display_surface
+};
+
+
+/* Register this driver at library load:
+ */
+static void _init( void ) __attribute__((constructor));
+static void _init( void )
+{
+ xmesa_set_driver( &xlib_i965_driver );
+}
+
+
+
+/***********************************************************************
+ *
+ * Butt-ugly hack to convince the linker not to throw away public GL
+ * symbols (they are all referenced from getprocaddress, I guess).
+ */
+extern void (*linker_foo(const unsigned char *procName))();
+extern void (*glXGetProcAddress(const unsigned char *procName))();
+
+extern void (*linker_foo(const unsigned char *procName))()
+{
+ return glXGetProcAddress(procName);
+}
diff --git a/src/gallium/winsys/drm/i965/xorg/Makefile b/src/gallium/winsys/drm/i965/xorg/Makefile
new file mode 100644
index 0000000000..d91d0006ef
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/xorg/Makefile
@@ -0,0 +1,57 @@
+TARGET = modesetting_drv.so
+CFILES = $(wildcard ./*.c)
+OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES))
+TOP = ../../../../../..
+
+include $(TOP)/configs/current
+
+INCLUDES = \
+ $(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto) \
+ -I../gem \
+ -I$(TOP)/src/gallium/include \
+ -I$(TOP)/src/gallium/drivers \
+ -I$(TOP)/src/gallium/auxiliary \
+ -I$(TOP)/src/mesa \
+ -I$(TOP)/include \
+ -I$(TOP)/src/egl/main
+
+LIBS = \
+ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
+ $(TOP)/src/gallium/winsys/drm/i965/gem/libi965drm.a \
+ $(TOP)/src/gallium/drivers/i965/libi965.a \
+ $(TOP)/src/gallium/drivers/trace/libtrace.a \
+ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+ $(GALLIUM_AUXILIARIES)
+
+DRIVER_DEFINES = \
+ -DHAVE_CONFIG_H
+
+
+#############################################
+
+
+
+all default: $(TARGET)
+
+$(TARGET): $(OBJECTS) Makefile $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a $(LIBS)
+ $(TOP)/bin/mklib -noprefix -o $@ \
+ $(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_intel
+
+clean:
+ rm -rf $(OBJECTS) $(TARGET)
+
+install:
+ $(INSTALL) -d $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR)
+ $(MINSTALL) -m 755 $(TARGET) $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR)
+
+
+##############################################
+
+
+.c.o:
+ $(CC) -c $(CFLAGS) $(INCLUDES) $(DRIVER_DEFINES) $< -o $@
+
+
+##############################################
+
+.PHONY = all clean install
diff --git a/src/gallium/winsys/drm/i965/xorg/intel_xorg.c b/src/gallium/winsys/drm/i965/xorg/intel_xorg.c
new file mode 100644
index 0000000000..ac691cb76b
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/xorg/intel_xorg.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ * Author: Alan Hourihane <alanh@tungstengraphics.com>
+ * Author: Jakob Bornecrantz <wallbraker@gmail.com>
+ *
+ */
+
+#include "../../../../state_trackers/xorg/xorg_winsys.h"
+
+static void intel_xorg_identify(int flags);
+static Bool intel_xorg_pci_probe(DriverPtr driver,
+ int entity_num,
+ struct pci_device *device,
+ intptr_t match_data);
+
+static const struct pci_id_match intel_xorg_device_match[] = {
+ {0x8086, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, 0, 0, 0},
+ {0, 0, 0},
+};
+
+static SymTabRec intel_xorg_chipsets[] = {
+ {PCI_MATCH_ANY, "Intel Graphics Device"},
+ {-1, NULL}
+};
+
+static PciChipsets intel_xorg_pci_devices[] = {
+ {PCI_MATCH_ANY, PCI_MATCH_ANY, NULL},
+ {-1, -1, NULL}
+};
+
+static XF86ModuleVersionInfo intel_xorg_version = {
+ "modesetting",
+ MODULEVENDORSTRING,
+ MODINFOSTRING1,
+ MODINFOSTRING2,
+ XORG_VERSION_CURRENT,
+ 0, 1, 0, /* major, minor, patch */
+ ABI_CLASS_VIDEODRV,
+ ABI_VIDEODRV_VERSION,
+ MOD_CLASS_VIDEODRV,
+ {0, 0, 0, 0}
+};
+
+/*
+ * Xorg driver exported structures
+ */
+
+_X_EXPORT DriverRec modesetting = {
+ 1,
+ "modesetting",
+ intel_xorg_identify,
+ NULL,
+ xorg_tracker_available_options,
+ NULL,
+ 0,
+ NULL,
+ intel_xorg_device_match,
+ intel_xorg_pci_probe
+};
+
+static MODULESETUPPROTO(intel_xorg_setup);
+
+_X_EXPORT XF86ModuleData modesettingModuleData = {
+ &intel_xorg_version,
+ intel_xorg_setup,
+ NULL
+};
+
+/*
+ * Xorg driver functions
+ */
+
+static pointer
+intel_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin)
+{
+ static Bool setupDone = 0;
+
+ /* This module should be loaded only once, but check to be sure.
+ */
+ if (!setupDone) {
+ setupDone = 1;
+ xf86AddDriver(&modesetting, module, HaveDriverFuncs);
+
+ /*
+ * The return value must be non-NULL on success even though there
+ * is no TearDownProc.
+ */
+ return (pointer) 1;
+ } else {
+ if (errmaj)
+ *errmaj = LDR_ONCEONLY;
+ return NULL;
+ }
+}
+
+static void
+intel_xorg_identify(int flags)
+{
+ xf86PrintChipsets("modesetting", "Driver for Modesetting Kernel Drivers",
+ intel_xorg_chipsets);
+}
+
+static Bool
+intel_xorg_pci_probe(DriverPtr driver,
+ int entity_num, struct pci_device *device, intptr_t match_data)
+{
+ ScrnInfoPtr scrn = NULL;
+ EntityInfoPtr entity;
+
+ scrn = xf86ConfigPciEntity(scrn, 0, entity_num, intel_xorg_pci_devices,
+ NULL, NULL, NULL, NULL, NULL);
+ if (scrn != NULL) {
+ scrn->driverVersion = 1;
+ scrn->driverName = "i965";
+ scrn->name = "modesetting";
+ scrn->Probe = NULL;
+
+ entity = xf86GetEntityInfo(entity_num);
+
+ /* Use all the functions from the xorg tracker */
+ xorg_tracker_set_functions(scrn);
+ }
+ return scrn != NULL;
+}
diff --git a/src/gallium/winsys/drm/intel/dri/Makefile b/src/gallium/winsys/drm/intel/dri/Makefile
index c0ecd9680e..26aae4122e 100644
--- a/src/gallium/winsys/drm/intel/dri/Makefile
+++ b/src/gallium/winsys/drm/intel/dri/Makefile
@@ -24,4 +24,3 @@ DRI_LIB_DEPS += -ldrm_intel
symlinks: $(TOP)/$(LIB_DIR)/gallium
@rm -f $(TOP)/$(LIB_DIR)/gallium/i965_dri.so
- ln -s i915_dri.so $(TOP)/$(LIB_DIR)/gallium/i965_dri.so
diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript
index b1b654d9f8..104e987083 100644
--- a/src/gallium/winsys/drm/intel/dri/SConscript
+++ b/src/gallium/winsys/drm/intel/dri/SConscript
@@ -15,6 +15,6 @@ drivers = [
env.LoadableModule(
target ='i915_dri.so',
source = COMMON_GALLIUM_SOURCES,
- LIBS = drivers + mesa + auxiliaries + env['LIBS'],
+ LIBS = drivers + mesa + gallium + env['LIBS'],
SHLIBPREFIX = '',
)
diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c
index 9ed570ff6e..5ed2a10af1 100644
--- a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c
+++ b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c
@@ -32,6 +32,7 @@ intel_drm_get_device_id(unsigned int *device_id)
}
shutup_gcc = fgets(path, sizeof(path), file);
+ (void) shutup_gcc;
sscanf(path, "%x", device_id);
fclose(file);
}
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
index dec7c06503..05194fc52a 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
@@ -206,7 +206,7 @@ static boolean radeon_shared_handle_from_texture(struct drm_api *api,
int retval, fd;
struct drm_gem_flink flink;
struct radeon_pipe_buffer* radeon_buffer;
- struct pipe_buffer *buffer;
+ struct pipe_buffer *buffer = NULL;
if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) {
return FALSE;
@@ -239,7 +239,7 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api,
unsigned *stride,
unsigned *handle)
{
- struct pipe_buffer *buffer;
+ struct pipe_buffer *buffer = NULL;
if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) {
return FALSE;
}
diff --git a/src/gallium/winsys/drm/radeon/dri/SConscript b/src/gallium/winsys/drm/radeon/dri/SConscript
index aea987a3ac..c4989d1b59 100644
--- a/src/gallium/winsys/drm/radeon/dri/SConscript
+++ b/src/gallium/winsys/drm/radeon/dri/SConscript
@@ -13,5 +13,5 @@ drivers = [
env.SharedLibrary(
target ='radeon_dri.so',
source = COMMON_GALLIUM_SOURCES,
- LIBS = st_dri + radeonwinsys + mesa + drivers + auxiliaries + env['LIBS'],
+ LIBS = st_dri + radeonwinsys + mesa + drivers + gallium + env['LIBS'],
)
diff --git a/src/gallium/winsys/drm/radeon/python/SConscript b/src/gallium/winsys/drm/radeon/python/SConscript
index 3200fd8d1b..91cae98697 100644
--- a/src/gallium/winsys/drm/radeon/python/SConscript
+++ b/src/gallium/winsys/drm/radeon/python/SConscript
@@ -29,5 +29,5 @@ if env['platform'] == 'linux':
env.SharedLibrary(
target ='_gallium',
source = sources,
- LIBS = [pyst] + drivers + auxiliaries + env['LIBS'],
+ LIBS = [pyst] + drivers + gallium + env['LIBS'],
)
diff --git a/src/gallium/winsys/drm/radeon/xorg/Makefile b/src/gallium/winsys/drm/radeon/xorg/Makefile
index 9fa16dab24..0eb1b3988f 100644
--- a/src/gallium/winsys/drm/radeon/xorg/Makefile
+++ b/src/gallium/winsys/drm/radeon/xorg/Makefile
@@ -1,11 +1,16 @@
-TARGET = modesetting_drv.so
-CFILES = $(wildcard ./*.c)
-OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES))
-GALLIUMDIR = ../../../..
TOP = ../../../../../..
+
+GALLIUMDIR = $(TOP)/src/gallium
+
+TARGET = radeong_drv.so
+
+CFILES = $(wildcard ./*.c)
+
include ${TOP}/configs/current
+OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES))
+
CFLAGS = -DHAVE_CONFIG_H \
-g -Wall -Wimplicit-function-declaration -fPIC \
$(shell pkg-config --cflags pixman-1 xorg-server libdrm xproto) \
@@ -24,16 +29,21 @@ LIBS = \
$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
$(GALLIUM_AUXILIARIES)
+TARGET_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET)
#############################################
+all default: $(TARGET) $(TARGET_STAGING)
-
-all default: $(TARGET)
-
-$(TARGET): $(OBJECTS) Makefile $(GALLIUMDIR)/state_trackers/xorg/libxorgtracker.a
+$(TARGET): $(OBJECTS) Makefile $(GALLIUMDIR)/state_trackers/xorg/libxorgtracker.a $(LIBS)
$(TOP)/bin/mklib -noprefix -o $@ \
$(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_radeon
+$(TOP)/$(LIB_DIR)/gallium:
+ mkdir -p $@
+
+$(TARGET_STAGING): $(TARGET) $(TOP)/$(LIB_DIR)/gallium
+ $(INSTALL) $(TARGET) $(TOP)/$(LIB_DIR)/gallium
+
clean:
rm -rf $(OBJECTS) $(TARGET)
diff --git a/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c b/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c
index 837f2aa8fe..bb76cc0349 100644
--- a/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c
+++ b/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c
@@ -53,7 +53,7 @@ static PciChipsets radeon_xorg_pci_devices[] = {
};
static XF86ModuleVersionInfo radeon_xorg_version = {
- "modesetting",
+ "radeong",
MODULEVENDORSTRING,
MODINFOSTRING1,
MODINFOSTRING2,
@@ -69,9 +69,9 @@ static XF86ModuleVersionInfo radeon_xorg_version = {
* Xorg driver exported structures
*/
-_X_EXPORT DriverRec modesetting = {
+_X_EXPORT DriverRec radeong = {
1,
- "modesetting",
+ "radeong",
radeon_xorg_identify,
NULL,
xorg_tracker_available_options,
@@ -84,7 +84,7 @@ _X_EXPORT DriverRec modesetting = {
static MODULESETUPPROTO(radeon_xorg_setup);
-_X_EXPORT XF86ModuleData modesettingModuleData = {
+_X_EXPORT XF86ModuleData radeongModuleData = {
&radeon_xorg_version,
radeon_xorg_setup,
NULL
@@ -103,7 +103,7 @@ radeon_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin)
*/
if (!setupDone) {
setupDone = 1;
- xf86AddDriver(&modesetting, module, HaveDriverFuncs);
+ xf86AddDriver(&radeong, module, HaveDriverFuncs);
/*
* The return value must be non-NULL on success even though there
@@ -120,7 +120,7 @@ radeon_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin)
static void
radeon_xorg_identify(int flags)
{
- xf86PrintChipsets("modesetting", "Driver for Modesetting Kernel Drivers",
+ xf86PrintChipsets("radeong", "Driver for Radeon Gallium with KMS",
radeon_xorg_chipsets);
}
@@ -135,8 +135,8 @@ radeon_xorg_pci_probe(DriverPtr driver,
NULL, NULL, NULL, NULL, NULL);
if (scrn != NULL) {
scrn->driverVersion = 1;
- scrn->driverName = "radeon";
- scrn->name = "modesetting";
+ scrn->driverName = "radeong";
+ scrn->name = "radeong";
scrn->Probe = NULL;
entity = xf86GetEntityInfo(entity_num);
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c
index 51e455f925..ccd0b418a1 100644
--- a/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c
@@ -468,6 +468,15 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
VMW_FUNC;
memset(&gp_arg, 0, sizeof(gp_arg));
+ gp_arg.param = DRM_VMW_PARAM_3D;
+ ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+ &gp_arg, sizeof(gp_arg));
+ if (ret || gp_arg.value == 0) {
+ debug_printf("No 3D enabled (%i, %s)\n", ret, strerror(-ret));
+ goto out_err1;
+ }
+
+ memset(&gp_arg, 0, sizeof(gp_arg));
gp_arg.param = DRM_VMW_PARAM_FIFO_OFFSET;
ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
&gp_arg, sizeof(gp_arg));
diff --git a/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h b/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h
index 89bbf17ce9..2be7e1249b 100644
--- a/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h
+++ b/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h
@@ -25,28 +25,32 @@
*
**************************************************************************/
-#ifndef _VMWGFX_DRM_H_
-#define _VMWGFX_DRM_H_
+#ifndef __VMWGFX_DRM_H__
+#define __VMWGFX_DRM_H__
#define DRM_VMW_MAX_SURFACE_FACES 6
#define DRM_VMW_MAX_MIP_LEVELS 24
#define DRM_VMW_EXT_NAME_LEN 128
-#define DRM_VMW_GET_PARAM 1
-#define DRM_VMW_EXTENSION 2
-#define DRM_VMW_CREATE_CONTEXT 3
-#define DRM_VMW_UNREF_CONTEXT 4
-#define DRM_VMW_CREATE_SURFACE 5
-#define DRM_VMW_UNREF_SURFACE 6
-#define DRM_VMW_REF_SURFACE 7
-#define DRM_VMW_EXECBUF 8
-#define DRM_VMW_ALLOC_DMABUF 9
-#define DRM_VMW_UNREF_DMABUF 10
-#define DRM_VMW_FIFO_DEBUG 11
-#define DRM_VMW_FENCE_WAIT 12
-#define DRM_VMW_OVERLAY 13
-#define DRM_VMW_CURSOR_BYPASS 14
+#define DRM_VMW_GET_PARAM 0
+#define DRM_VMW_ALLOC_DMABUF 1
+#define DRM_VMW_UNREF_DMABUF 2
+#define DRM_VMW_CURSOR_BYPASS 3
+/* guarded by DRM_VMW_PARAM_NUM_STREAMS != 0*/
+#define DRM_VMW_CONTROL_STREAM 4
+#define DRM_VMW_CLAIM_STREAM 5
+#define DRM_VMW_UNREF_STREAM 6
+/* guarded by DRM_VMW_PARAM_3D == 1 */
+#define DRM_VMW_CREATE_CONTEXT 7
+#define DRM_VMW_UNREF_CONTEXT 8
+#define DRM_VMW_CREATE_SURFACE 9
+#define DRM_VMW_UNREF_SURFACE 10
+#define DRM_VMW_REF_SURFACE 11
+#define DRM_VMW_EXECBUF 12
+#define DRM_VMW_FIFO_DEBUG 13
+#define DRM_VMW_FENCE_WAIT 14
+
/*************************************************************************/
/**
@@ -60,8 +64,11 @@
* Does the driver support the overlay ioctl.
*/
-#define DRM_VMW_PARAM_FIFO_OFFSET 0
-#define DRM_VMW_PARAM_OVERLAY_IOCTL 1
+#define DRM_VMW_PARAM_NUM_STREAMS 0
+#define DRM_VMW_PARAM_NUM_FREE_STREAMS 1
+#define DRM_VMW_PARAM_3D 2
+#define DRM_VMW_PARAM_FIFO_OFFSET 3
+
/**
* struct drm_vmw_getparam_arg
@@ -444,7 +451,7 @@ struct drm_vmw_fence_wait_arg {
/*************************************************************************/
/**
- * DRM_VMW_OVERLAY - Control overlays.
+ * DRM_VMW_CONTROL_STREAM - Control overlays, aka streams.
*
* This IOCTL controls the overlay units of the svga device.
* The SVGA overlay units does not work like regular hardware units in
@@ -469,7 +476,7 @@ struct drm_vmw_rect {
};
/**
- * struct drm_vmw_overlay_arg
+ * struct drm_vmw_control_stream_arg
*
* @stream_id: Stearm to control
* @enabled: If false all following arguments are ignored.
@@ -483,10 +490,10 @@ struct drm_vmw_rect {
* @src: Source rect, must be within the defined area above.
* @dst: Destination rect, x and y may be negative.
*
- * Argument to the DRM_VMW_OVERLAY Ioctl.
+ * Argument to the DRM_VMW_CONTROL_STREAM Ioctl.
*/
-struct drm_vmw_overlay_arg {
+struct drm_vmw_control_stream_arg {
uint32_t stream_id;
uint32_t enabled;
@@ -537,4 +544,31 @@ struct drm_vmw_cursor_bypass_arg {
int32_t yhot;
};
+/*************************************************************************/
+/**
+ * DRM_VMW_CLAIM_STREAM - Claim a single stream.
+ */
+
+/**
+ * struct drm_vmw_context_arg
+ *
+ * @stream_id: Device unique context ID.
+ *
+ * Output argument to the DRM_VMW_CREATE_CONTEXT Ioctl.
+ * Input argument to the DRM_VMW_UNREF_CONTEXT Ioctl.
+ */
+
+struct drm_vmw_stream_arg {
+ uint32_t stream_id;
+ uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_UNREF_STREAM - Unclaim a stream.
+ *
+ * Return a single stream that was claimed by this process. Also makes
+ * sure that the stream has been stopped.
+ */
+
#endif
diff --git a/src/gallium/winsys/drm/vmware/dri/SConscript b/src/gallium/winsys/drm/vmware/dri/SConscript
index 1019f577a5..84319f91ff 100644
--- a/src/gallium/winsys/drm/vmware/dri/SConscript
+++ b/src/gallium/winsys/drm/vmware/dri/SConscript
@@ -48,7 +48,7 @@ if env['platform'] == 'linux':
svgadrm,
svga,
mesa,
- auxiliaries,
+ gallium,
])
# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
diff --git a/src/gallium/winsys/drm/vmware/xorg/SConscript b/src/gallium/winsys/drm/vmware/xorg/SConscript
index b8968e7137..1e5d8ff7fe 100644
--- a/src/gallium/winsys/drm/vmware/xorg/SConscript
+++ b/src/gallium/winsys/drm/vmware/xorg/SConscript
@@ -38,12 +38,13 @@ if env['platform'] == 'linux':
st_xorg,
svgadrm,
svga,
- auxiliaries,
+ gallium,
])
sources = [
'vmw_ioctl.c',
'vmw_screen.c',
+ 'vmw_video.c',
'vmw_xorg.c',
]
diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_driver.h b/src/gallium/winsys/drm/vmware/xorg/vmw_driver.h
index 7265f767a5..3efe851a4b 100644
--- a/src/gallium/winsys/drm/vmware/xorg/vmw_driver.h
+++ b/src/gallium/winsys/drm/vmware/xorg/vmw_driver.h
@@ -73,8 +73,6 @@ void vmw_video_stop_all(ScrnInfoPtr pScrn, struct vmw_driver *vmw);
* vmw_ioctl.c
*/
-int vmw_ioctl_supports_overlay(struct vmw_driver *vmw);
-
int vmw_ioctl_cursor_bypass(struct vmw_driver *vmw, int xhot, int yhot);
struct vmw_dma_buffer * vmw_ioctl_buffer_create(struct vmw_driver *vmw,
@@ -90,5 +88,14 @@ void vmw_ioctl_buffer_unmap(struct vmw_driver *vmw,
void vmw_ioctl_buffer_destroy(struct vmw_driver *vmw,
struct vmw_dma_buffer *buf);
+int vmw_ioctl_supports_streams(struct vmw_driver *vmw);
+
+int vmw_ioctl_num_streams(struct vmw_driver *vmw,
+ uint32_t *ntot, uint32_t *nfree);
+
+int vmw_ioctl_unref_stream(struct vmw_driver *vmw, uint32_t stream_id);
+
+int vmw_ioctl_claim_stream(struct vmw_driver *vmw, uint32_t *out);
+
#endif
diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_ioctl.c b/src/gallium/winsys/drm/vmware/xorg/vmw_ioctl.c
index 0d1a0fcee6..ab2b5fadc4 100644
--- a/src/gallium/winsys/drm/vmware/xorg/vmw_ioctl.c
+++ b/src/gallium/winsys/drm/vmware/xorg/vmw_ioctl.c
@@ -75,12 +75,12 @@ vmw_ioctl_get_param(struct vmw_driver *vmw, uint32_t param, uint64_t *out)
}
int
-vmw_ioctl_supports_overlay(struct vmw_driver *vmw)
+vmw_ioctl_supports_streams(struct vmw_driver *vmw)
{
uint64_t value;
int ret;
- ret = vmw_ioctl_get_param(vmw, DRM_VMW_PARAM_OVERLAY_IOCTL, &value);
+ ret = vmw_ioctl_get_param(vmw, DRM_VMW_PARAM_NUM_STREAMS, &value);
if (ret)
return ret;
@@ -88,6 +88,58 @@ vmw_ioctl_supports_overlay(struct vmw_driver *vmw)
}
int
+vmw_ioctl_num_streams(struct vmw_driver *vmw,
+ uint32_t *ntot, uint32_t *nfree)
+{
+ uint64_t v1, v2;
+ int ret;
+
+ ret = vmw_ioctl_get_param(vmw, DRM_VMW_PARAM_NUM_STREAMS, &v1);
+ if (ret)
+ return ret;
+
+ ret = vmw_ioctl_get_param(vmw, DRM_VMW_PARAM_NUM_FREE_STREAMS, &v2);
+ if (ret)
+ return ret;
+
+ *ntot = (uint32_t)v1;
+ *nfree = (uint32_t)v2;
+
+ return 0;
+}
+
+int
+vmw_ioctl_claim_stream(struct vmw_driver *vmw, uint32_t *out)
+{
+ struct drm_vmw_stream_arg s_arg;
+ int ret;
+
+ ret = drmCommandRead(vmw->fd, DRM_VMW_CLAIM_STREAM,
+ &s_arg, sizeof(s_arg));
+
+ if (ret)
+ return -1;
+
+ *out = s_arg.stream_id;
+ return 0;
+}
+
+int
+vmw_ioctl_unref_stream(struct vmw_driver *vmw, uint32_t stream_id)
+{
+ struct drm_vmw_stream_arg s_arg;
+ int ret;
+
+ memset(&s_arg, 0, sizeof(s_arg));
+ s_arg.stream_id = stream_id;
+
+ ret = drmCommandRead(vmw->fd, DRM_VMW_CLAIM_STREAM,
+ &s_arg, sizeof(s_arg));
+
+ return 0;
+}
+
+int
vmw_ioctl_cursor_bypass(struct vmw_driver *vmw, int xhot, int yhot)
{
struct drm_vmw_cursor_bypass_arg arg;
diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_video.c b/src/gallium/winsys/drm/vmware/xorg/vmw_video.c
index 5674e4f352..b065b96346 100644
--- a/src/gallium/winsys/drm/vmware/xorg/vmw_video.c
+++ b/src/gallium/winsys/drm/vmware/xorg/vmw_video.c
@@ -273,11 +273,17 @@ vmw_video_init(ScrnInfoPtr pScrn, struct vmw_driver *vmw)
XF86VideoAdaptorPtr *overlayAdaptors, *newAdaptors = NULL;
XF86VideoAdaptorPtr newAdaptor = NULL;
int numAdaptors;
+ unsigned int ntot, nfree;
debug_printf("%s: enter\n", __func__);
- if (vmw_ioctl_supports_overlay(vmw) != 0) {
- debug_printf("No overlay ioctl support\n");
+ if (vmw_ioctl_num_streams(vmw, &ntot, &nfree) != 0) {
+ debug_printf("No stream ioctl support\n");
+ return FALSE;
+ }
+
+ if (nfree == 0) {
+ debug_printf("No free streams\n");
return FALSE;
}
@@ -353,6 +359,7 @@ vmw_video_close(ScrnInfoPtr pScrn, struct vmw_driver *vmw)
for (i = 0; i < VMWARE_VID_NUM_PORTS; ++i) {
/* make sure the port is stoped as well */
vmw_xv_stop_video(pScrn, &video->port[i], TRUE);
+ vmw_ioctl_unref_stream(vmw, video->port[i].streamId);
}
/* XXX: I'm sure this function is missing code for turning off Xv */
@@ -448,7 +455,7 @@ vmw_video_init_adaptor(ScrnInfoPtr pScrn, struct vmw_driver *vmw)
adaptor->pPortPrivates = video->port_ptr;
for (i = 0; i < VMWARE_VID_NUM_PORTS; ++i) {
- video->port[i].streamId = i;
+ vmw_ioctl_claim_stream(vmw, &video->port[i].streamId);
video->port[i].play = vmw_video_port_init;
video->port[i].flags = SVGA_VIDEO_FLAG_COLORKEY;
video->port[i].colorKey = VMWARE_VIDEO_COLORKEY;
@@ -577,7 +584,7 @@ vmw_video_port_play(ScrnInfoPtr pScrn, struct vmw_video_port *port,
short height, RegionPtr clipBoxes)
{
struct vmw_driver *vmw = vmw_driver(pScrn);
- struct drm_vmw_overlay_arg arg;
+ struct drm_vmw_control_stream_arg arg;
unsigned short w, h;
int size;
int ret;
@@ -636,7 +643,7 @@ vmw_video_port_play(ScrnInfoPtr pScrn, struct vmw_video_port *port,
}
}
- ret = drmCommandWrite(vmw->fd, DRM_VMW_OVERLAY, &arg, sizeof(arg));
+ ret = drmCommandWrite(vmw->fd, DRM_VMW_CONTROL_STREAM, &arg, sizeof(arg));
if (ret) {
vmw_video_port_cleanup(pScrn, port);
return XvBadAlloc;
@@ -846,7 +853,7 @@ vmw_xv_stop_video(ScrnInfoPtr pScrn, pointer data, Bool cleanup)
{
struct vmw_driver *vmw = vmw_driver(pScrn);
struct vmw_video_port *port = data;
- struct drm_vmw_overlay_arg arg;
+ struct drm_vmw_control_stream_arg arg;
int ret;
debug_printf("%s: cleanup is %s\n", __func__, cleanup ? "TRUE" : "FALSE");
@@ -862,7 +869,7 @@ vmw_xv_stop_video(ScrnInfoPtr pScrn, pointer data, Bool cleanup)
arg.stream_id = port->streamId;
arg.enabled = FALSE;
- ret = drmCommandWrite(vmw->fd, DRM_VMW_OVERLAY, &arg, sizeof(arg));
+ ret = drmCommandWrite(vmw->fd, DRM_VMW_CONTROL_STREAM, &arg, sizeof(arg));
assert(ret == 0);
vmw_video_port_cleanup(pScrn, port);
diff --git a/src/gallium/winsys/g3dvl/nouveau/Makefile b/src/gallium/winsys/g3dvl/nouveau/Makefile
index 2997f6b79c..3965bd949f 100644
--- a/src/gallium/winsys/g3dvl/nouveau/Makefile
+++ b/src/gallium/winsys/g3dvl/nouveau/Makefile
@@ -19,11 +19,7 @@ CFLAGS += -g -Wall -Werror=implicit-function-declaration -fPIC \
LDFLAGS += -L${DRMDIR}/lib \
-L${DRIDIR}/lib \
-L${GALLIUMDIR}/winsys/drm/nouveau/common \
- -L${GALLIUMDIR}/auxiliary/draw \
- -L${GALLIUMDIR}/auxiliary/tgsi \
- -L${GALLIUMDIR}/auxiliary/translate \
- -L${GALLIUMDIR}/auxiliary/rtasm \
- -L${GALLIUMDIR}/auxiliary/cso_cache \
+ -L${GALLIUMDIR}/auxiliary \
-L${GALLIUMDIR}/drivers/nv04 \
-L${GALLIUMDIR}/drivers/nv10 \
-L${GALLIUMDIR}/drivers/nv20 \
@@ -31,7 +27,7 @@ LDFLAGS += -L${DRMDIR}/lib \
-L${GALLIUMDIR}/drivers/nv40 \
-L${GALLIUMDIR}/drivers/nv50
-LIBS += -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv04 -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lm
+LIBS += -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv04 -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -lgallium -lm
#############################################
diff --git a/src/gallium/winsys/g3dvl/xlib/Makefile b/src/gallium/winsys/g3dvl/xlib/Makefile
index cf765ef51a..9877660a27 100644
--- a/src/gallium/winsys/g3dvl/xlib/Makefile
+++ b/src/gallium/winsys/g3dvl/xlib/Makefile
@@ -25,13 +25,7 @@ SOURCES = xsp_winsys.c
OBJECTS = $(SOURCES:.c=.o) $(TOP)/src/gallium/state_trackers/xorg/xvmc/*.o
LIBS = $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
- $(TOP)/src/gallium/auxiliary/vl/libvl.a \
- $(TOP)/src/gallium/auxiliary/tgsi/libtgsi.a \
- $(TOP)/src/gallium/auxiliary/draw/libdraw.a \
- $(TOP)/src/gallium/auxiliary/translate/libtranslate.a \
- $(TOP)/src/gallium/auxiliary/cso_cache/libcso_cache.a \
- $(TOP)/src/gallium/auxiliary/rtasm/librtasm.a \
- $(TOP)/src/gallium/auxiliary/util/libutil.a
+ $(TOP)/src/gallium/auxiliary/libgallium.a
.c.o:
$(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@
diff --git a/src/gallium/winsys/gdi/SConscript b/src/gallium/winsys/gdi/SConscript
index 74f6b2fd47..4cbc86f331 100644
--- a/src/gallium/winsys/gdi/SConscript
+++ b/src/gallium/winsys/gdi/SConscript
@@ -42,8 +42,10 @@ if env['platform'] == 'windows':
drivers += [trace]
+ env['no_import_lib'] = 1
+
env.SharedLibrary(
target ='opengl32',
source = sources,
- LIBS = wgl + glapi + mesa + drivers + auxiliaries + glsl + env['LIBS'],
+ LIBS = wgl + glapi + mesa + drivers + gallium + glsl + env['LIBS'],
)
diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile
index 3dc38a78e4..9482e8f9b1 100644
--- a/src/gallium/winsys/xlib/Makefile
+++ b/src/gallium/winsys/xlib/Makefile
@@ -23,17 +23,14 @@ INCLUDE_DIRS = \
-I$(TOP)/src/gallium/auxiliary
DEFINES += \
- -DGALLIUM_SOFTPIPE \
- -DGALLIUM_TRACE \
- -DGALLIUM_BRW
+ -DGALLIUM_SOFTPIPE
#-DGALLIUM_CELL will be defined by the config */
XLIB_WINSYS_SOURCES = \
xlib.c \
xlib_cell.c \
xlib_llvmpipe.c \
- xlib_softpipe.c \
- xlib_trace.c
+ xlib_softpipe.c
XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o)
@@ -84,11 +81,11 @@ depend: $(XLIB_WINSYS_SOURCES)
install: default
- $(INSTALL) -d $(INSTALL_DIR)/include/GL
- $(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR)
- $(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL
+ $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/include/GL
+ $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR)
+ $(INSTALL) -m 644 $(TOP)/include/GL/*.h $(DESTDIR)$(INSTALL_DIR)/include/GL
@if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \
- $(MINSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \
+ $(MINSTALL) $(TOP)/$(LIB_DIR)/libGL* $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR); \
fi
diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript
index dfe550f733..713841aeb1 100644
--- a/src/gallium/winsys/xlib/SConscript
+++ b/src/gallium/winsys/xlib/SConscript
@@ -22,7 +22,7 @@ if env['platform'] == 'linux' \
'xlib.c',
]
- drivers = []
+ drivers = [trace]
if 'softpipe' in env['drivers']:
env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE')
@@ -42,16 +42,11 @@ if env['platform'] == 'linux' \
sources += ['xlib_cell.c']
drivers += [cell]
- if 'trace' in env['drivers']:
- env.Append(CPPDEFINES = 'GALLIUM_TRACE')
- sources += ['xlib_trace.c']
- drivers += [trace]
-
# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
libgl = env.SharedLibrary(
target ='GL',
source = sources,
- LIBS = st_xlib + glapi + mesa + drivers + auxiliaries + env['LIBS'],
+ LIBS = st_xlib + glapi + mesa + glsl + drivers + gallium + env['LIBS'],
)
env.InstallSharedLibrary(libgl, version=(1, 5))
diff --git a/src/gallium/winsys/xlib/xlib.c b/src/gallium/winsys/xlib/xlib.c
index 163cc8863c..6dbe05f193 100644
--- a/src/gallium/winsys/xlib/xlib.c
+++ b/src/gallium/winsys/xlib/xlib.c
@@ -42,7 +42,6 @@
*/
enum mode {
- MODE_TRACE,
MODE_CELL,
MODE_LLVMPIPE,
MODE_SOFTPIPE
@@ -51,9 +50,6 @@ enum mode {
static enum mode get_mode()
{
- if (getenv("XMESA_TRACE"))
- return MODE_TRACE;
-
#ifdef GALLIUM_CELL
if (!getenv("GALLIUM_NOCELL"))
return MODE_CELL;
@@ -73,11 +69,6 @@ static void _init( void )
enum mode xlib_mode = get_mode();
switch (xlib_mode) {
- case MODE_TRACE:
-#if defined(GALLIUM_TRACE) && defined(GALLIUM_SOFTPIPE)
- xmesa_set_driver( &xlib_trace_driver );
-#endif
- break;
case MODE_CELL:
#if defined(GALLIUM_CELL)
xmesa_set_driver( &xlib_cell_driver );
diff --git a/src/gallium/winsys/xlib/xlib.h b/src/gallium/winsys/xlib/xlib.h
index f0855035f7..8e091d0c08 100644
--- a/src/gallium/winsys/xlib/xlib.h
+++ b/src/gallium/winsys/xlib/xlib.h
@@ -5,7 +5,6 @@
#include "pipe/p_compiler.h"
#include "xm_winsys.h"
-extern struct xm_driver xlib_trace_driver;
extern struct xm_driver xlib_softpipe_driver;
extern struct xm_driver xlib_llvmpipe_driver;
extern struct xm_driver xlib_cell_driver;
diff --git a/src/gallium/winsys/xlib/xlib_brw_context.c b/src/gallium/winsys/xlib/xlib_brw_context.c
new file mode 100644
index 0000000000..fc9addd09e
--- /dev/null
+++ b/src/gallium/winsys/xlib/xlib_brw_context.c
@@ -0,0 +1,209 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell
+ * Brian Paul
+ */
+
+
+/* #include "glxheader.h" */
+/* #include "xmesaP.h" */
+
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "i965simple/brw_winsys.h"
+#include "xlib_brw_aub.h"
+#include "xlib_brw.h"
+
+
+
+
+#define XBCWS_BATCHBUFFER_SIZE 1024
+
+
+/* The backend to the brw driver (ie struct brw_winsys) is actually a
+ * per-context entity.
+ */
+struct xlib_brw_context_winsys {
+ struct brw_winsys brw_context_winsys; /**< batch buffer funcs */
+ struct aub_context *aub;
+
+ struct pipe_winsys *pipe_winsys;
+
+ unsigned batch_data[XBCWS_BATCHBUFFER_SIZE];
+ unsigned batch_nr;
+ unsigned batch_size;
+ unsigned batch_alloc;
+};
+
+
+/* Turn a brw_winsys into an xlib_brw_context_winsys:
+ */
+static inline struct xlib_brw_context_winsys *
+xlib_brw_context_winsys( struct brw_winsys *sws )
+{
+ return (struct xlib_brw_context_winsys *)sws;
+}
+
+
+/* Simple batchbuffer interface:
+ */
+
+static unsigned *xbcws_batch_start( struct brw_winsys *sws,
+ unsigned dwords,
+ unsigned relocs )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+
+ if (xbcws->batch_size < xbcws->batch_nr + dwords)
+ return NULL;
+
+ xbcws->batch_alloc = xbcws->batch_nr + dwords;
+ return (void *)1; /* not a valid pointer! */
+}
+
+static void xbcws_batch_dword( struct brw_winsys *sws,
+ unsigned dword )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+
+ assert(xbcws->batch_nr < xbcws->batch_alloc);
+ xbcws->batch_data[xbcws->batch_nr++] = dword;
+}
+
+static void xbcws_batch_reloc( struct brw_winsys *sws,
+ struct pipe_buffer *buf,
+ unsigned access_flags,
+ unsigned delta )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+
+ assert(xbcws->batch_nr < xbcws->batch_alloc);
+ xbcws->batch_data[xbcws->batch_nr++] =
+ ( xlib_brw_get_buffer_offset( NULL, buf, access_flags ) +
+ delta );
+}
+
+static void xbcws_batch_end( struct brw_winsys *sws )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+
+ assert(xbcws->batch_nr <= xbcws->batch_alloc);
+ xbcws->batch_alloc = 0;
+}
+
+static void xbcws_batch_flush( struct brw_winsys *sws,
+ struct pipe_fence_handle **fence )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+ assert(xbcws->batch_nr <= xbcws->batch_size);
+
+ if (xbcws->batch_nr) {
+ xlib_brw_commands_aub( xbcws->pipe_winsys,
+ xbcws->batch_data,
+ xbcws->batch_nr );
+ }
+
+ xbcws->batch_nr = 0;
+}
+
+
+
+/* Really a per-device function, just pass through:
+ */
+static unsigned xbcws_get_buffer_offset( struct brw_winsys *sws,
+ struct pipe_buffer *buf,
+ unsigned access_flags )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+
+ return xlib_brw_get_buffer_offset( xbcws->pipe_winsys,
+ buf,
+ access_flags );
+}
+
+
+/* Really a per-device function, just pass through:
+ */
+static void xbcws_buffer_subdata_typed( struct brw_winsys *sws,
+ struct pipe_buffer *buf,
+ unsigned long offset,
+ unsigned long size,
+ const void *data,
+ unsigned data_type )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+
+ xlib_brw_buffer_subdata_typed( xbcws->pipe_winsys,
+ buf,
+ offset,
+ size,
+ data,
+ data_type );
+}
+
+
+/**
+ * Create i965 hardware rendering context, but plugged into a
+ * dump-to-aubfile backend.
+ */
+struct pipe_context *
+xlib_create_brw_context( struct pipe_screen *screen,
+ void *unused )
+{
+ struct xlib_brw_context_winsys *xbcws = CALLOC_STRUCT( xlib_brw_context_winsys );
+
+ /* Fill in this struct with callbacks that i965simple will need to
+ * communicate with the window system, buffer manager, etc.
+ */
+ xbcws->brw_context_winsys.batch_start = xbcws_batch_start;
+ xbcws->brw_context_winsys.batch_dword = xbcws_batch_dword;
+ xbcws->brw_context_winsys.batch_reloc = xbcws_batch_reloc;
+ xbcws->brw_context_winsys.batch_end = xbcws_batch_end;
+ xbcws->brw_context_winsys.batch_flush = xbcws_batch_flush;
+ xbcws->brw_context_winsys.buffer_subdata_typed = xbcws_buffer_subdata_typed;
+ xbcws->brw_context_winsys.get_buffer_offset = xbcws_get_buffer_offset;
+
+ xbcws->pipe_winsys = screen->winsys; /* redundant */
+
+ xbcws->batch_size = XBCWS_BATCHBUFFER_SIZE;
+
+ /* Create the i965simple context:
+ */
+#ifdef GALLIUM_CELL
+ return NULL;
+#else
+ return brw_create( screen,
+ &xbcws->brw_context_winsys,
+ 0 );
+#endif
+}
diff --git a/src/gallium/winsys/xlib/xlib_trace.c b/src/gallium/winsys/xlib/xlib_trace.c
deleted file mode 100644
index dbea655ab4..0000000000
--- a/src/gallium/winsys/xlib/xlib_trace.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- *
- **************************************************************************/
-
-/*
- * Authors:
- * Keith Whitwell
- * Brian Paul
- */
-
-
-#include "xlib.h"
-
-#include "trace/tr_screen.h"
-#include "trace/tr_context.h"
-#include "trace/tr_texture.h"
-
-#include "pipe/p_screen.h"
-
-
-
-static struct pipe_screen *
-xlib_create_trace_screen( void )
-{
- struct pipe_screen *screen, *trace_screen;
-
- screen = xlib_softpipe_driver.create_pipe_screen();
- if (screen == NULL)
- goto fail;
-
- /* Wrap it:
- */
- trace_screen = trace_screen_create(screen);
- if (trace_screen == NULL)
- goto fail;
-
- return trace_screen;
-
-fail:
- if (screen)
- screen->destroy( screen );
- return NULL;
-}
-
-static struct pipe_context *
-xlib_create_trace_context( struct pipe_screen *_screen,
- void *priv )
-{
- struct trace_screen *tr_scr = trace_screen( _screen );
- struct pipe_screen *screen = tr_scr->screen;
- struct pipe_context *pipe, *trace_pipe;
-
- pipe = xlib_softpipe_driver.create_pipe_context( screen, priv );
- if (pipe == NULL)
- goto fail;
-
- /* Wrap it:
- */
- trace_pipe = trace_context_create(_screen, pipe);
- if (trace_pipe == NULL)
- goto fail;
-
- trace_pipe->priv = priv;
-
- return trace_pipe;
-
-fail:
- if (pipe)
- pipe->destroy( pipe );
- return NULL;
-}
-
-static void
-xlib_trace_display_surface( struct xmesa_buffer *buffer,
- struct pipe_surface *_surf )
-{
- struct trace_surface *tr_surf = trace_surface( _surf );
- struct pipe_surface *surf = tr_surf->surface;
-
- xlib_softpipe_driver.display_surface( buffer, surf );
-}
-
-
-struct xm_driver xlib_trace_driver =
-{
- .create_pipe_screen = xlib_create_trace_screen,
- .create_pipe_context = xlib_create_trace_context,
- .display_surface = xlib_trace_display_surface,
-};