From b642730be93149baa7556e5791393168ab396175 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 15 Feb 2008 17:35:24 +0900 Subject: Code reorganization: move files into their places. This is in a separate commit to ensure renames are properly preserved. --- src/gallium/drivers/cell/Makefile | 12 + src/gallium/drivers/cell/common.h | 220 +++ src/gallium/drivers/cell/ppu/Makefile | 76 + src/gallium/drivers/cell/ppu/cell_batch.c | 217 +++ src/gallium/drivers/cell/ppu/cell_batch.h | 58 + src/gallium/drivers/cell/ppu/cell_clear.c | 76 + src/gallium/drivers/cell/ppu/cell_clear.h | 43 + src/gallium/drivers/cell/ppu/cell_context.c | 287 +++ src/gallium/drivers/cell/ppu/cell_context.h | 135 ++ src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 164 ++ src/gallium/drivers/cell/ppu/cell_draw_arrays.h | 42 + src/gallium/drivers/cell/ppu/cell_flush.c | 84 + src/gallium/drivers/cell/ppu/cell_flush.h | 38 + src/gallium/drivers/cell/ppu/cell_render.c | 210 +++ src/gallium/drivers/cell/ppu/cell_render.h | 39 + src/gallium/drivers/cell/ppu/cell_spu.c | 155 ++ src/gallium/drivers/cell/ppu/cell_spu.h | 82 + src/gallium/drivers/cell/ppu/cell_state.h | 115 ++ src/gallium/drivers/cell/ppu/cell_state_blend.c | 109 ++ src/gallium/drivers/cell/ppu/cell_state_clip.c | 84 + src/gallium/drivers/cell/ppu/cell_state_derived.c | 192 ++ src/gallium/drivers/cell/ppu/cell_state_emit.c | 103 ++ src/gallium/drivers/cell/ppu/cell_state_emit.h | 36 + src/gallium/drivers/cell/ppu/cell_state_fs.c | 171 ++ .../drivers/cell/ppu/cell_state_rasterizer.c | 106 ++ src/gallium/drivers/cell/ppu/cell_state_sampler.c | 84 + src/gallium/drivers/cell/ppu/cell_state_surface.c | 71 + src/gallium/drivers/cell/ppu/cell_state_vertex.c | 63 + src/gallium/drivers/cell/ppu/cell_surface.c | 179 ++ src/gallium/drivers/cell/ppu/cell_surface.h | 42 + src/gallium/drivers/cell/ppu/cell_texture.c | 252 +++ src/gallium/drivers/cell/ppu/cell_texture.h | 80 + src/gallium/drivers/cell/ppu/cell_vbuf.c | 294 +++ src/gallium/drivers/cell/ppu/cell_vbuf.h | 38 + src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 120 ++ src/gallium/drivers/cell/ppu/cell_winsys.c | 40 + src/gallium/drivers/cell/ppu/cell_winsys.h | 50 + src/gallium/drivers/cell/spu/Makefile | 72 + src/gallium/drivers/cell/spu/spu_blend.c | 62 + src/gallium/drivers/cell/spu/spu_blend.h | 37 + src/gallium/drivers/cell/spu/spu_colorpack.h | 110 ++ src/gallium/drivers/cell/spu/spu_exec.c | 1948 ++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_exec.h | 172 ++ src/gallium/drivers/cell/spu/spu_main.c | 567 ++++++ src/gallium/drivers/cell/spu/spu_main.h | 177 ++ src/gallium/drivers/cell/spu/spu_render.c | 301 +++ src/gallium/drivers/cell/spu/spu_render.h | 38 + src/gallium/drivers/cell/spu/spu_texture.c | 217 +++ src/gallium/drivers/cell/spu/spu_texture.h | 47 + src/gallium/drivers/cell/spu/spu_tile.c | 83 + src/gallium/drivers/cell/spu/spu_tile.h | 73 + src/gallium/drivers/cell/spu/spu_tri.c | 926 ++++++++++ src/gallium/drivers/cell/spu/spu_tri.h | 37 + src/gallium/drivers/cell/spu/spu_util.c | 165 ++ src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 673 +++++++ src/gallium/drivers/cell/spu/spu_vertex_shader.c | 231 +++ src/gallium/drivers/cell/spu/spu_vertex_shader.h | 63 + src/gallium/drivers/cell/spu/spu_ztest.h | 135 ++ src/gallium/drivers/failover/Makefile | 21 + src/gallium/drivers/failover/fo_context.c | 155 ++ src/gallium/drivers/failover/fo_context.h | 114 ++ src/gallium/drivers/failover/fo_state.c | 457 +++++ src/gallium/drivers/failover/fo_state_emit.c | 137 ++ src/gallium/drivers/failover/fo_winsys.h | 45 + src/gallium/drivers/i915simple/Makefile | 38 + src/gallium/drivers/i915simple/SConscript | 29 + src/gallium/drivers/i915simple/i915_batch.h | 54 + src/gallium/drivers/i915simple/i915_blit.c | 162 ++ src/gallium/drivers/i915simple/i915_blit.h | 55 + src/gallium/drivers/i915simple/i915_clear.c | 47 + src/gallium/drivers/i915simple/i915_context.c | 320 ++++ src/gallium/drivers/i915simple/i915_context.h | 304 +++ src/gallium/drivers/i915simple/i915_debug.c | 901 +++++++++ src/gallium/drivers/i915simple/i915_debug.h | 117 ++ src/gallium/drivers/i915simple/i915_debug_fp.c | 366 ++++ src/gallium/drivers/i915simple/i915_flush.c | 81 + src/gallium/drivers/i915simple/i915_fpc.h | 213 +++ src/gallium/drivers/i915simple/i915_fpc_emit.c | 375 ++++ .../drivers/i915simple/i915_fpc_translate.c | 1135 ++++++++++++ src/gallium/drivers/i915simple/i915_prim_emit.c | 215 +++ src/gallium/drivers/i915simple/i915_prim_vbuf.c | 254 +++ src/gallium/drivers/i915simple/i915_reg.h | 978 ++++++++++ src/gallium/drivers/i915simple/i915_state.c | 694 +++++++ src/gallium/drivers/i915simple/i915_state.h | 50 + .../drivers/i915simple/i915_state_derived.c | 177 ++ .../drivers/i915simple/i915_state_dynamic.c | 308 ++++ src/gallium/drivers/i915simple/i915_state_emit.c | 374 ++++ .../drivers/i915simple/i915_state_immediate.c | 221 +++ .../drivers/i915simple/i915_state_inlines.h | 230 +++ .../drivers/i915simple/i915_state_sampler.c | 231 +++ src/gallium/drivers/i915simple/i915_strings.c | 83 + src/gallium/drivers/i915simple/i915_surface.c | 191 ++ src/gallium/drivers/i915simple/i915_texture.c | 536 ++++++ src/gallium/drivers/i915simple/i915_texture.h | 17 + src/gallium/drivers/i915simple/i915_winsys.h | 115 ++ src/gallium/drivers/i965simple/Makefile | 66 + src/gallium/drivers/i965simple/SConscript | 55 + src/gallium/drivers/i965simple/brw_batch.h | 59 + src/gallium/drivers/i965simple/brw_blit.c | 218 +++ src/gallium/drivers/i965simple/brw_blit.h | 33 + src/gallium/drivers/i965simple/brw_cc.c | 269 +++ src/gallium/drivers/i965simple/brw_clip.c | 206 +++ src/gallium/drivers/i965simple/brw_clip.h | 170 ++ src/gallium/drivers/i965simple/brw_clip_line.c | 245 +++ src/gallium/drivers/i965simple/brw_clip_point.c | 47 + src/gallium/drivers/i965simple/brw_clip_state.c | 92 + src/gallium/drivers/i965simple/brw_clip_tri.c | 566 ++++++ src/gallium/drivers/i965simple/brw_clip_unfilled.c | 477 +++++ src/gallium/drivers/i965simple/brw_clip_util.c | 351 ++++ src/gallium/drivers/i965simple/brw_context.c | 245 +++ src/gallium/drivers/i965simple/brw_context.h | 690 +++++++ src/gallium/drivers/i965simple/brw_curbe.c | 368 ++++ src/gallium/drivers/i965simple/brw_defines.h | 852 +++++++++ src/gallium/drivers/i965simple/brw_draw.c | 239 +++ src/gallium/drivers/i965simple/brw_draw.h | 55 + src/gallium/drivers/i965simple/brw_draw_upload.c | 299 +++ src/gallium/drivers/i965simple/brw_eu.c | 130 ++ src/gallium/drivers/i965simple/brw_eu.h | 888 +++++++++ src/gallium/drivers/i965simple/brw_eu_debug.c | 90 + src/gallium/drivers/i965simple/brw_eu_emit.c | 1080 +++++++++++ src/gallium/drivers/i965simple/brw_eu_util.c | 126 ++ src/gallium/drivers/i965simple/brw_flush.c | 80 + src/gallium/drivers/i965simple/brw_gs.c | 196 ++ src/gallium/drivers/i965simple/brw_gs.h | 75 + src/gallium/drivers/i965simple/brw_gs_emit.c | 148 ++ src/gallium/drivers/i965simple/brw_gs_state.c | 89 + src/gallium/drivers/i965simple/brw_misc_state.c | 486 +++++ src/gallium/drivers/i965simple/brw_reg.h | 76 + src/gallium/drivers/i965simple/brw_sf.c | 351 ++++ src/gallium/drivers/i965simple/brw_sf.h | 122 ++ src/gallium/drivers/i965simple/brw_sf_emit.c | 382 ++++ src/gallium/drivers/i965simple/brw_sf_state.c | 180 ++ src/gallium/drivers/i965simple/brw_shader_info.c | 49 + src/gallium/drivers/i965simple/brw_state.c | 424 +++++ src/gallium/drivers/i965simple/brw_state.h | 158 ++ src/gallium/drivers/i965simple/brw_state_batch.c | 113 ++ src/gallium/drivers/i965simple/brw_state_cache.c | 443 +++++ src/gallium/drivers/i965simple/brw_state_pool.c | 137 ++ src/gallium/drivers/i965simple/brw_state_upload.c | 202 ++ src/gallium/drivers/i965simple/brw_strings.c | 72 + src/gallium/drivers/i965simple/brw_structs.h | 1348 ++++++++++++++ src/gallium/drivers/i965simple/brw_surface.c | 210 +++ src/gallium/drivers/i965simple/brw_tex_layout.c | 353 ++++ src/gallium/drivers/i965simple/brw_tex_layout.h | 15 + src/gallium/drivers/i965simple/brw_urb.c | 186 ++ src/gallium/drivers/i965simple/brw_util.c | 104 ++ src/gallium/drivers/i965simple/brw_util.h | 43 + src/gallium/drivers/i965simple/brw_vs.c | 120 ++ src/gallium/drivers/i965simple/brw_vs.h | 82 + src/gallium/drivers/i965simple/brw_vs_emit.c | 1332 +++++++++++++ src/gallium/drivers/i965simple/brw_vs_state.c | 102 + src/gallium/drivers/i965simple/brw_winsys.h | 205 ++ src/gallium/drivers/i965simple/brw_wm.c | 210 +++ src/gallium/drivers/i965simple/brw_wm.h | 142 ++ src/gallium/drivers/i965simple/brw_wm_decl.c | 383 ++++ src/gallium/drivers/i965simple/brw_wm_glsl.c | 1079 +++++++++++ src/gallium/drivers/i965simple/brw_wm_iz.c | 214 +++ .../drivers/i965simple/brw_wm_sampler_state.c | 273 +++ src/gallium/drivers/i965simple/brw_wm_state.c | 194 ++ .../drivers/i965simple/brw_wm_surface_state.c | 304 +++ src/gallium/drivers/softpipe/Makefile | 50 + src/gallium/drivers/softpipe/SConscript | 42 + src/gallium/drivers/softpipe/sp_clear.c | 73 + src/gallium/drivers/softpipe/sp_clear.h | 43 + src/gallium/drivers/softpipe/sp_context.c | 333 ++++ src/gallium/drivers/softpipe/sp_context.h | 152 ++ src/gallium/drivers/softpipe/sp_draw_arrays.c | 164 ++ src/gallium/drivers/softpipe/sp_flush.c | 76 + src/gallium/drivers/softpipe/sp_flush.h | 35 + src/gallium/drivers/softpipe/sp_headers.h | 82 + src/gallium/drivers/softpipe/sp_prim_setup.c | 1247 +++++++++++++ src/gallium/drivers/softpipe/sp_prim_setup.h | 79 + src/gallium/drivers/softpipe/sp_prim_vbuf.c | 221 +++ src/gallium/drivers/softpipe/sp_prim_vbuf.h | 38 + src/gallium/drivers/softpipe/sp_quad.c | 118 ++ src/gallium/drivers/softpipe/sp_quad.h | 70 + src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 108 ++ src/gallium/drivers/softpipe/sp_quad_blend.c | 749 ++++++++ src/gallium/drivers/softpipe/sp_quad_bufloop.c | 72 + src/gallium/drivers/softpipe/sp_quad_colormask.c | 110 ++ src/gallium/drivers/softpipe/sp_quad_coverage.c | 88 + src/gallium/drivers/softpipe/sp_quad_depth_test.c | 276 +++ src/gallium/drivers/softpipe/sp_quad_earlyz.c | 88 + src/gallium/drivers/softpipe/sp_quad_fs.c | 390 ++++ src/gallium/drivers/softpipe/sp_quad_occlusion.c | 85 + src/gallium/drivers/softpipe/sp_quad_output.c | 90 + src/gallium/drivers/softpipe/sp_quad_stencil.c | 352 ++++ src/gallium/drivers/softpipe/sp_quad_stipple.c | 94 + src/gallium/drivers/softpipe/sp_query.c | 107 ++ src/gallium/drivers/softpipe/sp_query.h | 39 + src/gallium/drivers/softpipe/sp_state.h | 187 ++ src/gallium/drivers/softpipe/sp_state_blend.c | 98 + src/gallium/drivers/softpipe/sp_state_clip.c | 83 + src/gallium/drivers/softpipe/sp_state_derived.c | 235 +++ src/gallium/drivers/softpipe/sp_state_fs.c | 179 ++ src/gallium/drivers/softpipe/sp_state_rasterizer.c | 62 + src/gallium/drivers/softpipe/sp_state_sampler.c | 93 + src/gallium/drivers/softpipe/sp_state_surface.c | 109 ++ src/gallium/drivers/softpipe/sp_state_vertex.c | 64 + src/gallium/drivers/softpipe/sp_surface.c | 159 ++ src/gallium/drivers/softpipe/sp_surface.h | 42 + src/gallium/drivers/softpipe/sp_tex_sample.c | 916 +++++++++ src/gallium/drivers/softpipe/sp_tex_sample.h | 17 + src/gallium/drivers/softpipe/sp_texture.c | 166 ++ src/gallium/drivers/softpipe/sp_texture.h | 71 + src/gallium/drivers/softpipe/sp_tile_cache.c | 585 ++++++ src/gallium/drivers/softpipe/sp_tile_cache.h | 104 ++ src/gallium/drivers/softpipe/sp_winsys.h | 57 + 208 files changed, 47397 insertions(+) create mode 100644 src/gallium/drivers/cell/Makefile create mode 100644 src/gallium/drivers/cell/common.h create mode 100644 src/gallium/drivers/cell/ppu/Makefile create mode 100644 src/gallium/drivers/cell/ppu/cell_batch.c create mode 100644 src/gallium/drivers/cell/ppu/cell_batch.h create mode 100644 src/gallium/drivers/cell/ppu/cell_clear.c create mode 100644 src/gallium/drivers/cell/ppu/cell_clear.h create mode 100644 src/gallium/drivers/cell/ppu/cell_context.c create mode 100644 src/gallium/drivers/cell/ppu/cell_context.h create mode 100644 src/gallium/drivers/cell/ppu/cell_draw_arrays.c create mode 100644 src/gallium/drivers/cell/ppu/cell_draw_arrays.h create mode 100644 src/gallium/drivers/cell/ppu/cell_flush.c create mode 100644 src/gallium/drivers/cell/ppu/cell_flush.h create mode 100644 src/gallium/drivers/cell/ppu/cell_render.c create mode 100644 src/gallium/drivers/cell/ppu/cell_render.h create mode 100644 src/gallium/drivers/cell/ppu/cell_spu.c create mode 100644 src/gallium/drivers/cell/ppu/cell_spu.h create mode 100644 src/gallium/drivers/cell/ppu/cell_state.h create mode 100644 src/gallium/drivers/cell/ppu/cell_state_blend.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_clip.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_derived.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_emit.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_emit.h create mode 100644 src/gallium/drivers/cell/ppu/cell_state_fs.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_rasterizer.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_sampler.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_surface.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_vertex.c create mode 100644 src/gallium/drivers/cell/ppu/cell_surface.c create mode 100644 src/gallium/drivers/cell/ppu/cell_surface.h create mode 100644 src/gallium/drivers/cell/ppu/cell_texture.c create mode 100644 src/gallium/drivers/cell/ppu/cell_texture.h create mode 100644 src/gallium/drivers/cell/ppu/cell_vbuf.c create mode 100644 src/gallium/drivers/cell/ppu/cell_vbuf.h create mode 100644 src/gallium/drivers/cell/ppu/cell_vertex_shader.c create mode 100644 src/gallium/drivers/cell/ppu/cell_winsys.c create mode 100644 src/gallium/drivers/cell/ppu/cell_winsys.h create mode 100644 src/gallium/drivers/cell/spu/Makefile create mode 100644 src/gallium/drivers/cell/spu/spu_blend.c create mode 100644 src/gallium/drivers/cell/spu/spu_blend.h create mode 100644 src/gallium/drivers/cell/spu/spu_colorpack.h create mode 100644 src/gallium/drivers/cell/spu/spu_exec.c create mode 100644 src/gallium/drivers/cell/spu/spu_exec.h create mode 100644 src/gallium/drivers/cell/spu/spu_main.c create mode 100644 src/gallium/drivers/cell/spu/spu_main.h create mode 100644 src/gallium/drivers/cell/spu/spu_render.c create mode 100644 src/gallium/drivers/cell/spu/spu_render.h create mode 100644 src/gallium/drivers/cell/spu/spu_texture.c create mode 100644 src/gallium/drivers/cell/spu/spu_texture.h create mode 100644 src/gallium/drivers/cell/spu/spu_tile.c create mode 100644 src/gallium/drivers/cell/spu/spu_tile.h create mode 100644 src/gallium/drivers/cell/spu/spu_tri.c create mode 100644 src/gallium/drivers/cell/spu/spu_tri.h create mode 100644 src/gallium/drivers/cell/spu/spu_util.c create mode 100644 src/gallium/drivers/cell/spu/spu_vertex_fetch.c create mode 100644 src/gallium/drivers/cell/spu/spu_vertex_shader.c create mode 100644 src/gallium/drivers/cell/spu/spu_vertex_shader.h create mode 100644 src/gallium/drivers/cell/spu/spu_ztest.h create mode 100644 src/gallium/drivers/failover/Makefile create mode 100644 src/gallium/drivers/failover/fo_context.c create mode 100644 src/gallium/drivers/failover/fo_context.h create mode 100644 src/gallium/drivers/failover/fo_state.c create mode 100644 src/gallium/drivers/failover/fo_state_emit.c create mode 100644 src/gallium/drivers/failover/fo_winsys.h create mode 100644 src/gallium/drivers/i915simple/Makefile create mode 100644 src/gallium/drivers/i915simple/SConscript create mode 100644 src/gallium/drivers/i915simple/i915_batch.h create mode 100644 src/gallium/drivers/i915simple/i915_blit.c create mode 100644 src/gallium/drivers/i915simple/i915_blit.h create mode 100644 src/gallium/drivers/i915simple/i915_clear.c create mode 100644 src/gallium/drivers/i915simple/i915_context.c create mode 100644 src/gallium/drivers/i915simple/i915_context.h create mode 100644 src/gallium/drivers/i915simple/i915_debug.c create mode 100644 src/gallium/drivers/i915simple/i915_debug.h create mode 100644 src/gallium/drivers/i915simple/i915_debug_fp.c create mode 100644 src/gallium/drivers/i915simple/i915_flush.c create mode 100644 src/gallium/drivers/i915simple/i915_fpc.h create mode 100644 src/gallium/drivers/i915simple/i915_fpc_emit.c create mode 100644 src/gallium/drivers/i915simple/i915_fpc_translate.c create mode 100644 src/gallium/drivers/i915simple/i915_prim_emit.c create mode 100644 src/gallium/drivers/i915simple/i915_prim_vbuf.c create mode 100644 src/gallium/drivers/i915simple/i915_reg.h create mode 100644 src/gallium/drivers/i915simple/i915_state.c create mode 100644 src/gallium/drivers/i915simple/i915_state.h create mode 100644 src/gallium/drivers/i915simple/i915_state_derived.c create mode 100644 src/gallium/drivers/i915simple/i915_state_dynamic.c create mode 100644 src/gallium/drivers/i915simple/i915_state_emit.c create mode 100644 src/gallium/drivers/i915simple/i915_state_immediate.c create mode 100644 src/gallium/drivers/i915simple/i915_state_inlines.h create mode 100644 src/gallium/drivers/i915simple/i915_state_sampler.c create mode 100644 src/gallium/drivers/i915simple/i915_strings.c create mode 100644 src/gallium/drivers/i915simple/i915_surface.c create mode 100644 src/gallium/drivers/i915simple/i915_texture.c create mode 100644 src/gallium/drivers/i915simple/i915_texture.h create mode 100644 src/gallium/drivers/i915simple/i915_winsys.h create mode 100644 src/gallium/drivers/i965simple/Makefile create mode 100644 src/gallium/drivers/i965simple/SConscript create mode 100644 src/gallium/drivers/i965simple/brw_batch.h create mode 100644 src/gallium/drivers/i965simple/brw_blit.c create mode 100644 src/gallium/drivers/i965simple/brw_blit.h create mode 100644 src/gallium/drivers/i965simple/brw_cc.c create mode 100644 src/gallium/drivers/i965simple/brw_clip.c create mode 100644 src/gallium/drivers/i965simple/brw_clip.h create mode 100644 src/gallium/drivers/i965simple/brw_clip_line.c create mode 100644 src/gallium/drivers/i965simple/brw_clip_point.c create mode 100644 src/gallium/drivers/i965simple/brw_clip_state.c create mode 100644 src/gallium/drivers/i965simple/brw_clip_tri.c create mode 100644 src/gallium/drivers/i965simple/brw_clip_unfilled.c create mode 100644 src/gallium/drivers/i965simple/brw_clip_util.c create mode 100644 src/gallium/drivers/i965simple/brw_context.c create mode 100644 src/gallium/drivers/i965simple/brw_context.h create mode 100644 src/gallium/drivers/i965simple/brw_curbe.c create mode 100644 src/gallium/drivers/i965simple/brw_defines.h create mode 100644 src/gallium/drivers/i965simple/brw_draw.c create mode 100644 src/gallium/drivers/i965simple/brw_draw.h create mode 100644 src/gallium/drivers/i965simple/brw_draw_upload.c create mode 100644 src/gallium/drivers/i965simple/brw_eu.c create mode 100644 src/gallium/drivers/i965simple/brw_eu.h create mode 100644 src/gallium/drivers/i965simple/brw_eu_debug.c create mode 100644 src/gallium/drivers/i965simple/brw_eu_emit.c create mode 100644 src/gallium/drivers/i965simple/brw_eu_util.c create mode 100644 src/gallium/drivers/i965simple/brw_flush.c create mode 100644 src/gallium/drivers/i965simple/brw_gs.c create mode 100644 src/gallium/drivers/i965simple/brw_gs.h create mode 100644 src/gallium/drivers/i965simple/brw_gs_emit.c create mode 100644 src/gallium/drivers/i965simple/brw_gs_state.c create mode 100644 src/gallium/drivers/i965simple/brw_misc_state.c create mode 100644 src/gallium/drivers/i965simple/brw_reg.h create mode 100644 src/gallium/drivers/i965simple/brw_sf.c create mode 100644 src/gallium/drivers/i965simple/brw_sf.h create mode 100644 src/gallium/drivers/i965simple/brw_sf_emit.c create mode 100644 src/gallium/drivers/i965simple/brw_sf_state.c create mode 100644 src/gallium/drivers/i965simple/brw_shader_info.c create mode 100644 src/gallium/drivers/i965simple/brw_state.c create mode 100644 src/gallium/drivers/i965simple/brw_state.h create mode 100644 src/gallium/drivers/i965simple/brw_state_batch.c create mode 100644 src/gallium/drivers/i965simple/brw_state_cache.c create mode 100644 src/gallium/drivers/i965simple/brw_state_pool.c create mode 100644 src/gallium/drivers/i965simple/brw_state_upload.c create mode 100644 src/gallium/drivers/i965simple/brw_strings.c create mode 100644 src/gallium/drivers/i965simple/brw_structs.h create mode 100644 src/gallium/drivers/i965simple/brw_surface.c create mode 100644 src/gallium/drivers/i965simple/brw_tex_layout.c create mode 100644 src/gallium/drivers/i965simple/brw_tex_layout.h create mode 100644 src/gallium/drivers/i965simple/brw_urb.c create mode 100644 src/gallium/drivers/i965simple/brw_util.c create mode 100644 src/gallium/drivers/i965simple/brw_util.h create mode 100644 src/gallium/drivers/i965simple/brw_vs.c create mode 100644 src/gallium/drivers/i965simple/brw_vs.h create mode 100644 src/gallium/drivers/i965simple/brw_vs_emit.c create mode 100644 src/gallium/drivers/i965simple/brw_vs_state.c create mode 100644 src/gallium/drivers/i965simple/brw_winsys.h create mode 100644 src/gallium/drivers/i965simple/brw_wm.c create mode 100644 src/gallium/drivers/i965simple/brw_wm.h create mode 100644 src/gallium/drivers/i965simple/brw_wm_decl.c create mode 100644 src/gallium/drivers/i965simple/brw_wm_glsl.c create mode 100644 src/gallium/drivers/i965simple/brw_wm_iz.c create mode 100644 src/gallium/drivers/i965simple/brw_wm_sampler_state.c create mode 100644 src/gallium/drivers/i965simple/brw_wm_state.c create mode 100644 src/gallium/drivers/i965simple/brw_wm_surface_state.c create mode 100644 src/gallium/drivers/softpipe/Makefile create mode 100644 src/gallium/drivers/softpipe/SConscript create mode 100644 src/gallium/drivers/softpipe/sp_clear.c create mode 100644 src/gallium/drivers/softpipe/sp_clear.h create mode 100644 src/gallium/drivers/softpipe/sp_context.c create mode 100644 src/gallium/drivers/softpipe/sp_context.h create mode 100644 src/gallium/drivers/softpipe/sp_draw_arrays.c create mode 100644 src/gallium/drivers/softpipe/sp_flush.c create mode 100644 src/gallium/drivers/softpipe/sp_flush.h create mode 100644 src/gallium/drivers/softpipe/sp_headers.h create mode 100644 src/gallium/drivers/softpipe/sp_prim_setup.c create mode 100644 src/gallium/drivers/softpipe/sp_prim_setup.h create mode 100644 src/gallium/drivers/softpipe/sp_prim_vbuf.c create mode 100644 src/gallium/drivers/softpipe/sp_prim_vbuf.h create mode 100644 src/gallium/drivers/softpipe/sp_quad.c create mode 100644 src/gallium/drivers/softpipe/sp_quad.h create mode 100644 src/gallium/drivers/softpipe/sp_quad_alpha_test.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_blend.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_bufloop.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_colormask.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_coverage.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_depth_test.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_earlyz.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_fs.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_occlusion.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_output.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_stencil.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_stipple.c create mode 100644 src/gallium/drivers/softpipe/sp_query.c create mode 100644 src/gallium/drivers/softpipe/sp_query.h create mode 100644 src/gallium/drivers/softpipe/sp_state.h create mode 100644 src/gallium/drivers/softpipe/sp_state_blend.c create mode 100644 src/gallium/drivers/softpipe/sp_state_clip.c create mode 100644 src/gallium/drivers/softpipe/sp_state_derived.c create mode 100644 src/gallium/drivers/softpipe/sp_state_fs.c create mode 100644 src/gallium/drivers/softpipe/sp_state_rasterizer.c create mode 100644 src/gallium/drivers/softpipe/sp_state_sampler.c create mode 100644 src/gallium/drivers/softpipe/sp_state_surface.c create mode 100644 src/gallium/drivers/softpipe/sp_state_vertex.c create mode 100644 src/gallium/drivers/softpipe/sp_surface.c create mode 100644 src/gallium/drivers/softpipe/sp_surface.h create mode 100644 src/gallium/drivers/softpipe/sp_tex_sample.c create mode 100644 src/gallium/drivers/softpipe/sp_tex_sample.h create mode 100644 src/gallium/drivers/softpipe/sp_texture.c create mode 100644 src/gallium/drivers/softpipe/sp_texture.h create mode 100644 src/gallium/drivers/softpipe/sp_tile_cache.c create mode 100644 src/gallium/drivers/softpipe/sp_tile_cache.h create mode 100644 src/gallium/drivers/softpipe/sp_winsys.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/Makefile b/src/gallium/drivers/cell/Makefile new file mode 100644 index 0000000000..47aef7b05f --- /dev/null +++ b/src/gallium/drivers/cell/Makefile @@ -0,0 +1,12 @@ +# Cell Gallium driver Makefile + + +default: + ( cd spu ; make ) + ( cd ppu ; make ) + + + +clean: + ( cd spu ; make clean ) + ( cd ppu ; make clean ) diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h new file mode 100644 index 0000000000..4de514c358 --- /dev/null +++ b/src/gallium/drivers/cell/common.h @@ -0,0 +1,220 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Types and tokens which are common to the SPU and PPU code. + */ + + +#ifndef CELL_COMMON_H +#define CELL_COMMON_H + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "pipe/p_format.h" + + +/** The standard assert macro doesn't seem to work reliably */ +#define ASSERT(x) \ + if (!(x)) { \ + ubyte *p = NULL; \ + fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \ + __FILE__, __LINE__, __FUNCTION__, #x); \ + *p = 0; \ + exit(1); \ + } + + +/** for sanity checking */ +#define ASSERT_ALIGN16(ptr) \ + ASSERT((((unsigned long) (ptr)) & 0xf) == 0); + + +/** round up value to next multiple of 4 */ +#define ROUNDUP4(k) (((k) + 0x3) & ~0x3) + +/** round up value to next multiple of 8 */ +#define ROUNDUP8(k) (((k) + 0x7) & ~0x7) + +/** round up value to next multiple of 16 */ +#define ROUNDUP16(k) (((k) + 0xf) & ~0xf) + + +#define CELL_MAX_SPUS 6 + +#define TILE_SIZE 32 + + +/** + * The low byte of a mailbox word contains the command opcode. + * Remaining higher bytes are command specific. + */ +#define CELL_CMD_OPCODE_MASK 0xff + +#define CELL_CMD_EXIT 1 +#define CELL_CMD_CLEAR_SURFACE 2 +#define CELL_CMD_FINISH 3 +#define CELL_CMD_RENDER 4 +#define CELL_CMD_BATCH 5 +#define CELL_CMD_RELEASE_VERTS 6 +#define CELL_CMD_STATE_FRAMEBUFFER 10 +#define CELL_CMD_STATE_DEPTH_STENCIL 11 +#define CELL_CMD_STATE_SAMPLER 12 +#define CELL_CMD_STATE_TEXTURE 13 +#define CELL_CMD_STATE_VERTEX_INFO 14 +#define CELL_CMD_STATE_VIEWPORT 15 +#define CELL_CMD_STATE_VS_ARRAY_INFO 16 +#define CELL_CMD_STATE_BLEND 17 +#define CELL_CMD_VS_EXECUTE 18 + + +#define CELL_NUM_BUFFERS 4 +#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */ + +#define CELL_BUFFER_STATUS_FREE 10 +#define CELL_BUFFER_STATUS_USED 20 + + + +/** + * Tell SPUs about the framebuffer size, location + */ +struct cell_command_framebuffer +{ + uint64_t opcode; /**< CELL_CMD_FRAMEBUFFER */ + int width, height; + void *color_start, *depth_start; + enum pipe_format color_format, depth_format; +}; + + +/** + * Clear framebuffer to the given value/color. + */ +struct cell_command_clear_surface +{ + uint64_t opcode; /**< CELL_CMD_CLEAR_SURFACE */ + uint surface; /**< Temporary: 0=color, 1=Z */ + uint value; +}; + + +/** + * Array info used by the vertex shader's vertex puller. + */ +struct cell_array_info +{ + uint64_t base; /**< Base address of the 0th element. */ + uint attr; /**< Attribute that this state is for. */ + uint pitch; /**< Byte pitch from one entry to the next. */ + uint format; /**< Pipe format of each entry. */ +} ALIGN16_ATTRIB; + + +struct cell_shader_info +{ + unsigned num_outputs; + + uint64_t declarations; + unsigned num_declarations; + uint64_t instructions; + unsigned num_instructions; + uint64_t uniforms; + uint64_t immediates; + unsigned num_immediates; +} ALIGN16_ATTRIB; + + +#define SPU_VERTS_PER_BATCH 64 +struct cell_command_vs +{ + uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */ + struct cell_shader_info shader; + unsigned num_elts; + unsigned elts[SPU_VERTS_PER_BATCH]; + uint64_t vOut[SPU_VERTS_PER_BATCH]; + float plane[12][4]; + unsigned nr_planes; + unsigned nr_attrs; +} ALIGN16_ATTRIB; + + +struct cell_command_render +{ + uint64_t opcode; /**< CELL_CMD_RENDER */ + uint prim_type; /**< PIPE_PRIM_x */ + uint num_verts; + uint vertex_size; /**< bytes per vertex */ + uint num_indexes; + uint vertex_buf; /**< which cell->buffer[] contains the vertex data */ + float xmin, ymin, xmax, ymax; /* XXX another dummy field */ + uint min_index; + boolean inline_verts; +}; + + +struct cell_command_release_verts +{ + uint64_t opcode; /**< CELL_CMD_RELEASE_VERTS */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ +}; + + +struct cell_command_texture +{ + void *start; /**< Address in main memory */ + uint width, height; +}; + + +/** XXX unions don't seem to work */ +/* XXX this should go away; all commands should be placed in batch buffers */ +struct cell_command +{ +#if 0 + struct cell_command_framebuffer fb; + struct cell_command_clear_surface clear; + struct cell_command_render render; +#endif + struct cell_command_vs vs; +} ALIGN16_ATTRIB; + + +/** This is the object passed to spe_create_thread() */ +struct cell_init_info +{ + unsigned id; + unsigned num_spus; + struct cell_command *cmd; + + /** Buffers for command batches, vertex/index data */ + ubyte *buffers[CELL_NUM_BUFFERS]; + uint *buffer_status; /**< points at cell_context->buffer_status */ +} ALIGN16_ATTRIB; + + +#endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile new file mode 100644 index 0000000000..50060f5cd3 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -0,0 +1,76 @@ +# Gallium3D Cell driver: PPU code + +# This makefile builds the g3dcell.a library which gets pulled into +# the main libGL.so library + + +TOP = ../../../../.. +include $(TOP)/configs/linux-cell + + +#PROG = gl4 + +CELL_LIB = libcell.a + +SPU_CODE_MODULE = ../spu/g3d_spu.a + + +SOURCES = \ + cell_batch.c \ + cell_clear.c \ + cell_context.c \ + cell_draw_arrays.c \ + cell_flush.c \ + cell_state_blend.c \ + cell_state_clip.c \ + cell_state_derived.c \ + cell_state_emit.c \ + cell_state_fs.c \ + cell_state_rasterizer.c \ + cell_state_sampler.c \ + cell_state_surface.c \ + cell_state_vertex.c \ + cell_spu.c \ + cell_surface.c \ + cell_texture.c \ + cell_vbuf.c \ + cell_vertex_shader.c \ + cell_winsys.c + + +OBJECTS = $(SOURCES:.c=.o) \ + +INCLUDE_DIRS = -I$(TOP)/src/mesa + + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + + +default: $(CELL_LIB) + + +$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE) +# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) + ar -ru $(CELL_LIB) $(OBJECTS) + +#$(PROG): $(PPU_OBJECTS) +# $(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS) + + + +clean: + rm -f *.o *~ $(CELL_LIB) + + + +depend: $(SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null + +include depend + + + diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c new file mode 100644 index 0000000000..f45e5f25b6 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -0,0 +1,217 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_spu.h" + + + +uint +cell_get_empty_buffer(struct cell_context *cell) +{ + uint buf = 0, tries = 0; + + /* Find a buffer that's marked as free by all SPUs */ + while (1) { + uint spu, num_free = 0; + + for (spu = 0; spu < cell->num_spus; spu++) { + if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) { + num_free++; + + if (num_free == cell->num_spus) { + /* found a free buffer, now mark status as used */ + for (spu = 0; spu < cell->num_spus; spu++) { + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + } + /* + printf("PPU: ALLOC BUFFER %u\n", buf); + */ + return buf; + } + } + else { + break; + } + } + + /* try next buf */ + buf = (buf + 1) % CELL_NUM_BUFFERS; + + tries++; + if (tries == 100) { + /* + printf("PPU WAITING for buffer...\n"); + */ + } + } +} + + +void +cell_batch_flush(struct cell_context *cell) +{ + static boolean flushing = FALSE; + uint batch = cell->cur_batch; + const uint size = cell->buffer_size[batch]; + uint spu, cmd_word; + + assert(!flushing); + + if (size == 0) + return; + + flushing = TRUE; + + assert(batch < CELL_NUM_BUFFERS); + + /* + printf("cell_batch_dispatch: buf %u at %p, size %u\n", + batch, &cell->batch_buffer[batch][0], size); + */ + + /* + * Build "BATCH" command and sent to all SPUs. + */ + cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); + + for (spu = 0; spu < cell->num_spus; spu++) { + assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED); + send_mbox_message(cell_global.spe_contexts[spu], cmd_word); + } + + /* When the SPUs are done copying the buffer into their locals stores + * they'll write a BUFFER_STATUS_FREE message into the buffer_status[] + * array indicating that the PPU can re-use the buffer. + */ + + batch = cell_get_empty_buffer(cell); + + cell->buffer_size[batch] = 0; /* empty */ + cell->cur_batch = batch; + + flushing = FALSE; +} + + +uint +cell_batch_free_space(const struct cell_context *cell) +{ + uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; + return free; +} + + +/** + * Append data to current batch. + */ +void +cell_batch_append(struct cell_context *cell, const void *data, uint bytes) +{ + uint size; + + ASSERT(bytes % 8 == 0); + ASSERT(bytes <= CELL_BUFFER_SIZE); + ASSERT(cell->cur_batch >= 0); + +#ifdef ASSERT + { + uint spu; + for (spu = 0; spu < cell->num_spus; spu++) { + ASSERT(cell->buffer_status[spu][cell->cur_batch][0] + == CELL_BUFFER_STATUS_USED); + } + } +#endif + + size = cell->buffer_size[cell->cur_batch]; + + if (size + bytes > CELL_BUFFER_SIZE) { + cell_batch_flush(cell); + size = 0; + } + + ASSERT(size + bytes <= CELL_BUFFER_SIZE); + + memcpy(cell->buffer[cell->cur_batch] + size, data, bytes); + + cell->buffer_size[cell->cur_batch] = size + bytes; +} + + +void * +cell_batch_alloc(struct cell_context *cell, uint bytes) +{ + return cell_batch_alloc_aligned(cell, bytes, 1); +} + + +void * +cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, + uint alignment) +{ + void *pos; + uint size, padbytes; + + ASSERT(bytes % 8 == 0); + ASSERT(bytes <= CELL_BUFFER_SIZE); + ASSERT(alignment > 0); + ASSERT(cell->cur_batch >= 0); + +#ifdef ASSERT + { + uint spu; + for (spu = 0; spu < cell->num_spus; spu++) { + ASSERT(cell->buffer_status[spu][cell->cur_batch][0] + == CELL_BUFFER_STATUS_USED); + } + } +#endif + + size = cell->buffer_size[cell->cur_batch]; + + padbytes = (alignment - (size % alignment)) % alignment; + + if (padbytes + size + bytes > CELL_BUFFER_SIZE) { + cell_batch_flush(cell); + size = 0; + } + else { + size += padbytes; + } + + ASSERT(size % alignment == 0); + ASSERT(size + bytes <= CELL_BUFFER_SIZE); + + pos = (void *) (cell->buffer[cell->cur_batch] + size); + + cell->buffer_size[cell->cur_batch] = size + bytes; + + return pos; +} diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h new file mode 100644 index 0000000000..a6eee0a8b1 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_batch.h @@ -0,0 +1,58 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_BATCH_H +#define CELL_BATCH_H + +#include "pipe/p_compiler.h" + + +struct cell_context; + + +extern uint +cell_get_empty_buffer(struct cell_context *cell); + +extern void +cell_batch_flush(struct cell_context *cell); + +extern uint +cell_batch_free_space(const struct cell_context *cell); + +extern void +cell_batch_append(struct cell_context *cell, const void *data, uint bytes); + +extern void * +cell_batch_alloc(struct cell_context *cell, uint bytes); + +extern void * +cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, + uint alignment); + + +#endif /* CELL_BATCH_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c new file mode 100644 index 0000000000..07b908eec5 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + +#include +#include +#include +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/cell/common.h" +#include "cell_clear.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_flush.h" +#include "cell_spu.h" + + +void +cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + struct cell_context *cell = cell_context(pipe); + uint surfIndex; + + if (cell->dirty) + cell_update_derived(cell); + + + if (!cell->cbuf_map[0]) + cell->cbuf_map[0] = pipe_surface_map(ps); + + if (ps == cell->framebuffer.zsbuf) { + surfIndex = 1; + } + else { + surfIndex = 0; + } + + + { + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) + cell_batch_alloc(cell, sizeof(*clr)); + clr->opcode = CELL_CMD_CLEAR_SURFACE; + clr->surface = surfIndex; + clr->value = clearValue; + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h new file mode 100644 index 0000000000..ff47d43f4c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_clear.h @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_CLEAR_H +#define CELL_CLEAR_H + + +struct pipe_context; +struct pipe_surface; + + +extern void +cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + + +#endif /* CELL_CLEAR_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c new file mode 100644 index 0000000000..bbe1fd7a11 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -0,0 +1,287 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + + +#include + +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/cell/common.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_private.h" +#include "cell_clear.h" +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_flush.h" +#include "cell_render.h" +#include "cell_state.h" +#include "cell_surface.h" +#include "cell_spu.h" +#include "cell_texture.h" +#include "cell_vbuf.h" + + + +static boolean +cell_is_format_supported( struct pipe_context *pipe, + enum pipe_format format, uint type ) +{ + /*struct cell_context *cell = cell_context( pipe );*/ + + switch (type) { + case PIPE_TEXTURE: + /* cell supports all texture formats, XXX for now anyway */ + return TRUE; + case PIPE_SURFACE: + /* cell supports all (off-screen) surface formats, XXX for now */ + return TRUE; + default: + assert(0); + return FALSE; + } +} + + +static int cell_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; /* max 2Kx2K */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; /* max 2Kx2K */ + default: + return 0; + } +} + +static float cell_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + + default: + return 0; + } +} + + +static const char * +cell_get_name( struct pipe_context *pipe ) +{ + return "Cell"; +} + +static const char * +cell_get_vendor( struct pipe_context *pipe ) +{ + return "Tungsten Graphics, Inc."; +} + + + +static void +cell_destroy_context( struct pipe_context *pipe ) +{ + struct cell_context *cell = cell_context(pipe); + + cell_spu_exit(cell); + + align_free(cell); +} + + +static struct draw_context * +cell_draw_create(struct cell_context *cell) +{ + struct draw_context *draw = draw_create(); + + if (getenv("GALLIUM_CELL_VS")) { + /* plug in SPU-based vertex transformation code */ + draw->shader_queue_flush = cell_vertex_shader_queue_flush; + draw->driver_private = cell; + } + + return draw; +} + + +struct pipe_context * +cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) +{ + struct cell_context *cell; + uint spu, buf; + + /* some fields need to be 16-byte aligned, so align the whole object */ + cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); + if (!cell) + return NULL; + + memset(cell, 0, sizeof(*cell)); + + cell->winsys = cws; + cell->pipe.winsys = winsys; + cell->pipe.destroy = cell_destroy_context; + + /* queries */ + cell->pipe.is_format_supported = cell_is_format_supported; + cell->pipe.get_name = cell_get_name; + cell->pipe.get_vendor = cell_get_vendor; + cell->pipe.get_param = cell_get_param; + cell->pipe.get_paramf = cell_get_paramf; + + + /* state setters */ + cell->pipe.create_blend_state = cell_create_blend_state; + cell->pipe.bind_blend_state = cell_bind_blend_state; + cell->pipe.delete_blend_state = cell_delete_blend_state; + + cell->pipe.create_sampler_state = cell_create_sampler_state; + cell->pipe.bind_sampler_state = cell_bind_sampler_state; + cell->pipe.delete_sampler_state = cell_delete_sampler_state; + + cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; + cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; + cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; + + cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; + cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; + cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; + + cell->pipe.create_fs_state = cell_create_fs_state; + cell->pipe.bind_fs_state = cell_bind_fs_state; + cell->pipe.delete_fs_state = cell_delete_fs_state; + + cell->pipe.create_vs_state = cell_create_vs_state; + cell->pipe.bind_vs_state = cell_bind_vs_state; + cell->pipe.delete_vs_state = cell_delete_vs_state; + + cell->pipe.set_blend_color = cell_set_blend_color; + cell->pipe.set_clip_state = cell_set_clip_state; + cell->pipe.set_constant_buffer = cell_set_constant_buffer; + + cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; + + cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; + cell->pipe.set_scissor_state = cell_set_scissor_state; + cell->pipe.set_viewport_state = cell_set_viewport_state; + + cell->pipe.set_vertex_buffer = cell_set_vertex_buffer; + cell->pipe.set_vertex_element = cell_set_vertex_element; + + cell->pipe.draw_arrays = cell_draw_arrays; + cell->pipe.draw_elements = cell_draw_elements; + + cell->pipe.clear = cell_clear_surface; + cell->pipe.flush = cell_flush; + + /* textures */ + cell->pipe.texture_create = cell_texture_create; + cell->pipe.texture_release = cell_texture_release; + cell->pipe.get_tex_surface = cell_get_tex_surface; + + cell->pipe.set_sampler_texture = cell_set_sampler_texture; + +#if 0 + cell->pipe.begin_query = cell_begin_query; + cell->pipe.end_query = cell_end_query; + cell->pipe.wait_query = cell_wait_query; +#endif + + cell_init_surface_functions(cell); + + cell->draw = cell_draw_create(cell); + + cell_init_vbuf(cell); + draw_set_rasterize_stage(cell->draw, cell->vbuf); + + /* + * SPU stuff + */ + cell->num_spus = 6; + + cell_start_spus(cell); + + /* init command, vertex/index buffer info */ + for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { + cell->buffer_size[buf] = 0; + + /* init batch buffer status values, + * mark 0th buffer as used, rest as free. + */ + for (spu = 0; spu < cell->num_spus; spu++) { + if (buf == 0) + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + else + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; + } + } + + return &cell->pipe; +} diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h new file mode 100644 index 0000000000..3b63419b5e --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_CONTEXT_H +#define CELL_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/draw/draw_vertex.h" +#include "pipe/draw/draw_vbuf.h" +#include "cell_winsys.h" +#include "pipe/cell/common.h" + + +struct cell_vbuf_render; + +struct cell_vertex_shader_state +{ + struct pipe_shader_state shader; + void *draw_data; +}; + + +struct cell_fragment_shader_state +{ + struct pipe_shader_state shader; + void *data; +}; + + +struct cell_context +{ + struct pipe_context pipe; + + struct cell_winsys *winsys; + + const struct pipe_blend_state *blend; + const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct pipe_rasterizer_state *rasterizer; + const struct cell_vertex_shader_state *vs; + const struct cell_fragment_shader_state *fs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[2]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct cell_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + + ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; + ubyte *zsbuf_map; + + struct pipe_surface *tex_surf; + uint *tex_map; + + uint dirty; + + /** The primitive drawing context */ + struct draw_context *draw; + struct draw_stage *render_stage; + + /** For post-transformed vertex buffering: */ + struct cell_vbuf_render *vbuf_render; + struct draw_stage *vbuf; + + struct vertex_info vertex_info; + + /** Mapped constant buffers */ + void *mapped_constants[PIPE_SHADER_TYPES]; + + + uint num_spus; + + /** Buffers for command batches, vertex/index data */ + uint buffer_size[CELL_NUM_BUFFERS]; + ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + + int cur_batch; /**< which buffer is being filled w/ commands */ + + /** [4] to ensure 16-byte alignment for each status word */ + uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + +}; + + + + +static INLINE struct cell_context * +cell_context(struct pipe_context *pipe) +{ + return (struct cell_context *) pipe; +} + + +extern struct pipe_context * +cell_create_context(struct pipe_winsys *ws, struct cell_winsys *cws); + +extern void +cell_vertex_shader_queue_flush(struct draw_context *draw); + + + + +#endif /* CELL_CONTEXT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c new file mode 100644 index 0000000000..717cd8370f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -0,0 +1,164 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_state.h" + +#include "pipe/draw/draw_context.h" + + + +static void +cell_map_constant_buffers(struct cell_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + } + + draw_set_mapped_constant_buffer(sp->draw, + sp->mapped_constants[PIPE_SHADER_VERTEX]); +} + +static void +cell_unmap_constant_buffers(struct cell_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + ws->buffer_unmap(ws, sp->constants[i].buffer); + sp->mapped_constants[i] = NULL; + } +} + + +boolean +cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return cell_draw_elements(pipe, NULL, 0, mode, start, count); +} + + + +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + * + * XXX should the element buffer be specified/bound with a separate function? + */ +boolean +cell_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct cell_context *sp = cell_context(pipe); + struct draw_context *draw = sp->draw; + unsigned i; + + /* first, check that the primitive is not malformed. It is the + * state tracker's responsibility to do send only correctly formed + * primitives down. It currently isn't doing that though... + */ +#if 1 + count = draw_trim_prim( mode, count ); +#else + if (!draw_validate_prim( mode, count )) + assert(0); +#endif + + if (sp->dirty) + cell_update_derived( sp ); + +#if 0 + cell_map_surfaces(sp); +#endif + cell_map_constant_buffers(sp); + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (sp->vertex_buffer[i].buffer) { + void *buf = pipe->winsys->buffer_map(pipe->winsys, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, + indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + /* draw! */ + draw_arrays(draw, mode, start, count); + + /* + * unmap vertex/index buffers - will cause draw module to flush + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (sp->vertex_buffer[i].buffer) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); + } + } + if (indexBuffer) { + draw_set_mapped_element_buffer(draw, 0, NULL); + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + } + + /* Note: leave drawing surfaces mapped */ + cell_unmap_constant_buffers(sp); + + return TRUE; +} diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h new file mode 100644 index 0000000000..d5df4aa05f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_DRAW_ARRAYS_H +#define CELL_DRAW_ARRAYS_H + + +boolean cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); + +boolean cell_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); + + + +#endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c new file mode 100644 index 0000000000..f62bc4650c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_render.h" +#include "pipe/draw/draw_context.h" + + +void +cell_flush(struct pipe_context *pipe, unsigned flags) +{ + struct cell_context *cell = cell_context(pipe); + + if (flags & PIPE_FLUSH_SWAPBUFFERS) + flags |= PIPE_FLUSH_WAIT; + + draw_flush( cell->draw ); + cell_flush_int(pipe, flags); +} + + +/** internal flush */ +void +cell_flush_int(struct pipe_context *pipe, unsigned flags) +{ + static boolean flushing = FALSE; /* recursion catcher */ + struct cell_context *cell = cell_context(pipe); + uint i; + + ASSERT(!flushing); + flushing = TRUE; + + if (flags & PIPE_FLUSH_WAIT) { + uint64_t *cmd = (uint64_t *) cell_batch_alloc(cell, sizeof(uint64_t)); + *cmd = CELL_CMD_FINISH; + } + + cell_batch_flush(cell); + +#if 0 + /* Send CMD_FINISH to all SPUs */ + for (i = 0; i < cell->num_spus; i++) { + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH); + } +#endif + + if (flags & PIPE_FLUSH_WAIT) { + /* Wait for ack */ + for (i = 0; i < cell->num_spus; i++) { + uint k = wait_mbox_message(cell_global.spe_contexts[i]); + assert(k == CELL_CMD_FINISH); + } + } + + flushing = FALSE; +} diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h new file mode 100644 index 0000000000..eda351b1cb --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_FLUSH +#define CELL_FLUSH + +extern void +cell_flush(struct pipe_context *pipe, unsigned flags); + +extern void +cell_flush_int(struct pipe_context *pipe, unsigned flags); + +#endif diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c new file mode 100644 index 0000000000..4ab277a4b2 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -0,0 +1,210 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Last stage of 'draw' pipeline: send tris to SPUs. + * \author Brian Paul + */ + +#include "cell_context.h" +#include "cell_render.h" +#include "cell_spu.h" +#include "pipe/p_util.h" +#include "pipe/draw/draw_private.h" + + +struct render_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct cell_context *cell; +}; + + +static INLINE struct render_stage * +render_stage(struct draw_stage *stage) +{ + return (struct render_stage *) stage; +} + + +static void render_begin( struct draw_stage *stage ) +{ +#if 0 + struct render_stage *render = render_stage(stage); + struct cell_context *sp = render->cell; + const struct pipe_shader_state *fs = &render->cell->fs->shader; + render->quad.nr_attrs = render->cell->nr_frag_attrs; + + render->firstFpInput = fs->input_semantic_name[0]; + + sp->quad.first->begin(sp->quad.first); +#endif +} + + +static void render_end( struct draw_stage *stage ) +{ +} + + +static void reset_stipple_counter( struct draw_stage *stage ) +{ + struct render_stage *render = render_stage(stage); + /*render->cell->line_stipple_counter = 0;*/ +} + + +static void +render_point(struct draw_stage *stage, struct prim_header *prim) +{ +} + + +static void +render_line(struct draw_stage *stage, struct prim_header *prim) +{ +} + + +/** Write a vertex into the prim buffer */ +static void +save_vertex(struct cell_prim_buffer *buf, uint pos, + const struct vertex_header *vert) +{ + uint attr, j; + + for (attr = 0; attr < 2; attr++) { + for (j = 0; j < 4; j++) { + buf->vertex[pos][attr][j] = vert->data[attr][j]; + } + } + + /* update bounding box */ + if (vert->data[0][0] < buf->xmin) + buf->xmin = vert->data[0][0]; + if (vert->data[0][0] > buf->xmax) + buf->xmax = vert->data[0][0]; + if (vert->data[0][1] < buf->ymin) + buf->ymin = vert->data[0][1]; + if (vert->data[0][1] > buf->ymax) + buf->ymax = vert->data[0][1]; +} + + +static void +render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + struct render_stage *rs = render_stage(stage); + struct cell_context *cell = rs->cell; + struct cell_prim_buffer *buf = &cell->prim_buffer; + uint i; + + if (buf->num_verts + 3 > CELL_MAX_VERTS) { + cell_flush_prim_buffer(cell); + } + + i = buf->num_verts; + assert(i+2 <= CELL_MAX_VERTS); + save_vertex(buf, i+0, prim->v[0]); + save_vertex(buf, i+1, prim->v[1]); + save_vertex(buf, i+2, prim->v[2]); + buf->num_verts += 3; +} + + +/** + * Send the a RENDER command to all SPUs to have them render the prims + * in the current prim_buffer. + */ +void +cell_flush_prim_buffer(struct cell_context *cell) +{ + uint i; + + if (cell->prim_buffer.num_verts == 0) + return; + + for (i = 0; i < cell->num_spus; i++) { + struct cell_command_render *render = &cell_global.command[i].render; + render->prim_type = PIPE_PRIM_TRIANGLES; + render->num_verts = cell->prim_buffer.num_verts; + render->vertex_size = cell->vertex_info->size * 4; + render->xmin = cell->prim_buffer.xmin; + render->ymin = cell->prim_buffer.ymin; + render->xmax = cell->prim_buffer.xmax; + render->ymax = cell->prim_buffer.ymax; + render->vertex_data = &cell->prim_buffer.vertex; + ASSERT_ALIGN16(render->vertex_data); + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER); + } + + cell->prim_buffer.num_verts = 0; + + cell->prim_buffer.xmin = 1e100; + cell->prim_buffer.ymin = 1e100; + cell->prim_buffer.xmax = -1e100; + cell->prim_buffer.ymax = -1e100; + + /* XXX temporary, need to double-buffer the prim buffer until we get + * a real command buffer/list system. + */ + cell_flush(&cell->pipe, 0x0); +} + + + +static void render_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create a new draw/render stage. This will be plugged into the + * draw module as the last pipeline stage. + */ +struct draw_stage *cell_draw_render_stage( struct cell_context *cell ) +{ + struct render_stage *render = CALLOC_STRUCT(render_stage); + + render->cell = cell; + render->stage.draw = cell->draw; + render->stage.begin = render_begin; + render->stage.point = render_point; + render->stage.line = render_line; + render->stage.tri = render_tri; + render->stage.end = render_end; + render->stage.reset_stipple_counter = reset_stipple_counter; + render->stage.destroy = render_destroy; + + /* + render->quad.coef = render->coef; + render->quad.posCoef = &render->posCoef; + */ + + return &render->stage; +} diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h new file mode 100644 index 0000000000..826dcbafeb --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_render.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_RENDER_H +#define CELL_RENDER_H + +struct cell_context; +struct draw_stage; + +extern void +cell_flush_prim_buffer(struct cell_context *cell); + +extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell ); + +#endif /* CELL_RENDER_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c new file mode 100644 index 0000000000..7c83a47e57 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -0,0 +1,155 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include + +#include "cell_spu.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "pipe/cell/common.h" + + +/* +helpful headers: +/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h +*/ + + +struct cell_global_info cell_global; + + +/** + * Write a 1-word message to the given SPE mailbox. + */ +void +send_mbox_message(spe_context_ptr_t ctx, unsigned int msg) +{ + spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING); +} + + +/** + * Wait for a 1-word message to arrive in given mailbox. + */ +uint +wait_mbox_message(spe_context_ptr_t ctx) +{ + do { + unsigned data; + int count = spe_out_mbox_read(ctx, &data, 1); + + if (count == 1) { + return data; + } + + if (count < 0) { + /* error */ ; + } + } while (1); +} + + +static void *cell_thread_function(void *arg) +{ + struct cell_init_info *init = (struct cell_init_info *) arg; + unsigned entry = SPE_DEFAULT_ENTRY; + + ASSERT_ALIGN16(init); + + if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0, + init, NULL, NULL) < 0) { + fprintf(stderr, "spe_context_run() failed\n"); + exit(1); + } + + pthread_exit(NULL); +} + + +/** + * Create the SPU threads + */ +void +cell_start_spus(struct cell_context *cell) +{ + uint i, j; + + assert(cell->num_spus <= MAX_SPUS); + + ASSERT_ALIGN16(&cell_global.command[0]); + ASSERT_ALIGN16(&cell_global.command[1]); + + ASSERT_ALIGN16(&cell_global.inits[0]); + ASSERT_ALIGN16(&cell_global.inits[1]); + + for (i = 0; i < cell->num_spus; i++) { + cell_global.inits[i].id = i; + cell_global.inits[i].num_spus = cell->num_spus; + cell_global.inits[i].cmd = &cell_global.command[i]; + for (j = 0; j < CELL_NUM_BUFFERS; j++) { + cell_global.inits[i].buffers[j] = cell->buffer[j]; + } + cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; + + cell_global.spe_contexts[i] = spe_context_create(0, NULL); + if (!cell_global.spe_contexts[i]) { + fprintf(stderr, "spe_context_create() failed\n"); + exit(1); + } + + if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) { + fprintf(stderr, "spe_program_load() failed\n"); + exit(1); + } + + pthread_create(&cell_global.spe_threads[i], NULL, &cell_thread_function, + &cell_global.inits[i]); + } +} + + +/** + * Tell all the SPUs to stop/exit. + */ +void +cell_spu_exit(struct cell_context *cell) +{ + uint i; + + for (i = 0; i < cell->num_spus; i++) { + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT); + } + + /* wait for threads to exit */ + for (i = 0; i < cell->num_spus; i++) { + void *value; + pthread_join(cell_global.spe_threads[i], &value); + cell_global.spe_threads[i] = 0; + cell_global.spe_contexts[i] = 0; + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h new file mode 100644 index 0000000000..19eff94f96 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -0,0 +1,82 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_SPU +#define CELL_SPU + + +#include +#include +#include "pipe/cell/common.h" + +#include "cell_context.h" + + +#define MAX_SPUS 8 + +/** + * Global vars, for now anyway. + */ +struct cell_global_info +{ + /** + * SPU/SPE handles, etc + */ + spe_context_ptr_t spe_contexts[MAX_SPUS]; + pthread_t spe_threads[MAX_SPUS]; + + /** + * Data sent to SPUs + */ + struct cell_init_info inits[MAX_SPUS]; + struct cell_command command[MAX_SPUS]; +}; + + +extern struct cell_global_info cell_global; + + +/** This is the handle for the actual SPE code */ +extern spe_program_handle_t g3d_spu; + + +extern void +send_mbox_message(spe_context_ptr_t ctx, unsigned int msg); + +extern uint +wait_mbox_message(spe_context_ptr_t ctx); + + +extern void +cell_start_spus(struct cell_context *cell); + + +extern void +cell_spu_exit(struct cell_context *cell); + + +#endif /* CELL_SPU */ diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h new file mode 100644 index 0000000000..3a71ba14fa --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -0,0 +1,115 @@ + + +#ifndef CELL_STATE_H +#define CELL_STATE_H + + +#define CELL_NEW_VIEWPORT 0x1 +#define CELL_NEW_RASTERIZER 0x2 +#define CELL_NEW_FS 0x4 +#define CELL_NEW_BLEND 0x8 +#define CELL_NEW_CLIP 0x10 +#define CELL_NEW_SCISSOR 0x20 +#define CELL_NEW_STIPPLE 0x40 +#define CELL_NEW_FRAMEBUFFER 0x80 +#define CELL_NEW_ALPHA_TEST 0x100 +#define CELL_NEW_DEPTH_STENCIL 0x200 +#define CELL_NEW_SAMPLER 0x400 +#define CELL_NEW_TEXTURE 0x800 +#define CELL_NEW_VERTEX 0x1000 +#define CELL_NEW_VS 0x2000 +#define CELL_NEW_CONSTANTS 0x4000 +#define CELL_NEW_VERTEX_INFO 0x8000 + + + +extern void +cell_set_framebuffer_state( struct pipe_context *, + const struct pipe_framebuffer_state * ); + + + +extern void * +cell_create_blend_state(struct pipe_context *, const struct pipe_blend_state *); +extern void cell_bind_blend_state(struct pipe_context *, void *); +extern void cell_delete_blend_state(struct pipe_context *, void *); + +extern void cell_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ); + + +void * +cell_create_sampler_state(struct pipe_context *, + const struct pipe_sampler_state *); + +extern void +cell_bind_sampler_state(struct pipe_context *, unsigned, void *); + +extern void +cell_delete_sampler_state(struct pipe_context *, void *); + + +extern void * +cell_create_depth_stencil_alpha_state(struct pipe_context *, + const struct pipe_depth_stencil_alpha_state *); + +extern void +cell_bind_depth_stencil_alpha_state(struct pipe_context *, void *); + +extern void +cell_delete_depth_stencil_alpha_state(struct pipe_context *, void *); + + +void *cell_create_fs_state(struct pipe_context *, + const struct pipe_shader_state *); +void cell_bind_fs_state(struct pipe_context *, void *); +void cell_delete_fs_state(struct pipe_context *, void *); +void *cell_create_vs_state(struct pipe_context *, + const struct pipe_shader_state *); +void cell_bind_vs_state(struct pipe_context *, void *); +void cell_delete_vs_state(struct pipe_context *, void *); + + +void * +cell_create_rasterizer_state(struct pipe_context *, + const struct pipe_rasterizer_state *); +void cell_bind_rasterizer_state(struct pipe_context *, void *); +void cell_delete_rasterizer_state(struct pipe_context *, void *); + + +void cell_set_clip_state( struct pipe_context *, + const struct pipe_clip_state * ); + +void cell_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf); + +void cell_set_polygon_stipple( struct pipe_context *, + const struct pipe_poly_stipple * ); + +void +cell_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, + struct pipe_texture *texture); + +void cell_set_scissor_state( struct pipe_context *, + const struct pipe_scissor_state * ); + +void cell_set_texture_state( struct pipe_context *, + unsigned unit, struct pipe_texture * ); + +void cell_set_vertex_element(struct pipe_context *, + unsigned index, + const struct pipe_vertex_element *); + +void cell_set_vertex_buffer(struct pipe_context *, + unsigned index, + const struct pipe_vertex_buffer *); + +void cell_set_viewport_state( struct pipe_context *, + const struct pipe_viewport_state * ); + + +void cell_update_derived( struct cell_context *softpipe ); + +#endif diff --git a/src/gallium/drivers/cell/ppu/cell_state_blend.c b/src/gallium/drivers/cell/ppu/cell_state_blend.c new file mode 100644 index 0000000000..4fc60548c8 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_blend.c @@ -0,0 +1,109 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" +#include "cell_context.h" +#include "cell_state.h" + + + +void * +cell_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + return mem_dup(blend, sizeof(*blend)); +} + + +void +cell_bind_blend_state(struct pipe_context *pipe, void *blend) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend = (const struct pipe_blend_state *)blend; + + cell->dirty |= CELL_NEW_BLEND; +} + + +void +cell_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + + +void +cell_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend_color = *blend_color; + + cell->dirty |= CELL_NEW_BLEND; +} + + + + +void * +cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + return mem_dup(depth_stencil, sizeof(*depth_stencil)); +} + + +void +cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->depth_stencil + = (const struct pipe_depth_stencil_alpha_state *) depth_stencil; + + cell->dirty |= CELL_NEW_DEPTH_STENCIL; +} + + +void +cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) +{ + FREE(depth); +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_clip.c b/src/gallium/drivers/cell/ppu/cell_state_clip.c new file mode 100644 index 0000000000..4f43665941 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_clip.c @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "cell_context.h" +#include "cell_state.h" +#include "pipe/draw/draw_context.h" + + +void cell_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct cell_context *cell = cell_context(pipe); + + /* pass the clip state to the draw module */ + draw_set_clip_state(cell->draw, clip); +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +void cell_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct cell_context *cell = cell_context(pipe); + + cell->viewport = *viewport; /* struct copy */ + cell->dirty |= CELL_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(cell->draw, viewport); + + /* Using tnl/ and vf/ modules is temporary while getting started. + * Full pipe will have vertex shader, vertex fetch of its own. + */ +} + + +void cell_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->scissor, scissor, sizeof(*scissor) ); + cell->dirty |= CELL_NEW_SCISSOR; +} + + +void cell_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); + cell->dirty |= CELL_NEW_STIPPLE; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c new file mode 100644 index 0000000000..56daf5dfde --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -0,0 +1,192 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_vertex.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_state.h" +#include "cell_state_emit.h" + + +static int +find_vs_output(const struct pipe_shader_state *vs, + uint semantic_name, + uint semantic_index) +{ + uint i; + for (i = 0; i < vs->num_outputs; i++) { + if (vs->output_semantic_name[i] == semantic_name && + vs->output_semantic_index[i] == semantic_index) + return i; + } + return -1; +} + + +/** + * Determine how to map vertex program outputs to fragment program inputs. + * Basically, this will be used when computing the triangle interpolation + * coefficients from the post-transform vertex attributes. + */ +static void +calculate_vertex_layout( struct cell_context *cell ) +{ + const struct pipe_shader_state *vs = &cell->vs->shader; + const struct pipe_shader_state *fs = &cell->fs->shader; + const enum interp_mode colorInterp + = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo = &cell->vertex_info; + uint i; + int src; + +#if 0 + if (cell->vbuf) { + /* if using the post-transform vertex buffer, tell draw_vbuf to + * simply emit the whole post-xform vertex as-is: + */ + struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf; + vinfo_vbuf->num_attribs = 0; + draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); + vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4; + } +#endif + + /* reset vinfo */ + vinfo->num_attribs = 0; + + /* we always want to emit vertex pos */ + src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + + + /* + * Loop over fragment shader inputs, searching for the matching output + * from the vertex shader. + */ + for (i = 0; i < fs->num_inputs; i++) { + switch (fs->input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + /* already done above */ + break; + + case TGSI_SEMANTIC_COLOR: + src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, + fs->input_semantic_index[i]); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + break; + + case TGSI_SEMANTIC_FOG: + src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0); +#if 1 + if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ + src = 0; +#endif + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + break; + + case TGSI_SEMANTIC_GENERIC: + /* this includes texcoords and varying vars */ + src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC, + fs->input_semantic_index[i]); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + default: + assert(0); + } + } + + draw_compute_vertex_size(vinfo); + + /* XXX only signal this if format really changes */ + cell->dirty |= CELL_NEW_VERTEX_INFO; +} + + +#if 0 +/** + * Recompute cliprect from scissor bounds, scissor enable and surface size. + */ +static void +compute_cliprect(struct cell_context *sp) +{ + unsigned surfWidth, surfHeight; + + if (sp->framebuffer.num_cbufs > 0) { + surfWidth = sp->framebuffer.cbufs[0]->width; + surfHeight = sp->framebuffer.cbufs[0]->height; + } + else { + /* no surface? */ + surfWidth = sp->scissor.maxx; + surfHeight = sp->scissor.maxy; + } + + if (sp->rasterizer->scissor) { + /* clip to scissor rect */ + sp->cliprect.minx = MAX2(sp->scissor.minx, 0); + sp->cliprect.miny = MAX2(sp->scissor.miny, 0); + sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); + sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); + } + else { + /* clip to surface bounds */ + sp->cliprect.minx = 0; + sp->cliprect.miny = 0; + sp->cliprect.maxx = surfWidth; + sp->cliprect.maxy = surfHeight; + } +} +#endif + + + +void cell_update_derived( struct cell_context *cell ) +{ + if (cell->dirty & (CELL_NEW_RASTERIZER | + CELL_NEW_FS | + CELL_NEW_VS)) + calculate_vertex_layout( cell ); + +#if 0 + if (cell->dirty & (CELL_NEW_SCISSOR | + CELL_NEW_DEPTH_STENCIL_ALPHA | + CELL_NEW_FRAMEBUFFER)) + compute_cliprect(cell); +#endif + + cell_emit_state(cell); + + cell->dirty = 0; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c new file mode 100644 index 0000000000..5d2a786449 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -0,0 +1,103 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "cell_context.h" +#include "cell_state.h" +#include "cell_state_emit.h" +#include "cell_batch.h" +#include "cell_texture.h" + + +static void +emit_state_cmd(struct cell_context *cell, uint cmd, + const void *state, uint state_size) +{ + uint64_t *dst = (uint64_t *) + cell_batch_alloc(cell, ROUNDUP8(sizeof(uint64_t) + state_size)); + *dst = cmd; + memcpy(dst + 1, state, state_size); +} + + + +void +cell_emit_state(struct cell_context *cell) +{ + if (cell->dirty & CELL_NEW_FRAMEBUFFER) { + struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; + struct pipe_surface *zbuf = cell->framebuffer.zsbuf; + struct cell_command_framebuffer *fb + = cell_batch_alloc(cell, sizeof(*fb)); + fb->opcode = CELL_CMD_STATE_FRAMEBUFFER; + fb->color_start = cell->cbuf_map[0]; + fb->color_format = cbuf->format; + fb->depth_start = cell->zsbuf_map; + fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; + fb->width = cell->framebuffer.cbufs[0]->width; + fb->height = cell->framebuffer.cbufs[0]->height; + } + + if (cell->dirty & CELL_NEW_BLEND) { + emit_state_cmd(cell, CELL_CMD_STATE_BLEND, + cell->blend, + sizeof(struct pipe_blend_state)); + } + + if (cell->dirty & CELL_NEW_DEPTH_STENCIL) { + emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, + cell->depth_stencil, + sizeof(struct pipe_depth_stencil_alpha_state)); + } + + if (cell->dirty & CELL_NEW_SAMPLER) { + emit_state_cmd(cell, CELL_CMD_STATE_SAMPLER, + cell->sampler[0], sizeof(struct pipe_sampler_state)); + } + + if (cell->dirty & CELL_NEW_TEXTURE) { + struct cell_command_texture texture; + if (cell->texture[0]) { + texture.start = cell->texture[0]->tiled_data; + texture.width = cell->texture[0]->base.width[0]; + texture.height = cell->texture[0]->base.height[0]; + } + else { + texture.start = NULL; + texture.width = 0; + texture.height = 0; + } + + emit_state_cmd(cell, CELL_CMD_STATE_TEXTURE, + &texture, sizeof(struct cell_command_texture)); + } + + if (cell->dirty & CELL_NEW_VERTEX_INFO) { + emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, + &cell->vertex_info, sizeof(struct vertex_info)); + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h new file mode 100644 index 0000000000..59f8affe8d --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_STATE_EMIT_H +#define CELL_STATE_EMIT_H + + +extern void +cell_emit_state(struct cell_context *cell); + + +#endif /* CELL_STATE_EMIT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_fs.c b/src/gallium/drivers/cell/ppu/cell_state_fs.c new file mode 100644 index 0000000000..3f46a87d18 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_fs.c @@ -0,0 +1,171 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/draw/draw_context.h" +#if 0 +#include "pipe/p_shader_tokens.h" +#include "pipe/llvm/gallivm.h" +#include "pipe/tgsi/util/tgsi_dump.h" +#include "pipe/tgsi/exec/tgsi_sse2.h" +#endif + +#include "cell_context.h" +#include "cell_state.h" + + +void * +cell_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + /*struct cell_context *cell = cell_context(pipe);*/ + struct cell_fragment_shader_state *state; + + state = CALLOC_STRUCT(cell_fragment_shader_state); + if (!state) + return NULL; + + state->shader = *templ; + +#if 0 + if (cell->dump_fs) { + tgsi_dump(state->shader.tokens, 0); + } + +#if defined(__i386__) || defined(__386__) + if (cell->use_sse) { + x86_init_func( &state->sse2_program ); + tgsi_emit_sse2_fs( state->shader.tokens, &state->sse2_program ); + } +#endif + +#ifdef MESA_LLVM + state->llvm_prog = 0; + if (!gallivm_global_cpu_engine()) { + gallivm_cpu_engine_create(state->llvm_prog); + } + else + gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); +#endif +#endif + + return state; +} + + +void +cell_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct cell_context *cell = cell_context(pipe); + + cell->fs = (struct cell_fragment_shader_state *) fs; + + cell->dirty |= CELL_NEW_FS; +} + + +void +cell_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct cell_fragment_shader_state *state = + (struct cell_fragment_shader_state *) fs; + + FREE( state ); +} + + +void * +cell_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct cell_context *cell = cell_context(pipe); + struct cell_vertex_shader_state *state; + + state = CALLOC_STRUCT(cell_vertex_shader_state); + if (!state) + return NULL; + + state->shader = *templ; + + state->draw_data = draw_create_vertex_shader(cell->draw, &state->shader); + if (state->draw_data == NULL) { + FREE( state ); + return NULL; + } + + return state; +} + + +void +cell_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct cell_context *cell = cell_context(pipe); + + cell->vs = (const struct cell_vertex_shader_state *) vs; + + draw_bind_vertex_shader(cell->draw, cell->vs->draw_data); + + cell->dirty |= CELL_NEW_VS; +} + + +void +cell_delete_vs_state(struct pipe_context *pipe, void *vs) +{ + struct cell_context *cell = cell_context(pipe); + + struct cell_vertex_shader_state *state = + (struct cell_vertex_shader_state *) vs; + + draw_delete_vertex_shader(cell->draw, state->draw_data); + FREE( state ); +} + + +void +cell_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct cell_context *cell = cell_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* note: reference counting */ + pipe_buffer_reference(ws, + &cell->constants[shader].buffer, + buf->buffer); + cell->constants[shader].size = buf->size; + + cell->dirty |= CELL_NEW_CONSTANTS; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c new file mode 100644 index 0000000000..d8128ece54 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c @@ -0,0 +1,106 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" +#include "cell_context.h" +#include "cell_state.h" + + + +struct spu_rasterizer_state +{ + unsigned flatshade:1; +#if 0 + unsigned light_twoside:1; + unsigned front_winding:2; /**< PIPE_WINDING_x */ + unsigned cull_mode:2; /**< PIPE_WINDING_x */ + unsigned fill_cw:2; /**< PIPE_POLYGON_MODE_x */ + unsigned fill_ccw:2; /**< PIPE_POLYGON_MODE_x */ + unsigned offset_cw:1; + unsigned offset_ccw:1; +#endif + unsigned scissor:1; + unsigned poly_smooth:1; + unsigned poly_stipple_enable:1; + unsigned point_smooth:1; +#if 0 + unsigned point_sprite:1; + unsigned point_size_per_vertex:1; /**< size computed in vertex shader */ +#endif + unsigned multisample:1; /* XXX maybe more ms state in future */ + unsigned line_smooth:1; + unsigned line_stipple_enable:1; + unsigned line_stipple_factor:8; /**< [1..256] actually */ + unsigned line_stipple_pattern:16; +#if 0 + unsigned bypass_clipping:1; +#endif + unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */ + + float line_width; + float point_size; /**< used when no per-vertex size */ +#if 0 + float offset_units; + float offset_scale; + ubyte sprite_coord_mode[PIPE_MAX_SHADER_OUTPUTS]; /**< PIPE_SPRITE_COORD_ */ +#endif +}; + + + +void * +cell_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *setup) +{ + struct pipe_rasterizer_state *state + = MALLOC(sizeof(struct pipe_rasterizer_state)); + memcpy(state, setup, sizeof(struct pipe_rasterizer_state)); + return state; +} + + +void +cell_bind_rasterizer_state(struct pipe_context *pipe, void *setup) +{ + struct cell_context *cell = cell_context(pipe); + + /* pass-through to draw module */ + draw_set_rasterizer_state(cell->draw, setup); + + cell->rasterizer = (struct pipe_rasterizer_state *)setup; + + cell->dirty |= CELL_NEW_RASTERIZER; +} + + +void +cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) +{ + FREE(rasterizer); +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_sampler.c b/src/gallium/drivers/cell/ppu/cell_state_sampler.c new file mode 100644 index 0000000000..ade6cc8338 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_sampler.c @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Brian Paul + */ + +#include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" +#include "cell_context.h" +#include "cell_state.h" +#include "cell_texture.h" + + +void * +cell_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + return mem_dup(sampler, sizeof(*sampler)); +} + +void +cell_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + assert(unit < PIPE_MAX_SAMPLERS); + cell->sampler[unit] = (struct pipe_sampler_state *)sampler; + + cell->dirty |= CELL_NEW_SAMPLER; +} + + +void +cell_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + + +void +cell_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, + struct pipe_texture *texture) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->texture[sampler] = texture; + + cell_update_texture_mapping(cell); + + cell->dirty |= CELL_NEW_TEXTURE; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_surface.c b/src/gallium/drivers/cell/ppu/cell_state_surface.c new file mode 100644 index 0000000000..287610b76b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_surface.c @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_inlines.h" +#include "cell_context.h" +#include "cell_state.h" + + +void +cell_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct cell_context *cell = cell_context(pipe); + + if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { + struct pipe_surface *csurf = fb->cbufs[0]; + struct pipe_surface *zsurf = fb->zsbuf; + uint i; + + /* unmap old surfaces */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) { + pipe_surface_unmap(cell->framebuffer.cbufs[i]); + cell->cbuf_map[i] = NULL; + } + } + + if (cell->framebuffer.zsbuf && cell->zsbuf_map) { + pipe_surface_unmap(cell->framebuffer.zsbuf); + cell->zsbuf_map = NULL; + } + + /* update my state */ + cell->framebuffer = *fb; + + /* map new surfaces */ + if (csurf) + cell->cbuf_map[0] = pipe_surface_map(csurf); + + if (zsurf) + cell->zsbuf_map = pipe_surface_map(zsurf); + + cell->dirty |= CELL_NEW_FRAMEBUFFER; + } +} + diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c new file mode 100644 index 0000000000..0f01e920f9 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + + +#include "cell_context.h" +#include "cell_state.h" + +#include "pipe/draw/draw_context.h" + + +void +cell_set_vertex_element(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_element *attrib) +{ + struct cell_context *cell = cell_context(pipe); + assert(index < PIPE_ATTRIB_MAX); + cell->vertex_element[index] = *attrib; /* struct copy */ + cell->dirty |= CELL_NEW_VERTEX; + + draw_set_vertex_element(cell->draw, index, attrib); +} + + +void +cell_set_vertex_buffer(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_buffer *buffer) +{ + struct cell_context *cell = cell_context(pipe); + assert(index < PIPE_ATTRIB_MAX); + cell->vertex_buffer[index] = *buffer; /* struct copy */ + cell->dirty |= CELL_NEW_VERTEX; + + draw_set_vertex_buffer(cell->draw, index, buffer); +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c new file mode 100644 index 0000000000..fca93e4742 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -0,0 +1,179 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/util/p_tile.h" +#include "cell_context.h" +#include "cell_surface.h" + + +/* Upload data to a rectangular sub-region. Lots of choices how to do this: + * + * - memcpy by span to current destination + * - upload data as new buffer and blit + * + * Currently always memcpy. + */ +static void +cell_surface_data(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + const void *src, unsigned src_pitch, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + pipe_copy_rect(pipe_surface_map(dst), + dst->cpp, + dst->pitch, + dstx, dsty, width, height, src, src_pitch, srcx, srcy); + + pipe_surface_unmap(dst); +} + + +static void +cell_surface_copy(struct pipe_context *pipe, + unsigned do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + assert( dst->cpp == src->cpp ); + + pipe_copy_rect(pipe_surface_map(dst), + dst->cpp, + dst->pitch, + dstx, dsty, + width, height, + pipe_surface_map(src), + do_flip ? -src->pitch : src->pitch, + srcx, do_flip ? 1 - srcy - height : srcy); + + pipe_surface_unmap(src); + pipe_surface_unmap(dst); +} + + +static void * +get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) +{ + return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; +} + + +#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) + + +/** + * Fill a rectangular sub-region. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +static void +cell_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + unsigned i, j; + void *dst_map = pipe_surface_map(dst); + + assert(dst->pitch > 0); + assert(width <= dst->pitch); + + switch (dst->cpp) { + case 1: + { + ubyte *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + memset(row, value, width); + row += dst->pitch; + } + } + break; + case 2: + { + ushort *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = (ushort) value; + row += dst->pitch; + } + } + break; + case 4: + { + unsigned *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = value; + row += dst->pitch; + } + } + break; + case 8: + { + /* expand the 4-byte clear value to an 8-byte value */ + ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); + ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); + ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); + ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); + ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); + val0 = (val0 << 8) | val0; + val1 = (val1 << 8) | val1; + val2 = (val2 << 8) | val2; + val3 = (val3 << 8) | val3; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + row[j*4+0] = val0; + row[j*4+1] = val1; + row[j*4+2] = val2; + row[j*4+3] = val3; + } + row += dst->pitch * 4; + } + } + break; + default: + assert(0); + break; + } + + pipe_surface_unmap( dst ); +} + + +void +cell_init_surface_functions(struct cell_context *cell) +{ + cell->pipe.surface_copy = cell_surface_copy; + cell->pipe.surface_fill = cell_surface_fill; +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h new file mode 100644 index 0000000000..9e58f32944 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_surface.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef CELL_SURFACE_H +#define CELL_SURFACE_H + + +struct cell_context; + + +extern void +cell_init_surface_functions(struct cell_context *cell); + + +#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c new file mode 100644 index 0000000000..c8ef36002f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -0,0 +1,252 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell + * Michel Dänzer + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "cell_context.h" +#include "cell_state.h" +#include "cell_texture.h" + + +/* Simple, maximally packed layout. + */ + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static void +cell_texture_layout(struct cell_texture * spt) +{ + struct pipe_texture *pt = &spt->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + + spt->buffer_size = 0; + + for ( level = 0 ; level <= pt->last_level ; level++ ) { + pt->width[level] = width; + pt->height[level] = height; + pt->depth[level] = depth; + + spt->level_offset[level] = spt->buffer_size; + + spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * + ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * + width * pt->cpp; + + width = minify(width); + height = minify(height); + depth = minify(depth); + } +} + + +struct pipe_texture * +cell_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) +{ + struct cell_texture *spt = CALLOC_STRUCT(cell_texture); + if (!spt) + return NULL; + + spt->base = *templat; + + cell_texture_layout(spt); + + spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); + + if (!spt->buffer) { + FREE(spt); + return NULL; + } + + return &spt->base; +} + + +void +cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct cell_texture *spt = cell_texture(*pt); + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); + */ + + pipe_buffer_reference(pipe->winsys, &spt->buffer, NULL); + + FREE(spt); + } + *pt = NULL; +} + + +/** + * Called via pipe->get_tex_surface() + */ +struct pipe_surface * +cell_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct cell_texture *spt = cell_texture(pt); + struct pipe_surface *ps; + + ps = pipe->winsys->surface_alloc(pipe->winsys); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(pipe->winsys, &ps->buffer, spt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = ps->width; + ps->offset = spt->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + (pt->compressed ? ps->height/4 : ps->height) * + ps->width * ps->cpp; + } else { + assert(face == 0); + assert(zslice == 0); + } + } + return ps; +} + + + +static void +tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) +{ + const uint tile_size2 = tile_size * tile_size; + const uint h_t = h / tile_size, w_t = w / tile_size; + + uint it, jt; /* tile counters */ + uint i, j; /* intra-tile counters */ + + for (it = 0; it < h_t; it++) { + for (jt = 0; jt < w_t; jt++) { + /* fill in tile (i, j) */ + uint *tdst = dst + (it * w_t + jt) * tile_size2; + for (i = 0; i < tile_size; i++) { + for (j = 0; j < tile_size; j++) { + const uint srci = it * tile_size + i; + const uint srcj = jt * tile_size + j; + *tdst++ = src[srci * h + srcj]; + } + } + } + } +} + + + +/** + * Convert linear texture image data to tiled format for SPU usage. + */ +static void +cell_tile_texture(struct cell_context *cell, + struct cell_texture *texture) +{ + uint face = 0, level = 0, zslice = 0; + struct pipe_surface *surf; + const uint w = texture->base.width[0], h = texture->base.height[0]; + const uint *src; + + /* temporary restrictions: */ + assert(w >= TILE_SIZE); + assert(h >= TILE_SIZE); + assert(w % TILE_SIZE == 0); + assert(h % TILE_SIZE == 0); + + surf = cell_get_tex_surface(&cell->pipe, &texture->base, face, level, zslice); + ASSERT(surf); + + src = (const uint *) pipe_surface_map(surf); + + if (texture->tiled_data) { + align_free(texture->tiled_data); + } + texture->tiled_data = align_malloc(w * h * 4, 16); + + tile_copy_data(w, h, TILE_SIZE, texture->tiled_data, src); + + pipe_surface_unmap(surf); + + pipe_surface_reference(&surf, NULL); +} + + + +void +cell_update_texture_mapping(struct cell_context *cell) +{ + uint face = 0, level = 0, zslice = 0; + + if (cell->texture[0]) + cell_tile_texture(cell, cell->texture[0]); +#if 0 + if (cell->tex_surf && cell->tex_map) { + pipe_surface_unmap(cell->tex_surf); + cell->tex_map = NULL; + } + + /* XXX free old surface */ + + cell->tex_surf = cell_get_tex_surface(&cell->pipe, + &cell->texture[0]->base, + face, level, zslice); + + cell->tex_map = pipe_surface_map(cell->tex_surf); +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h new file mode 100644 index 0000000000..0264fed88e --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -0,0 +1,80 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_TEXTURE_H +#define CELL_TEXTURE_H + + +struct pipe_context; +struct pipe_texture; + + +/** + * Subclass of pipe_texture + */ +struct cell_texture +{ + struct pipe_texture base; + + unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; + unsigned long buffer_size; + + void *tiled_data; /* XXX this may be temporary */ /*ALIGN16*/ +}; + + +/** cast wrapper */ +static INLINE struct cell_texture * +cell_texture(struct pipe_texture *pt) +{ + return (struct cell_texture *) pt; +} + + + +extern struct pipe_texture * +cell_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat); + +extern void +cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); + +extern struct pipe_surface * +cell_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice); + + +extern void +cell_update_texture_mapping(struct cell_context *cell); + + +#endif /* CELL_TEXTURE */ diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c new file mode 100644 index 0000000000..e9fafe492e --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -0,0 +1,294 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + + +#include "cell_batch.h" +#include "cell_context.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_vbuf.h" +#include "pipe/draw/draw_vbuf.h" + + +/** Allow vertex data to be inlined after RENDER command */ +#define ALLOW_INLINE_VERTS 1 + + +/** + * Subclass of vbuf_render because we need a cell_context pointer in + * a few places. + */ +struct cell_vbuf_render +{ + struct vbuf_render base; + struct cell_context *cell; + uint prim; /**< PIPE_PRIM_x */ + uint vertex_size; /**< in bytes */ + void *vertex_buffer; /**< just for debug, really */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ +}; + + +/** cast wrapper */ +static struct cell_vbuf_render * +cell_vbuf_render(struct vbuf_render *vbr) +{ + return (struct cell_vbuf_render *) vbr; +} + + + +static const struct vertex_info * +cell_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + return &cvbr->cell->vertex_info; +} + + +static void * +cell_vbuf_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ + + assert(cvbr->vertex_buf == ~0); + cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); + cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; + cvbr->vertex_size = vertex_size; + return cvbr->vertex_buffer; +} + + +static void +cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, + unsigned vertex_size, unsigned vertices_used) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + struct cell_context *cell = cvbr->cell; + + /* + printf("%s vertex_buf = %u count = %u\n", + __FUNCTION__, cvbr->vertex_buf, vertices_used); + */ + + /* Tell SPUs they can release the vert buf */ + if (cvbr->vertex_buf != ~0U) { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) + cell_batch_alloc(cell, sizeof(struct cell_command_release_verts)); + release->opcode = CELL_CMD_RELEASE_VERTS; + release->vertex_buf = cvbr->vertex_buf; + } + + cvbr->vertex_buf = ~0; + cell_flush_int(&cell->pipe, 0x0); + + assert(vertices == cvbr->vertex_buffer); + cvbr->vertex_buffer = NULL; +} + + + +static void +cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + cvbr->prim = prim; + /*printf("cell_set_prim %u\n", prim);*/ +} + + +static void +cell_vbuf_draw(struct vbuf_render *vbr, + const ushort *indices, + uint nr_indices) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + struct cell_context *cell = cvbr->cell; + float xmin, ymin, xmax, ymax; + uint i; + uint nr_vertices = 0, min_index = ~0; + const void *vertices = cvbr->vertex_buffer; + const uint vertex_size = cvbr->vertex_size; + + for (i = 0; i < nr_indices; i++) { + if (indices[i] > nr_vertices) + nr_vertices = indices[i]; + if (indices[i] < min_index) + min_index = indices[i]; + } + nr_vertices++; + +#if 0 + /*if (min_index > 0)*/ + printf("%s min_index = %u\n", __FUNCTION__, min_index); +#endif + +#if 0 + printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n", + nr_indices, nr_vertices); + printf(" "); + for (i = 0; i < nr_indices; i += 3) { + printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]); + } + printf("\n"); +#elif 0 + printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n", + nr_indices, nr_vertices, + indices[0], indices[1], indices[2]); + printf("ind space = %u, vert space = %u, space = %u\n", + nr_indices * 2, + nr_vertices * 4 * cell->vertex_info.size, + cell_batch_free_space(cell)); +#endif + + /* compute x/y bounding box */ + xmin = ymin = 1e50; + xmax = ymax = -1e50; + for (i = min_index; i < nr_vertices; i++) { + const float *v = (float *) ((ubyte *) vertices + i * vertex_size); + if (v[0] < xmin) + xmin = v[0]; + if (v[0] > xmax) + xmax = v[0]; + if (v[1] < ymin) + ymin = v[1]; + if (v[1] > ymax) + ymax = v[1]; + } +#if 0 + printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax); + fflush(stdout); +#endif + + if (cvbr->prim != PIPE_PRIM_TRIANGLES) + return; /* only render tris for now */ + + /* build/insert batch RENDER command */ + { + const uint index_bytes = ROUNDUP8(nr_indices * 2); + const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size; + const uint batch_size = sizeof(struct cell_command_render) + index_bytes; + + struct cell_command_render *render + = (struct cell_command_render *) + cell_batch_alloc(cell, batch_size); + + render->opcode = CELL_CMD_RENDER; + render->prim_type = cvbr->prim; + + render->num_indexes = nr_indices; + render->min_index = min_index; + + /* append indices after render command */ + memcpy(render + 1, indices, nr_indices * 2); + + /* if there's room, append vertices after the indices, else leave + * vertices in the original/separate buffer. + */ + render->vertex_size = 4 * cell->vertex_info.size; + render->num_verts = nr_vertices; + if (ALLOW_INLINE_VERTS && + min_index == 0 && + vertex_bytes + 16 <= cell_batch_free_space(cell)) { + /* vertex data inlined, after indices, at 16-byte boundary */ + void *dst = cell_batch_alloc_aligned(cell, vertex_bytes, 16); + memcpy(dst, vertices, vertex_bytes); + render->inline_verts = TRUE; + render->vertex_buf = ~0; + } + else { + /* vertex data in separate buffer */ + render->inline_verts = FALSE; + ASSERT(cvbr->vertex_buf >= 0); + render->vertex_buf = cvbr->vertex_buf; + } + + render->xmin = xmin; + render->ymin = ymin; + render->xmax = xmax; + render->ymax = ymax; + } + +#if 0 + /* helpful for debug */ + cell_flush_int(&cell->pipe, PIPE_FLUSH_WAIT); +#endif +} + + +static void +cell_vbuf_destroy(struct vbuf_render *vbr) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + cvbr->cell->vbuf_render = NULL; + FREE(cvbr); +} + + +/** + * Initialize the post-transform vertex buffer information for the given + * context. + */ +void +cell_init_vbuf(struct cell_context *cell) +{ + assert(cell->draw); + + cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render); + + /* The max number of indexes is what can fix into a batch buffer, + * minus the render and release-verts commands. + */ + cell->vbuf_render->base.max_indices + = (CELL_BUFFER_SIZE + - sizeof(struct cell_command_render) + - sizeof(struct cell_command_release_verts)) + / sizeof(ushort); + cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE; + + cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; + cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; + cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive; + cell->vbuf_render->base.draw = cell_vbuf_draw; + cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices; + cell->vbuf_render->base.destroy = cell_vbuf_destroy; + + cell->vbuf_render->cell = cell; +#if 1 + cell->vbuf_render->vertex_buf = ~0; +#endif + + cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base); +} diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h new file mode 100644 index 0000000000..d265cbf770 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_VBUF_H +#define CELL_VBUF_H + + +struct cell_context; + +extern void +cell_init_vbuf(struct cell_context *cell); + + +#endif /* CELL_VBUF_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c new file mode 100644 index 0000000000..80dd500b34 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -0,0 +1,120 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file cell_vertex_shader.c + * Vertex shader interface routines for Cell. + * + * \author Ian Romanick + */ + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_spu.h" +#include "cell_batch.h" + +#include "pipe/cell/common.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_private.h" + +/** + * Run the vertex shader on all vertices in the vertex queue. + * Called by the draw module when the vertx cache needs to be flushed. + */ +void +cell_vertex_shader_queue_flush(struct draw_context *draw) +{ + struct cell_context *const cell = + (struct cell_context *) draw->driver_private; + struct cell_command_vs *const vs = &cell_global.command[0].vs; + uint64_t *batch; + struct cell_array_info *array_info; + unsigned i, j; + + assert(draw->vs.queue_nr != 0); + + /* XXX: do this on statechange: + */ + draw_update_vertex_fetch(draw); + + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); + + batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; + + array_info = (struct cell_array_info *) &batch[1]; + assert(draw->vertex_fetch.src_ptr[i] != NULL); + array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; + array_info->attr = i; + array_info->pitch = draw->vertex_fetch.pitch[i]; + array_info->format = draw->vertex_element[i].src_format; + } + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + + sizeof(struct pipe_viewport_state)); + batch[0] = CELL_CMD_STATE_VIEWPORT; + (void) memcpy(&batch[1], &draw->viewport, + sizeof(struct pipe_viewport_state)); + + cell_batch_flush(cell); + + vs->opcode = CELL_CMD_VS_EXECUTE; + vs->shader.num_outputs = draw->num_vs_outputs; + vs->shader.declarations = (uintptr_t) draw->machine.Declarations; + vs->shader.num_declarations = draw->machine.NumDeclarations; + vs->shader.instructions = (uintptr_t) draw->machine.Instructions; + vs->shader.num_instructions = draw->machine.NumInstructions; + vs->shader.uniforms = (uintptr_t) draw->user.constants; + vs->shader.immediates = (uintptr_t) draw->machine.Imms; + vs->shader.num_immediates = draw->machine.ImmLimit / 4; + vs->nr_attrs = draw->vertex_fetch.nr_attrs; + + (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); + vs->nr_planes = draw->nr_planes; + + for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) { + const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i); + + for (j = 0; j < n; j++) { + vs->elts[j] = draw->vs.queue[i + j].elt; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].dest; + } + + for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { + vs->elts[j] = vs->elts[0]; + vs->vOut[j] = vs->vOut[0]; + } + + vs->num_elts = n; + send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); + + cell_flush_int(& cell->pipe, PIPE_FLUSH_WAIT); + } + + draw->vs.queue_nr = 0; +} diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.c b/src/gallium/drivers/cell/ppu/cell_winsys.c new file mode 100644 index 0000000000..ebabce3c8f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_winsys.c @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "cell_winsys.h" + + +struct cell_winsys * +cell_get_winsys(uint format) +{ + struct cell_winsys *cws = CALLOC_STRUCT(cell_winsys); + if (cws) + cws->preferredFormat = format; + return cws; +} diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.h b/src/gallium/drivers/cell/ppu/cell_winsys.h new file mode 100644 index 0000000000..ae2af5696b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_winsys.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_WINSYS_H +#define CELL_WINSYS_H + +#include "pipe/p_compiler.h" + + +/** + * Very simple winsys at this time. + * Will probably eventually add SPU control info. + */ +struct cell_winsys +{ + uint preferredFormat; +}; + + +extern struct cell_winsys * +cell_get_winsys(uint format); + + + +#endif diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile new file mode 100644 index 0000000000..f202971d73 --- /dev/null +++ b/src/gallium/drivers/cell/spu/Makefile @@ -0,0 +1,72 @@ +# Gallium3D Cell driver: SPU code + +# This makefile builds the g3d_spu.a file that's linked into the +# PPU code/library. + + +TOP = ../../../../.. +include $(TOP)/configs/linux-cell + + +PROG = g3d + +PROG_SPU = $(PROG)_spu +PROG_SPU_A = $(PROG)_spu.a +PROG_SPU_EMBED_O = $(PROG)_spu-embed.o + + +SOURCES = \ + spu_main.c \ + spu_blend.c \ + spu_render.c \ + spu_texture.c \ + spu_tile.c \ + spu_tri.c \ + spu_exec.c \ + spu_util.c \ + spu_vertex_fetch.c \ + spu_vertex_shader.c + +SPU_OBJECTS = $(SOURCES:.c=.o) \ + +SPU_ASM_OUT = $(SOURCES:.c=.s) \ + +INCLUDE_DIRS = -I$(TOP)/src/mesa + + +.c.o: + $(SPU_CC) $(SPU_CFLAGS) -c $< + +.c.s: + $(SPU_CC) $(SPU_CFLAGS) -S $< + + +# The .a file will be linked into the main/PPU executable +default: $(PROG_SPU_A) + +$(PROG_SPU_A): $(PROG_SPU_EMBED_O) + $(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O) + +$(PROG_SPU_EMBED_O): $(PROG_SPU) + $(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O) + +$(PROG_SPU): $(SPU_OBJECTS) + $(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS) + + + +asmfiles: $(SPU_ASM_OUT) + + +clean: + rm -f *~ *.o *.a *.d *.s $(PROG_SPU) + + + +depend: $(SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null + +include depend + diff --git a/src/gallium/drivers/cell/spu/spu_blend.c b/src/gallium/drivers/cell/spu/spu_blend.c new file mode 100644 index 0000000000..23ec0eeb45 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_blend.c @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "spu_main.h" +#include "spu_blend.h" +#include "spu_colorpack.h" + + +void +blend_quad(uint itx, uint ity, vector float colors[4]) +{ + /* simple SRC_ALPHA, ONE_MINUS_SRC_ALPHA blending */ + vector float fbc00 = spu_unpack_color(spu.ctile.ui[ity][itx]); + vector float fbc01 = spu_unpack_color(spu.ctile.ui[ity][itx+1]); + vector float fbc10 = spu_unpack_color(spu.ctile.ui[ity+1][itx]); + vector float fbc11 = spu_unpack_color(spu.ctile.ui[ity+1][itx+1]); + + vector float alpha00 = spu_splats(spu_extract(colors[0], 3)); + vector float alpha01 = spu_splats(spu_extract(colors[1], 3)); + vector float alpha10 = spu_splats(spu_extract(colors[2], 3)); + vector float alpha11 = spu_splats(spu_extract(colors[3], 3)); + + vector float one_minus_alpha00 = spu_sub(spu_splats(1.0f), alpha00); + vector float one_minus_alpha01 = spu_sub(spu_splats(1.0f), alpha01); + vector float one_minus_alpha10 = spu_sub(spu_splats(1.0f), alpha10); + vector float one_minus_alpha11 = spu_sub(spu_splats(1.0f), alpha11); + + colors[0] = spu_add(spu_mul(colors[0], alpha00), + spu_mul(fbc00, one_minus_alpha00)); + colors[1] = spu_add(spu_mul(colors[1], alpha01), + spu_mul(fbc01, one_minus_alpha01)); + colors[2] = spu_add(spu_mul(colors[2], alpha10), + spu_mul(fbc10, one_minus_alpha10)); + colors[3] = spu_add(spu_mul(colors[3], alpha11), + spu_mul(fbc11, one_minus_alpha11)); +} + diff --git a/src/gallium/drivers/cell/spu/spu_blend.h b/src/gallium/drivers/cell/spu/spu_blend.h new file mode 100644 index 0000000000..2b594b578b --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_blend.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_BLEND_H +#define SPU_BLEND_H + + +extern void +blend_quad(uint itx, uint ity, vector float colors[4]); + + +#endif /* SPU_BLEND_H */ diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h new file mode 100644 index 0000000000..e9fee8a3a6 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_colorpack.h @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#ifndef SPU_COLORPACK_H +#define SPU_COLORPACK_H + + +#include + + +static INLINE unsigned int +spu_pack_R8G8B8A8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + + out = spu_shuffle(out, out, ((vector unsigned char) { + 0, 4, 8, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }) ); + + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_A8R8G8B8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, ((vector unsigned char) { + 12, 0, 4, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}) ); + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_B8G8R8A8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, ((vector unsigned char) { + 8, 4, 0, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}) ); + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, shuffle); + return spu_extract(out, 0); +} + + +static INLINE vector float +spu_unpack_color(uint color) +{ + vector unsigned int color_u4 = spu_splats(color); + color_u4 = spu_shuffle(color_u4, color_u4, + ((vector unsigned char) { + 0, 0, 0, 0, + 5, 5, 5, 5, + 10, 10, 10, 10, + 15, 15, 15, 15}) ); + return spu_convtf(color_u4, 32); +} + + +static INLINE vector float +spu_unpack_A8R8G8B8(uint color) +{ + vector unsigned int color_u4 = spu_splats(color); + color_u4 = spu_shuffle(color_u4, color_u4, + ((vector unsigned char) { + 5, 5, 5, 5, + 10, 10, 10, 10, + 15, 15, 15, 15, + 0, 0, 0, 0}) ); + + return spu_convtf(color_u4, 32); +} + + +#endif /* SPU_COLORPACK_H */ diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c new file mode 100644 index 0000000000..e51008b9b3 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -0,0 +1,1948 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI interpretor/executor. + * + * Flow control information: + * + * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) + * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special + * care since a condition may be true for some quad components but false + * for other components. + * + * We basically execute all statements (even if they're in the part of + * an IF/ELSE clause that's "not taken") and use a special mask to + * control writing to destination registers. This is the ExecMask. + * See store_dest(). + * + * The ExecMask is computed from three other masks (CondMask, LoopMask and + * ContMask) which are controlled by the flow control instructions (namely: + * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). + * + * + * Authors: + * Michal Krol + * Brian Paul + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" +#include "pipe/tgsi/util/tgsi_util.h" +#include "spu_exec.h" +#include "spu_main.h" +#include "spu_vertex_shader.h" + +#define TILE_TOP_LEFT 0 +#define TILE_TOP_RIGHT 1 +#define TILE_BOTTOM_LEFT 2 +#define TILE_BOTTOM_RIGHT 3 + +/* + * Shorthand locations of various utility registers (_I = Index, _C = Channel) + */ +#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I +#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C +#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I +#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C +#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I +#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C +#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I +#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C +#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C +#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I +#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C +#define TEMP_128_I TGSI_EXEC_TEMP_128_I +#define TEMP_128_C TGSI_EXEC_TEMP_128_C +#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I +#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C +#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I +#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C +#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I +#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C +#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I +#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +#define FOR_EACH_CHANNEL(CHAN)\ + for (CHAN = 0; CHAN < 4; CHAN++) + +#define IS_CHANNEL_ENABLED(INST, CHAN)\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IS_CHANNEL_ENABLED2(INST, CHAN)\ + ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + +#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED2( INST, CHAN )) + + +/** The execution mask depends on the conditional mask and the loop mask */ +#define UPDATE_EXEC_MASK(MACH) \ + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + + + +/** + * Initialize machine state by expanding tokens to full instructions, + * allocating temporary storage, setting up constants, etc. + * After this, we can call spu_exec_machine_run() many times. + */ +void +spu_exec_machine_init(struct spu_exec_machine *mach, + uint numSamplers, + struct spu_sampler *samplers, + unsigned processor) +{ + qword zero; + qword not_zero; + uint i; + + mach->Samplers = samplers; + mach->Processor = processor; + mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; + + zero = si_xor(zero, zero); + not_zero = si_xori(zero, 0xff); + + /* Setup constants. */ + mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; + mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; + mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); + mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); + + mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); + mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); + mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); + mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); +} + + +static INLINE qword +micro_abs(qword src) +{ + return si_rotmi(si_shli(src, 1), -1); +} + +static INLINE qword +micro_ceil(qword src) +{ + return (qword) _ceilf4((vec_float4) src); +} + +static INLINE qword +micro_cos(qword src) +{ + return (qword) _cosf4((vec_float4) src); +} + +static const qword br_shuf = { + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, +}; + +static const qword bl_shuf = { + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, +}; + +static const qword tl_shuf = { + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, +}; + +static qword +micro_ddx(qword src) +{ + qword bottom_right = si_shufb(src, src, br_shuf); + qword bottom_left = si_shufb(src, src, bl_shuf); + + return si_fs(bottom_right, bottom_left); +} + +static qword +micro_ddy(qword src) +{ + qword top_left = si_shufb(src, src, tl_shuf); + qword bottom_left = si_shufb(src, src, bl_shuf); + + return si_fs(top_left, bottom_left); +} + +static INLINE qword +micro_div(qword src0, qword src1) +{ + return (qword) _divf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_flr(qword src) +{ + return (qword) _floorf4((vec_float4) src); +} + +static qword +micro_frc(qword src) +{ + return si_fs(src, (qword) _floorf4((vec_float4) src)); +} + +static INLINE qword +micro_ge(qword src0, qword src1) +{ + return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); +} + +static qword +micro_lg2(qword src) +{ + return (qword) _log2f4((vec_float4) src); +} + +static INLINE qword +micro_lt(qword src0, qword src1) +{ + const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); + + return si_xori(tmp, 0xff); +} + +static INLINE qword +micro_max(qword src0, qword src1) +{ + return si_selb(src1, src0, si_fcgt(src0, src1)); +} + +static INLINE qword +micro_min(qword src0, qword src1) +{ + return si_selb(src0, src1, si_fcgt(src0, src1)); +} + +static qword +micro_neg(qword src) +{ + return si_xor(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_set_sign(qword src) +{ + return si_or(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_pow(qword src0, qword src1) +{ + return (qword) _powf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_rnd(qword src) +{ + const qword half = (qword) spu_splats(0.5f); + + /* May be able to use _roundf4. There may be some difference, though. + */ + return (qword) _floorf4((vec_float4) si_fa(src, half)); +} + +static INLINE qword +micro_ishr(qword src0, qword src1) +{ + return si_rotma(src0, si_sfi(src1, 0)); +} + +static qword +micro_trunc(qword src) +{ + return (qword) _truncf4((vec_float4) src); +} + +static qword +micro_sin(qword src) +{ + return (qword) _sinf4((vec_float4) src); +} + +static INLINE qword +micro_sqrt(qword src) +{ + return (qword) _sqrtf4((vec_float4) src); +} + +static void +fetch_src_file_channel( + const struct spu_exec_machine *mach, + const uint file, + const uint swizzle, + const union spu_exec_channel *index, + union spu_exec_channel *chan ) +{ + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( file ) { + case TGSI_FILE_CONSTANT: { + unsigned char buffer[32] ALIGN16_ATTRIB; + unsigned i; + + for (i = 0; i < 4; i++) { + const float *ptr = mach->Consts[index->i[i]]; + const uint64_t addr = (uint64_t)(uintptr_t) ptr; + const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32; + + mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0); + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + (void) memcpy(& chan->f[i], &buffer[(addr & 0x0f) + + (sizeof(float) * swizzle)], sizeof(float)); + } + break; + } + + case TGSI_FILE_INPUT: + chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_TEMPORARY: + chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_IMMEDIATE: + assert( index->i[0] < (int) mach->ImmLimit ); + assert( index->i[1] < (int) mach->ImmLimit ); + assert( index->i[2] < (int) mach->ImmLimit ); + assert( index->i[3] < (int) mach->ImmLimit ); + + chan->f[0] = mach->Imms[index->i[0]][swizzle]; + chan->f[1] = mach->Imms[index->i[1]][swizzle]; + chan->f[2] = mach->Imms[index->i[2]][swizzle]; + chan->f[3] = mach->Imms[index->i[3]][swizzle]; + break; + + case TGSI_FILE_ADDRESS: + chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; + break; + + case TGSI_EXTSWIZZLE_ONE: + *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; + break; + + default: + assert( 0 ); + } +} + +static void +fetch_source( + const struct spu_exec_machine *mach, + union spu_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index ) +{ + union spu_exec_channel index; + uint swizzle; + + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->SrcRegister.Index; + + if (reg->SrcRegister.Indirect) { + union spu_exec_channel index2; + union spu_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, + CHAN_X); + fetch_src_file_channel( + mach, + reg->SrcRegisterInd.File, + swizzle, + &index2, + &indir_index ); + + index.q = si_a(index.q, indir_index.q); + } + + if( reg->SrcRegister.Dimension ) { + switch( reg->SrcRegister.File ) { + case TGSI_FILE_INPUT: + index.q = si_mpyi(index.q, 17); + break; + case TGSI_FILE_CONSTANT: + index.q = si_shli(index.q, 12); + break; + default: + assert( 0 ); + } + + index.i[0] += reg->SrcRegisterDim.Index; + index.i[1] += reg->SrcRegisterDim.Index; + index.i[2] += reg->SrcRegisterDim.Index; + index.i[3] += reg->SrcRegisterDim.Index; + + if (reg->SrcRegisterDim.Indirect) { + union spu_exec_channel index2; + union spu_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterDimInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterDimInd.File, + swizzle, + &index2, + &indir_index ); + + index.q = si_a(index.q, indir_index.q); + } + } + + swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + fetch_src_file_channel( + mach, + reg->SrcRegister.File, + swizzle, + &index, + chan ); + + switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { + case TGSI_UTIL_SIGN_CLEAR: + chan->q = micro_abs(chan->q); + break; + + case TGSI_UTIL_SIGN_SET: + chan->q = micro_set_sign(chan->q); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + chan->q = micro_neg(chan->q); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } + + if (reg->SrcRegisterExtMod.Complement) { + chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); + } +} + +static void +store_dest( + struct spu_exec_machine *mach, + const union spu_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index ) +{ + union spu_exec_channel *dst; + + switch( reg->DstRegister.File ) { + case TGSI_FILE_NULL: + return; + + case TGSI_FILE_OUTPUT: + dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + + reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_TEMPORARY: + dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_ADDRESS: + dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + break; + + default: + assert( 0 ); + return; + } + + switch (inst->Instruction.Saturate) + { + case TGSI_SAT_NONE: + if (mach->ExecMask & 0x1) + dst->i[0] = chan->i[0]; + if (mach->ExecMask & 0x2) + dst->i[1] = chan->i[1]; + if (mach->ExecMask & 0x4) + dst->i[2] = chan->i[2]; + if (mach->ExecMask & 0x8) + dst->i[3] = chan->i[3]; + break; + + case TGSI_SAT_ZERO_ONE: + /* XXX need to obey ExecMask here */ + dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + + default: + assert( 0 ); + } +} + +#define FETCH(VAL,INDEX,CHAN)\ + fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + +#define STORE(VAL,INDEX,CHAN)\ + store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + + +/** + * Execute ARB-style KIL which is predicated by a src register. + * Kill fragment if any of the four values is less than zero. + */ +static void +exec_kilp(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint uniquemask; + uint chan_index; + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + union spu_exec_channel r[1]; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint swizzle; + uint i; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle ( + &inst->FullSrcRegisters[0], + chan_index); + + /* check if the component has not been already tested */ + if (uniquemask & (1 << swizzle)) + continue; + uniquemask |= 1 << swizzle; + + FETCH(&r[0], 0, chan_index); + for (i = 0; i < 4; i++) + if (r[0].f[i] < 0.0f) + kilmask |= 1 << i; + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + + +/* + * Fetch a texel using STR texture coordinates. + */ +static void +fetch_texel( struct spu_sampler *sampler, + const union spu_exec_channel *s, + const union spu_exec_channel *t, + const union spu_exec_channel *p, + float lodbias, /* XXX should be float[4] */ + union spu_exec_channel *r, + union spu_exec_channel *g, + union spu_exec_channel *b, + union spu_exec_channel *a ) +{ + qword rgba[4]; + qword out[4]; + + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba); + + _transpose_matrix4x4(out, rgba); + r->q = out[0]; + g->q = out[1]; + b->q = out[2]; + a->q = out[3]; +} + + +static void +exec_tex(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst, + boolean biasLod) +{ + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + union spu_exec_channel r[8]; + uint chan_index; + float lodBias; + + /* printf("Sampler %u unit %u\n", sampler, unit); */ + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + + FETCH(&r[0], 0, CHAN_X); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[1], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[1].q); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[1], 0, CHAN_W); + lodBias = r[2].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[3], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[3].q); + r[1].q = micro_div(r[1].q, r[3].q); + r[2].q = micro_div(r[2].q, r[3].q); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[3], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[3].q); + r[1].q = micro_div(r[1].q, r[3].q); + r[2].q = micro_div(r[2].q, r[3].q); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert (0); + } + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[chan_index], 0, chan_index ); + } +} + + + +static void +constant_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } +} + +static void +linear_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + mach->Inputs[attrib].xyzw[chan].f[0] = a0; + mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; + mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; + mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} + +static void +perspective_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; +} + + +typedef void (* interpolation_func)( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ); + +static void +exec_declaration(struct spu_exec_machine *mach, + const struct tgsi_full_declaration *decl) +{ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + interpolation_func interp; + + assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + + first = decl->u.DeclarationRange.First; + last = decl->u.DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + switch( decl->Interpolation.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + interp = constant_interpolation; + break; + + case TGSI_INTERPOLATE_LINEAR: + interp = linear_interpolation; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = perspective_interpolation; + break; + + default: + assert( 0 ); + } + + if( mask == TGSI_WRITEMASK_XYZW ) { + unsigned i, j; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + interp( mach, i, j ); + } + } + } + else { + unsigned i, j; + + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + interp( mach, i, j ); + } + } + } + } + } + } +} + +static void +exec_instruction( + struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst, + int *pc ) +{ + uint chan_index; + union spu_exec_channel r[8]; + + (*pc)++; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_cflts(r[0].q, 0); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + /* TGSI_OPCODE_SWZ */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_X ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[1], 0, CHAN_Y ); + r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + + FETCH( &r[2], 0, CHAN_W ); + r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); + r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); + r[1].q = micro_pow(r[1].q, r[2].q); + + /* r0 = (r0 > 0.0) ? r1 : 0.0 + */ + r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, + r[0].q); + STORE( &r[0], 0, CHAN_Z ); + } + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_sqrt(r[0].q); + r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + assert (0); + break; + + case TGSI_OPCODE_LOG: + assert (0); + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = si_fm(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_fa(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + + FETCH( &r[1], 0, CHAN_Z ); + FETCH( &r[2], 1, CHAN_Z ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_W); + FETCH(&r[2], 1, CHAN_W); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + FETCH( &r[0], 0, CHAN_Y ); + FETCH( &r[1], 1, CHAN_Y); + r[0].q = si_fm(r[0].q, r[1].q); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_Z ); + STORE( &r[0], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + FETCH( &r[0], 1, CHAN_W ); + STORE( &r[0], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = micro_min(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = micro_max(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = micro_ge(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = micro_ge(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + FETCH( &r[2], 2, chan_index ); + r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = si_fs(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + r[1].q = si_fs(r[1].q, r[2].q); + r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_CND: + assert (0); + break; + + case TGSI_OPCODE_CND0: + assert (0); + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + assert (0); + break; + + case TGSI_OPCODE_INDEX: + assert (0); + break; + + case TGSI_OPCODE_NEGATE: + assert (0); + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_frc(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + assert (0); + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_flr(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_rnd(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH(&r[0], 0, CHAN_X); + + r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_lg2(r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = micro_pow(r[0].q, r[1].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + FETCH(&r[0], 0, CHAN_Y); + FETCH(&r[1], 1, CHAN_Z); + FETCH(&r[3], 0, CHAN_Z); + FETCH(&r[4], 1, CHAN_Y); + + /* r2 = (r0 * r1) - (r3 * r5) + */ + r[2].q = si_fm(r[3].q, r[5].q); + r[2].q = si_fms(r[0].q, r[1].q, r[2].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[2], 0, CHAN_X ); + } + + FETCH(&r[2], 1, CHAN_X); + FETCH(&r[5], 0, CHAN_X); + + /* r3 = (r3 * r2) - (r1 * r5) + */ + r[1].q = si_fm(r[1].q, r[5].q); + r[3].q = si_fms(r[3].q, r[2].q, r[1].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + STORE( &r[3], 0, CHAN_Y ); + } + + /* r5 = (r5 * r4) - (r0 * r2) + */ + r[0].q = si_fm(r[0].q, r[2].q); + r[5].q = si_fms(r[5].q, r[4].q, r[0].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[5], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + assert (0); + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + + r[0].q = micro_abs(r[0].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_RCC: + assert (0); + break; + + case TGSI_OPCODE_DPH: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 1, CHAN_W); + + r[0].q = si_fa(r[0].q, r[1].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH(&r[0], 0, CHAN_X); + + r[0].q = micro_cos(r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ddx(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDY: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ddy(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_KILP: + exec_kilp (mach, inst); + break; + + case TGSI_OPCODE_KIL: + /* for enabled ExecMask bits, set the killed bit */ + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; + break; + + case TGSI_OPCODE_PK2H: + assert (0); + break; + + case TGSI_OPCODE_PK2US: + assert (0); + break; + + case TGSI_OPCODE_PK4B: + assert (0); + break; + + case TGSI_OPCODE_PK4UB: + assert (0); + break; + + case TGSI_OPCODE_RFL: + assert (0); + break; + + case TGSI_OPCODE_SEQ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fceq(r[0].q, r[1].q); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SFL: + assert (0); + break; + + case TGSI_OPCODE_SGT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_fcgt(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SIN: + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_sin(r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fcgt(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SNE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fceq(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_STR: + assert (0); + break; + + case TGSI_OPCODE_TEX: + /* simple texture lookup */ + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE); + break; + + case TGSI_OPCODE_TXB: + /* Texture lookup with lod bias */ + /* src[0] = texcoord (src[0].w = load bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE); + break; + + case TGSI_OPCODE_TXD: + /* Texture lookup with explict partial derivatives */ + /* src[0] = texcoord */ + /* src[1] = d[strq]/dx */ + /* src[2] = d[strq]/dy */ + /* src[3] = sampler unit */ + assert (0); + break; + + case TGSI_OPCODE_TXL: + /* Texture lookup with explit LOD */ + /* src[0] = texcoord (src[0].w = load bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE); + break; + + case TGSI_OPCODE_UP2H: + assert (0); + break; + + case TGSI_OPCODE_UP2US: + assert (0); + break; + + case TGSI_OPCODE_UP4B: + assert (0); + break; + + case TGSI_OPCODE_UP4UB: + assert (0); + break; + + case TGSI_OPCODE_X2D: + assert (0); + break; + + case TGSI_OPCODE_ARA: + assert (0); + break; + + case TGSI_OPCODE_ARR: + assert (0); + break; + + case TGSI_OPCODE_BRA: + assert (0); + break; + + case TGSI_OPCODE_CAL: + /* skip the call if no execution channels are enabled */ + if (mach->ExecMask) { + /* do the call */ + + /* push the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + + assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; + + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop++] = *pc; + *pc = inst->InstructionExtLabel.Label; + } + break; + + case TGSI_OPCODE_RET: + mach->FuncMask &= ~mach->ExecMask; + UPDATE_EXEC_MASK(mach); + + if (mach->ExecMask == 0x0) { + /* really return now (otherwise, keep executing */ + + if (mach->CallStackTop == 0) { + /* returning from main() */ + *pc = -1; + return; + } + *pc = mach->CallStack[--mach->CallStackTop]; + + /* pop the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + UPDATE_EXEC_MASK(mach); + } + break; + + case TGSI_OPCODE_SSG: + assert (0); + break; + + case TGSI_OPCODE_CMP: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + /* r0 = (r0 < 0.0) ? r1 : r2 + */ + r[3].q = si_xor(r[3].q, r[3].q); + r[0].q = micro_lt(r[0].q, r[3].q); + r[0].q = si_selb(r[1].q, r[2].q, r[0].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_SCS: + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( &r[0], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + r[1].q = micro_cos(r[0].q); + STORE( &r[1], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + r[1].q = micro_sin(r[0].q); + STORE( &r[1], 0, CHAN_Y ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_NRM: + assert (0); + break; + + case TGSI_OPCODE_DIV: + assert( 0 ); + break; + + case TGSI_OPCODE_DP2: + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_IF: + /* push CondMask */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + FETCH( &r[0], 0, CHAN_X ); + /* update CondMask */ + if( ! r[0].u[0] ) { + mach->CondMask &= ~0x1; + } + if( ! r[0].u[1] ) { + mach->CondMask &= ~0x2; + } + if( ! r[0].u[2] ) { + mach->CondMask &= ~0x4; + } + if( ! r[0].u[3] ) { + mach->CondMask &= ~0x8; + } + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ELSE */ + break; + + case TGSI_OPCODE_ELSE: + /* invert CondMask wrt previous mask */ + { + uint prevMask; + assert(mach->CondStackTop > 0); + prevMask = mach->CondStack[mach->CondStackTop - 1]; + mach->CondMask = ~mach->CondMask & prevMask; + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ENDIF */ + } + break; + + case TGSI_OPCODE_ENDIF: + /* pop CondMask */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_END: + /* halt execution */ + *pc = -1; + break; + + case TGSI_OPCODE_REP: + assert (0); + break; + + case TGSI_OPCODE_ENDREP: + assert (0); + break; + + case TGSI_OPCODE_PUSHA: + assert (0); + break; + + case TGSI_OPCODE_POPA: + assert (0); + break; + + case TGSI_OPCODE_CEIL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ceil(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_I2F: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_csflt(r[0].q, 0); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_NOT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_xorbi(r[0].q, 0xff); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TRUNC: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_trunc(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_shl(r[0].q, r[1].q); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = micro_ishr(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_AND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_and(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_OR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_or(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOD: + assert (0); + break; + + case TGSI_OPCODE_XOR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_xor(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SAD: + assert (0); + break; + + case TGSI_OPCODE_TXF: + assert (0); + break; + + case TGSI_OPCODE_TXQ: + assert (0); + break; + + case TGSI_OPCODE_EMIT: + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + break; + + case TGSI_OPCODE_ENDPRIM: + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + break; + + case TGSI_OPCODE_LOOP: + /* fall-through (for now) */ + case TGSI_OPCODE_BGNLOOP2: + /* push LoopMask and ContMasks */ + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + break; + + case TGSI_OPCODE_ENDLOOP: + /* fall-through (for now at least) */ + case TGSI_OPCODE_ENDLOOP2: + /* Restore ContMask, but don't pop */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + if (mach->LoopMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + *pc = inst->InstructionExtLabel.Label + 1; + } + else { + /* exit loop: pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + } + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BRK: + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_CONT: + /* turn off cont channels for each enabled exec channel */ + mach->ContMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BGNSUB: + /* no-op */ + break; + + case TGSI_OPCODE_ENDSUB: + /* no-op */ + break; + + case TGSI_OPCODE_NOISE1: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE2: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE3: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE4: + assert( 0 ); + break; + + case TGSI_OPCODE_NOP: + break; + + default: + assert( 0 ); + } +} + + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +spu_exec_machine_run( struct spu_exec_machine *mach ) +{ + uint i; + int pc = 0; + + mach->CondMask = 0xf; + mach->LoopMask = 0xf; + mach->ContMask = 0xf; + mach->FuncMask = 0xf; + mach->ExecMask = 0xf; + + mach->CondStackTop = 0; /* temporarily subvert this assertion */ + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->CallStackTop == 0); + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; + + if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; + mach->Primitives[0] = 0; + } + + + /* execute declarations (interpolants) */ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + for (i = 0; i < mach->NumDeclarations; i++) { + uint8_t buffer[sizeof(struct tgsi_full_declaration) + 32] ALIGN16_ATTRIB; + struct tgsi_full_declaration decl; + unsigned long decl_addr = (unsigned long) (mach->Declarations+i); + unsigned size = ((sizeof(decl) + (decl_addr & 0x0f) + 0x0f) & ~0x0f); + + mfc_get(buffer, decl_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); + wait_on_mask(1 << TAG_INSTRUCTION_FETCH); + + memcpy(& decl, buffer + (decl_addr & 0x0f), sizeof(decl)); + exec_declaration( mach, &decl ); + } + } + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + uint8_t buffer[sizeof(struct tgsi_full_instruction) + 32] ALIGN16_ATTRIB; + struct tgsi_full_instruction inst; + unsigned long inst_addr = (unsigned long) (mach->Instructions + pc); + unsigned size = ((sizeof(inst) + (inst_addr & 0x0f) + 0x0f) & ~0x0f); + + assert(pc < mach->NumInstructions); + mfc_get(buffer, inst_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); + wait_on_mask(1 << TAG_INSTRUCTION_FETCH); + + memcpy(& inst, buffer + (inst_addr & 0x0f), sizeof(inst)); + exec_instruction( mach, & inst, &pc ); + } + +#if 0 + /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + /* + * Scale back depth component. + */ + for (i = 0; i < 4; i++) + mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; + } +#endif + + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; +} + + diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h new file mode 100644 index 0000000000..b4c7661ef6 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -0,0 +1,172 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if !defined SPU_EXEC_H +#define SPU_EXEC_H + +#include "pipe/p_compiler.h" +#include "pipe/tgsi/exec/tgsi_exec.h" + +#if defined __cplusplus +extern "C" { +#endif + +/** + * Registers may be treated as float, signed int or unsigned int. + */ +union spu_exec_channel +{ + float f[QUAD_SIZE]; + int i[QUAD_SIZE]; + unsigned u[QUAD_SIZE]; + qword q; +}; + +/** + * A vector[RGBA] of channels[4 pixels] + */ +struct spu_exec_vector +{ + union spu_exec_channel xyzw[NUM_CHANNELS]; +}; + +/** + * For fragment programs, information for computing fragment input + * values from plane equation of the triangle/line. + */ +struct spu_interp_coef +{ + float a0[NUM_CHANNELS]; /* in an xyzw layout */ + float dadx[NUM_CHANNELS]; + float dady[NUM_CHANNELS]; +}; + + +struct softpipe_tile_cache; /**< Opaque to TGSI */ + +/** + * Information for sampling textures, which must be implemented + * by code outside the TGSI executor. + */ +struct spu_sampler +{ + const struct pipe_sampler_state *state; + struct pipe_texture *texture; + /** Get samples for four fragments in a quad */ + void (*get_samples)(struct spu_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + void *pipe; /*XXX temporary*/ + struct softpipe_tile_cache *cache; +}; + + +/** + * Run-time virtual machine state for executing TGSI shader. + */ +struct spu_exec_machine +{ + /* + * 32 program temporaries + * 4 internal temporaries + * 1 address + */ + struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS + + TGSI_EXEC_NUM_ADDRS + 1] + ALIGN16_ATTRIB; + + struct spu_exec_vector *Addrs; + + struct spu_sampler *Samplers; + + float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + unsigned ImmLimit; + float (*Consts)[4]; + struct spu_exec_vector *Inputs; + struct spu_exec_vector *Outputs; + unsigned Processor; + + /* GEOMETRY processor only. */ + unsigned *Primitives; + + /* FRAGMENT processor only. */ + const struct spu_interp_coef *InterpCoefs; + struct spu_exec_vector QuadPos; + + /* Conditional execution masks */ + uint CondMask; /**< For IF/ELSE/ENDIF */ + uint LoopMask; /**< For BGNLOOP/ENDLOOP */ + uint ContMask; /**< For loop CONT statements */ + uint FuncMask; /**< For function calls */ + uint ExecMask; /**< = CondMask & LoopMask */ + + /** Condition mask stack (for nested conditionals) */ + uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; + int CondStackTop; + + /** Loop mask stack (for nested loops) */ + uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopStackTop; + + /** Loop continue mask stack (see comments in tgsi_exec.c) */ + uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int ContStackTop; + + /** Function execution mask stack (for executing subroutine code) */ + uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; + int FuncStackTop; + + /** Function call stack for saving/restoring the program counter */ + uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + int CallStackTop; + + struct tgsi_full_instruction *Instructions; + uint NumInstructions; + + struct tgsi_full_declaration *Declarations; + uint NumDeclarations; +}; + + +extern void +spu_exec_machine_init(struct spu_exec_machine *mach, + uint numSamplers, + struct spu_sampler *samplers, + unsigned processor); + +extern uint +spu_exec_machine_run( struct spu_exec_machine *mach ); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* SPU_EXEC_H */ diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c new file mode 100644 index 0000000000..e375197fe6 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -0,0 +1,567 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* main() for Cell SPU code */ + + +#include +#include + +#include "spu_main.h" +#include "spu_render.h" +#include "spu_texture.h" +#include "spu_tile.h" +//#include "spu_test.h" +#include "spu_vertex_shader.h" +#include "pipe/cell/common.h" +#include "pipe/p_defines.h" + + +/* +helpful headers: +/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h +/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h +*/ + +boolean Debug = FALSE; + +struct spu_global spu; + +struct spu_vs_context draw; + +/** + * Tell the PPU that this SPU has finished copying a buffer to + * local store and that it may be reused by the PPU. + * This is done by writting a 16-byte batch-buffer-status block back into + * main memory (in cell_context->buffer_status[]). + */ +static void +release_buffer(uint buffer) +{ + /* Evidently, using less than a 16-byte status doesn't work reliably */ + static const uint status[4] ALIGN16_ATTRIB + = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; + + const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); + uint *dst = spu.init.buffer_status + index; + + ASSERT(buffer < CELL_NUM_BUFFERS); + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_MISC, /* tag is unimportant */ + 0, /* tid */ + 0 /* rid */); +} + + +/** + * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled + * tiles back to the main framebuffer. + */ +static void +really_clear_tiles(uint surfaceIndex) +{ + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + + if (surfaceIndex == 0) { + clear_c_tile(&spu.ctile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + } + } + } + else { + clear_z_tile(&spu.ztile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); + } + } + +#if 0 + wait_on_mask(1 << TAG_SURFACE_CLEAR); +#endif +} + + +static void +cmd_clear_surface(const struct cell_command_clear_surface *clear) +{ + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + + if (Debug) + printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id, + clear->surface, clear->value); + +#define CLEAR_OPT 1 +#if CLEAR_OPT + /* set all tile's status to CLEAR */ + if (clear->surface == 0) { + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); + spu.fb.color_clear_value = clear->value; + } + else { + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); + spu.fb.depth_clear_value = clear->value; + } + return; +#endif + + if (clear->surface == 0) { + spu.fb.color_clear_value = clear->value; + clear_c_tile(&spu.ctile); + } + else { + spu.fb.depth_clear_value = clear->value; + clear_z_tile(&spu.ztile); + } + + /* + printf("SPU: %s num=%d w=%d h=%d\n", + __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); + */ + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (clear->surface == 0) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + else + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); + /* XXX we don't want this here, but it fixes bad tile results */ + } + +#if 0 + wait_on_mask(1 << TAG_SURFACE_CLEAR); +#endif + + if (Debug) + printf("SPU %u: CLEAR SURF done\n", spu.init.id); +} + + +static void +cmd_release_verts(const struct cell_command_release_verts *release) +{ + if (Debug) + printf("SPU %u: RELEASE VERTS %u\n", + spu.init.id, release->vertex_buf); + ASSERT(release->vertex_buf != ~0U); + release_buffer(release->vertex_buf); +} + + +static void +cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) +{ + if (Debug) + printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", + spu.init.id, + cmd->width, + cmd->height, + cmd->color_start, + cmd->color_format, + cmd->depth_format); + + ASSERT_ALIGN16(cmd->color_start); + ASSERT_ALIGN16(cmd->depth_start); + + spu.fb.color_start = cmd->color_start; + spu.fb.depth_start = cmd->depth_start; + spu.fb.color_format = cmd->color_format; + spu.fb.depth_format = cmd->depth_format; + spu.fb.width = cmd->width; + spu.fb.height = cmd->height; + spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; + spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; + + if (spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM) + spu.fb.zsize = 4; + else if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) + spu.fb.zsize = 2; + else + spu.fb.zsize = 0; + + if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM) + spu.color_shuffle = ((vector unsigned char) { + 12, 0, 4, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}); + else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM) + spu.color_shuffle = ((vector unsigned char) { + 8, 4, 0, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}); + else + ASSERT(0); +} + + +static void +cmd_state_blend(const struct pipe_blend_state *state) +{ + if (Debug) + printf("SPU %u: BLEND: enabled %d\n", + spu.init.id, + state->blend_enable); + + memcpy(&spu.blend, state, sizeof(*state)); +} + + +static void +cmd_state_depth_stencil(const struct pipe_depth_stencil_alpha_state *state) +{ + if (Debug) + printf("SPU %u: DEPTH_STENCIL: ztest %d\n", + spu.init.id, + state->depth.enabled); + + memcpy(&spu.depth_stencil, state, sizeof(*state)); +} + + +static void +cmd_state_sampler(const struct pipe_sampler_state *state) +{ + if (Debug) + printf("SPU %u: SAMPLER\n", + spu.init.id); + + memcpy(&spu.sampler[0], state, sizeof(*state)); + if (spu.sampler[0].min_img_filter == PIPE_TEX_FILTER_LINEAR) + spu.sample_texture = sample_texture_bilinear; + else + spu.sample_texture = sample_texture_nearest; +} + + +static void +cmd_state_texture(const struct cell_command_texture *texture) +{ + if (Debug) + printf("SPU %u: TEXTURE at %p size %u x %u\n", + spu.init.id, texture->start, texture->width, texture->height); + + memcpy(&spu.texture, texture, sizeof(*texture)); + spu.tex_size = (vector float) + { spu.texture.width, spu.texture.height, 0.0, 0.0}; + spu.tex_size_mask = (vector unsigned int) + { spu.texture.width - 1, spu.texture.height - 1, 0, 0 }; +} + + +static void +cmd_state_vertex_info(const struct vertex_info *vinfo) +{ + if (Debug) { + printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id, + vinfo->num_attribs); + } + ASSERT(vinfo->num_attribs >= 1); + ASSERT(vinfo->num_attribs <= 8); + memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); +} + + +static void +cmd_state_vs_array_info(const struct cell_array_info *vs_info) +{ + const unsigned attr = vs_info->attr; + + ASSERT(attr < PIPE_ATTRIB_MAX); + draw.vertex_fetch.src_ptr[attr] = vs_info->base; + draw.vertex_fetch.pitch[attr] = vs_info->pitch; + draw.vertex_fetch.format[attr] = vs_info->format; + draw.vertex_fetch.dirty = 1; +} + + +static void +cmd_finish(void) +{ + if (Debug) + printf("SPU %u: FINISH\n", spu.init.id); + really_clear_tiles(0); + /* wait for all outstanding DMAs to finish */ + mfc_write_tag_mask(~0); + mfc_read_tag_status_all(); + /* send mbox message to PPU */ + spu_write_out_mbox(CELL_CMD_FINISH); +} + + +/** + * Execute a batch of commands + * The opcode param encodes the location of the buffer and its size. + */ +static void +cmd_batch(uint opcode) +{ + const uint buf = (opcode >> 8) & 0xff; + uint size = (opcode >> 16); + uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; + const unsigned usize = size / sizeof(buffer[0]); + uint pos; + + if (Debug) + printf("SPU %u: BATCH buffer %u, len %u, from %p\n", + spu.init.id, buf, size, spu.init.buffers[buf]); + + ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + size = ROUNDUP16(size); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + mfc_get(buffer, /* dest */ + (unsigned int) spu.init.buffers[buf], /* src */ + size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + /* Tell PPU we're done copying the buffer to local store */ + if (Debug) + printf("SPU %u: release batch buf %u\n", spu.init.id, buf); + release_buffer(buf); + + for (pos = 0; pos < usize; /* no incr */) { + switch (buffer[pos]) { + case CELL_CMD_STATE_FRAMEBUFFER: + { + struct cell_command_framebuffer *fb + = (struct cell_command_framebuffer *) &buffer[pos]; + cmd_state_framebuffer(fb); + pos += sizeof(*fb) / 8; + } + break; + case CELL_CMD_CLEAR_SURFACE: + { + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) &buffer[pos]; + cmd_clear_surface(clr); + pos += sizeof(*clr) / 8; + } + break; + case CELL_CMD_RENDER: + { + struct cell_command_render *render + = (struct cell_command_render *) &buffer[pos]; + uint pos_incr; + cmd_render(render, &pos_incr); + pos += pos_incr; + } + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 8; + } + break; + case CELL_CMD_FINISH: + cmd_finish(); + pos += 1; + break; + case CELL_CMD_STATE_BLEND: + cmd_state_blend((struct pipe_blend_state *) + &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8); + break; + case CELL_CMD_STATE_DEPTH_STENCIL: + cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *) + &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8); + break; + case CELL_CMD_STATE_SAMPLER: + cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct pipe_sampler_state)) / 8); + break; + case CELL_CMD_STATE_TEXTURE: + cmd_state_texture((struct cell_command_texture *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_command_texture)) / 8); + break; + case CELL_CMD_STATE_VERTEX_INFO: + cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); + break; + case CELL_CMD_STATE_VIEWPORT: + (void) memcpy(& draw.viewport, &buffer[pos+1], + sizeof(struct pipe_viewport_state)); + pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); + break; + case CELL_CMD_STATE_VS_ARRAY_INFO: + cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); + break; + default: + printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); + ASSERT(0); + break; + } + } + + if (Debug) + printf("SPU %u: BATCH complete\n", spu.init.id); +} + + +/** + * Temporary/simple main loop for SPEs: Get a command, execute it, repeat. + */ +static void +main_loop(void) +{ + struct cell_command cmd; + int exitFlag = 0; + + if (Debug) + printf("SPU %u: Enter main loop\n", spu.init.id); + + ASSERT((sizeof(struct cell_command) & 0xf) == 0); + ASSERT_ALIGN16(&cmd); + + while (!exitFlag) { + unsigned opcode; + int tag = 0; + + if (Debug) + printf("SPU %u: Wait for cmd...\n", spu.init.id); + + /* read/wait from mailbox */ + opcode = (unsigned int) spu_read_in_mbox(); + + if (Debug) + printf("SPU %u: got cmd 0x%x\n", spu.init.id, opcode); + + /* command payload */ + mfc_get(&cmd, /* dest */ + (unsigned int) spu.init.cmd, /* src */ + sizeof(struct cell_command), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask( 1 << tag ); + + /* + * NOTE: most commands should be contained in a batch buffer + */ + + switch (opcode & CELL_CMD_OPCODE_MASK) { + case CELL_CMD_EXIT: + if (Debug) + printf("SPU %u: EXIT\n", spu.init.id); + exitFlag = 1; + break; + case CELL_CMD_VS_EXECUTE: + spu_execute_vertex_shader(&draw, &cmd.vs); + break; + case CELL_CMD_BATCH: + cmd_batch(opcode); + break; + default: + printf("Bad opcode!\n"); + } + + } + + if (Debug) + printf("SPU %u: Exit main loop\n", spu.init.id); +} + + + +static void +one_time_init(void) +{ + memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); + memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); + invalidate_tex_cache(); +} + + + +/* In some versions of the SDK the SPE main takes 'unsigned long' as a + * parameter. In others it takes 'unsigned long long'. Use a define to + * select between the two. + */ +#ifdef SPU_MAIN_PARAM_LONG_LONG +typedef unsigned long long main_param_t; +#else +typedef unsigned long main_param_t; +#endif + +/** + * SPE entrypoint. + */ +int +main(main_param_t speid, main_param_t argp) +{ + int tag = 0; + + (void) speid; + + ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); + ASSERT(sizeof(struct cell_command_render) % 8 == 0); + + one_time_init(); + + if (Debug) + printf("SPU: main() speid=%lu\n", speid); + + mfc_get(&spu.init, /* dest */ + (unsigned int) argp, /* src */ + sizeof(struct cell_init_info), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask( 1 << tag ); + +#if 0 + if (spu.init.id==0) + spu_test_misc(); +#endif + + main_loop(); + + return 0; +} diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h new file mode 100644 index 0000000000..1710a17512 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -0,0 +1,177 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_MAIN_H +#define SPU_MAIN_H + + +#include + +#include "pipe/cell/common.h" +#include "pipe/draw/draw_vertex.h" +#include "pipe/p_state.h" + + + +#define MAX_WIDTH 1024 +#define MAX_HEIGHT 1024 + + +typedef union { + ushort us[TILE_SIZE][TILE_SIZE]; + uint ui[TILE_SIZE][TILE_SIZE]; + vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; + vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; +} tile_t; + + +#define TILE_STATUS_CLEAR 1 +#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ +#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ +#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ +#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ + + +struct spu_framebuffer { + void *color_start; /**< addr of color surface in main memory */ + void *depth_start; /**< addr of depth surface in main memory */ + enum pipe_format color_format; + enum pipe_format depth_format; + uint width, height; /**< size in pixels */ + uint width_tiles, height_tiles; /**< width and height in tiles */ + + uint color_clear_value; + uint depth_clear_value; + + uint zsize; /**< 0, 2 or 4 bytes per Z */ +} ALIGN16_ATTRIB; + + +/** + * All SPU global/context state will be in singleton object of this type: + */ +struct spu_global +{ + struct cell_init_info init; + + struct spu_framebuffer fb; + struct pipe_blend_state blend_stencil; + struct pipe_depth_stencil_alpha_state depth_stencil; + struct pipe_blend_state blend; + struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; + struct cell_command_texture texture; + + struct vertex_info vertex_info; + + /* XXX more state to come */ + + + /** current color and Z tiles */ + tile_t ctile ALIGN16_ATTRIB; + tile_t ztile ALIGN16_ATTRIB; + + /** Current tiles' status */ + ubyte cur_ctile_status, cur_ztile_status; + + /** Status of all tiles in framebuffer */ + ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + + + /** for converting RGBA to PIPE_FORMAT_x colors */ + vector unsigned char color_shuffle; + + vector float tex_size; + vector unsigned int tex_size_mask; /**< == int(size - 1) */ + + vector float (*sample_texture)(vector float texcoord); + +} ALIGN16_ATTRIB; + + +extern struct spu_global spu; +extern boolean Debug; + + + + +/* DMA TAGS */ + +#define TAG_SURFACE_CLEAR 10 +#define TAG_VERTEX_BUFFER 11 +#define TAG_READ_TILE_COLOR 12 +#define TAG_READ_TILE_Z 13 +#define TAG_WRITE_TILE_COLOR 14 +#define TAG_WRITE_TILE_Z 15 +#define TAG_INDEX_BUFFER 16 +#define TAG_BATCH_BUFFER 17 +#define TAG_MISC 18 +#define TAG_TEXTURE_TILE 19 +#define TAG_INSTRUCTION_FETCH 20 + + + +static INLINE void +wait_on_mask(unsigned tagMask) +{ + mfc_write_tag_mask( tagMask ); + /* wait for completion of _any_ DMAs specified by tagMask */ + mfc_read_tag_status_any(); +} + + +static INLINE void +wait_on_mask_all(unsigned tagMask) +{ + mfc_write_tag_mask( tagMask ); + /* wait for completion of _any_ DMAs specified by tagMask */ + mfc_read_tag_status_all(); +} + + + + + +static INLINE void +memset16(ushort *d, ushort value, uint count) +{ + uint i; + for (i = 0; i < count; i++) + d[i] = value; +} + + +static INLINE void +memset32(uint *d, uint value, uint count) +{ + uint i; + for (i = 0; i < count; i++) + d[i] = value; +} + + +#endif /* SPU_MAIN_H */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c new file mode 100644 index 0000000000..932fb500b3 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -0,0 +1,301 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include +#include +#include + +#include "spu_main.h" +#include "spu_render.h" +#include "spu_tri.h" +#include "spu_tile.h" +#include "pipe/cell/common.h" + + + +/** + * Given a rendering command's bounding box (in pixels) compute the + * location of the corresponding screen tile bounding box. + */ +static INLINE void +tile_bounding_box(const struct cell_command_render *render, + uint *txmin, uint *tymin, + uint *box_num_tiles, uint *box_width_tiles) +{ +#if 0 + /* Debug: full-window bounding box */ + uint txmax = spu.fb.width_tiles - 1; + uint tymax = spu.fb.height_tiles - 1; + *txmin = 0; + *tymin = 0; + *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + *box_width_tiles = spu.fb.width_tiles; + (void) render; + (void) txmax; + (void) tymax; +#else + uint txmax, tymax, box_height_tiles; + + *txmin = (uint) render->xmin / TILE_SIZE; + *tymin = (uint) render->ymin / TILE_SIZE; + txmax = (uint) render->xmax / TILE_SIZE; + tymax = (uint) render->ymax / TILE_SIZE; + if (txmax >= spu.fb.width_tiles) + txmax = spu.fb.width_tiles-1; + if (tymax >= spu.fb.height_tiles) + tymax = spu.fb.height_tiles-1; + *box_width_tiles = txmax - *txmin + 1; + box_height_tiles = tymax - *tymin + 1; + *box_num_tiles = *box_width_tiles * box_height_tiles; +#endif +#if 0 + printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, + render->xmin, render->ymin, render->xmax, render->ymax); + printf("SPU %u: tiles: %u, %u .. %u, %u\n", + spu.init.id, *txmin, *tymin, txmax, tymax); + ASSERT(render->xmin <= render->xmax); + ASSERT(render->ymin <= render->ymax); +#endif +} + + +/** Check if the tile at (tx,ty) belongs to this SPU */ +static INLINE boolean +my_tile(uint tx, uint ty) +{ + return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; +} + + +/** + * Start fetching non-clear color/Z tiles from main memory + */ +static INLINE void +get_cz_tiles(uint tx, uint ty) +{ + if (spu.depth_stencil.depth.enabled) { + if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_GETTING; + } + } + + if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_GETTING; + } +} + + +/** + * Start putting dirty color/Z tiles back to main memory + */ +static INLINE void +put_cz_tiles(uint tx, uint ty) +{ + if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ztile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); + } + + if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ctile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); + } +} + + +/** + * Wait for 'put' of color/z tiles to complete. + */ +static INLINE void +wait_put_cz_tiles(void) +{ + wait_on_mask(1 << TAG_WRITE_TILE_COLOR); + if (spu.depth_stencil.depth.enabled) { + wait_on_mask(1 << TAG_WRITE_TILE_Z); + } +} + + +/** + * Render primitives + * \param pos_incr returns value indicating how may words to skip after + * this command in the batch buffer + */ +void +cmd_render(const struct cell_command_render *render, uint *pos_incr) +{ + /* we'll DMA into these buffers */ + ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + const uint vertex_size = render->vertex_size; /* in bytes */ + /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; + uint index_bytes; + const ubyte *vertices; + const ushort *indexes; + uint i, j; + + + if (Debug) { + printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " + "inline_vert=%u\n", + spu.init.id, + render->prim_type, + render->num_verts, + render->num_indexes, + render->inline_verts); + + /* + printf(" bound: %g, %g .. %g, %g\n", + render->xmin, render->ymin, render->xmax, render->ymax); + */ + } + + ASSERT(sizeof(*render) % 4 == 0); + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); + ASSERT(render->num_indexes % 3 == 0); + + + /* indexes are right after the render command in the batch buffer */ + indexes = (const ushort *) (render + 1); + index_bytes = ROUNDUP8(render->num_indexes * 2); + *pos_incr = index_bytes / 8 + sizeof(*render) / 8; + + + if (render->inline_verts) { + /* Vertices are after indexes in batch buffer at next 16-byte addr */ + vertices = (const ubyte *) render + (*pos_incr * 8); + vertices = (const ubyte *) align_pointer((void *) vertices, 16); + ASSERT_ALIGN16(vertices); + *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; + } + else { + /* Begin DMA fetch of vertex buffer */ + ubyte *src = spu.init.buffers[render->vertex_buf]; + ubyte *dest = vertex_data; + + /* skip vertex data we won't use */ +#if 01 + src += render->min_index * vertex_size; + dest += render->min_index * vertex_size; + total_vertex_bytes -= render->min_index * vertex_size; +#endif + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT_ALIGN16(dest); + ASSERT_ALIGN16(src); + + mfc_get(dest, /* in vertex_data[] array */ + (unsigned int) src, /* src in main memory */ + total_vertex_bytes, /* size */ + TAG_VERTEX_BUFFER, + 0, /* tid */ + 0 /* rid */); + + vertices = vertex_data; + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + } + + + /** + ** find tiles which intersect the prim bounding box + **/ + uint txmin, tymin, box_width_tiles, box_num_tiles; + tile_bounding_box(render, &txmin, &tymin, + &box_num_tiles, &box_width_tiles); + + + /* make sure any pending clears have completed */ + wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + + + /** + ** loop over tiles, rendering tris + **/ + for (i = 0; i < box_num_tiles; i++) { + const uint tx = txmin + i % box_width_tiles; + const uint ty = tymin + i / box_width_tiles; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + + if (!my_tile(tx, ty)) + continue; + + spu.cur_ctile_status = spu.ctile_status[ty][tx]; + spu.cur_ztile_status = spu.ztile_status[ty][tx]; + + get_cz_tiles(tx, ty); + + uint drawn = 0; + + /* loop over tris */ + for (j = 0; j < render->num_indexes; j += 3) { + const float *v0, *v1, *v2; + + v0 = (const float *) (vertices + indexes[j+0] * vertex_size); + v1 = (const float *) (vertices + indexes[j+1] * vertex_size); + v2 = (const float *) (vertices + indexes[j+2] * vertex_size); + + drawn += tri_draw(v0, v1, v2, tx, ty); + } + + //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); + + /* write color/z tiles back to main framebuffer, if dirtied */ + put_cz_tiles(tx, ty); + + wait_put_cz_tiles(); /* XXX seems unnecessary... */ + + spu.ctile_status[ty][tx] = spu.cur_ctile_status; + spu.ztile_status[ty][tx] = spu.cur_ztile_status; + } + + if (Debug) + printf("SPU %u: RENDER done\n", + spu.init.id); +} + + diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h new file mode 100644 index 0000000000..fbcdc5ec31 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_RENDER_H +#define SPU_RENDER_H + +#include "pipe/cell/common.h" + +extern void +cmd_render(const struct cell_command_render *render, uint *pos_incr); + +#endif /* SPU_RENDER_H */ + diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c new file mode 100644 index 0000000000..3962aaa4a9 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -0,0 +1,217 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "spu_main.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_colorpack.h" + + +/** + * Number of texture tiles to cache. + * Note that this will probably be the largest consumer of SPU local store/ + * memory for this driver! + */ +#define CACHE_SIZE 16 + +static tile_t tex_tiles[CACHE_SIZE] ALIGN16_ATTRIB; + +static vector unsigned int tex_tile_xy[CACHE_SIZE]; + + + +/** + * Mark all tex cache entries as invalid. + */ +void +invalidate_tex_cache(void) +{ + /* XXX memset? */ + uint i; + for (i = 0; i < CACHE_SIZE; i++) { + tex_tile_xy[i] = ((vector unsigned int) { ~0U, ~0U, ~0U, ~0U }); + } +} + + +/** + * Return the cache pos/index which corresponds to tile (tx,ty) + */ +static INLINE uint +cache_pos(vector unsigned int txty) +{ + uint pos = (spu_extract(txty,0) + spu_extract(txty,1) * 4) % CACHE_SIZE; + return pos; +} + + +/** + * Make sure the tile for texel (i,j) is present, return its position/index + * in the cache. + */ +static uint +get_tex_tile(vector unsigned int ij) +{ + /* tile address: tx,ty */ + const vector unsigned int txty = spu_rlmask(ij, -5); /* divide by 32 */ + const uint pos = cache_pos(txty); + + if ((spu_extract(tex_tile_xy[pos], 0) != spu_extract(txty, 0)) || + (spu_extract(tex_tile_xy[pos], 1) != spu_extract(txty, 1))) { + + /* texture cache miss, fetch tile from main memory */ + const uint tiles_per_row = spu.texture.width / TILE_SIZE; + const uint bytes_per_tile = sizeof(tile_t); + const void *src = (const ubyte *) spu.texture.start + + (spu_extract(txty,1) * tiles_per_row + spu_extract(txty,0)) * bytes_per_tile; + + printf("SPU %u: tex cache miss at %d, %d pos=%u old=%d,%d\n", + spu.init.id, + spu_extract(txty,0), + spu_extract(txty,1), + pos, + spu_extract(tex_tile_xy[pos],0), + spu_extract(tex_tile_xy[pos],1)); + + ASSERT_ALIGN16(tex_tiles[pos].ui); + ASSERT_ALIGN16(src); + + mfc_get(tex_tiles[pos].ui, /* dest */ + (unsigned int) src, + bytes_per_tile, /* size */ + TAG_TEXTURE_TILE, + 0, /* tid */ + 0 /* rid */); + + wait_on_mask(1 << TAG_TEXTURE_TILE); + + tex_tile_xy[pos] = txty; + } + else { +#if 0 + printf("SPU %u: tex cache HIT at %d, %d\n", + spu.init.id, tx, ty); +#endif + } + + return pos; +} + + +/** + * Get texture sample at texcoord. + * XXX this is extremely primitive for now. + */ +vector float +sample_texture_nearest(vector float texcoord) +{ + vector float tc = spu_mul(texcoord, spu.tex_size); + vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ + itc = spu_and(itc, spu.tex_size_mask); /* mask (GL_REPEAT) */ + vector unsigned int ij = spu_and(itc, TILE_SIZE-1); /* intra tile addr */ + uint pos = get_tex_tile(itc); + uint texel = tex_tiles[pos].ui[spu_extract(ij, 1)][spu_extract(ij, 0)]; + return spu_unpack_A8R8G8B8(texel); +} + + +vector float +sample_texture_bilinear(vector float texcoord) +{ + static const vector unsigned int offset10 = {1, 0, 0, 0}; + static const vector unsigned int offset01 = {0, 1, 0, 0}; + + vector float tc = spu_mul(texcoord, spu.tex_size); + tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ + + /* integer texcoords S,T: */ + vector unsigned int itc00 = spu_convtu(tc, 0); /* convert to int */ + vector unsigned int itc01 = spu_add(itc00, offset01); + vector unsigned int itc10 = spu_add(itc00, offset10); + vector unsigned int itc11 = spu_add(itc10, offset01); + + /* mask (GL_REPEAT) */ + itc00 = spu_and(itc00, spu.tex_size_mask); + itc01 = spu_and(itc01, spu.tex_size_mask); + itc10 = spu_and(itc10, spu.tex_size_mask); + itc11 = spu_and(itc11, spu.tex_size_mask); + + /* intra tile addr */ + vector unsigned int ij00 = spu_and(itc00, TILE_SIZE-1); + vector unsigned int ij01 = spu_and(itc01, TILE_SIZE-1); + vector unsigned int ij10 = spu_and(itc10, TILE_SIZE-1); + vector unsigned int ij11 = spu_and(itc11, TILE_SIZE-1); + + /* get tile cache positions */ + uint pos00 = get_tex_tile(itc00); + uint pos01, pos10, pos11; + if ((spu_extract(ij00, 0) < TILE_SIZE-1) && + (spu_extract(ij00, 1) < TILE_SIZE-1)) { + /* all texels are in the same tile */ + pos01 = pos10 = pos11 = pos00; + } + else { + pos01 = get_tex_tile(itc01); + pos10 = get_tex_tile(itc10); + pos11 = get_tex_tile(itc11); + } + + /* get texels from tiles and convert to float[4] */ + vector float texel00 = spu_unpack_A8R8G8B8(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]); + vector float texel01 = spu_unpack_A8R8G8B8(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]); + vector float texel10 = spu_unpack_A8R8G8B8(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]); + vector float texel11 = spu_unpack_A8R8G8B8(tex_tiles[pos11].ui[spu_extract(ij11, 1)][spu_extract(ij11, 0)]); + + /* Compute weighting factors in [0,1] + * Multiply texcoord by 1024, AND with 1023, convert back to float. + */ + vector float tc1024 = spu_mul(tc, spu_splats(1024.0f)); + vector signed int itc1024 = spu_convts(tc1024, 0); + itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1)); + vector float weight = spu_convtf(itc1024, 10); + + /* smeared frac and 1-frac */ + vector float sfrac = spu_splats(spu_extract(weight, 0)); + vector float tfrac = spu_splats(spu_extract(weight, 1)); + vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac); + vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac); + + /* multiply the samples (colors) by the S/T weights */ + texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1); + texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1); + texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac ); + texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac ); + + /* compute sum of weighted samples */ + vector float texel_sum = spu_add(texel00, texel01); + texel_sum = spu_add(texel_sum, texel10); + texel_sum = spu_add(texel_sum, texel11); + + return texel_sum; +} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h new file mode 100644 index 0000000000..95eb87080f --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_TEXTURE_H +#define SPU_TEXTURE_H + + +#include "pipe/p_compiler.h" + + +extern void +invalidate_tex_cache(void); + + +extern vector float +sample_texture_nearest(vector float texcoord); + + +extern vector float +sample_texture_bilinear(vector float texcoord); + + +#endif /* SPU_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c new file mode 100644 index 0000000000..12dc246328 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tile.c @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#include "spu_tile.h" +#include "spu_main.h" + + +void +get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) +{ + const uint offset = ty * spu.fb.width_tiles + tx; + const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); + const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start; + + src += offset * bytesPerTile; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + ASSERT_ALIGN16(tile); + /* + printf("get_tile: dest: %p src: 0x%x size: %d\n", + tile, (unsigned int) src, bytesPerTile); + */ + mfc_get(tile->ui, /* dest in local memory */ + (unsigned int) src, /* src in main memory */ + bytesPerTile, + tag, + 0, /* tid */ + 0 /* rid */); +} + + +void +put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) +{ + const uint offset = ty * spu.fb.width_tiles + tx; + const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); + ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start; + + dst += offset * bytesPerTile; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + ASSERT_ALIGN16(tile); + /* + printf("SPU %u: put_tile: src: %p dst: 0x%x size: %d\n", + spu.init.id, + tile, (unsigned int) dst, bytesPerTile); + */ + mfc_put((void *) tile->ui, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + bytesPerTile, + tag, + 0, /* tid */ + 0 /* rid */); +} + diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h new file mode 100644 index 0000000000..e53340a55a --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_TILE_H +#define SPU_TILE_H + + +#include +#include +#include "spu_main.h" +#include "pipe/cell/common.h" + + + +void +get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); + +void +put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); + + + +static INLINE void +clear_c_tile(tile_t *ctile) +{ + memset32((uint*) ctile->ui, + spu.fb.color_clear_value, + TILE_SIZE * TILE_SIZE); +} + + +static INLINE void +clear_z_tile(tile_t *ztile) +{ + if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { + memset16((ushort*) ztile->us, + spu.fb.depth_clear_value, + TILE_SIZE * TILE_SIZE); + } + else { + ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM); + memset32((uint*) ztile->ui, + spu.fb.depth_clear_value, + TILE_SIZE * TILE_SIZE); + } +} + + +#endif /* SPU_TILE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c new file mode 100644 index 0000000000..be9624cf7d --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -0,0 +1,926 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Triangle rendering within a tile. + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_util.h" +#include "spu_blend.h" +#include "spu_colorpack.h" +#include "spu_main.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_tri.h" + +#include "spu_ztest.h" + + +/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ +typedef vector unsigned int mask_t; + +typedef union +{ + vector float v; + float f[4]; +} float4; + + +/** + * Simplified types taken from other parts of Gallium + */ +struct vertex_header { + vector float data[1]; +}; + + + +/* XXX fix this */ +#undef CEILF +#define CEILF(X) ((float) (int) ((X) + 0.99999)) + + +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 +#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) +#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) +#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) +#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) +#define MASK_ALL 0xf + + +#define DEBUG_VERTS 0 + +/** + * Triangle edge info + */ +struct edge { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + float dxdy; /**< dx/dy */ + float sx, sy; /**< first sample point coord */ + int lines; /**< number of lines on this edge */ +}; + + +struct interp_coef +{ + float4 a0; + float4 dadx; + float4 dady; +}; + + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_stage { + + /* Vertices are just an array of floats making up each attribute in + * turn. Currently fixed at 4 floats, but should change in time. + * Codegen will help cope with this. + */ + const struct vertex_header *vmax; + const struct vertex_header *vmid; + const struct vertex_header *vmin; + const struct vertex_header *vprovoke; + + struct edge ebot; + struct edge etop; + struct edge emaj; + + float oneoverarea; + + uint tx, ty; + + int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; + +#if 0 + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; +#else + struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; +#endif + +#if 0 + struct quad_header quad; +#endif + + struct { + int left[2]; /**< [0] = row0, [1] = row1 */ + int right[2]; + int y; + unsigned y_flags; + unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ + } span; +}; + + + +static struct setup_stage setup; + + + + +#if 0 +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ + return (struct setup_stage *)stage; +} +#endif + +#if 0 +/** + * Clip setup.quad against the scissor/surface bounds. + */ +static INLINE void +quad_clip(struct setup_stage *setup) +{ + const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + + if (setup.quad.x0 >= maxx || + setup.quad.y0 >= maxy || + setup.quad.x0 + 1 < minx || + setup.quad.y0 + 1 < miny) { + /* totally clipped */ + setup.quad.mask = 0x0; + return; + } + if (setup.quad.x0 < minx) + setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (setup.quad.y0 < miny) + setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (setup.quad.x0 == maxx - 1) + setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (setup.quad.y0 == maxy - 1) + setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); +} +#endif + +#if 0 +/** + * Emit a quad (pass to next stage) with clipping. + */ +static INLINE void +clip_emit_quad(struct setup_stage *setup) +{ + quad_clip(setup); + if (setup.quad.mask) { + struct softpipe_context *sp = setup.softpipe; + sp->quad.first->run(sp->quad.first, &setup.quad); + } +} +#endif + +/** + * Evaluate attribute coefficients (plane equations) to compute + * attribute values for the four fragments in a quad. + * Eg: four colors will be compute. + */ +static INLINE void +eval_coeff(uint slot, float x, float y, vector float result[4]) +{ + switch (spu.vertex_info.interp_mode[slot]) { + case INTERP_CONSTANT: + result[QUAD_TOP_LEFT] = + result[QUAD_TOP_RIGHT] = + result[QUAD_BOTTOM_LEFT] = + result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v; + break; + + case INTERP_LINEAR: + /* fall-through, for now */ + default: + { + register vector float dadx = setup.coef[slot].dadx.v; + register vector float dady = setup.coef[slot].dady.v; + register vector float topLeft + = spu_add(setup.coef[slot].a0.v, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); + + result[QUAD_TOP_LEFT] = topLeft; + result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); + result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); + result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); + } + } +} + + +static INLINE vector float +eval_z(float x, float y) +{ + const uint slot = 0; + const float dzdx = setup.coef[slot].dadx.f[2]; + const float dzdy = setup.coef[slot].dady.f[2]; + const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy; + const vector float topLeftv = spu_splats(topLeft); + const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; + return spu_add(topLeftv, derivs); +} + + +static INLINE mask_t +do_depth_test(int x, int y, mask_t quadmask) +{ + float4 zvals; + mask_t mask; + + zvals.v = eval_z((float) x, (float) y); + + if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { + int ix = (x - setup.cliprect_minx) / 4; + int iy = (y - setup.cliprect_miny) / 2; + mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask); + } + else { + int ix = (x - setup.cliprect_minx) / 2; + int iy = (y - setup.cliprect_miny) / 2; + mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask); + } + + if (spu_extract(spu_orx(mask), 0)) + spu.cur_ztile_status = TILE_STATUS_DIRTY; + + return mask; +} + + +/** + * Emit a quad (pass to next stage). No clipping is done. + * Note: about 1/5 to 1/7 of the time, mask is zero and this function + * should be skipped. But adding the test for that slows things down + * overall. + */ +static INLINE void +emit_quad( int x, int y, mask_t mask ) +{ +#if 0 + struct softpipe_context *sp = setup.softpipe; + setup.quad.x0 = x; + setup.quad.y0 = y; + setup.quad.mask = mask; + sp->quad.first->run(sp->quad.first, &setup.quad); +#else + + if (spu.depth_stencil.depth.enabled) { + mask = do_depth_test(x, y, mask); + } + + /* If any bits in mask are set... */ + if (spu_extract(spu_orx(mask), 0)) { + const int ix = x - setup.cliprect_minx; + const int iy = y - setup.cliprect_miny; + const vector unsigned char shuffle = spu.color_shuffle; + vector float colors[4]; + + spu.cur_ctile_status = TILE_STATUS_DIRTY; + + if (spu.texture.start) { + /* texture mapping */ + vector float texcoords[4]; + eval_coeff(2, (float) x, (float) y, texcoords); + + if (spu_extract(mask, 0)) + colors[0] = spu.sample_texture(texcoords[0]); + if (spu_extract(mask, 1)) + colors[1] = spu.sample_texture(texcoords[1]); + if (spu_extract(mask, 2)) + colors[2] = spu.sample_texture(texcoords[2]); + if (spu_extract(mask, 3)) + colors[3] = spu.sample_texture(texcoords[3]); + } + else { + /* simple shading */ + eval_coeff(1, (float) x, (float) y, colors); + } + +#if 1 + if (spu.blend.blend_enable) + blend_quad(ix % TILE_SIZE, iy % TILE_SIZE, colors); +#endif + + if (spu_extract(mask, 0)) + spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0], shuffle); + if (spu_extract(mask, 1)) + spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1], shuffle); + if (spu_extract(mask, 2)) + spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2], shuffle); + if (spu_extract(mask, 3)) + spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3], shuffle); + +#if 0 + /* SIMD_Z with swizzled color buffer (someday) */ + vector unsigned int uicolors = *((vector unsigned int *) &colors); + spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask); +#endif + } + +#endif +} + + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int block( int x ) +{ + return x & ~1; +} + + +/** + * Compute mask which indicates which pixels in the 2x2 quad are actually inside + * the triangle's bounds. + * The mask is a uint4 vector and each element will be 0 or 0xffffffff. + */ +static INLINE mask_t calculate_mask( int x ) +{ + /* This is a little tricky. + * Use & instead of && to avoid branches. + * Use negation to convert true/false to ~0/0 values. + */ + mask_t mask; + mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0); + mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1); + mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2); + mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3); + return mask; +} + + +/** + * Render a horizontal span of quads + */ +static void flush_spans( void ) +{ + int minleft, maxright; + int x; + + switch (setup.span.y_flags) { + case 0x3: + /* both odd and even lines written (both quad rows) */ + minleft = MIN2(setup.span.left[0], setup.span.left[1]); + maxright = MAX2(setup.span.right[0], setup.span.right[1]); + break; + + case 0x1: + /* only even line written (quad top row) */ + minleft = setup.span.left[0]; + maxright = setup.span.right[0]; + break; + + case 0x2: + /* only odd line written (quad bottom row) */ + minleft = setup.span.left[1]; + maxright = setup.span.right[1]; + break; + + default: + return; + } + + + /* OK, we're very likely to need the tile data now. + * clear or finish waiting if needed. + */ + if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + //printf("SPU: %u: waiting for ctile\n", spu.init.id); + wait_on_mask(1 << TAG_READ_TILE_COLOR); + spu.cur_ctile_status = TILE_STATUS_CLEAN; + } + else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { + //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty); + clear_c_tile(&spu.ctile); + spu.cur_ctile_status = TILE_STATUS_DIRTY; + } + ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); + + if (spu.depth_stencil.depth.enabled) { + if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + //printf("SPU: %u: waiting for ztile\n", spu.init.id); + wait_on_mask(1 << TAG_READ_TILE_Z); + spu.cur_ztile_status = TILE_STATUS_CLEAN; + } + else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) { + //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty); + clear_z_tile(&spu.ztile); + spu.cur_ztile_status = TILE_STATUS_DIRTY; + } + ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); + } + + /* XXX this loop could be moved into the above switch cases and + * calculate_mask() could be simplified a bit... + */ + for (x = block(minleft); x <= block(maxright); x += 2) { +#if 1 + emit_quad( x, setup.span.y, calculate_mask( x ) ); +#endif + } + + setup.span.y = 0; + setup.span.y_flags = 0; + setup.span.right[0] = 0; + setup.span.right[1] = 0; +} + +#if DEBUG_VERTS +static void print_vertex(const struct vertex_header *v) +{ + int i; + fprintf(stderr, "Vertex: (%p)\n", v); + for (i = 0; i < setup.quad.nr_attrs; i++) { + fprintf(stderr, " %d: %f %f %f %f\n", i, + v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); + } +} +#endif + + +static boolean setup_sort_vertices(const struct vertex_header *v0, + const struct vertex_header *v1, + const struct vertex_header *v2) +{ + +#if DEBUG_VERTS + fprintf(stderr, "Triangle:\n"); + print_vertex(v0); + print_vertex(v1); + print_vertex(v2); +#endif + + setup.vprovoke = v2; + + /* determine bottom to top order of vertices */ + { + float y0 = spu_extract(v0->data[0], 1); + float y1 = spu_extract(v1->data[0], 1); + float y2 = spu_extract(v2->data[0], 1); + if (y0 <= y1) { + if (y1 <= y2) { + /* y0<=y1<=y2 */ + setup.vmin = v0; + setup.vmid = v1; + setup.vmax = v2; + } + else if (y2 <= y0) { + /* y2<=y0<=y1 */ + setup.vmin = v2; + setup.vmid = v0; + setup.vmax = v1; + } + else { + /* y0<=y2<=y1 */ + setup.vmin = v0; + setup.vmid = v2; + setup.vmax = v1; + } + } + else { + if (y0 <= y2) { + /* y1<=y0<=y2 */ + setup.vmin = v1; + setup.vmid = v0; + setup.vmax = v2; + } + else if (y2 <= y1) { + /* y2<=y1<=y0 */ + setup.vmin = v2; + setup.vmid = v1; + setup.vmax = v0; + } + else { + /* y1<=y2<=y0 */ + setup.vmin = v1; + setup.vmid = v2; + setup.vmax = v0; + } + } + } + + /* Check if triangle is completely outside the tile bounds */ + if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy) + return FALSE; + if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny) + return FALSE; + if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx && + spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx && + spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx) + return FALSE; + if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx && + spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx && + spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx) + return FALSE; + + setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0); + setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1); + setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0); + setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1); + setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0); + setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1); + + /* + * Compute triangle's area. Use 1/area to compute partial + * derivatives of attributes later. + * + * The area will be the same as prim->det, but the sign may be + * different depending on how the vertices get sorted above. + * + * To determine whether the primitive is front or back facing we + * use the prim->det value because its sign is correct. + */ + { + const float area = (setup.emaj.dx * setup.ebot.dy - + setup.ebot.dx * setup.emaj.dy); + + setup.oneoverarea = 1.0f / area; + /* + _mesa_printf("%s one-over-area %f area %f det %f\n", + __FUNCTION__, setup.oneoverarea, area, prim->det ); + */ + } + +#if 0 + /* We need to know if this is a front or back-facing triangle for: + * - the GLSL gl_FrontFacing fragment attribute (bool) + * - two-sided stencil test + */ + setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW); +#endif + + return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex->data[slot]. + * The result will be put into setup.coef[slot].a0. + * \param slot which attribute slot + */ +static INLINE void +const_coeff(uint slot) +{ + setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].a0.v = setup.vprovoke->data[slot]; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static INLINE void +tri_linear_coeff(uint slot, uint firstComp, uint lastComp) +{ + uint i; + const float *vmin_d = (float *) &setup.vmin->data[slot]; + const float *vmid_d = (float *) &setup.vmid->data[slot]; + const float *vmax_d = (float *) &setup.vmax->data[slot]; + const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f; + const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f; + + for (i = firstComp; i < lastComp; i++) { + float botda = vmid_d[i] - vmin_d[i]; + float majda = vmax_d[i] - vmin_d[i]; + float a = setup.ebot.dy * majda - botda * setup.emaj.dy; + float b = setup.emaj.dx * botda - majda * setup.ebot.dx; + + ASSERT(slot < PIPE_MAX_SHADER_INPUTS); + + setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; + setup.coef[slot].dady.f[i] = b * setup.oneoverarea; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + setup.coef[slot].a0.f[i] = (vmin_d[i] - + (setup.coef[slot].dadx.f[i] * x + + setup.coef[slot].dady.f[i] * y)); + } + + /* + _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n", + slot, "xyzw"[i], + setup.coef[slot].a0[i], + setup.coef[slot].dadx.f[i], + setup.coef[slot].dady.f[i]); + */ +} + + +/** + * As above, but interp setup all four vector components. + */ +static INLINE void +tri_linear_coeff4(uint slot) +{ + const vector float vmin_d = setup.vmin->data[slot]; + const vector float vmid_d = setup.vmid->data[slot]; + const vector float vmax_d = setup.vmax->data[slot]; + const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); + const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + + vector float botda = vmid_d - vmin_d; + vector float majda = vmax_d - vmin_d; + + vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), + spu_mul(botda, spu_splats(setup.emaj.dy))); + vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), + spu_mul(majda, spu_splats(setup.ebot.dx))); + + setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea)); + setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea)); + + vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); + + setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy)); +} + + + +#if 0 +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void tri_persp_coeff( unsigned slot, + unsigned i ) +{ + /* premultiply by 1/w: + */ + float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3]; + float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3]; + float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3]; + + float botda = mida - mina; + float majda = maxa - mina; + float a = setup.ebot.dy * majda - botda * setup.emaj.dy; + float b = setup.emaj.dx * botda - majda * setup.ebot.dx; + + /* + printf("tri persp %d,%d: %f %f %f\n", slot, i, + setup.vmin->data[slot][i], + setup.vmid->data[slot][i], + setup.vmax->data[slot][i] + ); + */ + + assert(slot < PIPE_MAX_SHADER_INPUTS); + assert(i <= 3); + + setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; + setup.coef[slot].dady.f[i] = b * setup.oneoverarea; + setup.coef[slot].a0.f[i] = (mina - + (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) + + setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f))); +} +#endif + + +/** + * Compute the setup.coef[] array dadx, dady, a0 values. + * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. + */ +static void setup_tri_coefficients(void) +{ +#if 1 + uint i; + + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + switch (spu.vertex_info.interp_mode[i]) { + case INTERP_NONE: + break; + case INTERP_POS: + /*tri_linear_coeff(i, 2, 3);*/ + /* XXX interp W if PERSPECTIVE... */ + tri_linear_coeff4(i); + break; + case INTERP_CONSTANT: + const_coeff(i); + break; + case INTERP_LINEAR: + tri_linear_coeff4(i); + break; + case INTERP_PERSPECTIVE: + tri_linear_coeff4(i); /* temporary */ + break; + default: + ASSERT(0); + } + } +#else + ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS); + ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR || + spu.vertex_info.interp_mode[1] == INTERP_CONSTANT); + tri_linear_coeff(0, 2, 3); /* slot 0, z */ + tri_linear_coeff(1, 0, 4); /* slot 1, color */ +#endif +} + + +static void setup_tri_edges(void) +{ + float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; + float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; + + float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f; + float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f; + float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f; + + setup.emaj.sy = CEILF(vmin_y); + setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy); + setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy; + setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy; + + setup.etop.sy = CEILF(vmid_y); + setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy); + setup.etop.dxdy = setup.etop.dx / setup.etop.dy; + setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy; + + setup.ebot.sy = CEILF(vmin_y); + setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy); + setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy; + setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void subtriangle( struct edge *eleft, + struct edge *eright, + unsigned lines ) +{ + const int minx = setup.cliprect_minx; + const int maxx = setup.cliprect_maxx; + const int miny = setup.cliprect_miny; + const int maxy = setup.cliprect_maxy; + int y, start_y, finish_y; + int sy = (int)eleft->sy; + + ASSERT((int)eleft->sy == (int) eright->sy); + + /* clip top/bottom */ + start_y = sy; + finish_y = sy + lines; + + if (start_y < miny) + start_y = miny; + + if (finish_y > maxy) + finish_y = maxy; + + start_y -= sy; + finish_y -= sy; + + /* + _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + */ + + for (y = start_y; y < finish_y; y++) { + + /* avoid accumulating adds as floats don't have the precision to + * accurately iterate large triangle edges that way. luckily we + * can just multiply these days. + * + * this is all drowned out by the attribute interpolation anyway. + */ + int left = (int)(eleft->sx + y * eleft->dxdy); + int right = (int)(eright->sx + y * eright->dxdy); + + /* clip left/right */ + if (left < minx) + left = minx; + if (right > maxx) + right = maxx; + + if (left < right) { + int _y = sy + y; + if (block(_y) != setup.span.y) { + flush_spans(); + setup.span.y = block(_y); + } + + setup.span.left[_y&1] = left; + setup.span.right[_y&1] = right; + setup.span.y_flags |= 1<<(_y&1); + } + } + + + /* save the values so that emaj can be restarted: + */ + eleft->sx += lines * eleft->dxdy; + eright->sx += lines * eright->dxdy; + eleft->sy += lines; + eright->sy += lines; +} + + +/** + * Draw triangle into tile at (tx, ty) (tile coords) + * The tile data should have already been fetched. + */ +boolean +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) +{ + setup.tx = tx; + setup.ty = ty; + + /* set clipping bounds to tile bounds */ + setup.cliprect_minx = tx * TILE_SIZE; + setup.cliprect_miny = ty * TILE_SIZE; + setup.cliprect_maxx = (tx + 1) * TILE_SIZE; + setup.cliprect_maxy = (ty + 1) * TILE_SIZE; + + if (!setup_sort_vertices((struct vertex_header *) v0, + (struct vertex_header *) v1, + (struct vertex_header *) v2)) { + return FALSE; /* totally clipped */ + } + + setup_tri_coefficients(); + setup_tri_edges(); + + setup.span.y = 0; + setup.span.y_flags = 0; + setup.span.right[0] = 0; + setup.span.right[1] = 0; + /* setup.span.z_mode = tri_z_mode( setup.ctx ); */ + + /* init_constant_attribs( setup ); */ + + if (setup.oneoverarea < 0.0) { + /* emaj on left: + */ + subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); + subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); + } + else { + /* emaj on right: + */ + subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); + subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); + } + + flush_spans(); + + return TRUE; +} diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h new file mode 100644 index 0000000000..aa694dd7c9 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_TRI_H +#define SPU_TRI_H + + +extern boolean +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); + + +#endif /* SPU_TRI_H */ diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c new file mode 100644 index 0000000000..ac373240c1 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -0,0 +1,165 @@ +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" +//#include "tgsi_build.h" +#include "pipe/tgsi/util/tgsi_util.h" + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->SwizzleX; + case 1: + return reg->SwizzleY; + case 2: + return reg->SwizzleZ; + case 3: + return reg->SwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->ExtSwizzleX; + case 1: + return reg->ExtSwizzleY; + case 2: + return reg->ExtSwizzleZ; + case 3: + return reg->ExtSwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned swizzle; + + /* + * First, calculate the extended swizzle for a given channel. This will give + * us either a channel index into the simple swizzle or a constant 1 or 0. + */ + swizzle = tgsi_util_get_src_register_extswizzle( + ®->SrcRegisterExtSwz, + component ); + + assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); + assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); + assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); + assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + + /* + * Second, calculate the simple swizzle for the unswizzled channel index. + * Leave the constants intact, they are not affected by the simple swizzle. + */ + if( swizzle <= TGSI_SWIZZLE_W ) { + swizzle = tgsi_util_get_src_register_swizzle( + ®->SrcRegister, + component ); + } + + return swizzle; +} + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->NegateX; + case 1: + return reg->NegateY; + case 2: + return reg->NegateZ; + case 3: + return reg->NegateW; + default: + assert( 0 ); + } + return 0; +} + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ) +{ + switch( component ) { + case 0: + reg->NegateX = negate; + break; + case 1: + reg->NegateY = negate; + break; + case 2: + reg->NegateZ = negate; + break; + case 3: + reg->NegateW = negate; + break; + default: + assert( 0 ); + } +} + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned sign_mode; + + if( reg->SrcRegisterExtMod.Absolute ) { + /* Consider only the post-abs negation. */ + + if( reg->SrcRegisterExtMod.Negate ) { + sign_mode = TGSI_UTIL_SIGN_SET; + } + else { + sign_mode = TGSI_UTIL_SIGN_CLEAR; + } + } + else { + /* Accumulate the three negations. */ + + unsigned negate; + + negate = reg->SrcRegister.Negate; + if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { + negate = !negate; + } + if( reg->SrcRegisterExtMod.Negate ) { + negate = !negate; + } + + if( negate ) { + sign_mode = TGSI_UTIL_SIGN_TOGGLE; + } + else { + sign_mode = TGSI_UTIL_SIGN_KEEP; + } + } + + return sign_mode; +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c new file mode 100644 index 0000000000..45e3c26c00 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -0,0 +1,673 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include +#include + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "spu_exec.h" +#include "spu_vertex_shader.h" +#include "spu_main.h" + +#define CACHE_NAME attribute +#define CACHED_TYPE qword +#define CACHE_TYPE CACHE_TYPE_RO +#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER +#define CACHE_LOG2NNWAY 2 +#define CACHE_LOG2NSETS 6 +#include + +/* Yes folks, this is ugly. + */ +#undef CACHE_NWAY +#undef CACHE_NSETS +#define CACHE_NAME attribute +#define CACHE_NWAY 4 +#define CACHE_NSETS (1U << 6) + + +#define DRAW_DBG 0 + +static const qword fetch_shuffle_data[] = { + /* Shuffle used by CVT_64_FLOAT + */ + { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + }, + + /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED + */ + { + 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, + 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, + }, + + /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED + */ + { + 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, + 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, + }, + + /* High value shuffle used by trans4x4. + */ + { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17 + }, + + /* Low value shuffle used by trans4x4. + */ + { + 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F + } +}; + + +static INLINE void +trans4x4(qword row0, qword row1, qword row2, qword row3, qword *out, + const qword *shuffle) +{ + qword t1 = si_shufb(row0, row2, shuffle[3]); + qword t2 = si_shufb(row0, row2, shuffle[4]); + qword t3 = si_shufb(row1, row3, shuffle[3]); + qword t4 = si_shufb(row1, row3, shuffle[4]); + + out[0] = si_shufb(t1, t3, shuffle[3]); + out[1] = si_shufb(t1, t3, shuffle[4]); + out[2] = si_shufb(t2, t4, shuffle[3]); + out[3] = si_shufb(t2, t4, shuffle[4]); +} + + +/** + * Fetch between 1 and 32 bytes from an unaligned address + */ +static INLINE void +fetch_unaligned(qword *dst, unsigned ea, unsigned size) +{ + qword tmp[4]; + const int shift = ea & 0x0f; + const unsigned aligned_start_ea = ea & ~0x0f; + const unsigned aligned_end_ea = (ea + size) & ~0x0f; + const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1; + unsigned i; + + + if (shift == 0) { + /* Data is already aligned. Fetch directly into the destination buffer. + */ + for (i = 0; i < num_entries; i++) { + dst[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16)); + } + } else { + /* Fetch data from the cache to the local buffer. + */ + for (i = 0; i < num_entries; i++) { + tmp[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16)); + } + + + /* Fix the alignment of the data and write to the destination buffer. + */ + for (i = 0; i < ((size + 15) / 16); i++) { + dst[i] = si_or((qword) spu_slqwbyte(tmp[i], shift), + (qword) spu_rlmaskqwbyte(tmp[i + 1], shift - 16)); + } + } +} + + +#define CVT_32_FLOAT(q, s) (*(q)) + +static INLINE qword +CVT_64_FLOAT(const qword *qw, const qword *shuffle) +{ + qword a = si_frds(qw[0]); + qword b = si_frds(si_rotqbyi(qw[0], 8)); + qword c = si_frds(qw[1]); + qword d = si_frds(si_rotqbyi(qw[1], 8)); + + qword ab = si_shufb(a, b, shuffle[0]); + qword cd = si_shufb(c, d, si_rotqbyi(shuffle[0], 8)); + + return si_or(ab, cd); +} + + +static INLINE qword +CVT_8_USCALED(const qword *qw, const qword *shuffle) +{ + return si_cuflt(si_shufb(*qw, *qw, shuffle[1]), 0); +} + + +static INLINE qword +CVT_16_USCALED(const qword *qw, const qword *shuffle) +{ + return si_cuflt(si_shufb(*qw, *qw, shuffle[2]), 0); +} + + +static INLINE qword +CVT_32_USCALED(const qword *qw, const qword *shuffle) +{ + (void) shuffle; + return si_cuflt(*qw, 0); +} + +static INLINE qword +CVT_8_SSCALED(const qword *qw, const qword *shuffle) +{ + return si_csflt(si_shufb(*qw, *qw, shuffle[1]), 0); +} + + +static INLINE qword +CVT_16_SSCALED(const qword *qw, const qword *shuffle) +{ + return si_csflt(si_shufb(*qw, *qw, shuffle[2]), 0); +} + + +static INLINE qword +CVT_32_SSCALED(const qword *qw, const qword *shuffle) +{ + (void) shuffle; + return si_csflt(*qw, 0); +} + + +static INLINE qword +CVT_8_UNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 255.0f); + return si_fm(CVT_8_USCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_16_UNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 65535.0f); + return si_fm(CVT_16_USCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_32_UNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 4294967295.0f); + return si_fm(CVT_32_USCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_8_SNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 127.0f); + return si_fm(CVT_8_SSCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_16_SNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 32767.0f); + return si_fm(CVT_16_SSCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_32_SNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 2147483647.0f); + return si_fm(CVT_32_SSCALED(qw, shuffle), scale); +} + +#define SZ_4 si_il(0U) +#define SZ_3 si_fsmbi(0x000f) +#define SZ_2 si_fsmbi(0x00ff) +#define SZ_1 si_fsmbi(0x0fff) + +/** + * Fetch a float[4] vertex attribute from memory, doing format/type + * conversion as needed. + * + * This is probably needed/dupliocated elsewhere, eg format + * conversion, texture sampling etc. + */ +#define FETCH_ATTRIB( NAME, SZ, CVT, N ) \ +static void \ +fetch_##NAME(qword *out, const qword *in, qword defaults, \ + const qword *shuffle) \ +{ \ + qword tmp[4]; \ + \ + tmp[0] = si_selb(CVT(in + (0 * N), shuffle), defaults, SZ); \ + tmp[1] = si_selb(CVT(in + (1 * N), shuffle), defaults, SZ); \ + tmp[2] = si_selb(CVT(in + (2 * N), shuffle), defaults, SZ); \ + tmp[3] = si_selb(CVT(in + (3 * N), shuffle), defaults, SZ); \ + trans4x4(tmp[0], tmp[1], tmp[2], tmp[3], out, shuffle); \ +} + + +FETCH_ATTRIB( R64G64B64A64_FLOAT, SZ_4, CVT_64_FLOAT, 2 ) +FETCH_ATTRIB( R64G64B64_FLOAT, SZ_3, CVT_64_FLOAT, 2 ) +FETCH_ATTRIB( R64G64_FLOAT, SZ_2, CVT_64_FLOAT, 2 ) +FETCH_ATTRIB( R64_FLOAT, SZ_1, CVT_64_FLOAT, 2 ) + +FETCH_ATTRIB( R32G32B32A32_FLOAT, SZ_4, CVT_32_FLOAT, 1 ) +FETCH_ATTRIB( R32G32B32_FLOAT, SZ_3, CVT_32_FLOAT, 1 ) +FETCH_ATTRIB( R32G32_FLOAT, SZ_2, CVT_32_FLOAT, 1 ) +FETCH_ATTRIB( R32_FLOAT, SZ_1, CVT_32_FLOAT, 1 ) + +FETCH_ATTRIB( R32G32B32A32_USCALED, SZ_4, CVT_32_USCALED, 1 ) +FETCH_ATTRIB( R32G32B32_USCALED, SZ_3, CVT_32_USCALED, 1 ) +FETCH_ATTRIB( R32G32_USCALED, SZ_2, CVT_32_USCALED, 1 ) +FETCH_ATTRIB( R32_USCALED, SZ_1, CVT_32_USCALED, 1 ) + +FETCH_ATTRIB( R32G32B32A32_SSCALED, SZ_4, CVT_32_SSCALED, 1 ) +FETCH_ATTRIB( R32G32B32_SSCALED, SZ_3, CVT_32_SSCALED, 1 ) +FETCH_ATTRIB( R32G32_SSCALED, SZ_2, CVT_32_SSCALED, 1 ) +FETCH_ATTRIB( R32_SSCALED, SZ_1, CVT_32_SSCALED, 1 ) + +FETCH_ATTRIB( R32G32B32A32_UNORM, SZ_4, CVT_32_UNORM, 1 ) +FETCH_ATTRIB( R32G32B32_UNORM, SZ_3, CVT_32_UNORM, 1 ) +FETCH_ATTRIB( R32G32_UNORM, SZ_2, CVT_32_UNORM, 1 ) +FETCH_ATTRIB( R32_UNORM, SZ_1, CVT_32_UNORM, 1 ) + +FETCH_ATTRIB( R32G32B32A32_SNORM, SZ_4, CVT_32_SNORM, 1 ) +FETCH_ATTRIB( R32G32B32_SNORM, SZ_3, CVT_32_SNORM, 1 ) +FETCH_ATTRIB( R32G32_SNORM, SZ_2, CVT_32_SNORM, 1 ) +FETCH_ATTRIB( R32_SNORM, SZ_1, CVT_32_SNORM, 1 ) + +FETCH_ATTRIB( R16G16B16A16_USCALED, SZ_4, CVT_16_USCALED, 1 ) +FETCH_ATTRIB( R16G16B16_USCALED, SZ_3, CVT_16_USCALED, 1 ) +FETCH_ATTRIB( R16G16_USCALED, SZ_2, CVT_16_USCALED, 1 ) +FETCH_ATTRIB( R16_USCALED, SZ_1, CVT_16_USCALED, 1 ) + +FETCH_ATTRIB( R16G16B16A16_SSCALED, SZ_4, CVT_16_SSCALED, 1 ) +FETCH_ATTRIB( R16G16B16_SSCALED, SZ_3, CVT_16_SSCALED, 1 ) +FETCH_ATTRIB( R16G16_SSCALED, SZ_2, CVT_16_SSCALED, 1 ) +FETCH_ATTRIB( R16_SSCALED, SZ_1, CVT_16_SSCALED, 1 ) + +FETCH_ATTRIB( R16G16B16A16_UNORM, SZ_4, CVT_16_UNORM, 1 ) +FETCH_ATTRIB( R16G16B16_UNORM, SZ_3, CVT_16_UNORM, 1 ) +FETCH_ATTRIB( R16G16_UNORM, SZ_2, CVT_16_UNORM, 1 ) +FETCH_ATTRIB( R16_UNORM, SZ_1, CVT_16_UNORM, 1 ) + +FETCH_ATTRIB( R16G16B16A16_SNORM, SZ_4, CVT_16_SNORM, 1 ) +FETCH_ATTRIB( R16G16B16_SNORM, SZ_3, CVT_16_SNORM, 1 ) +FETCH_ATTRIB( R16G16_SNORM, SZ_2, CVT_16_SNORM, 1 ) +FETCH_ATTRIB( R16_SNORM, SZ_1, CVT_16_SNORM, 1 ) + +FETCH_ATTRIB( R8G8B8A8_USCALED, SZ_4, CVT_8_USCALED, 1 ) +FETCH_ATTRIB( R8G8B8_USCALED, SZ_3, CVT_8_USCALED, 1 ) +FETCH_ATTRIB( R8G8_USCALED, SZ_2, CVT_8_USCALED, 1 ) +FETCH_ATTRIB( R8_USCALED, SZ_1, CVT_8_USCALED, 1 ) + +FETCH_ATTRIB( R8G8B8A8_SSCALED, SZ_4, CVT_8_SSCALED, 1 ) +FETCH_ATTRIB( R8G8B8_SSCALED, SZ_3, CVT_8_SSCALED, 1 ) +FETCH_ATTRIB( R8G8_SSCALED, SZ_2, CVT_8_SSCALED, 1 ) +FETCH_ATTRIB( R8_SSCALED, SZ_1, CVT_8_SSCALED, 1 ) + +FETCH_ATTRIB( R8G8B8A8_UNORM, SZ_4, CVT_8_UNORM, 1 ) +FETCH_ATTRIB( R8G8B8_UNORM, SZ_3, CVT_8_UNORM, 1 ) +FETCH_ATTRIB( R8G8_UNORM, SZ_2, CVT_8_UNORM, 1 ) +FETCH_ATTRIB( R8_UNORM, SZ_1, CVT_8_UNORM, 1 ) + +FETCH_ATTRIB( R8G8B8A8_SNORM, SZ_4, CVT_8_SNORM, 1 ) +FETCH_ATTRIB( R8G8B8_SNORM, SZ_3, CVT_8_SNORM, 1 ) +FETCH_ATTRIB( R8G8_SNORM, SZ_2, CVT_8_SNORM, 1 ) +FETCH_ATTRIB( R8_SNORM, SZ_1, CVT_8_SNORM, 1 ) + +FETCH_ATTRIB( A8R8G8B8_UNORM, SZ_4, CVT_8_UNORM, 1 ) + + + +static spu_fetch_func get_fetch_func( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return fetch_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return fetch_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return fetch_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return fetch_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return fetch_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return fetch_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return fetch_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return fetch_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return fetch_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return fetch_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return fetch_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return fetch_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return fetch_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return fetch_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return fetch_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return fetch_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return fetch_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return fetch_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return fetch_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return fetch_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return fetch_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return fetch_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return fetch_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return fetch_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return fetch_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return fetch_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return fetch_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return fetch_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return fetch_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return fetch_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return fetch_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return fetch_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return fetch_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return fetch_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return fetch_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return fetch_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return fetch_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return fetch_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return fetch_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return fetch_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return fetch_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return fetch_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return fetch_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return fetch_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return fetch_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return fetch_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return fetch_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return fetch_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return fetch_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return fetch_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return fetch_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return fetch_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return fetch_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return fetch_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return fetch_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return fetch_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return fetch_A8R8G8B8_UNORM; + + case 0: + return NULL; /* not sure why this is needed */ + + default: + assert(0); + return NULL; + } +} + + +static unsigned get_vertex_size( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return 8; + case PIPE_FORMAT_R64G64_FLOAT: + return 2 * 8; + case PIPE_FORMAT_R64G64B64_FLOAT: + return 3 * 8; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return 4 * 8; + + case PIPE_FORMAT_R32_SSCALED: + case PIPE_FORMAT_R32_SNORM: + case PIPE_FORMAT_R32_USCALED: + case PIPE_FORMAT_R32_UNORM: + case PIPE_FORMAT_R32_FLOAT: + return 4; + case PIPE_FORMAT_R32G32_SSCALED: + case PIPE_FORMAT_R32G32_SNORM: + case PIPE_FORMAT_R32G32_USCALED: + case PIPE_FORMAT_R32G32_UNORM: + case PIPE_FORMAT_R32G32_FLOAT: + return 2 * 4; + case PIPE_FORMAT_R32G32B32_SSCALED: + case PIPE_FORMAT_R32G32B32_SNORM: + case PIPE_FORMAT_R32G32B32_USCALED: + case PIPE_FORMAT_R32G32B32_UNORM: + case PIPE_FORMAT_R32G32B32_FLOAT: + return 3 * 4; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_USCALED: + case PIPE_FORMAT_R32G32B32A32_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return 4 * 4; + + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16_USCALED: + return 2; + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16_USCALED: + case PIPE_FORMAT_R16G16_UNORM: + return 2 * 2; + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16_UNORM: + return 3 * 2; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16A16_UNORM: + return 4 * 2; + + case PIPE_FORMAT_R8_SSCALED: + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8_UNORM: + return 1; + case PIPE_FORMAT_R8G8_SSCALED: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8_USCALED: + case PIPE_FORMAT_R8G8_UNORM: + return 2 * 1; + case PIPE_FORMAT_R8G8B8_SSCALED: + case PIPE_FORMAT_R8G8B8_SNORM: + case PIPE_FORMAT_R8G8B8_USCALED: + case PIPE_FORMAT_R8G8B8_UNORM: + return 3 * 1; + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SSCALED: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_USCALED: + case PIPE_FORMAT_R8G8B8A8_UNORM: + return 4 * 1; + + case 0: + return 0; /* not sure why this is needed */ + + default: + assert(0); + return 0; + } +} + + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void generic_vertex_fetch(struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count) +{ + unsigned nr_attrs = draw->vertex_fetch.nr_attrs; + unsigned attr; + + assert(count <= 4); + +#if DRAW_DBG + printf("SPU: %s count = %u, nr_attrs = %u\n", + __FUNCTION__, count, nr_attrs); +#endif + + /* loop over vertex attributes (vertex shader inputs) + */ + for (attr = 0; attr < nr_attrs; attr++) { + const qword default_values = (qword)(vec_float4){ 0.0, 0.0, 0.0, 1.0 }; + const unsigned pitch = draw->vertex_fetch.pitch[attr]; + const uint64_t src = draw->vertex_fetch.src_ptr[attr]; + const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr]; + unsigned i; + unsigned idx; + const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; + const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; + qword in[2 * 4]; + + + /* Fetch four attributes for four vertices. + */ + idx = 0; + for (i = 0; i < count; i++) { + const uint64_t addr = src + (elts[i] * pitch); + +#if DRAW_DBG + printf("SPU: fetching = 0x%llx\n", addr); +#endif + + fetch_unaligned(& in[idx], addr, bytes_per_entry); + idx += quads_per_entry; + } + + /* Be nice and zero out any missing vertices. + */ + (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword)); + + + /* Convert all 4 vertices to vectors of float. + */ + (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, default_values, + fetch_shuffle_data); + } +} + + +void spu_update_vertex_fetch( struct spu_vs_context *draw ) +{ + unsigned i; + + + /* Invalidate the vertex cache. + */ + for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { + CACHELINE_CLEARVALID(i); + } + + + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + draw->vertex_fetch.fetch[i] = + get_fetch_func(draw->vertex_fetch.format[i]); + draw->vertex_fetch.size[i] = + get_vertex_size(draw->vertex_fetch.format[i]); + } + + draw->vertex_fetch.fetch_func = generic_vertex_fetch; +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c new file mode 100644 index 0000000000..c1cbbb6d1e --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -0,0 +1,231 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + * Brian Paul + * Ian Romanick + */ + +#include + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "spu_vertex_shader.h" +#include "spu_exec.h" +#include "pipe/draw/draw_private.h" +#include "pipe/draw/draw_context.h" +#include "pipe/cell/common.h" +#include "spu_main.h" + +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<machine; + unsigned int j; + + ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_ATTRIB_MAX); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + assert(count <= 4); + + machine->Processor = TGSI_PROCESSOR_VERTEX; + + ASSERT_ALIGN16(draw->constants); + machine->Consts = (float (*)[4]) draw->constants; + + machine->Inputs = ALIGN16_ASSIGN(inputs); + machine->Outputs = ALIGN16_ASSIGN(outputs); + + spu_vertex_fetch( draw, machine, elts, count ); + + /* run shader */ + spu_exec_machine_run( machine ); + + + /* store machine results */ + for (j = 0; j < count; j++) { + unsigned slot; + float x, y, z, w; + unsigned char buffer[sizeof(struct vertex_header) + + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + struct vertex_header *const tmpOut = + (struct vertex_header *) buffer; + const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) + + (sizeof(float) * 4 + * draw->num_vs_outputs)); + + mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + + /* Handle attr[0] (position) specially: + * + * XXX: Computing the clipmask should be done in the vertex + * program as a set of DP4 instructions appended to the + * user-provided code. + */ + x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane, + draw->nr_planes); + tmpOut->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + tmpOut->data[0][0] = x * scale[0] + trans[0]; + tmpOut->data[0][1] = y * scale[1] + trans[1]; + tmpOut->data[0][2] = z * scale[2] + trans[2]; + tmpOut->data[0][3] = w; + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + + mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); + } /* loop over vertices */ +} + + +static void +spu_bind_vertex_shader(struct spu_vs_context *draw, + void *uniforms, + void *planes, + unsigned nr_planes, + unsigned num_outputs + ) +{ + draw->constants = (float (*)[4]) uniforms; + + (void) memcpy(draw->plane, planes, sizeof(float) * 4 * nr_planes); + draw->nr_planes = nr_planes; + draw->num_vs_outputs = num_outputs; + + /* specify the shader to interpret/execute */ + spu_exec_machine_init(&draw->machine, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/, + PIPE_SHADER_VERTEX); +} + + +unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] + ALIGN16_ATTRIB; + +void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs) +{ + unsigned i; + + const uint64_t immediate_addr = vs->shader.immediates; + const unsigned immediate_size = + ROUNDUP16((sizeof(float) * 4 * vs->shader.num_immediates) + + (immediate_addr & 0x0f)); + + mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, + TAG_VERTEX_BUFFER, 0, 0); + + draw->machine.Instructions = (struct tgsi_full_instruction *) + vs->shader.instructions; + draw->machine.NumInstructions = vs->shader.num_instructions; + + draw->machine.Declarations = (struct tgsi_full_declaration *) + vs->shader.declarations; + draw->machine.NumDeclarations = vs->shader.num_declarations; + + draw->vertex_fetch.nr_attrs = vs->nr_attrs; + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], + sizeof(float) * 4 * vs->shader.num_immediates); + + spu_bind_vertex_shader(draw, vs->shader.uniforms, + vs->plane, vs->nr_planes, + vs->shader.num_outputs); + + for (i = 0; i < vs->num_elts; i += 4) { + const unsigned batch_size = MIN2(vs->num_elts - i, 4); + + run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]); + } +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h new file mode 100644 index 0000000000..b5bf31e67d --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -0,0 +1,63 @@ +#ifndef SPU_VERTEX_SHADER_H +#define SPU_VERTEX_SHADER_H + +#include "pipe/p_format.h" +#include "spu_exec.h" + +struct spu_vs_context; + +typedef void (*spu_fetch_func)(qword *out, const qword *in, qword defaults, + const qword *shuffle_data); +typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count ); + +struct spu_vs_context { + struct pipe_viewport_state viewport; + + struct { + uint64_t src_ptr[PIPE_ATTRIB_MAX]; + unsigned pitch[PIPE_ATTRIB_MAX]; + unsigned size[PIPE_ATTRIB_MAX]; + enum pipe_format format[PIPE_ATTRIB_MAX]; + unsigned nr_attrs; + boolean dirty; + + spu_fetch_func fetch[PIPE_ATTRIB_MAX]; + spu_full_fetch_func fetch_func; + } vertex_fetch; + + /* Clip derived state: + */ + float plane[12][4]; + unsigned nr_planes; + + struct spu_exec_machine machine; + const float (*constants)[4]; + + unsigned num_vs_outputs; +}; + +extern void spu_update_vertex_fetch(struct spu_vs_context *draw); + +static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count) +{ + if (draw->vertex_fetch.dirty) { + spu_update_vertex_fetch(draw); + draw->vertex_fetch.dirty = 0; + } + + (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count); +} + +struct cell_command_vs; + +extern void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs); + +#endif /* SPU_VERTEX_SHADER_H */ diff --git a/src/gallium/drivers/cell/spu/spu_ztest.h b/src/gallium/drivers/cell/spu/spu_ztest.h new file mode 100644 index 0000000000..ce8ad00339 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_ztest.h @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Zbuffer/depth test code. + */ + + +#ifndef SPU_ZTEST_H +#define SPU_ZTEST_H + + +#ifdef __SPU__ +#include +#endif + + + +/** + * Perform Z testing for a 16-bit/value Z buffer. + * + * \param zvals vector of four fragment zvalues as floats + * \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this + * contains the Z values for 2 quads, 8 pixels. + * \param x x coordinate of quad (only lsbit is significant) + * \param inMask indicates which fragments in the quad are alive + * \return new mask indicating which fragments are alive after ztest + */ +static INLINE vector unsigned int +spu_z16_test_less(vector float zvals, vector unsigned short *zbuf, + uint x, vector unsigned int inMask) +{ +#define ZERO 0x80 + vector unsigned int zvals_ui4, zbuf_ui4, mask; + + /* convert floats to uints in [0, 65535] */ + zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */ + zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */ + + /* XXX this conditional could be removed with a bit of work */ + if (x & 1) { + /* convert zbuffer values from ushorts to uints */ + /* gather lower four ushorts */ + zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, + (vector unsigned int) *zbuf, + ((vector unsigned char) { + ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11, + ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15})); + /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ + mask = spu_cmpgt(zbuf_ui4, zvals_ui4); + /* mask &= inMask */ + mask = spu_and(mask, inMask); + /* zbuf = mask ? zval : zbuf */ + zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); + /* convert zbuffer values from uints back to ushorts, preserve lower 4 */ + *zbuf = (vector unsigned short) + spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, + ((vector unsigned char) { + 16, 17, 18, 19, 20, 21, 22, 23, + 2, 3, 6, 7, 10, 11, 14, 15})); + } + else { + /* convert zbuffer values from ushorts to uints */ + /* gather upper four ushorts */ + zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, + (vector unsigned int) *zbuf, + ((vector unsigned char) { + ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, + ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7})); + /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ + mask = spu_cmpgt(zbuf_ui4, zvals_ui4); + /* mask &= inMask */ + mask = spu_and(mask, inMask); + /* zbuf = mask ? zval : zbuf */ + zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); + /* convert zbuffer values from uints back to ushorts, preserve upper 4 */ + *zbuf = (vector unsigned short) + spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, + ((vector unsigned char) { + 2, 3, 6, 7, 10, 11, 14, 15, + 24, 25, 26, 27, 28, 29, 30, 31})); + } + return mask; +#undef ZERO +} + + +/** + * As above, but Zbuffer values as 32-bit uints + */ +static INLINE vector unsigned int +spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr, + vector unsigned int inMask) +{ + vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr; + + /* convert floats to uints in [0, 0xffffffff] */ + zvals_ui4 = spu_convtu(zvals, 32); + /* mask = (zbuf < zvals_ui4) ? ~0 : 0 */ + mask = spu_cmpgt(zbuf, zvals_ui4); + /* mask &= inMask */ + mask = spu_and(mask, inMask); + /* zbuf = mask ? zval : zbuf */ + *zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask); + + return mask; +} + + +#endif /* SPU_ZTEST_H */ diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile new file mode 100644 index 0000000000..72d0895c74 --- /dev/null +++ b/src/gallium/drivers/failover/Makefile @@ -0,0 +1,21 @@ + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = failover + +DRIVER_SOURCES = \ + fo_state.c \ + fo_state_emit.c \ + fo_context.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c new file mode 100644 index 0000000000..7ce4a7df17 --- /dev/null +++ b/src/gallium/drivers/failover/fo_context.c @@ -0,0 +1,155 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_context.h" + +#include "fo_context.h" +#include "fo_winsys.h" + + + +static void failover_destroy( struct pipe_context *pipe ) +{ + struct failover_context *failover = failover_context( pipe ); + + free( failover ); +} + + + +static boolean failover_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct failover_context *failover = failover_context( pipe ); + + /* If there has been any statechange since last time, try hardware + * rendering again: + */ + if (failover->dirty) { + failover->mode = FO_HW; + } + + /* Try hardware: + */ + if (failover->mode == FO_HW) { + if (!failover->hw->draw_elements( failover->hw, + indexBuffer, + indexSize, + prim, + start, + count )) { + + failover->hw->flush( failover->hw, ~0 ); + failover->mode = FO_SW; + } + } + + /* Possibly try software: + */ + if (failover->mode == FO_SW) { + + if (failover->dirty) + failover_state_emit( failover ); + + failover->sw->draw_elements( failover->sw, + indexBuffer, + indexSize, + prim, + start, + count ); + + /* Be ready to switch back to hardware rendering without an + * intervening flush. Unlikely to be much performance impact to + * this: + */ + failover->sw->flush( failover->sw, ~0 ); + } + + return TRUE; +} + + +static boolean failover_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return failover_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + +struct pipe_context *failover_create( struct pipe_context *hw, + struct pipe_context *sw ) +{ + struct failover_context *failover = CALLOC_STRUCT(failover_context); + if (failover == NULL) + return NULL; + + failover->hw = hw; + failover->sw = sw; + failover->pipe.winsys = hw->winsys; + failover->pipe.destroy = failover_destroy; + failover->pipe.is_format_supported = hw->is_format_supported; + failover->pipe.get_name = hw->get_name; + failover->pipe.get_vendor = hw->get_vendor; + failover->pipe.get_param = hw->get_param; + failover->pipe.get_paramf = hw->get_paramf; + + failover->pipe.draw_arrays = failover_draw_arrays; + failover->pipe.draw_elements = failover_draw_elements; + failover->pipe.clear = hw->clear; + + /* No software occlusion fallback (or other optional functionality) + * at this point - if the hardware doesn't support it, don't + * advertise it to the application. + */ + failover->pipe.begin_query = hw->begin_query; + failover->pipe.end_query = hw->end_query; + + failover_init_state_functions( failover ); + +#if 0 + failover->pipe.surface_alloc = hw->surface_alloc; +#endif + failover->pipe.get_tex_surface = hw->get_tex_surface; + + failover->pipe.surface_copy = hw->surface_copy; + failover->pipe.surface_fill = hw->surface_fill; + failover->pipe.texture_create = hw->texture_create; + failover->pipe.texture_release = hw->texture_release; + failover->pipe.flush = hw->flush; + + failover->dirty = 0; + + return &failover->pipe; +} + diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h new file mode 100644 index 0000000000..1dc87291c9 --- /dev/null +++ b/src/gallium/drivers/failover/fo_context.h @@ -0,0 +1,114 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef FO_CONTEXT_H +#define FO_CONTEXT_H + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + + + +#define FO_NEW_VIEWPORT 0x1 +#define FO_NEW_RASTERIZER 0x2 +#define FO_NEW_FRAGMENT_SHADER 0x4 +#define FO_NEW_BLEND 0x8 +#define FO_NEW_CLIP 0x10 +#define FO_NEW_SCISSOR 0x20 +#define FO_NEW_STIPPLE 0x40 +#define FO_NEW_FRAMEBUFFER 0x80 +#define FO_NEW_ALPHA_TEST 0x100 +#define FO_NEW_DEPTH_STENCIL 0x200 +#define FO_NEW_SAMPLER 0x400 +#define FO_NEW_TEXTURE 0x800 +#define FO_NEW_VERTEX 0x2000 +#define FO_NEW_VERTEX_SHADER 0x4000 +#define FO_NEW_BLEND_COLOR 0x8000 +#define FO_NEW_CLEAR_COLOR 0x10000 +#define FO_NEW_VERTEX_BUFFER 0x20000 +#define FO_NEW_VERTEX_ELEMENT 0x40000 + + + +#define FO_HW 0 +#define FO_SW 1 + +struct fo_state { + void *sw_state; + void *hw_state; +}; +struct failover_context { + struct pipe_context pipe; /**< base class */ + + + /* The most recent drawing state as set by the driver: + */ + const struct fo_state *blend; + const struct fo_state *sampler[PIPE_MAX_SAMPLERS]; + const struct fo_state *depth_stencil; + const struct fo_state *rasterizer; + const struct fo_state *fragment_shader; + const struct fo_state *vertex_shader; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + + unsigned dirty; + unsigned dirty_sampler; + unsigned dirty_texture; + unsigned dirty_vertex_buffer; + unsigned dirty_vertex_element; + + + unsigned mode; + struct pipe_context *hw; + struct pipe_context *sw; +}; + + + +void failover_init_state_functions( struct failover_context *failover ); +void failover_state_emit( struct failover_context *failover ); + +static INLINE struct failover_context * +failover_context( struct pipe_context *pipe ) +{ + return (struct failover_context *)pipe; +} + + +#endif /* FO_CONTEXT_H */ diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c new file mode 100644 index 0000000000..0fc5568da1 --- /dev/null +++ b/src/gallium/drivers/failover/fo_state.c @@ -0,0 +1,457 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "fo_context.h" + + +/* This looks like a lot of work at the moment - we're keeping a + * duplicate copy of the state up-to-date. + * + * This can change in two ways: + * - With constant state objects we would only need to save a pointer, + * not the whole object. + * - By adding a callback in the state tracker to re-emit state. The + * state tracker knows the current state already and can re-emit it + * without additional complexity. + * + * This works as a proof-of-concept, but a final version will have + * lower overheads. + */ + + + +static void * +failover_create_blend_state( struct pipe_context *pipe, + const struct pipe_blend_state *blend ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_blend_state(failover->sw, blend); + state->hw_state = failover->hw->create_blend_state(failover->hw, blend); + + return state; +} + +static void +failover_bind_blend_state( struct pipe_context *pipe, + void *blend ) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state *)blend; + failover->blend = state; + failover->dirty |= FO_NEW_BLEND; + failover->sw->bind_blend_state( failover->sw, state->sw_state ); + failover->hw->bind_blend_state( failover->hw, state->hw_state ); +} + +static void +failover_delete_blend_state( struct pipe_context *pipe, + void *blend ) +{ + struct fo_state *state = (struct fo_state*)blend; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_blend_state(failover->sw, state->sw_state); + failover->hw->delete_blend_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void +failover_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->blend_color = *blend_color; + failover->dirty |= FO_NEW_BLEND_COLOR; + failover->sw->set_blend_color( failover->sw, blend_color ); + failover->hw->set_blend_color( failover->hw, blend_color ); +} + +static void +failover_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->clip = *clip; + failover->dirty |= FO_NEW_CLIP; + failover->sw->set_clip_state( failover->sw, clip ); + failover->hw->set_clip_state( failover->hw, clip ); +} + + +static void * +failover_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_depth_stencil_alpha_state(failover->sw, templ); + state->hw_state = failover->hw->create_depth_stencil_alpha_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state *)depth_stencil; + failover->depth_stencil = state; + failover->dirty |= FO_NEW_DEPTH_STENCIL; + failover->sw->bind_depth_stencil_alpha_state(failover->sw, state->sw_state); + failover->hw->bind_depth_stencil_alpha_state(failover->hw, state->hw_state); +} + +static void +failover_delete_depth_stencil_state(struct pipe_context *pipe, + void *ds) +{ + struct fo_state *state = (struct fo_state*)ds; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_depth_stencil_alpha_state(failover->sw, state->sw_state); + failover->hw->delete_depth_stencil_alpha_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void +failover_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *framebuffer) +{ + struct failover_context *failover = failover_context(pipe); + + failover->framebuffer = *framebuffer; + failover->dirty |= FO_NEW_FRAMEBUFFER; + failover->sw->set_framebuffer_state( failover->sw, framebuffer ); + failover->hw->set_framebuffer_state( failover->hw, framebuffer ); +} + + +static void * +failover_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_fs_state(failover->sw, templ); + state->hw_state = failover->hw->create_fs_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state*)fs; + failover->fragment_shader = state; + failover->dirty |= FO_NEW_FRAGMENT_SHADER; + failover->sw->bind_fs_state(failover->sw, state->sw_state); + failover->hw->bind_fs_state(failover->hw, state->hw_state); +} + +static void +failover_delete_fs_state(struct pipe_context *pipe, + void *fs) +{ + struct fo_state *state = (struct fo_state*)fs; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_fs_state(failover->sw, state->sw_state); + failover->hw->delete_fs_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void * +failover_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_vs_state(failover->sw, templ); + state->hw_state = failover->hw->create_vs_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_vs_state(struct pipe_context *pipe, + void *vs) +{ + struct failover_context *failover = failover_context(pipe); + + struct fo_state *state = (struct fo_state*)vs; + failover->vertex_shader = state; + failover->dirty |= FO_NEW_VERTEX_SHADER; + failover->sw->bind_vs_state(failover->sw, state->sw_state); + failover->hw->bind_vs_state(failover->hw, state->hw_state); +} + +static void +failover_delete_vs_state(struct pipe_context *pipe, + void *vs) +{ + struct fo_state *state = (struct fo_state*)vs; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_vs_state(failover->sw, state->sw_state); + failover->hw->delete_vs_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void +failover_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->poly_stipple = *stipple; + failover->dirty |= FO_NEW_STIPPLE; + failover->sw->set_polygon_stipple( failover->sw, stipple ); + failover->hw->set_polygon_stipple( failover->hw, stipple ); +} + + +static void * +failover_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_rasterizer_state(failover->sw, templ); + state->hw_state = failover->hw->create_rasterizer_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + struct failover_context *failover = failover_context(pipe); + + struct fo_state *state = (struct fo_state*)raster; + failover->rasterizer = state; + failover->dirty |= FO_NEW_RASTERIZER; + failover->sw->bind_rasterizer_state(failover->sw, state->sw_state); + failover->hw->bind_rasterizer_state(failover->hw, state->hw_state); +} + +static void +failover_delete_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + struct fo_state *state = (struct fo_state*)raster; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_rasterizer_state(failover->sw, state->sw_state); + failover->hw->delete_rasterizer_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + + +static void +failover_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->scissor = *scissor; + failover->dirty |= FO_NEW_SCISSOR; + failover->sw->set_scissor_state( failover->sw, scissor ); + failover->hw->set_scissor_state( failover->hw, scissor ); +} + + +static void * +failover_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_sampler_state(failover->sw, templ); + state->hw_state = failover->hw->create_sampler_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state*)sampler; + failover->sampler[unit] = state; + failover->dirty |= FO_NEW_SAMPLER; + failover->dirty_sampler |= (1<sw->bind_sampler_state(failover->sw, unit, + state->sw_state); + failover->hw->bind_sampler_state(failover->hw, unit, + state->hw_state); +} + +static void +failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) +{ + struct fo_state *state = (struct fo_state*)sampler; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_sampler_state(failover->sw, state->sw_state); + failover->hw->delete_sampler_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + + +static void +failover_set_sampler_texture(struct pipe_context *pipe, + unsigned unit, + struct pipe_texture *texture) +{ + struct failover_context *failover = failover_context(pipe); + + failover->texture[unit] = texture; + failover->dirty |= FO_NEW_TEXTURE; + failover->dirty_texture |= (1<sw->set_sampler_texture( failover->sw, unit, texture ); + failover->hw->set_sampler_texture( failover->hw, unit, texture ); +} + + +static void +failover_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->viewport = *viewport; + failover->dirty |= FO_NEW_VIEWPORT; + failover->sw->set_viewport_state( failover->sw, viewport ); + failover->hw->set_viewport_state( failover->hw, viewport ); +} + + +static void +failover_set_vertex_buffer(struct pipe_context *pipe, + unsigned unit, + const struct pipe_vertex_buffer *vertex_buffer) +{ + struct failover_context *failover = failover_context(pipe); + + failover->vertex_buffer[unit] = *vertex_buffer; + failover->dirty |= FO_NEW_VERTEX_BUFFER; + failover->dirty_vertex_buffer |= (1<sw->set_vertex_buffer( failover->sw, unit, vertex_buffer ); + failover->hw->set_vertex_buffer( failover->hw, unit, vertex_buffer ); +} + + +static void +failover_set_vertex_element(struct pipe_context *pipe, + unsigned unit, + const struct pipe_vertex_element *vertex_element) +{ + struct failover_context *failover = failover_context(pipe); + + failover->vertex_element[unit] = *vertex_element; + failover->dirty |= FO_NEW_VERTEX_ELEMENT; + failover->dirty_vertex_element |= (1<sw->set_vertex_element( failover->sw, unit, vertex_element ); + failover->hw->set_vertex_element( failover->hw, unit, vertex_element ); +} + +void +failover_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct failover_context *failover = failover_context(pipe); + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + failover->sw->set_constant_buffer(failover->sw, shader, index, buf); + failover->hw->set_constant_buffer(failover->hw, shader, index, buf); +} + + +void +failover_init_state_functions( struct failover_context *failover ) +{ + failover->pipe.create_blend_state = failover_create_blend_state; + failover->pipe.bind_blend_state = failover_bind_blend_state; + failover->pipe.delete_blend_state = failover_delete_blend_state; + failover->pipe.create_sampler_state = failover_create_sampler_state; + failover->pipe.bind_sampler_state = failover_bind_sampler_state; + failover->pipe.delete_sampler_state = failover_delete_sampler_state; + failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state; + failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state; + failover->pipe.delete_depth_stencil_alpha_state = failover_delete_depth_stencil_state; + failover->pipe.create_rasterizer_state = failover_create_rasterizer_state; + failover->pipe.bind_rasterizer_state = failover_bind_rasterizer_state; + failover->pipe.delete_rasterizer_state = failover_delete_rasterizer_state; + failover->pipe.create_fs_state = failover_create_fs_state; + failover->pipe.bind_fs_state = failover_bind_fs_state; + failover->pipe.delete_fs_state = failover_delete_fs_state; + failover->pipe.create_vs_state = failover_create_vs_state; + failover->pipe.bind_vs_state = failover_bind_vs_state; + failover->pipe.delete_vs_state = failover_delete_vs_state; + + failover->pipe.set_blend_color = failover_set_blend_color; + failover->pipe.set_clip_state = failover_set_clip_state; + failover->pipe.set_framebuffer_state = failover_set_framebuffer_state; + failover->pipe.set_polygon_stipple = failover_set_polygon_stipple; + failover->pipe.set_scissor_state = failover_set_scissor_state; + failover->pipe.set_sampler_texture = failover_set_sampler_texture; + failover->pipe.set_viewport_state = failover_set_viewport_state; + failover->pipe.set_vertex_buffer = failover_set_vertex_buffer; + failover->pipe.set_vertex_element = failover_set_vertex_element; + failover->pipe.set_constant_buffer = failover_set_constant_buffer; +} diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c new file mode 100644 index 0000000000..c663dd4947 --- /dev/null +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -0,0 +1,137 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "fo_context.h" + +/* This looks like a lot of work at the moment - we're keeping a + * duplicate copy of the state up-to-date. + * + * This can change in two ways: + * - With constant state objects we would only need to save a pointer, + * not the whole object. + * - By adding a callback in the state tracker to re-emit state. The + * state tracker knows the current state already and can re-emit it + * without additional complexity. + * + * This works as a proof-of-concept, but a final version will have + * lower overheads. + */ + + +/* Bring the software pipe uptodate with current state. + * + * With constant state objects we would probably just send all state + * to both rasterizers all the time??? + */ +void +failover_state_emit( struct failover_context *failover ) +{ + unsigned i; + + if (failover->dirty & FO_NEW_BLEND) + failover->sw->bind_blend_state( failover->sw, + failover->blend->sw_state ); + + if (failover->dirty & FO_NEW_BLEND_COLOR) + failover->sw->set_blend_color( failover->sw, &failover->blend_color ); + + if (failover->dirty & FO_NEW_CLIP) + failover->sw->set_clip_state( failover->sw, &failover->clip ); + + if (failover->dirty & FO_NEW_DEPTH_STENCIL) + failover->sw->bind_depth_stencil_alpha_state( failover->sw, + failover->depth_stencil->sw_state ); + + if (failover->dirty & FO_NEW_FRAMEBUFFER) + failover->sw->set_framebuffer_state( failover->sw, &failover->framebuffer ); + + if (failover->dirty & FO_NEW_FRAGMENT_SHADER) + failover->sw->bind_fs_state( failover->sw, + failover->fragment_shader->sw_state ); + + if (failover->dirty & FO_NEW_VERTEX_SHADER) + failover->sw->bind_vs_state( failover->sw, + failover->vertex_shader->sw_state ); + + if (failover->dirty & FO_NEW_STIPPLE) + failover->sw->set_polygon_stipple( failover->sw, &failover->poly_stipple ); + + if (failover->dirty & FO_NEW_RASTERIZER) + failover->sw->bind_rasterizer_state( failover->sw, + failover->rasterizer->sw_state ); + + if (failover->dirty & FO_NEW_SCISSOR) + failover->sw->set_scissor_state( failover->sw, &failover->scissor ); + + if (failover->dirty & FO_NEW_VIEWPORT) + failover->sw->set_viewport_state( failover->sw, &failover->viewport ); + + if (failover->dirty & FO_NEW_SAMPLER) { + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (failover->dirty_sampler & (1<sw->bind_sampler_state( failover->sw, i, + failover->sampler[i]->sw_state ); + } + } + } + + if (failover->dirty & FO_NEW_TEXTURE) { + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (failover->dirty_texture & (1<sw->set_sampler_texture( failover->sw, i, + failover->texture[i] ); + } + } + } + + if (failover->dirty & FO_NEW_VERTEX_BUFFER) { + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (failover->dirty_vertex_buffer & (1<sw->set_vertex_buffer( failover->sw, i, + &failover->vertex_buffer[i] ); + } + } + } + + if (failover->dirty & FO_NEW_VERTEX_ELEMENT) { + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (failover->dirty_vertex_element & (1<sw->set_vertex_element( failover->sw, i, + &failover->vertex_element[i] ); + } + } + } + + failover->dirty = 0; + failover->dirty_vertex_element = 0; + failover->dirty_vertex_buffer = 0; + failover->dirty_texture = 0; + failover->dirty_sampler = 0; +} diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h new file mode 100644 index 0000000000..a8ce997a1f --- /dev/null +++ b/src/gallium/drivers/failover/fo_winsys.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef FO_WINSYS_H +#define FO_WINSYS_H + + +/* This is the interface that failover requires any window system + * hosting it to implement. This is the only include file in failover + * which is public. + */ + + +struct pipe_context; + + +struct pipe_context *failover_create( struct pipe_context *hw, + struct pipe_context *sw ); + + +#endif /* FO_WINSYS_H */ diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile new file mode 100644 index 0000000000..2f91de3afc --- /dev/null +++ b/src/gallium/drivers/i915simple/Makefile @@ -0,0 +1,38 @@ + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i915simple + +DRIVER_SOURCES = \ + i915_blit.c \ + i915_clear.c \ + i915_flush.c \ + i915_context.c \ + i915_context.c \ + i915_debug.c \ + i915_debug_fp.c \ + i915_state.c \ + i915_state_immediate.c \ + i915_state_dynamic.c \ + i915_state_derived.c \ + i915_state_emit.c \ + i915_state_sampler.c \ + i915_strings.c \ + i915_prim_emit.c \ + i915_prim_vbuf.c \ + i915_texture.c \ + i915_fpc_emit.c \ + i915_fpc_translate.c \ + i915_surface.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/i915simple/SConscript b/src/gallium/drivers/i915simple/SConscript new file mode 100644 index 0000000000..f5fb96b995 --- /dev/null +++ b/src/gallium/drivers/i915simple/SConscript @@ -0,0 +1,29 @@ +Import('*') + +env = env.Clone() + +i915simple = env.ConvenienceLibrary( + target = 'i915simple', + source = [ + 'i915_blit.c', + 'i915_clear.c', + 'i915_context.c', + 'i915_debug.c', + 'i915_debug_fp.c', + 'i915_flush.c', + 'i915_fpc_emit.c', + 'i915_fpc_translate.c', + 'i915_prim_emit.c', + 'i915_prim_vbuf.c', + 'i915_state.c', + 'i915_state_derived.c', + 'i915_state_dynamic.c', + 'i915_state_emit.c', + 'i915_state_immediate.c', + 'i915_state_sampler.c', + 'i915_strings.c', + 'i915_surface.c', + 'i915_texture.c', + ]) + +Export('i915simple') diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h new file mode 100644 index 0000000000..fb88cd6db0 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -0,0 +1,54 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_BATCH_H +#define I915_BATCH_H + +#include "i915_winsys.h" +#include "i915_debug.h" + +#define BATCH_LOCALS + +#define BEGIN_BATCH( dwords, relocs ) \ + (i915->batch_start = i915->winsys->batch_start( i915->winsys, dwords, relocs )) + +#define OUT_BATCH( dword ) \ + i915->winsys->batch_dword( i915->winsys, dword ) + +#define OUT_RELOC( buf, flags, delta ) \ + i915->winsys->batch_reloc( i915->winsys, buf, flags, delta ) + +#define ADVANCE_BATCH() + +#define FLUSH_BATCH() do { \ + if (0) i915_dump_batchbuffer( i915 ); \ + i915->winsys->batch_flush( i915->winsys ); \ + i915->batch_start = NULL; \ + i915->hardware_dirty = ~0; \ +} while (0) + +#endif diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c new file mode 100644 index 0000000000..db4671ff55 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -0,0 +1,162 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_blit.h" +#include "i915_reg.h" +#include "i915_batch.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +void +i915_fill_blit(struct i915_context *i915, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short x, short y, + short w, short h, + unsigned color) +{ + unsigned BR13, CMD; + BATCH_LOCALS; + + dst_pitch *= (short) cpp; + + switch (cpp) { + case 1: + case 2: + case 3: + BR13 = dst_pitch | (0xF0 << 16) | (1 << 24); + CMD = XY_COLOR_BLT_CMD; + break; + case 4: + BR13 = dst_pitch | (0xF0 << 16) | (1 << 24) | (1 << 25); + CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | + XY_COLOR_BLT_WRITE_RGB); + break; + default: + return; + } + +// DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", +// __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); + + + if (!BEGIN_BATCH(6, 1)) { + FLUSH_BATCH(); + assert(BEGIN_BATCH(6, 1)); + } + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((y << 16) | x); + OUT_BATCH(((y + h) << 16) | (x + w)); + OUT_RELOC( dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); + OUT_BATCH(color); + ADVANCE_BATCH(); +} + + +void +i915_copy_blit( struct i915_context *i915, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h ) +{ + unsigned CMD, BR13; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + BATCH_LOCALS; + + + I915_DBG(i915, + "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_offset, src_x, src_y, + dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + + src_pitch *= (short) cpp; + dst_pitch *= (short) cpp; + + switch (cpp) { + case 1: + case 2: + case 3: + BR13 = (((int) dst_pitch) & 0xffff) | + (0xCC << 16) | (1 << 24); + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + BR13 = + (((int) dst_pitch) & 0xffff) | + (0xCC << 16) | (1 << 24) | (1 << 25); + CMD = + (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + break; + default: + return; + } + + if (dst_y2 < dst_y || + dst_x2 < dst_x) { + return; + } + + /* Hardware can handle negative pitches but loses the ability to do + * proper overlapping blits in that case. We don't really have a + * need for either at this stage. + */ + assert (dst_pitch > 0 && src_pitch > 0); + + + if (!BEGIN_BATCH(8, 2)) { + FLUSH_BATCH(); + assert(BEGIN_BATCH(8, 2)); + } + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((dst_y << 16) | dst_x); + OUT_BATCH((dst_y2 << 16) | dst_x2); + OUT_RELOC(dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); + OUT_BATCH((src_y << 16) | src_x); + OUT_BATCH(((int) src_pitch & 0xffff)); + OUT_RELOC(src_buffer, I915_BUFFER_ACCESS_READ, src_offset); + ADVANCE_BATCH(); +} + + diff --git a/src/gallium/drivers/i915simple/i915_blit.h b/src/gallium/drivers/i915simple/i915_blit.h new file mode 100644 index 0000000000..6e5b44e124 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_blit.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_BLIT_H +#define I915_BLIT_H + +#include "i915_context.h" + +extern void i915_copy_blit(struct i915_context *i915, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short srcx, short srcy, + short dstx, short dsty, + short w, short h ); + +extern void i915_fill_blit(struct i915_context *i915, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short x, short y, + short w, short h, unsigned color); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_clear.c b/src/gallium/drivers/i915simple/i915_clear.c new file mode 100644 index 0000000000..cde69daacc --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_clear.c @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "i915_context.h" +#include "i915_state.h" + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + */ +void +i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c new file mode 100644 index 0000000000..497623a700 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -0,0 +1,320 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_state.h" +#include "i915_batch.h" +#include "i915_texture.h" +#include "i915_reg.h" + +#include "pipe/draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + + +/** + * Query format support for creating a texture, drawing surface, etc. + * \param format the format to test + * \param type one of PIPE_TEXTURE, PIPE_SURFACE + */ +static boolean +i915_is_format_supported( struct pipe_context *pipe, + enum pipe_format format, uint type ) +{ + static const enum pipe_format tex_supported[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_U_L8, + PIPE_FORMAT_U_A8, + PIPE_FORMAT_U_I8, + PIPE_FORMAT_U_A8_L8, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format surface_supported[] = { + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + /*PIPE_FORMAT_R16G16B16A16_SNORM,*/ + PIPE_FORMAT_NONE /* list terminator */ + }; + const enum pipe_format *list; + uint i; + + switch (type) { + case PIPE_TEXTURE: + list = tex_supported; + break; + case PIPE_SURFACE: + list = surface_supported; + break; + default: + assert(0); + } + + for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { + if (list[i] == format) + return TRUE; + } + + return FALSE; +} + + +static int +i915_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + + +static float +i915_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + + +static void i915_destroy( struct pipe_context *pipe ) +{ + struct i915_context *i915 = i915_context( pipe ); + + draw_destroy( i915->draw ); + + FREE( i915 ); +} + + + + +static boolean +i915_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct i915_context *i915 = i915_context( pipe ); + struct draw_context *draw = i915->draw; + unsigned i; + + if (i915->dirty) + i915_update_derived( i915 ); + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (i915->vertex_buffer[i].buffer) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + i915->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + draw_set_mapped_constant_buffer(draw, + i915->current.constants[PIPE_SHADER_VERTEX]); + + /* draw! */ + draw_arrays(i915->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (i915->vertex_buffer[i].buffer) { + pipe->winsys->buffer_unmap(pipe->winsys, i915->vertex_buffer[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + } + if (indexBuffer) { + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + return TRUE; +} + + +static boolean i915_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return i915_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + +struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, + struct i915_winsys *i915_winsys, + unsigned pci_id ) +{ + struct i915_context *i915; + unsigned is_i945 = 0; + + switch (pci_id) { + case PCI_CHIP_I915_G: + case PCI_CHIP_I915_GM: + break; + + case PCI_CHIP_I945_G: + case PCI_CHIP_I945_GM: + case PCI_CHIP_I945_GME: + case PCI_CHIP_G33_G: + case PCI_CHIP_Q33_G: + case PCI_CHIP_Q35_G: + is_i945 = 1; + break; + + default: + pipe_winsys->printf(pipe_winsys, + "%s: unknown pci id 0x%x, cannot create context\n", + __FUNCTION__, pci_id); + return NULL; + } + + i915 = CALLOC_STRUCT(i915_context); + if (i915 == NULL) + return NULL; + + i915->winsys = i915_winsys; + i915->pipe.winsys = pipe_winsys; + + i915->pipe.destroy = i915_destroy; + i915->pipe.is_format_supported = i915_is_format_supported; + i915->pipe.get_param = i915_get_param; + i915->pipe.get_paramf = i915_get_paramf; + + i915->pipe.clear = i915_clear; + + + i915->pipe.draw_arrays = i915_draw_arrays; + i915->pipe.draw_elements = i915_draw_elements; + + /* + * Create drawing context and plug our rendering stage into it. + */ + i915->draw = draw_create(); + assert(i915->draw); + if (GETENV("I915_VBUF")) { + draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); + } + else { + draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915)); + } + + i915_init_surface_functions(i915); + i915_init_state_functions(i915); + i915_init_flush_functions(i915); + i915_init_string_functions(i915); + + i915->pci_id = pci_id; + i915->flags.is_i945 = is_i945; + + i915->pipe.texture_create = i915_texture_create; + i915->pipe.texture_release = i915_texture_release; + + i915->dirty = ~0; + i915->hardware_dirty = ~0; + + /* Batch stream debugging is a bit hacked up at the moment: + */ + i915->batch_start = NULL; + + /* + * XXX we could plug GL selection/feedback into the drawing pipeline + * by specifying a different setup/render stage. + */ + + return &i915->pipe; +} + diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h new file mode 100644 index 0000000000..b4ea63c3e7 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -0,0 +1,304 @@ + /************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_CONTEXT_H +#define I915_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/draw/draw_vertex.h" + + +#define I915_TEX_UNITS 8 + +#define I915_DYNAMIC_MODES4 0 +#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ +#define I915_DYNAMIC_DEPTHSCALE_1 2 +#define I915_DYNAMIC_IAB 3 +#define I915_DYNAMIC_BC_0 4 /* just the header */ +#define I915_DYNAMIC_BC_1 5 +#define I915_DYNAMIC_BFO_0 6 +#define I915_DYNAMIC_BFO_1 7 +#define I915_DYNAMIC_STP_0 8 +#define I915_DYNAMIC_STP_1 9 +#define I915_DYNAMIC_SC_ENA_0 10 +#define I915_DYNAMIC_SC_RECT_0 11 +#define I915_DYNAMIC_SC_RECT_1 12 +#define I915_DYNAMIC_SC_RECT_2 13 +#define I915_MAX_DYNAMIC 14 + + +#define I915_IMMEDIATE_S0 0 +#define I915_IMMEDIATE_S1 1 +#define I915_IMMEDIATE_S2 2 +#define I915_IMMEDIATE_S3 3 +#define I915_IMMEDIATE_S4 4 +#define I915_IMMEDIATE_S5 5 +#define I915_IMMEDIATE_S6 6 +#define I915_IMMEDIATE_S7 7 +#define I915_MAX_IMMEDIATE 8 + +/* These must mach the order of LI0_STATE_* bits, as they will be used + * to generate hardware packets: + */ +#define I915_CACHE_STATIC 0 +#define I915_CACHE_DYNAMIC 1 /* handled specially */ +#define I915_CACHE_SAMPLER 2 +#define I915_CACHE_MAP 3 +#define I915_CACHE_PROGRAM 4 +#define I915_CACHE_CONSTANTS 5 +#define I915_MAX_CACHE 6 + +#define I915_MAX_CONSTANT 32 + + + +struct i915_cache_context; + +/* Use to calculate differences between state emitted to hardware and + * current driver-calculated state. + */ +struct i915_state +{ + unsigned immediate[I915_MAX_IMMEDIATE]; + unsigned dynamic[I915_MAX_DYNAMIC]; + + float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; + /** number of constants passed in through a constant buffer */ + uint num_user_constants[PIPE_SHADER_TYPES]; + /** user constants, plus extra constants from shader translation */ + uint num_constants[PIPE_SHADER_TYPES]; + + uint *program; + uint program_len; + + /* texture sampler state */ + unsigned sampler[I915_TEX_UNITS][3]; + unsigned sampler_enable_flags; + unsigned sampler_enable_nr; + + /* texture image buffers */ + unsigned texbuffer[I915_TEX_UNITS][2]; + + /** Describes the current hardware vertex layout */ + struct vertex_info vertex_info; + + unsigned id; /* track lost context events */ +}; + +struct i915_blend_state { + unsigned iab; + unsigned modes4; + unsigned LIS5; + unsigned LIS6; +}; + +struct i915_depth_stencil_state { + unsigned stencil_modes4; + unsigned bfo[2]; + unsigned stencil_LIS5; + unsigned depth_LIS6; +}; + +struct i915_rasterizer_state { + int light_twoside : 1; + unsigned st; + enum interp_mode color_interp; + + unsigned LIS4; + unsigned LIS7; + unsigned sc[1]; + + const struct pipe_rasterizer_state *templ; + + union { float f; unsigned u; } ds[2]; +}; + +struct i915_sampler_state { + unsigned state[3]; + const struct pipe_sampler_state *templ; +}; + + +struct i915_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned pitch; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_height; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +struct i915_context +{ + struct pipe_context pipe; + struct i915_winsys *winsys; + struct draw_context *draw; + + /* The most recent drawing state as set by the driver: + */ + const struct i915_blend_state *blend; + const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct i915_depth_stencil_state *depth_stencil; + const struct i915_rasterizer_state *rasterizer; + const struct pipe_shader_state *fs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct i915_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + + unsigned dirty; + + unsigned *batch_start; + + /** Vertex buffer */ + struct pipe_buffer *vbo; + + struct i915_state current; + unsigned hardware_dirty; + + unsigned debug; + unsigned pci_id; + + struct { + unsigned is_i945:1; + } flags; +}; + +/* A flag for each state_tracker state object: + */ +#define I915_NEW_VIEWPORT 0x1 +#define I915_NEW_RASTERIZER 0x2 +#define I915_NEW_FS 0x4 +#define I915_NEW_BLEND 0x8 +#define I915_NEW_CLIP 0x10 +#define I915_NEW_SCISSOR 0x20 +#define I915_NEW_STIPPLE 0x40 +#define I915_NEW_FRAMEBUFFER 0x80 +#define I915_NEW_ALPHA_TEST 0x100 +#define I915_NEW_DEPTH_STENCIL 0x200 +#define I915_NEW_SAMPLER 0x400 +#define I915_NEW_TEXTURE 0x800 +#define I915_NEW_CONSTANTS 0x1000 +#define I915_NEW_VBO 0x2000 + + +/* Driver's internally generated state flags: + */ +#define I915_NEW_VERTEX_FORMAT 0x10000 + + +/* Dirty flags for hardware emit + */ +#define I915_HW_STATIC (1<winsys->printf( stream->winsys, buffer ); + va_end( args ); +} + + +static boolean debug( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned i; + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + + if (len == 0) { + PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + assert(0); + return FALSE; + } + + if (stream->print_addresses) + PRINTF(stream, "%08x: ", stream->offset); + + + PRINTF(stream, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + +static const char *get_prim_name( unsigned val ) +{ + switch (val & PRIM3D_MASK) { + case PRIM3D_TRILIST: return "TRILIST"; break; + case PRIM3D_TRISTRIP: return "TRISTRIP"; break; + case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; break; + case PRIM3D_TRIFAN: return "TRIFAN"; break; + case PRIM3D_POLY: return "POLY"; break; + case PRIM3D_LINELIST: return "LINELIST"; break; + case PRIM3D_LINESTRIP: return "LINESTRIP"; break; + case PRIM3D_RECTLIST: return "RECTLIST"; break; + case PRIM3D_POINTLIST: return "POINTLIST"; break; + case PRIM3D_DIB: return "DIB"; break; + case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; break; + case PRIM3D_ZONE_INIT: return "ZONE_INIT"; break; + default: return "????"; break; + } +} + +static boolean debug_prim( struct debug_stream *stream, const char *name, + boolean dump_floats, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + unsigned i; + + + + PRINTF(stream, "%s %s (%d dwords):\n", name, prim, len); + PRINTF(stream, "\t0x%08x\n", ptr[0]); + for (i = 1; i < len; i++) { + if (dump_floats) + PRINTF(stream, "\t0x%08x // %f\n", ptr[i], *(float *)&ptr[i]); + else + PRINTF(stream, "\t0x%08x\n", ptr[i]); + } + + + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + + + +static boolean debug_program( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + + if (len == 0) { + PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + assert(0); + return FALSE; + } + + if (stream->print_addresses) + PRINTF(stream, "%08x: ", stream->offset); + + PRINTF(stream, "%s (%d dwords):\n", name, len); + i915_disassemble_program( stream, ptr, len ); + + stream->offset += len * sizeof(unsigned); + return TRUE; +} + + +static boolean debug_chain( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned old_offset = stream->offset + len * sizeof(unsigned); + unsigned i; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + + stream->offset = ptr[1] & ~0x3; + + if (stream->offset < old_offset) + PRINTF(stream, "\n... skipping backwards from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + else + PRINTF(stream, "\n... skipping from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + + + return TRUE; +} + + +static boolean debug_variable_length_prim( struct debug_stream *stream ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + unsigned i, len; + + ushort *idx = (ushort *)(ptr+1); + for (i = 0; idx[i] != 0xffff; i++) + ; + + len = 1+(i+2)/2; + + PRINTF(stream, "3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + return TRUE; +} + + +static void +BITS( + struct debug_stream *stream, + unsigned dw, + unsigned hi, + unsigned lo, + const char *fmt, + ... ) +{ + va_list args; + char buffer[256]; + unsigned himask = ~0UL >> (31 - (hi)); + + PRINTF(stream, "\t\t "); + + va_start( args, fmt ); + vsprintf( buffer, fmt, args ); + stream->winsys->printf( stream->winsys, buffer ); + va_end( args ); + + PRINTF(stream, ": 0x%x\n", ((dw) & himask) >> (lo)); +} + +#define MBZ( dw, hi, lo) do { \ + unsigned x = (dw) >> (lo); \ + unsigned lomask = (1 << (lo)) - 1; \ + unsigned himask; \ + himask = (1UL << (hi)) - 1; \ + assert ((x & himask & ~lomask) == 0); \ +} while (0) + +static void +FLAG( + struct debug_stream *stream, + unsigned dw, + unsigned bit, + const char *fmt, + ... ) +{ + if (((dw) >> (bit)) & 1) { + va_list args; + char buffer[256]; + + PRINTF(stream, "\t\t "); + + va_start( args, fmt ); + vsprintf( buffer, fmt, args ); + stream->winsys->printf( stream->winsys, buffer ); + va_end( args ); + + PRINTF(stream, "\n"); + } +} + +static boolean debug_load_immediate( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned bits = (ptr[0] >> 4) & 0xff; + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords, flags: %x):\n", name, len, bits); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + if (bits & (1<<0)) { + PRINTF(stream, "\t LIS0: 0x%08x\n", ptr[j]); + PRINTF(stream, "\t vb address: 0x%08x\n", (ptr[j] & ~0x3)); + BITS(stream, ptr[j], 0, 0, "vb invalidate disable"); + j++; + } + if (bits & (1<<1)) { + PRINTF(stream, "\t LIS1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 29, 24, "vb dword width"); + BITS(stream, ptr[j], 21, 16, "vb dword pitch"); + BITS(stream, ptr[j], 15, 0, "vb max index"); + j++; + } + if (bits & (1<<2)) { + int i; + PRINTF(stream, "\t LIS2: 0x%08x\n", ptr[j]); + for (i = 0; i < 8; i++) { + unsigned tc = (ptr[j] >> (i * 4)) & 0xf; + if (tc != 0xf) + BITS(stream, tc, 3, 0, "tex coord %d", i); + } + j++; + } + if (bits & (1<<3)) { + PRINTF(stream, "\t LIS3: 0x%08x\n", ptr[j]); + j++; + } + if (bits & (1<<4)) { + PRINTF(stream, "\t LIS4: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 23, "point width"); + BITS(stream, ptr[j], 22, 19, "line width"); + FLAG(stream, ptr[j], 18, "alpha flatshade"); + FLAG(stream, ptr[j], 17, "fog flatshade"); + FLAG(stream, ptr[j], 16, "spec flatshade"); + FLAG(stream, ptr[j], 15, "rgb flatshade"); + BITS(stream, ptr[j], 14, 13, "cull mode"); + FLAG(stream, ptr[j], 12, "vfmt: point width"); + FLAG(stream, ptr[j], 11, "vfmt: specular/fog"); + FLAG(stream, ptr[j], 10, "vfmt: rgba"); + FLAG(stream, ptr[j], 9, "vfmt: depth offset"); + BITS(stream, ptr[j], 8, 6, "vfmt: position (2==xyzw)"); + FLAG(stream, ptr[j], 5, "force dflt diffuse"); + FLAG(stream, ptr[j], 4, "force dflt specular"); + FLAG(stream, ptr[j], 3, "local depth offset enable"); + FLAG(stream, ptr[j], 2, "vfmt: fp32 fog coord"); + FLAG(stream, ptr[j], 1, "sprite point"); + FLAG(stream, ptr[j], 0, "antialiasing"); + j++; + } + if (bits & (1<<5)) { + PRINTF(stream, "\t LIS5: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 28, "rgba write disables"); + FLAG(stream, ptr[j], 27, "force dflt point width"); + FLAG(stream, ptr[j], 26, "last pixel enable"); + FLAG(stream, ptr[j], 25, "global z offset enable"); + FLAG(stream, ptr[j], 24, "fog enable"); + BITS(stream, ptr[j], 23, 16, "stencil ref"); + BITS(stream, ptr[j], 15, 13, "stencil test"); + BITS(stream, ptr[j], 12, 10, "stencil fail op"); + BITS(stream, ptr[j], 9, 7, "stencil pass z fail op"); + BITS(stream, ptr[j], 6, 4, "stencil pass z pass op"); + FLAG(stream, ptr[j], 3, "stencil write enable"); + FLAG(stream, ptr[j], 2, "stencil test enable"); + FLAG(stream, ptr[j], 1, "color dither enable"); + FLAG(stream, ptr[j], 0, "logiop enable"); + j++; + } + if (bits & (1<<6)) { + PRINTF(stream, "\t LIS6: 0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "alpha test enable"); + BITS(stream, ptr[j], 30, 28, "alpha func"); + BITS(stream, ptr[j], 27, 20, "alpha ref"); + FLAG(stream, ptr[j], 19, "depth test enable"); + BITS(stream, ptr[j], 18, 16, "depth func"); + FLAG(stream, ptr[j], 15, "blend enable"); + BITS(stream, ptr[j], 14, 12, "blend func"); + BITS(stream, ptr[j], 11, 8, "blend src factor"); + BITS(stream, ptr[j], 7, 4, "blend dst factor"); + FLAG(stream, ptr[j], 3, "depth write enable"); + FLAG(stream, ptr[j], 2, "color write enable"); + BITS(stream, ptr[j], 1, 0, "provoking vertex"); + j++; + } + + + PRINTF(stream, "\n"); + + assert(j == len); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + + +static boolean debug_load_indirect( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned bits = (ptr[0] >> 8) & 0x3f; + unsigned i, j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + for (i = 0; i < 6; i++) { + if (bits & (1<offset += len * sizeof(unsigned); + + return TRUE; +} + +static void BR13( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + FLAG(stream, val, 30, "clipping enable"); + BITS(stream, val, 25, 24, "color depth (3==32bpp)"); + BITS(stream, val, 23, 16, "raster op"); + BITS(stream, val, 15, 0, "dest pitch"); +} + + +static void BR22( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "dest y1"); + BITS(stream, val, 15, 0, "dest x1"); +} + +static void BR23( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "dest y2"); + BITS(stream, val, 15, 0, "dest x2"); +} + +static void BR09( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- dest address\n", val); +} + +static void BR26( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "src y1"); + BITS(stream, val, 15, 0, "src x1"); +} + +static void BR11( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 15, 0, "src pitch"); +} + +static void BR12( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- src address\n", val); +} + +static void BR16( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- color\n", val); +} + +static boolean debug_copy_blit( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR22(stream, ptr[j++]); + BR23(stream, ptr[j++]); + BR09(stream, ptr[j++]); + BR26(stream, ptr[j++]); + BR11(stream, ptr[j++]); + BR12(stream, ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_color_blit( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR22(stream, ptr[j++]); + BR23(stream, ptr[j++]); + BR09(stream, ptr[j++]); + BR16(stream, ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_modes4( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 21, 18, "logicop func"); + FLAG(stream, ptr[j], 17, "stencil test mask modify-enable"); + FLAG(stream, ptr[j], 16, "stencil write mask modify-enable"); + BITS(stream, ptr[j], 15, 8, "stencil test mask"); + BITS(stream, ptr[j], 7, 0, "stencil write mask"); + j++; + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_map_state( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 15, 0, "map mask"); + j++; + } + + while (j < len) { + { + PRINTF(stream, "\t TMn.0: 0x%08x\n", ptr[j]); + PRINTF(stream, "\t map address: 0x%08x\n", (ptr[j] & ~0x3)); + FLAG(stream, ptr[j], 1, "vertical line stride"); + FLAG(stream, ptr[j], 0, "vertical line stride offset"); + j++; + } + + { + PRINTF(stream, "\t TMn.1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 21, "height"); + BITS(stream, ptr[j], 20, 10, "width"); + BITS(stream, ptr[j], 9, 7, "surface format"); + BITS(stream, ptr[j], 6, 3, "texel format"); + FLAG(stream, ptr[j], 2, "use fence regs"); + FLAG(stream, ptr[j], 1, "tiled surface"); + FLAG(stream, ptr[j], 0, "tile walk ymajor"); + j++; + } + { + PRINTF(stream, "\t TMn.2: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 21, "dword pitch"); + BITS(stream, ptr[j], 20, 15, "cube face enables"); + BITS(stream, ptr[j], 14, 9, "max lod"); + FLAG(stream, ptr[j], 8, "mip layout right"); + BITS(stream, ptr[j], 7, 0, "depth"); + j++; + } + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_sampler_state( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 15, 0, "sampler mask"); + j++; + } + + while (j < len) { + { + PRINTF(stream, "\t TSn.0: 0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "reverse gamma"); + FLAG(stream, ptr[j], 30, "planar to packed"); + FLAG(stream, ptr[j], 29, "yuv->rgb"); + BITS(stream, ptr[j], 28, 27, "chromakey index"); + BITS(stream, ptr[j], 26, 22, "base mip level"); + BITS(stream, ptr[j], 21, 20, "mip mode filter"); + BITS(stream, ptr[j], 19, 17, "mag mode filter"); + BITS(stream, ptr[j], 16, 14, "min mode filter"); + BITS(stream, ptr[j], 13, 5, "lod bias (s4.4)"); + FLAG(stream, ptr[j], 4, "shadow enable"); + FLAG(stream, ptr[j], 3, "max-aniso-4"); + BITS(stream, ptr[j], 2, 0, "shadow func"); + j++; + } + + { + PRINTF(stream, "\t TSn.1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 24, "min lod"); + MBZ( ptr[j], 23, 18 ); + FLAG(stream, ptr[j], 17, "kill pixel enable"); + FLAG(stream, ptr[j], 16, "keyed tex filter mode"); + FLAG(stream, ptr[j], 15, "chromakey enable"); + BITS(stream, ptr[j], 14, 12, "tcx wrap mode"); + BITS(stream, ptr[j], 11, 9, "tcy wrap mode"); + BITS(stream, ptr[j], 8, 6, "tcz wrap mode"); + FLAG(stream, ptr[j], 5, "normalized coords"); + BITS(stream, ptr[j], 4, 1, "map (surface) index"); + FLAG(stream, ptr[j], 0, "EAST deinterlacer enable"); + j++; + } + { + PRINTF(stream, "\t TSn.2: 0x%08x (default color)\n", ptr[j]); + j++; + } + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_dest_vars( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "early classic ztest"); + FLAG(stream, ptr[j], 30, "opengl tex default color"); + FLAG(stream, ptr[j], 29, "bypass iz"); + FLAG(stream, ptr[j], 28, "lod preclamp"); + BITS(stream, ptr[j], 27, 26, "dither pattern"); + FLAG(stream, ptr[j], 25, "linear gamma blend"); + FLAG(stream, ptr[j], 24, "debug dither"); + BITS(stream, ptr[j], 23, 20, "dstorg x"); + BITS(stream, ptr[j], 19, 16, "dstorg y"); + MBZ (ptr[j], 15, 15 ); + BITS(stream, ptr[j], 14, 12, "422 write select"); + BITS(stream, ptr[j], 11, 8, "cbuf format"); + BITS(stream, ptr[j], 3, 2, "zbuf format"); + FLAG(stream, ptr[j], 1, "vert line stride"); + FLAG(stream, ptr[j], 1, "vert line stride offset"); + j++; + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_buf_info( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 28, 28, "aux buffer id"); + BITS(stream, ptr[j], 27, 24, "buffer id (7=depth, 3=back)"); + FLAG(stream, ptr[j], 23, "use fence regs"); + FLAG(stream, ptr[j], 22, "tiled surface"); + FLAG(stream, ptr[j], 21, "tile walk ymajor"); + MBZ (ptr[j], 20, 14); + BITS(stream, ptr[j], 13, 2, "dword pitch"); + MBZ (ptr[j], 2, 0); + j++; + } + + PRINTF(stream, "\t0x%08x -- buffer base address\n", ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean i915_debug_packet( struct debug_stream *stream ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned cmd = *ptr; + + switch (((cmd >> 29) & 0x7)) { + case 0x0: + switch ((cmd >> 23) & 0x3f) { + case 0x0: + return debug(stream, "MI_NOOP", 1); + case 0x3: + return debug(stream, "MI_WAIT_FOR_EVENT", 1); + case 0x4: + return debug(stream, "MI_FLUSH", 1); + case 0xA: + debug(stream, "MI_BATCH_BUFFER_END", 1); + return FALSE; + case 0x22: + return debug(stream, "MI_LOAD_REGISTER_IMM", 3); + case 0x31: + return debug_chain(stream, "MI_BATCH_BUFFER_START", 2); + default: + (void)debug(stream, "UNKNOWN 0x0 case!", 1); + assert(0); + break; + } + break; + case 0x1: + (void) debug(stream, "UNKNOWN 0x1 case!", 1); + assert(0); + break; + case 0x2: + switch ((cmd >> 22) & 0xff) { + case 0x50: + return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2); + case 0x53: + return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2); + default: + return debug(stream, "blit command", (cmd & 0xff) + 2); + } + break; + case 0x3: + switch ((cmd >> 24) & 0x1f) { + case 0x6: + return debug(stream, "3DSTATE_ANTI_ALIASING", 1); + case 0x7: + return debug(stream, "3DSTATE_RASTERIZATION_RULES", 1); + case 0x8: + return debug(stream, "3DSTATE_BACKFACE_STENCIL_OPS", 2); + case 0x9: + return debug(stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1); + case 0xb: + return debug(stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1); + case 0xc: + return debug(stream, "3DSTATE_MODES5", 1); + case 0xd: + return debug_modes4(stream, "3DSTATE_MODES4", 1); + case 0x15: + return debug(stream, "3DSTATE_FOG_COLOR", 1); + case 0x16: + return debug(stream, "3DSTATE_COORD_SET_BINDINGS", 1); + case 0x1c: + /* 3DState16NP */ + switch((cmd >> 19) & 0x1f) { + case 0x10: + return debug(stream, "3DSTATE_SCISSOR_ENABLE", 1); + case 0x11: + return debug(stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1); + default: + (void) debug(stream, "UNKNOWN 0x1c case!", 1); + assert(0); + break; + } + break; + case 0x1d: + /* 3DStateMW */ + switch ((cmd >> 16) & 0xff) { + case 0x0: + return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2); + case 0x1: + return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2); + case 0x4: + return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2); + case 0x5: + return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2); + case 0x6: + return debug(stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2); + case 0x7: + return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2); + case 0x80: + return debug(stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2); + case 0x81: + return debug(stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2); + case 0x83: + return debug(stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2); + case 0x85: + return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2); + case 0x88: + return debug(stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2); + case 0x89: + return debug(stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2); + case 0x8e: + return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2); + case 0x97: + return debug(stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2); + case 0x98: + return debug(stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2); + case 0x99: + return debug(stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2); + case 0x9a: + return debug(stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2); + case 0x9c: + return debug(stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2); + default: + assert(0); + return 0; + } + break; + case 0x1e: + if (cmd & (1 << 23)) + return debug(stream, "???", (cmd & 0xffff) + 1); + else + return debug(stream, "", 1); + break; + case 0x1f: + if ((cmd & (1 << 23)) == 0) + return debug_prim(stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2); + else if (cmd & (1 << 17)) + { + if ((cmd & 0xffff) == 0) + return debug_variable_length_prim(stream); + else + return debug_prim(stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1); + } + else + return debug_prim(stream, "3DPRIM (indirect sequential)", 0, 2); + break; + default: + return debug(stream, "", 0); + } + default: + assert(0); + return 0; + } + + assert(0); + return 0; +} + + + +void +i915_dump_batchbuffer( struct i915_context *i915 ) +{ + struct debug_stream stream; + unsigned *start = i915->batch_start; + unsigned *end = i915->winsys->batch_start( i915->winsys, 0, 0 ); + unsigned long bytes = (unsigned long) (end - start) * 4; + boolean done = FALSE; + + stream.offset = 0; + stream.ptr = (char *)start; + stream.print_addresses = 0; + stream.winsys = i915->pipe.winsys; + + if (!start || !end) { + stream.winsys->printf( stream.winsys, "\n\nBATCH: ???\n"); + return; + } + + stream.winsys->printf( stream.winsys, "\n\nBATCH: (%d)\n", bytes / 4); + + while (!done && + stream.offset < bytes) + { + if (!i915_debug_packet( &stream )) + break; + + assert(stream.offset <= bytes && + stream.offset >= 0); + } + + stream.winsys->printf( stream.winsys, "END-BATCH\n\n\n"); +} + + diff --git a/src/gallium/drivers/i915simple/i915_debug.h b/src/gallium/drivers/i915simple/i915_debug.h new file mode 100644 index 0000000000..0bcd094233 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug.h @@ -0,0 +1,117 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef I915_DEBUG_H +#define I915_DEBUG_H + +#include + +struct i915_context; + +struct debug_stream +{ + unsigned offset; /* current gtt offset */ + char *ptr; /* pointer to gtt offset zero */ + char *end; /* pointer to gtt offset zero */ + unsigned print_addresses; + struct pipe_winsys *winsys; +}; + + +/* Internal functions + */ +void i915_disassemble_program(struct debug_stream *stream, + const unsigned *program, unsigned sz); + +void i915_print_ureg(const char *msg, unsigned ureg); + + +#define DEBUG_BATCH 0x1 +#define DEBUG_BLIT 0x2 +#define DEBUG_BUFFER 0x4 +#define DEBUG_CONSTANTS 0x8 +#define DEBUG_CONTEXT 0x10 +#define DEBUG_DRAW 0x20 +#define DEBUG_DYNAMIC 0x40 +#define DEBUG_FLUSH 0x80 +#define DEBUG_MAP 0x100 +#define DEBUG_PROGRAM 0x200 +#define DEBUG_REGIONS 0x400 +#define DEBUG_SAMPLER 0x800 +#define DEBUG_STATIC 0x1000 +#define DEBUG_SURFACE 0x2000 +#define DEBUG_WINSYS 0x4000 + +#include "pipe/p_compiler.h" + +#if defined(DEBUG) && defined(FILE_DEBUG_FLAG) + +#include "pipe/p_winsys.h" + +static INLINE void +I915_DBG( + struct i915_context *i915, + const char *fmt, + ... ) +{ + if ((i915)->debug & FILE_DEBUG_FLAG) { + va_list args; + char buffer[256]; + + va_start( args, fmt ); + vsprintf( buffer, fmt, args ); + i915->pipe.winsys->printf( i915->pipe.winsys, buffer ); + va_end( args ); + } +} + +#else + +static INLINE void +I915_DBG( + struct i915_context *i915, + const char *fmt, + ... ) +{ + (void) i915; + (void) fmt; +} + +#endif + + +void i915_dump_batchbuffer( struct i915_context *i915 ); + + + +void i915_debug_init( struct i915_context *i915 ); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c new file mode 100644 index 0000000000..ebfdb3d93c --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug_fp.c @@ -0,0 +1,366 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + + +static void +PRINTF( + struct debug_stream *stream, + const char *fmt, + ... ) +{ + va_list args; + char buffer[256]; + + va_start( args, fmt ); + vsprintf( buffer, fmt, args ); + stream->winsys->printf( stream->winsys, buffer ); + va_end( args ); +} + + +static const char *opcodes[0x20] = { + "NOP", + "ADD", + "MOV", + "MUL", + "MAD", + "DP2ADD", + "DP3", + "DP4", + "FRC", + "RCP", + "RSQ", + "EXP", + "LOG", + "CMP", + "MIN", + "MAX", + "FLR", + "MOD", + "TRC", + "SGE", + "SLT", + "TEXLD", + "TEXLDP", + "TEXLDB", + "TEXKILL", + "DCL", + "0x1a", + "0x1b", + "0x1c", + "0x1d", + "0x1e", + "0x1f", +}; + + +static const int args[0x20] = { + 0, /* 0 nop */ + 2, /* 1 add */ + 1, /* 2 mov */ + 2, /* 3 m ul */ + 3, /* 4 mad */ + 3, /* 5 dp2add */ + 2, /* 6 dp3 */ + 2, /* 7 dp4 */ + 1, /* 8 frc */ + 1, /* 9 rcp */ + 1, /* a rsq */ + 1, /* b exp */ + 1, /* c log */ + 3, /* d cmp */ + 2, /* e min */ + 2, /* f max */ + 1, /* 10 flr */ + 1, /* 11 mod */ + 1, /* 12 trc */ + 2, /* 13 sge */ + 2, /* 14 slt */ + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +}; + + +static const char *regname[0x8] = { + "R", + "T", + "CONST", + "S", + "OC", + "OD", + "U", + "UNKNOWN", +}; + +static void +print_reg_type_nr(struct debug_stream *stream, unsigned type, unsigned nr) +{ + switch (type) { + case REG_TYPE_T: + switch (nr) { + case T_DIFFUSE: + PRINTF(stream, "T_DIFFUSE"); + return; + case T_SPECULAR: + PRINTF(stream, "T_SPECULAR"); + return; + case T_FOG_W: + PRINTF(stream, "T_FOG_W"); + return; + default: + PRINTF(stream, "T_TEX%d", nr); + return; + } + case REG_TYPE_OC: + if (nr == 0) { + PRINTF(stream, "oC"); + return; + } + break; + case REG_TYPE_OD: + if (nr == 0) { + PRINTF(stream, "oD"); + return; + } + break; + default: + break; + } + + PRINTF(stream, "%s[%d]", regname[type], nr); +} + +#define REG_SWIZZLE_MASK 0x7777 +#define REG_NEGATE_MASK 0x8888 + +#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \ + (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \ + (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \ + (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) + + +static void +print_reg_neg_swizzle(struct debug_stream *stream, unsigned reg) +{ + int i; + + if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && + (reg & REG_NEGATE_MASK) == 0) + return; + + PRINTF(stream, "."); + + for (i = 3; i >= 0; i--) { + if (reg & (1 << ((i * 4) + 3))) + PRINTF(stream, "-"); + + switch ((reg >> (i * 4)) & 0x7) { + case 0: + PRINTF(stream, "x"); + break; + case 1: + PRINTF(stream, "y"); + break; + case 2: + PRINTF(stream, "z"); + break; + case 3: + PRINTF(stream, "w"); + break; + case 4: + PRINTF(stream, "0"); + break; + case 5: + PRINTF(stream, "1"); + break; + default: + PRINTF(stream, "?"); + break; + } + } +} + + +static void +print_src_reg(struct debug_stream *stream, unsigned dword) +{ + unsigned nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; + unsigned type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(stream, type, nr); + print_reg_neg_swizzle(stream, dword); +} + + +static void +print_dest_reg(struct debug_stream *stream, unsigned dword) +{ + unsigned nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; + unsigned type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(stream, type, nr); + if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) + return; + PRINTF(stream, "."); + if (dword & A0_DEST_CHANNEL_X) + PRINTF(stream, "x"); + if (dword & A0_DEST_CHANNEL_Y) + PRINTF(stream, "y"); + if (dword & A0_DEST_CHANNEL_Z) + PRINTF(stream, "z"); + if (dword & A0_DEST_CHANNEL_W) + PRINTF(stream, "w"); +} + + +#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) +#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) +#define GET_SRC2_REG(r) (r) + + +static void +print_arith_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + if (opcode != A0_NOP) { + print_dest_reg(stream, program[0]); + if (program[0] & A0_DEST_SATURATE) + PRINTF(stream, " = SATURATE "); + else + PRINTF(stream, " = "); + } + + PRINTF(stream, "%s ", opcodes[opcode]); + + print_src_reg(stream, GET_SRC0_REG(program[0], program[1])); + if (args[opcode] == 1) { + PRINTF(stream, "\n"); + return; + } + + PRINTF(stream, ", "); + print_src_reg(stream, GET_SRC1_REG(program[1], program[2])); + if (args[opcode] == 2) { + PRINTF(stream, "\n"); + return; + } + + PRINTF(stream, ", "); + print_src_reg(stream, GET_SRC2_REG(program[2])); + PRINTF(stream, "\n"); + return; +} + + +static void +print_tex_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + print_dest_reg(stream, program[0] | A0_DEST_CHANNEL_ALL); + PRINTF(stream, " = "); + + PRINTF(stream, "%s ", opcodes[opcode]); + + PRINTF(stream, "S[%d],", program[0] & T0_SAMPLER_NR_MASK); + + print_reg_type_nr(stream, + (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + PRINTF(stream, "\n"); +} + +static void +print_texkil_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + PRINTF(stream, "TEXKIL "); + + print_reg_type_nr(stream, + (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + PRINTF(stream, "\n"); +} + +static void +print_dcl_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + PRINTF(stream, "%s ", opcodes[opcode]); + print_dest_reg(stream, + program[0] | A0_DEST_CHANNEL_ALL); + PRINTF(stream, "\n"); +} + + +void +i915_disassemble_program(struct debug_stream *stream, + const unsigned * program, unsigned sz) +{ + unsigned size = program[0] & 0x1ff; + unsigned i; + + PRINTF(stream, "\t\tBEGIN\n"); + + assert(size + 2 == sz); + + program++; + for (i = 1; i < sz; i += 3, program += 3) { + unsigned opcode = program[0] & (0x1f << 24); + + PRINTF(stream, "\t\t"); + + if ((int) opcode >= A0_NOP && opcode <= A0_SLT) + print_arith_op(stream, opcode >> 24, program); + else if (opcode >= T0_TEXLD && opcode < T0_TEXKILL) + print_tex_op(stream, opcode >> 24, program); + else if (opcode == T0_TEXKILL) + print_texkil_op(stream, opcode >> 24, program); + else if (opcode == D0_DCL) + print_dcl_op(stream, opcode >> 24, program); + else + PRINTF(stream, "Unknown opcode 0x%x\n", opcode); + } + + PRINTF(stream, "\t\tEND\n\n"); +} + + diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c new file mode 100644 index 0000000000..3c2069b827 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -0,0 +1,81 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_batch.h" + + +/** + * In future we may want a fence-like interface instead of finish. + */ +static void i915_flush( struct pipe_context *pipe, + unsigned flags ) +{ + struct i915_context *i915 = i915_context(pipe); + + /* Do we need to emit an MI_FLUSH command to flush the hardware + * caches? + */ + if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { + unsigned flush = MI_FLUSH; + + if (!(flags & PIPE_FLUSH_RENDER_CACHE)) + flush |= INHIBIT_FLUSH_RENDER_CACHE; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) + flush |= FLUSH_MAP_CACHE; + + if (!BEGIN_BATCH(1, 0)) { + FLUSH_BATCH(); + assert(BEGIN_BATCH(1, 0)); + } + OUT_BATCH( flush ); + ADVANCE_BATCH(); + } + + /* If there are no flags, just flush pending commands to hardware: + */ + FLUSH_BATCH(); + + if (flags & PIPE_FLUSH_WAIT) { + i915->winsys->batch_finish(i915->winsys); + } +} + + + +void i915_init_flush_functions( struct i915_context *i915 ) +{ + i915->pipe.flush = i915_flush; +} diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h new file mode 100644 index 0000000000..8c7b68aefb --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc.h @@ -0,0 +1,213 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_FPC_H +#define I915_FPC_H + +#include "pipe/p_util.h" + +#include "i915_context.h" +#include "i915_reg.h" + + + +#define I915_PROGRAM_SIZE 192 + + + +/** + * Program translation state + */ +struct i915_fp_compile { + const struct pipe_shader_state *shader; + + struct vertex_info *vertex_info; + + uint declarations[I915_PROGRAM_SIZE]; + uint program[I915_PROGRAM_SIZE]; + + uint input_semantic_name[PIPE_MAX_SHADER_INPUTS]; + uint input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + + uint output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; + uint output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + + /** points into the i915->current.constants array: */ + float (*constants)[4]; + uint num_constants; + uint constant_flags[I915_MAX_CONSTANT]; /**< status of each constant */ + + uint *csr; /**< Cursor, points into program. */ + + uint *decl; /**< Cursor, points into declarations. */ + + uint decl_s; /**< flags for which s regs need to be decl'd */ + uint decl_t; /**< flags for which t regs need to be decl'd */ + + uint temp_flag; /**< Tracks temporary regs which are in use */ + uint utemp_flag; /**< Tracks TYPE_U temporary regs which are in use */ + + uint nr_tex_indirect; + uint nr_tex_insn; + uint nr_alu_insn; + uint nr_decl_insn; + + boolean error; /**< Set if i915_program_error() is called */ + uint wpos_tex; + uint NumNativeInstructions; + uint NumNativeAluInstructions; + uint NumNativeTexInstructions; + uint NumNativeTexIndirections; +}; + + +/* Having zero and one in here makes the definition of swizzle a lot + * easier. + */ +#define UREG_TYPE_SHIFT 29 +#define UREG_NR_SHIFT 24 +#define UREG_CHANNEL_X_NEGATE_SHIFT 23 +#define UREG_CHANNEL_X_SHIFT 20 +#define UREG_CHANNEL_Y_NEGATE_SHIFT 19 +#define UREG_CHANNEL_Y_SHIFT 16 +#define UREG_CHANNEL_Z_NEGATE_SHIFT 15 +#define UREG_CHANNEL_Z_SHIFT 12 +#define UREG_CHANNEL_W_NEGATE_SHIFT 11 +#define UREG_CHANNEL_W_SHIFT 8 +#define UREG_CHANNEL_ZERO_NEGATE_MBZ 5 +#define UREG_CHANNEL_ZERO_SHIFT 4 +#define UREG_CHANNEL_ONE_NEGATE_MBZ 1 +#define UREG_CHANNEL_ONE_SHIFT 0 + +#define UREG_BAD 0xffffffff /* not a valid ureg */ + +#define X SRC_X +#define Y SRC_Y +#define Z SRC_Z +#define W SRC_W +#define ZERO SRC_ZERO +#define ONE SRC_ONE + +/* Construct a ureg: + */ +#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \ + ((nr) << UREG_NR_SHIFT) | \ + (X << UREG_CHANNEL_X_SHIFT) | \ + (Y << UREG_CHANNEL_Y_SHIFT) | \ + (Z << UREG_CHANNEL_Z_SHIFT) | \ + (W << UREG_CHANNEL_W_SHIFT) | \ + (ZERO << UREG_CHANNEL_ZERO_SHIFT) | \ + (ONE << UREG_CHANNEL_ONE_SHIFT)) + +#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20)) +#define CHANNEL_SRC( src, channel ) (src>>(channel*4)) + +#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)®_TYPE_MASK) +#define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)®_NR_MASK) + + + +#define UREG_XYZW_CHANNEL_MASK 0x00ffff00 + +/* One neat thing about the UREG representation: + */ +static INLINE int +swizzle(int reg, uint x, uint y, uint z, uint w) +{ + assert(x <= SRC_ONE); + assert(y <= SRC_ONE); + assert(z <= SRC_ONE); + assert(w <= SRC_ONE); + return ((reg & ~UREG_XYZW_CHANNEL_MASK) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3)); +} + + + +/*********************************************************************** + * Public interface for the compiler + */ +extern void i915_translate_fragment_program( struct i915_context *i915 ); + + + +extern uint i915_get_temp(struct i915_fp_compile *p); +extern uint i915_get_utemp(struct i915_fp_compile *p); +extern void i915_release_utemps(struct i915_fp_compile *p); + + +extern uint i915_emit_texld(struct i915_fp_compile *p, + uint dest, + uint destmask, + uint sampler, uint coord, uint op); + +extern uint i915_emit_arith(struct i915_fp_compile *p, + uint op, + uint dest, + uint mask, + uint saturate, + uint src0, uint src1, uint src2); + +extern uint i915_emit_decl(struct i915_fp_compile *p, + uint type, uint nr, uint d0_flags); + + +extern uint i915_emit_const1f(struct i915_fp_compile *p, float c0); + +extern uint i915_emit_const2f(struct i915_fp_compile *p, + float c0, float c1); + +extern uint i915_emit_const4fv(struct i915_fp_compile *p, + const float * c); + +extern uint i915_emit_const4f(struct i915_fp_compile *p, + float c0, float c1, + float c2, float c3); + + +/*====================================================================== + * i915_fpc_debug.c + */ +extern void i915_disassemble_program(const uint * program, uint sz); + + +/*====================================================================== + * i915_fpc_translate.c + */ + +extern void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...); + +extern void +i915_translate_fragment_program(struct i915_context *i915); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c new file mode 100644 index 0000000000..74924ff0a1 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c @@ -0,0 +1,375 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + + +#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT) +#define A1_SRC0( reg ) (((reg)&UREG_MASK)<>UREG_A1_SRC1_SHIFT_LEFT) +#define A2_SRC1( reg ) (((reg)&UREG_MASK)<>UREG_A2_SRC2_SHIFT_LEFT) + +/* These are special, and don't have swizzle/negate bits. + */ +#define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<temp_flag); + if (!bit) { + i915_program_error(p, "i915_get_temp: out of temporaries\n"); + return 0; + } + + p->temp_flag |= 1 << (bit - 1); + return UREG(REG_TYPE_R, (bit - 1)); +} + + +uint +i915_get_utemp(struct i915_fp_compile * p) +{ + int bit = ffs(~p->utemp_flag); + if (!bit) { + i915_program_error(p, "i915_get_utemp: out of temporaries\n"); + return 0; + } + + p->utemp_flag |= 1 << (bit - 1); + return UREG(REG_TYPE_U, (bit - 1)); +} + +void +i915_release_utemps(struct i915_fp_compile *p) +{ + p->utemp_flag = ~0x7; +} + + +uint +i915_emit_decl(struct i915_fp_compile *p, + uint type, uint nr, uint d0_flags) +{ + uint reg = UREG(type, nr); + + if (type == REG_TYPE_T) { + if (p->decl_t & (1 << nr)) + return reg; + + p->decl_t |= (1 << nr); + } + else if (type == REG_TYPE_S) { + if (p->decl_s & (1 << nr)) + return reg; + + p->decl_s |= (1 << nr); + } + else + return reg; + + *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); + *(p->decl++) = D1_MBZ; + *(p->decl++) = D2_MBZ; + + p->nr_decl_insn++; + return reg; +} + +uint +i915_emit_arith(struct i915_fp_compile * p, + uint op, + uint dest, + uint mask, + uint saturate, uint src0, uint src1, uint src2) +{ + uint c[3]; + uint nr_const = 0; + + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); + assert(dest); + + if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) + c[nr_const++] = 0; + if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) + c[nr_const++] = 1; + if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) + c[nr_const++] = 2; + + /* Recursively call this function to MOV additional const values + * into temporary registers. Use utemp registers for this - + * currently shouldn't be possible to run out, but keep an eye on + * this. + */ + if (nr_const > 1) { + uint s[3], first, i, old_utemp_flag; + + s[0] = src0; + s[1] = src1; + s[2] = src2; + old_utemp_flag = p->utemp_flag; + + first = GET_UREG_NR(s[c[0]]); + for (i = 1; i < nr_const; i++) { + if (GET_UREG_NR(s[c[i]]) != first) { + uint tmp = i915_get_utemp(p); + + i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, + s[c[i]], 0, 0); + s[c[i]] = tmp; + } + } + + src0 = s[0]; + src1 = s[1]; + src2 = s[2]; + p->utemp_flag = old_utemp_flag; /* restore */ + } + + *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); + *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); + *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); + + p->nr_alu_insn++; + return dest; +} + +uint i915_emit_texld( struct i915_fp_compile *p, + uint dest, + uint destmask, + uint sampler, + uint coord, + uint op ) +{ + uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); + if (coord != k) { + /* No real way to work around this in the general case - need to + * allocate and declare a new temporary register (a utemp won't + * do). Will fallback for now. + */ + i915_program_error(p, "Can't (yet) swizzle TEX arguments"); + assert(0); + return 0; + } + + /* Don't worry about saturate as we only support + */ + if (destmask != A0_DEST_CHANNEL_ALL) { + uint tmp = i915_get_utemp(p); + i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op ); + i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); + return dest; + } + else { + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + + if (GET_UREG_TYPE(coord) != REG_TYPE_T) { + p->nr_tex_indirect++; + } + + *(p->csr++) = (op | + T0_DEST( dest ) | + T0_SAMPLER( sampler )); + + *(p->csr++) = T1_ADDRESS_REG( coord ); + *(p->csr++) = T2_MBZ; + + p->nr_tex_insn++; + return dest; + } +} + + +uint +i915_emit_const1f(struct i915_fp_compile * p, float c0) +{ + unsigned reg, idx; + + if (c0 == 0.0) + return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); + if (c0 == 1.0) + return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + continue; + for (idx = 0; idx < 4; idx++) { + if (!(p->constant_flags[reg] & (1 << idx)) || + p->constants[reg][idx] == c0) { + p->constants[reg][idx] = c0; + p->constant_flags[reg] |= 1 << idx; + if (reg + 1 > p->num_constants) + p->num_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); + } + } + } + + i915_program_error(p, "i915_emit_const1f: out of constants\n"); + return 0; +} + +uint +i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) +{ + unsigned reg, idx; + + if (c0 == 0.0) + return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); + if (c0 == 1.0) + return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); + + if (c1 == 0.0) + return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); + if (c1 == 1.0) + return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (p->constant_flags[reg] == 0xf || + p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + continue; + for (idx = 0; idx < 3; idx++) { + if (!(p->constant_flags[reg] & (3 << idx))) { + p->constants[reg][idx + 0] = c0; + p->constants[reg][idx + 1] = c1; + p->constant_flags[reg] |= 3 << idx; + if (reg + 1 > p->num_constants) + p->num_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); + } + } + } + + i915_program_error(p, "i915_emit_const2f: out of constants\n"); + return 0; +} + + + +uint +i915_emit_const4f(struct i915_fp_compile * p, + float c0, float c1, float c2, float c3) +{ + unsigned reg; + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (p->constant_flags[reg] == 0xf && + p->constants[reg][0] == c0 && + p->constants[reg][1] == c1 && + p->constants[reg][2] == c2 && + p->constants[reg][3] == c3) { + return UREG(REG_TYPE_CONST, reg); + } + else if (p->constant_flags[reg] == 0) { + + p->constants[reg][0] = c0; + p->constants[reg][1] = c1; + p->constants[reg][2] = c2; + p->constants[reg][3] = c3; + p->constant_flags[reg] = 0xf; + if (reg + 1 > p->num_constants) + p->num_constants = reg + 1; + return UREG(REG_TYPE_CONST, reg); + } + } + + i915_program_error(p, "i915_emit_const4f: out of constants\n"); + return 0; +} + + +uint +i915_emit_const4fv(struct i915_fp_compile * p, const float * c) +{ + return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); +} + + +#if 00000/*UNUSED*/ +/* Reserve a slot in the constant file for a Mesa state parameter. + * These will later need to be tracked on statechanges, but that is + * done elsewhere. + */ +uint +i915_emit_param4fv(struct i915_fp_compile * p, const float * values) +{ + struct i915_fragment_program *fp = p->fp; + int i; + + for (i = 0; i < fp->nr_params; i++) { + if (fp->param[i].values == values) + return UREG(REG_TYPE_CONST, fp->param[i].reg); + } + + if (p->constants->nr_constants == I915_MAX_CONSTANT || + fp->nr_params == I915_MAX_CONSTANT) { + i915_program_error(p, "i915_emit_param4fv: out of constants\n"); + return 0; + } + + { + int reg = p->constants->nr_constants++; + int i = fp->nr_params++; + + assert (p->constant_flags[reg] == 0); + p->constant_flags[reg] = I915_CONSTFLAG_PARAM; + + fp->param[i].values = values; + fp->param[i].reg = reg; + + return UREG(REG_TYPE_CONST, reg); + } +} +#endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c new file mode 100644 index 0000000000..868f0c7e04 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -0,0 +1,1135 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + +#include "pipe/draw/draw_vertex.h" + + +/** + * Simple pass-through fragment shader to use when we don't have + * a real shader (or it fails to compile for some reason). + */ +static unsigned passthrough[] = +{ + _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), + + /* declare input color: + */ + (D0_DCL | + (REG_TYPE_T << D0_TYPE_SHIFT) | + (T_DIFFUSE << D0_NR_SHIFT) | + D0_CHANNEL_ALL), + 0, + 0, + + /* move to output color: + */ + (A0_MOV | + (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | + A0_DEST_CHANNEL_ALL | + (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | + (T_DIFFUSE << A0_SRC0_NR_SHIFT)), + 0x01230000, /* .xyzw */ + 0 +}; + + +/* 1, -1/3!, 1/5!, -1/7! */ +static const float sin_constants[4] = { 1.0, + -1.0f / (3 * 2 * 1), + 1.0f / (5 * 4 * 3 * 2 * 1), + -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) +}; + +/* 1, -1/2!, 1/4!, -1/6! */ +static const float cos_constants[4] = { 1.0, + -1.0f / (2 * 1), + 1.0f / (4 * 3 * 2 * 1), + -1.0f / (6 * 5 * 4 * 3 * 2 * 1) +}; + + + +/** + * component-wise negation of ureg + */ +static INLINE int +negate(int reg, int x, int y, int z, int w) +{ + /* Another neat thing about the UREG representation */ + return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | + ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | + ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | + ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); +} + + +static void +i915_use_passthrough_shader(struct i915_context *i915) +{ + debug_printf("**** Using i915 pass-through fragment shader\n"); + + i915->current.program = (uint *) MALLOC(sizeof(passthrough)); + if (i915->current.program) { + memcpy(i915->current.program, passthrough, sizeof(passthrough)); + i915->current.program_len = Elements(passthrough); + } + + i915->current.num_constants[PIPE_SHADER_FRAGMENT] = 0; + i915->current.num_user_constants[PIPE_SHADER_FRAGMENT] = 0; +} + + +void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...) +{ + va_list args; + char buffer[1024]; + + debug_printf("i915_program_error: "); + va_start( args, msg ); + vsprintf( buffer, msg, args ); + va_end( args ); + debug_printf(buffer); + debug_printf("\n"); + + p->error = 1; +} + + + +/** + * Construct a ureg for the given source register. Will emit + * constants, apply swizzling and negation as needed. + */ +static uint +src_vector(struct i915_fp_compile *p, + const struct tgsi_full_src_register *source) +{ + uint index = source->SrcRegister.Index; + uint src, sem_name, sem_ind; + + switch (source->SrcRegister.File) { + case TGSI_FILE_TEMPORARY: + if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) { + i915_program_error(p, "Exceeded max temporary reg"); + return 0; + } + src = UREG(REG_TYPE_R, index); + break; + case TGSI_FILE_INPUT: + /* XXX: Packing COL1, FOGC into a single attribute works for + * texenv programs, but will fail for real fragment programs + * that use these attributes and expect them to be a full 4 + * components wide. Could use a texcoord to pass these + * attributes if necessary, but that won't work in the general + * case. + * + * We also use a texture coordinate to pass wpos when possible. + */ + + /* use vertex format info to map a slot number to a VF attrib */ + assert(index < p->vertex_info->num_attribs); + + sem_name = p->input_semantic_name[index]; + sem_ind = p->input_semantic_index[index]; + + switch (sem_name) { + case TGSI_SEMANTIC_POSITION: + debug_printf("SKIP SEM POS\n"); + /* + assert(p->wpos_tex != -1); + src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); + */ + break; + case TGSI_SEMANTIC_COLOR: + if (sem_ind == 0) { + src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); + } + else { + /* secondary color */ + assert(sem_ind == 1); + src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); + src = swizzle(src, X, Y, Z, ONE); + } + break; + case TGSI_SEMANTIC_FOG: + src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); + src = swizzle(src, W, W, W, W); + break; + case TGSI_SEMANTIC_GENERIC: + /* usually a texcoord */ + src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL); + break; + default: + i915_program_error(p, "Bad source->Index"); + return 0; + } + break; + + case TGSI_FILE_IMMEDIATE: + /* XXX unfinished - need to append immediates onto const buffer */ + /* fall-through */ + case TGSI_FILE_CONSTANT: + src = UREG(REG_TYPE_CONST, index); + break; + + default: + i915_program_error(p, "Bad source->File"); + return 0; + } + + if (source->SrcRegister.Extended) { + src = swizzle(src, + source->SrcRegisterExtSwz.ExtSwizzleX, + source->SrcRegisterExtSwz.ExtSwizzleY, + source->SrcRegisterExtSwz.ExtSwizzleZ, + source->SrcRegisterExtSwz.ExtSwizzleW); + } + else { + src = swizzle(src, + source->SrcRegister.SwizzleX, + source->SrcRegister.SwizzleY, + source->SrcRegister.SwizzleZ, + source->SrcRegister.SwizzleW); + } + + + /* There's both negate-all-components and per-component negation. + * Try to handle both here. + */ + { + int nx = source->SrcRegisterExtSwz.NegateX; + int ny = source->SrcRegisterExtSwz.NegateY; + int nz = source->SrcRegisterExtSwz.NegateZ; + int nw = source->SrcRegisterExtSwz.NegateW; + if (source->SrcRegister.Negate) { + nx = !nx; + ny = !ny; + nz = !nz; + nw = !nw; + } + src = negate(src, nx, ny, nz, nw); + } + + /* no abs() or post-abs negation */ +#if 0 + /* XXX assertions disabled to allow arbfplight.c to run */ + /* XXX enable these assertions, or fix things */ + assert(!source->SrcRegisterExtMod.Absolute); + assert(!source->SrcRegisterExtMod.Negate); +#endif + return src; +} + + +/** + * Construct a ureg for a destination register. + */ +static uint +get_result_vector(struct i915_fp_compile *p, + const struct tgsi_full_dst_register *dest) +{ + switch (dest->DstRegister.File) { + case TGSI_FILE_OUTPUT: + { + uint sem_name = p->output_semantic_name[dest->DstRegister.Index]; + switch (sem_name) { + case TGSI_SEMANTIC_POSITION: + return UREG(REG_TYPE_OD, 0); + case TGSI_SEMANTIC_COLOR: + return UREG(REG_TYPE_OC, 0); + default: + i915_program_error(p, "Bad inst->DstReg.Index/semantics"); + return 0; + } + } + case TGSI_FILE_TEMPORARY: + return UREG(REG_TYPE_R, dest->DstRegister.Index); + default: + i915_program_error(p, "Bad inst->DstReg.File"); + return 0; + } +} + + +/** + * Compute flags for saturation and writemask. + */ +static uint +get_result_flags(const struct tgsi_full_instruction *inst) +{ + const uint writeMask + = inst->FullDstRegisters[0].DstRegister.WriteMask; + uint flags = 0x0; + + if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) + flags |= A0_DEST_SATURATE; + + if (writeMask & TGSI_WRITEMASK_X) + flags |= A0_DEST_CHANNEL_X; + if (writeMask & TGSI_WRITEMASK_Y) + flags |= A0_DEST_CHANNEL_Y; + if (writeMask & TGSI_WRITEMASK_Z) + flags |= A0_DEST_CHANNEL_Z; + if (writeMask & TGSI_WRITEMASK_W) + flags |= A0_DEST_CHANNEL_W; + + return flags; +} + + +/** + * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token + */ +static uint +translate_tex_src_target(struct i915_fp_compile *p, uint tex) +{ + switch (tex) { + case TGSI_TEXTURE_1D: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_2D: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_RECT: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_3D: + return D0_SAMPLE_TYPE_VOLUME; + case TGSI_TEXTURE_CUBE: + return D0_SAMPLE_TYPE_CUBE; + default: + i915_program_error(p, "TexSrc type"); + return 0; + } +} + + +/** + * Generate texel lookup instruction. + */ +static void +emit_tex(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode) +{ + uint texture = inst->InstructionExtTexture.Texture; + uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint tex = translate_tex_src_target( p, texture ); + uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); + uint coord = src_vector( p, &inst->FullSrcRegisters[0]); + + i915_emit_texld( p, + get_result_vector( p, &inst->FullDstRegisters[0] ), + get_result_flags( inst ), + sampler, + coord, + opcode); +} + + +/** + * Generate a simple arithmetic instruction + * \param opcode the i915 opcode + * \param numArgs the number of input/src arguments + */ +static void +emit_simple_arith(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode, uint numArgs) +{ + uint arg1, arg2, arg3; + + assert(numArgs <= 3); + + arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] ); + arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] ); + arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] ); + + i915_emit_arith( p, + opcode, + get_result_vector( p, &inst->FullDstRegisters[0]), + get_result_flags( inst ), 0, + arg1, + arg2, + arg3 ); +} + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +/* + * Translate TGSI instruction to i915 instruction. + * + * Possible concerns: + * + * SIN, COS -- could use another taylor step? + * LIT -- results seem a little different to sw mesa + * LOG -- different to mesa on negative numbers, but this is conformant. + */ +static void +i915_translate_instruction(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst) +{ + uint writemask; + uint src0, src1, src2, flags; + uint tmp = 0; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + i915_emit_arith(p, + A0_MAX, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + src0, negate(src0, 1, 1, 1, 1), 0); + break; + + case TGSI_OPCODE_ADD: + emit_simple_arith(p, inst, A0_ADD, 2); + break; + + case TGSI_OPCODE_CMP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src2 = src_vector(p, &inst->FullSrcRegisters[2]); + i915_emit_arith(p, A0_CMP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), + 0, src0, src2, src1); /* NOTE: order of src2, src1 */ + break; + + case TGSI_OPCODE_COS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); + + i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + /* By choosing different taylor constants, could get rid of this mul: + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 + * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * result = DP4 t0, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(tmp, X, X, ONE, ONE), + swizzle(tmp, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, Y, X, ONE), + swizzle(tmp, X, X, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + break; + + case TGSI_OPCODE_DP3: + emit_simple_arith(p, inst, A0_DP3, 2); + break; + + case TGSI_OPCODE_DP4: + emit_simple_arith(p, inst, A0_DP4, 2); + break; + + case TGSI_OPCODE_DPH: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, Y, Z, ONE), src1, 0); + break; + + case TGSI_OPCODE_DST: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + /* result[0] = 1 * 1; + * result[1] = a[1] * b[1]; + * result[2] = a[2] * 1; + * result[3] = 1 * b[3]; + */ + i915_emit_arith(p, + A0_MUL, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, ONE, Y, Z, ONE), + swizzle(src1, ONE, Y, ONE, W), 0); + break; + + case TGSI_OPCODE_END: + /* no-op */ + break; + + case TGSI_OPCODE_EX2: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_FLR: + emit_simple_arith(p, inst, A0_FLR, 1); + break; + + case TGSI_OPCODE_FRC: + emit_simple_arith(p, inst, A0_FRC, 1); + break; + + case TGSI_OPCODE_KIL: + /* unconditional kill */ + assert(0); /* not tested yet */ +#if 0 + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ + 0, src0, T0_TEXKILL); +#endif + break; + + case TGSI_OPCODE_KILP: + /* kill if src[0].x < 0 || src[0].y < 0 ... */ + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ + 0, src0, T0_TEXKILL); + break; + + case TGSI_OPCODE_LG2: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_LOG, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_LIT: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + /* tmp = max( a.xyzw, a.00zw ) + * XXX: Clamp tmp.w to -128..128 + * tmp.y = log(tmp.y) + * tmp.y = tmp.w * tmp.y + * tmp.y = exp(tmp.y) + * result = cmp (a.11-x1, a.1x01, a.1xy1 ) + */ + i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, + src0, swizzle(src0, ZERO, ZERO, Z, W), 0); + + i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, Y, ZERO, ZERO), + swizzle(tmp, ZERO, W, ZERO, ZERO), 0); + + i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_CMP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), + swizzle(tmp, ONE, X, ZERO, ONE), + swizzle(tmp, ONE, X, Y, ONE)); + + break; + + case TGSI_OPCODE_LRP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src2 = src_vector(p, &inst->FullSrcRegisters[2]); + flags = get_result_flags(inst); + tmp = i915_get_utemp(p); + + /* b*a + c*(1-a) + * + * b*a + c - ca + * + * tmp = b*a + c, + * result = (-c)*a + tmp + */ + i915_emit_arith(p, A0_MAD, tmp, + flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); + + i915_emit_arith(p, A0_MAD, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); + break; + + case TGSI_OPCODE_MAD: + emit_simple_arith(p, inst, A0_MAD, 3); + break; + + case TGSI_OPCODE_MAX: + emit_simple_arith(p, inst, A0_MAX, 2); + break; + + case TGSI_OPCODE_MIN: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + i915_emit_arith(p, + A0_MAX, + tmp, flags & A0_DEST_CHANNEL_ALL, 0, + negate(src0, 1, 1, 1, 1), + negate(src1, 1, 1, 1, 1), 0); + + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); + break; + + case TGSI_OPCODE_MOV: + /* aka TGSI_OPCODE_SWZ */ + emit_simple_arith(p, inst, A0_MOV, 1); + break; + + case TGSI_OPCODE_MUL: + emit_simple_arith(p, inst, A0_MUL, 2); + break; + + case TGSI_OPCODE_POW: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + /* XXX: masking on intermediate values, here and elsewhere. + */ + i915_emit_arith(p, + A0_LOG, + tmp, A0_DEST_CHANNEL_X, 0, + swizzle(src0, X, X, X, X), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, swizzle(tmp, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_RET: + /* XXX: no-op? */ + break; + + case TGSI_OPCODE_RCP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_RCP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_RSQ: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_RSQ, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_SCS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * scs.x = DP4 t1, sin_constants + * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * scs.y = DP4 t1, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(src0, X, X, ONE, ONE), + swizzle(src0, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + if (writemask & TGSI_WRITEMASK_Y) { + uint tmp1; + + if (writemask & TGSI_WRITEMASK_X) + tmp1 = i915_get_utemp(p); + else + tmp1 = tmp; + + i915_emit_arith(p, + A0_MUL, + tmp1, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + A0_DEST_CHANNEL_Y, 0, + swizzle(tmp1, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + } + + if (writemask & TGSI_WRITEMASK_X) { + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + A0_DEST_CHANNEL_X, 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + } + break; + + case TGSI_OPCODE_SGE: + emit_simple_arith(p, inst, A0_SGE, 2); + break; + + case TGSI_OPCODE_SIN: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); + + i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + /* By choosing different taylor constants, could get rid of this mul: + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * result = DP4 t1.wzyx, sin_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(tmp, X, X, ONE, ONE), + swizzle(tmp, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(tmp, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + break; + + case TGSI_OPCODE_SLT: + emit_simple_arith(p, inst, A0_SLT, 2); + break; + + case TGSI_OPCODE_SUB: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + i915_emit_arith(p, + A0_ADD, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + src0, negate(src1, 1, 1, 1, 1), 0); + break; + + case TGSI_OPCODE_TEX: + if (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide + == TGSI_EXTSWIZZLE_W) { + emit_tex(p, inst, T0_TEXLDP); + } + else { + emit_tex(p, inst, T0_TEXLD); + } + break; + + case TGSI_OPCODE_TXB: + emit_tex(p, inst, T0_TEXLDB); + break; + + case TGSI_OPCODE_XPD: + /* Cross product: + * result.x = src0.y * src1.z - src0.z * src1.y; + * result.y = src0.z * src1.x - src0.x * src1.z; + * result.z = src0.x * src1.y - src0.y * src1.x; + * result.w = undef; + */ + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(src0, Z, X, Y, ONE), + swizzle(src1, Y, Z, X, ONE), 0); + + i915_emit_arith(p, + A0_MAD, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, Y, Z, X, ONE), + swizzle(src1, Z, X, Y, ONE), + negate(tmp, 1, 1, 1, 0)); + break; + + default: + i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); + return; + } + + i915_release_utemps(p); +} + + +/** + * Translate TGSI fragment shader into i915 hardware instructions. + * \param p the translation state + * \param tokens the TGSI token array + */ +static void +i915_translate_instructions(struct i915_fp_compile *p, + const struct tgsi_token *tokens) +{ + struct tgsi_parse_context parse; + + tgsi_parse_init( &parse, tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_INPUT) { + /* save input register info for use in src_vector() */ + uint ind, sem, semi; + ind = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + sem = parse.FullToken.FullDeclaration.Semantic.SemanticName; + semi = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + /*debug_printf("FS Input DECL [%u] sem %u\n", ind, sem);*/ + p->input_semantic_name[ind] = sem; + p->input_semantic_index[ind] = semi; + } + else if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_OUTPUT) { + /* save output register info for use in get_result_vector() */ + uint ind, sem, semi; + ind = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + sem = parse.FullToken.FullDeclaration.Semantic.SemanticName; + semi = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + /*debug_printf("FS Output DECL [%u] sem %u\n", ind, sem);*/ + p->output_semantic_name[ind] = sem; + p->output_semantic_index[ind] = semi; + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* XXX append the immediate to the const buffer... */ + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + i915_translate_instruction(p, &parse.FullToken.FullInstruction); + break; + + default: + assert( 0 ); + } + + } /* while */ + + tgsi_parse_free (&parse); +} + + +static struct i915_fp_compile * +i915_init_compile(struct i915_context *i915, + const struct pipe_shader_state *fs) +{ + struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); + + p->shader = i915->fs; + + p->vertex_info = &i915->current.vertex_info; + + /* new constants found during translation get appended after the + * user-provided constants. + */ + p->constants = i915->current.constants[PIPE_SHADER_FRAGMENT]; + p->num_constants = i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]; + + p->nr_tex_indirect = 1; /* correct? */ + p->nr_tex_insn = 0; + p->nr_alu_insn = 0; + p->nr_decl_insn = 0; + + memset(p->constant_flags, 0, sizeof(p->constant_flags)); + + p->csr = p->program; + p->decl = p->declarations; + p->decl_s = 0; + p->decl_t = 0; + p->temp_flag = 0xffff000; + p->utemp_flag = ~0x7; + + p->wpos_tex = -1; + + /* initialize the first program word */ + *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; + + return p; +} + + +/* Copy compile results to the fragment program struct and destroy the + * compilation context. + */ +static void +i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) +{ + unsigned long program_size = (unsigned long) (p->csr - p->program); + unsigned long decl_size = (unsigned long) (p->decl - p->declarations); + + if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) + i915_program_error(p, "Exceeded max nr indirect texture lookups"); + + if (p->nr_tex_insn > I915_MAX_TEX_INSN) + i915_program_error(p, "Exceeded max TEX instructions"); + + if (p->nr_alu_insn > I915_MAX_ALU_INSN) + i915_program_error(p, "Exceeded max ALU instructions"); + + if (p->nr_decl_insn > I915_MAX_DECL_INSN) + i915_program_error(p, "Exceeded max DECL instructions"); + + /* free old program, if present */ + if (i915->current.program) { + FREE(i915->current.program); + i915->current.program_len = 0; + } + + if (p->error) { + p->NumNativeInstructions = 0; + p->NumNativeAluInstructions = 0; + p->NumNativeTexInstructions = 0; + p->NumNativeTexIndirections = 0; + + i915_use_passthrough_shader(i915); + } + else { + p->NumNativeInstructions + = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; + p->NumNativeAluInstructions = p->nr_alu_insn; + p->NumNativeTexInstructions = p->nr_tex_insn; + p->NumNativeTexIndirections = p->nr_tex_indirect; + + /* patch in the program length */ + p->declarations[0] |= program_size + decl_size - 2; + + /* Copy compilation results to fragment program struct: + */ + i915->current.program + = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); + if (i915->current.program) { + i915->current.program_len = program_size + decl_size; + + memcpy(i915->current.program, + p->declarations, + decl_size * sizeof(uint)); + + memcpy(i915->current.program + decl_size, + p->program, + program_size * sizeof(uint)); + } + + /* update number of constants */ + i915->current.num_constants[PIPE_SHADER_FRAGMENT] = p->num_constants; + assert(i915->current.num_constants[PIPE_SHADER_FRAGMENT] + >= i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]); + } + + /* Release the compilation struct: + */ + FREE(p); +} + + +/** + * Find an unused texture coordinate slot to use for fragment WPOS. + * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found). + */ +static void +i915_find_wpos_space(struct i915_fp_compile *p) +{ +#if 0 + const uint inputs + = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS); /*XXX hack*/ + uint i; + + p->wpos_tex = -1; + + if (inputs & (1 << TGSI_ATTRIB_POS)) { + for (i = 0; i < I915_TEX_UNITS; i++) { + if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) { + p->wpos_tex = i; + return; + } + } + + i915_program_error(p, "No free texcoord for wpos value"); + } +#else + if (p->shader->input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + /* frag shader using the fragment position input */ +#if 0 + assert(0); +#endif + } +#endif +} + + + + +/** + * Rather than trying to intercept and jiggle depth writes during + * emit, just move the value into its correct position at the end of + * the program: + */ +static void +i915_fixup_depth_write(struct i915_fp_compile *p) +{ + /* XXX assuming pos/depth is always in output[0] */ + if (p->shader->output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + const uint depth = UREG(REG_TYPE_OD, 0); + + i915_emit_arith(p, + A0_MOV, /* opcode */ + depth, /* dest reg */ + A0_DEST_CHANNEL_W, /* write mask */ + 0, /* saturate? */ + swizzle(depth, X, Y, Z, Z), /* src0 */ + 0, 0 /* src1, src2 */); + } +} + + +void +i915_translate_fragment_program( struct i915_context *i915 ) +{ + struct i915_fp_compile *p = i915_init_compile(i915, i915->fs); + const struct tgsi_token *tokens = i915->fs->tokens; + + i915_find_wpos_space(p); + + i915_translate_instructions(p, tokens); + i915_fixup_depth_write(p); + + i915_fini_compile(i915, p); +} diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c new file mode 100644 index 0000000000..c4a706c37d --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -0,0 +1,215 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/draw/draw_private.h" +#include "pipe/p_util.h" + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "i915_batch.h" + + + +/** + * Primitive emit to hardware. No support for vertex buffers or any + * nice fast paths. + */ +struct setup_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct i915_context *i915; +}; + + + +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ + return (struct setup_stage *)stage; +} + + +/** + * Extract the needed fields from vertex_header and emit i915 dwords. + * Recall that the vertices are constructed by the 'draw' module and + * have a couple of slots at the beginning (1-dword header, 4-dword + * clip pos) that we ignore here. + */ +static INLINE void +emit_hw_vertex( struct i915_context *i915, + const struct vertex_header *vertex) +{ + const struct vertex_info *vinfo = &i915->current.vertex_info; + uint i; + uint count = 0; /* for debug/sanity */ + + for (i = 0; i < vinfo->num_attribs; i++) { + switch (vinfo->emit[i]) { + case EMIT_OMIT: + /* no-op */ + break; + case EMIT_1F: + OUT_BATCH( fui(vertex->data[i][0]) ); + count++; + break; + case EMIT_2F: + OUT_BATCH( fui(vertex->data[i][0]) ); + OUT_BATCH( fui(vertex->data[i][1]) ); + count += 2; + break; + case EMIT_3F: + OUT_BATCH( fui(vertex->data[i][0]) ); + OUT_BATCH( fui(vertex->data[i][1]) ); + OUT_BATCH( fui(vertex->data[i][2]) ); + count += 3; + break; + case EMIT_4F: + OUT_BATCH( fui(vertex->data[i][0]) ); + OUT_BATCH( fui(vertex->data[i][1]) ); + OUT_BATCH( fui(vertex->data[i][2]) ); + OUT_BATCH( fui(vertex->data[i][3]) ); + count += 4; + break; + case EMIT_4UB: + OUT_BATCH( pack_ub4(float_to_ubyte( vertex->data[i][2] ), + float_to_ubyte( vertex->data[i][1] ), + float_to_ubyte( vertex->data[i][0] ), + float_to_ubyte( vertex->data[i][3] )) ); + count += 1; + break; + default: + assert(0); + } + } + assert(count == vinfo->size); +} + + + +static INLINE void +emit_prim( struct draw_stage *stage, + struct prim_header *prim, + unsigned hwprim, + unsigned nr ) +{ + struct i915_context *i915 = setup_stage(stage)->i915; + unsigned vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ + unsigned i; + + assert(vertex_size >= 12); /* never smaller than 12 bytes */ + + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + FLUSH_BATCH(); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + assert(0); + return; + } + } + + /* Emit each triangle as a single primitive. I told you this was + * simple. + */ + OUT_BATCH(_3DPRIMITIVE | + hwprim | + ((4 + vertex_size * nr)/4 - 2)); + + for (i = 0; i < nr; i++) + emit_hw_vertex(i915, prim->v[i]); +} + + +static void +setup_tri( struct draw_stage *stage, struct prim_header *prim ) +{ + emit_prim( stage, prim, PRIM3D_TRILIST, 3 ); +} + + +static void +setup_line(struct draw_stage *stage, struct prim_header *prim) +{ + emit_prim( stage, prim, PRIM3D_LINELIST, 2 ); +} + + +static void +setup_point(struct draw_stage *stage, struct prim_header *prim) +{ + emit_prim( stage, prim, PRIM3D_POINTLIST, 1 ); +} + + +static void setup_flush( struct draw_stage *stage, unsigned flags ) +{ +} + +static void reset_stipple_counter( struct draw_stage *stage ) +{ +} + +static void render_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create a new primitive setup/render stage. This gets plugged into + * the 'draw' module's pipeline. + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ) +{ + struct setup_stage *setup = CALLOC_STRUCT(setup_stage); + + setup->i915 = i915; + setup->stage.draw = i915->draw; + setup->stage.point = setup_point; + setup->stage.line = setup_line; + setup->stage.tri = setup_tri; + setup->stage.flush = setup_flush; + setup->stage.reset_stipple_counter = reset_stipple_counter; + setup->stage.destroy = render_destroy; + + return &setup->stage; +} diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c new file mode 100644 index 0000000000..e069773fd4 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -0,0 +1,254 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress + * + * \author José Fonseca + * \author Keith Whitwell + */ + + +#include "pipe/draw/draw_vbuf.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_winsys.h" +#include "i915_batch.h" +#include "i915_state.h" + + +/** + * Primitive renderer for i915. + */ +struct i915_vbuf_render { + struct vbuf_render base; + + struct i915_context *i915; + + /** Vertex size in bytes */ + unsigned vertex_size; + + /** Hardware primitive */ + unsigned hwprim; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct i915_vbuf_render * +i915_vbuf_render( struct vbuf_render *render ) +{ + assert(render); + return (struct i915_vbuf_render *)render; +} + + +static const struct vertex_info * +i915_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + return &i915->current.vertex_info; +} + + +static void * +i915_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct pipe_winsys *winsys = i915->pipe.winsys; + size_t size = (size_t)vertex_size * (size_t)nr_vertices; + + /* FIXME: handle failure */ + assert(!i915->vbo); + i915->vbo = winsys->buffer_create(winsys, 64, I915_BUFFER_USAGE_LIT_VERTEX, + size); + + i915->dirty |= I915_NEW_VBO; + + return winsys->buffer_map(winsys, + i915->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); +} + + +static void +i915_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + + switch(prim) { + case PIPE_PRIM_POINTS: + i915_render->hwprim = PRIM3D_POINTLIST; + break; + case PIPE_PRIM_LINES: + i915_render->hwprim = PRIM3D_LINELIST; + break; + case PIPE_PRIM_TRIANGLES: + i915_render->hwprim = PRIM3D_TRILIST; + break; + default: + assert(0); + } +} + + +static void +i915_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned i; + + assert(nr_indices); + + assert((i915->dirty & ~I915_NEW_VBO) == 0); + + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + FLUSH_BATCH(); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + assert(0); + return; + } + } + + OUT_BATCH( _3DPRIMITIVE | + PRIM_INDIRECT | + i915_render->hwprim | + PRIM_INDIRECT_ELTS | + nr_indices ); + for (i = 0; i + 1 < nr_indices; i += 2) { + OUT_BATCH( indices[i] | + (indices[i + 1] << 16) ); + } + if (i < nr_indices) { + OUT_BATCH( indices[i] ); + } +} + + +static void +i915_vbuf_render_release_vertices( struct vbuf_render *render, + void *vertices, + unsigned vertex_size, + unsigned vertices_used ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct pipe_winsys *winsys = i915->pipe.winsys; + + assert(i915->vbo); + winsys->buffer_unmap(winsys, i915->vbo); + pipe_buffer_reference(winsys, &i915->vbo, NULL); +} + + +static void +i915_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + FREE(i915_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +i915_vbuf_render_create( struct i915_context *i915 ) +{ + struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); + + i915_render->i915 = i915; + + i915_render->base.max_vertex_buffer_bytes = 128*1024; + + /* NOTE: it must be such that state and vertices indices fit in a single + * batch buffer. + */ + i915_render->base.max_indices = 16*1024; + + i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info; + i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices; + i915_render->base.set_primitive = i915_vbuf_render_set_primitive; + i915_render->base.draw = i915_vbuf_render_draw; + i915_render->base.release_vertices = i915_vbuf_render_release_vertices; + i915_render->base.destroy = i915_vbuf_render_destroy; + + return &i915_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = i915_vbuf_render_create(i915); + if(!render) + return NULL; + + stage = draw_vbuf_stage( i915->draw, render ); + if(!stage) { + render->destroy(render); + return NULL; + } + + return stage; +} diff --git a/src/gallium/drivers/i915simple/i915_reg.h b/src/gallium/drivers/i915simple/i915_reg.h new file mode 100644 index 0000000000..04620fec68 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_reg.h @@ -0,0 +1,978 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_REG_H +#define I915_REG_H + + +#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) + +#define CMD_3D (0x3<<29) + +#define PRIM3D_INLINE (CMD_3D | (0x1f<<24)) +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_CLEAR_RECT (0xa<<18) +#define PRIM3D_ZONE_INIT (0xd<<18) +#define PRIM3D_MASK (0x1f<<18) + +/* p137 */ +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) + +/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/ +#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24)) +#define BFO_ENABLE_STENCIL_REF (1<<23) +#define BFO_STENCIL_REF_SHIFT 15 +#define BFO_STENCIL_REF_MASK (0xff<<15) +#define BFO_ENABLE_STENCIL_FUNCS (1<<14) +#define BFO_STENCIL_TEST_SHIFT 11 +#define BFO_STENCIL_TEST_MASK (0x7<<11) +#define BFO_STENCIL_FAIL_SHIFT 8 +#define BFO_STENCIL_FAIL_MASK (0x7<<8) +#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5 +#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5) +#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2 +#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2) +#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1) +#define BFO_STENCIL_TWO_SIDE (1<<0) + + +/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */ +#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24)) +#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17) +#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) +#define BFM_STENCIL_TEST_MASK_SHIFT 8 +#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) +#define BFM_STENCIL_WRITE_MASK_SHIFT 0 +#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) + + + +/* 3DSTATE_BIN_CONTROL p141 */ + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + + +/* 3DSTATE_CHROMA_KEY */ + +/* 3DSTATE_CLEAR_PARAMETERS, p150 */ +#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) +/* Dword 1 */ +#define CLEARPARAM_CLEAR_RECT (1 << 16) +#define CLEARPARAM_ZONE_INIT (0 << 16) +#define CLEARPARAM_WRITE_COLOR (1 << 2) +#define CLEARPARAM_WRITE_DEPTH (1 << 1) +#define CLEARPARAM_WRITE_STENCIL (1 << 0) + +/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + + + +/* 3DSTATE_COORD_SET_BINDINGS, p154 */ +#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24)) +#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3)) + +/* p156 */ +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +/* p157 */ +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +/* p158 */ +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + + +/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */ +#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16)) +/* scale in dword 1 */ + + +/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2) + +/* p161 */ +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define TEX_DEFAULT_COLOR_OGL (0<<30) +#define TEX_DEFAULT_COLOR_D3D (1<<30) +#define ZR_EARLY_DEPTH (1<<29) +#define LOD_PRECLAMP_OGL (1<<28) +#define LOD_PRECLAMP_D3D (0<<28) +#define DITHER_FULL_ALWAYS (0<<26) +#define DITHER_FULL_ON_FB_BLEND (1<<26) +#define DITHER_CLAMPED_ALWAYS (2<<26) +#define LINEAR_GAMMA_BLEND_32BPP (1<<25) +#define DEBUG_DISABLE_ENH_DITHER (1<<24) +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLOR_BUF_8BIT 0 +#define COLOR_BUF_RGB555 (1<<8) +#define COLOR_BUF_RGB565 (2<<8) +#define COLOR_BUF_ARGB8888 (3<<8) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 (0<<1) +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +/* p166 */ +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + + +/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */ + +/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */ + + +/* _3DSTATE_FOG_COLOR, p173 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p174 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31) +#define FMC1_FOGFUNC_VERTEX (0<<28) +#define FMC1_FOGFUNC_PIXEL_EXP (1<<28) +#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28) +#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28) +#define FMC1_FOGFUNC_MASK (3<<28) +#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27) +#define FMC1_FOGINDEX_Z (0<<25) +#define FMC1_FOGINDEX_W (1<<25) +#define FMC1_C1_C2_MODIFY_ENABLE (1<<24) +#define FMC1_DENSITY_MODIFY_ENABLE (1<<23) +#define FMC1_C1_ONE (1<<13) +#define FMC1_C1_MASK (0xffff<<4) +/* Dword 2 */ +#define FMC2_C2_ONE (1<<16) +/* Dword 3 */ +#define FMC3_D_ONE (1<<16) + + + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */ +#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define IAB_MODIFY_ENABLE (1<<23) +#define IAB_ENABLE (1<<22) +#define IAB_MODIFY_FUNC (1<<21) +#define IAB_FUNC_SHIFT 16 +#define IAB_MODIFY_SRC_FACTOR (1<<11) +#define IAB_SRC_FACTOR_SHIFT 6 +#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6) +#define IAB_MODIFY_DST_FACTOR (1<<5) +#define IAB_DST_FACTOR_SHIFT 0 +#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0) + + +#define BLENDFUNC_ADD 0x0 +#define BLENDFUNC_SUBTRACT 0x1 +#define BLENDFUNC_REVERSE_SUBTRACT 0x2 +#define BLENDFUNC_MIN 0x3 +#define BLENDFUNC_MAX 0x4 +#define BLENDFUNC_MASK 0x7 + +/* 3DSTATE_LOAD_INDIRECT, p180 */ + +#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16)) +#define LI0_STATE_STATIC_INDIRECT (0x01<<8) +#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8) +#define LI0_STATE_SAMPLER (0x04<<8) +#define LI0_STATE_MAP (0x08<<8) +#define LI0_STATE_PROGRAM (0x10<<8) +#define LI0_STATE_CONSTANTS (0x20<<8) + +#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SIS0_FORCE_LOAD (1<<1) +#define SIS0_BUFFER_VALID (1<<0) +#define SIS1_BUFFER_LENGTH(x) ((x)&0xff) + +#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define DIS0_BUFFER_RESET (1<<1) +#define DIS0_BUFFER_VALID (1<<0) + +#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SSB0_FORCE_LOAD (1<<1) +#define SSB0_BUFFER_VALID (1<<0) +#define SSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define MSB0_FORCE_LOAD (1<<1) +#define MSB0_BUFFER_VALID (1<<0) +#define MSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSP0_FORCE_LOAD (1<<1) +#define PSP0_BUFFER_VALID (1<<0) +#define PSP1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSC0_FORCE_LOAD (1<<1) +#define PSC0_BUFFER_VALID (1<<0) +#define PSC1_BUFFER_LENGTH(x) ((x)&0xff) + + + + + +/* _3DSTATE_RASTERIZATION_RULES */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_TEXKILL_3D_4D (1<<10) +#define TEXKILL_3D (0<<9) +#define TEXKILL_4D (1<<9) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) + +/* _3DSTATE_SCISSOR_ENABLE, p256 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) + +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + + + +#define DST_BLND_FACT(f) ((f)<= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + + + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + + + +/* p207 */ +#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16)) + +#define MS1_MAPMASK_SHIFT 0 +#define MS1_MAPMASK_MASK (0x8fff<<0) + +#define MS2_UNTRUSTED_SURFACE (1<<31) +#define MS2_ADDRESS_MASK 0xfffffffc +#define MS2_VERTICAL_LINE_STRIDE (1<<1) +#define MS2_VERTICAL_OFFSET (1<<1) + +#define MS3_HEIGHT_SHIFT 21 +#define MS3_WIDTH_SHIFT 10 +#define MS3_PALETTE_SELECT (1<<9) +#define MS3_MAPSURF_FORMAT_SHIFT 7 +#define MS3_MAPSURF_FORMAT_MASK (0x7<<7) +#define MAPSURF_8BIT (1<<7) +#define MAPSURF_16BIT (2<<7) +#define MAPSURF_32BIT (3<<7) +#define MAPSURF_422 (5<<7) +#define MAPSURF_COMPRESSED (6<<7) +#define MAPSURF_4BIT_INDEXED (7<<7) +#define MS3_MT_FORMAT_MASK (0x7 << 3) +#define MS3_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_8BIT_MONO8 (5<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_I16 (7<<3) +#define MT_16BIT_L16 (8<<3) +#define MT_16BIT_A16 (9<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_QWVU8888 (4<<3) +#define MT_32BIT_AXVU8888 (5<<3) +#define MT_32BIT_LXVU8888 (6<<3) +#define MT_32BIT_XLVU8888 (7<<3) +#define MT_32BIT_ARGB2101010 (8<<3) +#define MT_32BIT_ABGR2101010 (9<<3) +#define MT_32BIT_AWVU2101010 (0xA<<3) +#define MT_32BIT_GR1616 (0xB<<3) +#define MT_32BIT_VU1616 (0xC<<3) +#define MT_32BIT_xI824 (0xD<<3) +#define MT_32BIT_xA824 (0xE<<3) +#define MT_32BIT_xL824 (0xF<<3) +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define MT_COMPRESS_DXT1_RGB (4<<3) +#define MS3_USE_FENCE_REGS (1<<2) +#define MS3_TILED_SURFACE (1<<1) +#define MS3_TILE_WALK (1<<0) + +#define MS4_PITCH_SHIFT 21 +#define MS4_CUBE_FACE_ENA_NEGX (1<<20) +#define MS4_CUBE_FACE_ENA_POSX (1<<19) +#define MS4_CUBE_FACE_ENA_NEGY (1<<18) +#define MS4_CUBE_FACE_ENA_POSY (1<<17) +#define MS4_CUBE_FACE_ENA_NEGZ (1<<16) +#define MS4_CUBE_FACE_ENA_POSZ (1<<15) +#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15) +#define MS4_MAX_LOD_SHIFT 9 +#define MS4_MAX_LOD_MASK (0x3f<<9) +#define MS4_MIP_LAYOUT_LEGACY (0<<8) +#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8) +#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8) +#define MS4_VOLUME_DEPTH_SHIFT 0 +#define MS4_VOLUME_DEPTH_MASK (0xff<<0) + +/* p244 */ +#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16)) + +#define SS1_MAPMASK_SHIFT 0 +#define SS1_MAPMASK_MASK (0x8fff<<0) + +#define SS2_REVERSE_GAMMA_ENABLE (1<<31) +#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30) +#define SS2_COLORSPACE_CONVERSION (1<<29) +#define SS2_CHROMAKEY_SHIFT 27 +#define SS2_BASE_MIP_LEVEL_SHIFT 22 +#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22) +#define SS2_MIP_FILTER_SHIFT 20 +#define SS2_MIP_FILTER_MASK (0x3<<20) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define SS2_MAG_FILTER_SHIFT 17 +#define SS2_MAG_FILTER_MASK (0x7<<17) +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 +#define FILTER_4X4_1 3 +#define FILTER_4X4_2 4 +#define FILTER_4X4_FLAT 5 +#define FILTER_6X5_MONO 6 /* XXX - check */ +#define SS2_MIN_FILTER_SHIFT 14 +#define SS2_MIN_FILTER_MASK (0x7<<14) +#define SS2_LOD_BIAS_SHIFT 5 +#define SS2_LOD_BIAS_ONE (0x10<<5) +#define SS2_LOD_BIAS_MASK (0x1ff<<5) +/* Shadow requires: + * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format + * FILTER_4X4_x MIN and MAG filters + */ +#define SS2_SHADOW_ENABLE (1<<4) +#define SS2_MAX_ANISO_MASK (1<<3) +#define SS2_MAX_ANISO_2 (0<<3) +#define SS2_MAX_ANISO_4 (1<<3) +#define SS2_SHADOW_FUNC_SHIFT 0 +#define SS2_SHADOW_FUNC_MASK (0x7<<0) +/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */ + +#define SS3_MIN_LOD_SHIFT 24 +#define SS3_MIN_LOD_ONE (0x10<<24) +#define SS3_MIN_LOD_MASK (0xff<<24) +#define SS3_KILL_PIXEL_ENABLE (1<<17) +#define SS3_TCX_ADDR_MODE_SHIFT 12 +#define SS3_TCX_ADDR_MODE_MASK (0x7<<12) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP_EDGE 2 +#define TEXCOORDMODE_CUBE 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORDMODE_MIRROR_ONCE 5 +#define SS3_TCY_ADDR_MODE_SHIFT 9 +#define SS3_TCY_ADDR_MODE_MASK (0x7<<9) +#define SS3_TCZ_ADDR_MODE_SHIFT 6 +#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6) +#define SS3_NORMALIZED_COORDS (1<<5) +#define SS3_TEXTUREMAP_INDEX_SHIFT 1 +#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1) +#define SS3_DEINTERLACER_ENABLE (1<<0) + +#define SS4_BORDER_COLOR_MASK (~0) + +/* 3DSTATE_SPAN_STIPPLE, p258 + */ +#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define _3DSTATE_DEFAULT_Z ((0x3<<29)|(0x1d<<24)|(0x98<<16)) +#define _3DSTATE_DEFAULT_DIFFUSE ((0x3<<29)|(0x1d<<24)|(0x99<<16)) +#define _3DSTATE_DEFAULT_SPECULAR ((0x3<<29)|(0x1d<<24)|(0x9a<<16)) + + +#define MI_FLUSH ((0<<29)|(4<<23)) +#define FLUSH_MAP_CACHE (1<<0) +#define INHIBIT_FLUSH_RENDER_CACHE (1<<2) + + +#define CMD_3D (0x3<<29) + + +#define _3DPRIMITIVE ((0x3<<29)|(0x1f<<24)) +#define PRIM_INDIRECT (1<<23) +#define PRIM_INLINE (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS (1<<17) + +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define I915PACKCOLOR4444(r,g,b,a) \ + ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) + +#define I915PACKCOLOR1555(r,g,b,a) \ + ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ + ((a) ? 0x8000 : 0)) + +#define I915PACKCOLOR565(r,g,b) \ + ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) + +#define I915PACKCOLOR8888(r,g,b,a) \ + ((a<<24) | (r<<16) | (g<<8) | b) + + + + +#define BR00_BITBLT_CLIENT 0x40000000 +#define BR00_OP_COLOR_BLT 0x10000000 +#define BR00_OP_SRC_COPY_BLT 0x10C00000 +#define BR13_SOLID_PATTERN 0x80000000 + +#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) +#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) +#define XY_COLOR_BLT_WRITE_RGB (1<<20) + +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) + +#define MI_WAIT_FOR_EVENT ((0x3<<23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +#define MI_BATCH_BUFFER (0x30<<23) +#define MI_BATCH_BUFFER_START (0x31<<23) +#define MI_BATCH_BUFFER_END (0xa<<23) + + + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 0x1 +#define STENCILOP_REPLACE 0x2 +#define STENCILOP_INCRSAT 0x3 +#define STENCILOP_DECRSAT 0x4 +#define STENCILOP_INCR 0x5 +#define STENCILOP_DECR 0x6 +#define STENCILOP_INVERT 0x7 + +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_Q33_G 0x29D2 + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c new file mode 100644 index 0000000000..abd5571b88 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -0,0 +1,694 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + + +#include "pipe/draw/draw_context.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "i915_state_inlines.h" + + +/* The i915 (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static unsigned +translate_wrap_mode(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return TEXCOORDMODE_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return TEXCOORDMODE_CLAMP_EDGE; /* not quite correct */ + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return TEXCOORDMODE_CLAMP_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return TEXCOORDMODE_CLAMP_BORDER; +// case PIPE_TEX_WRAP_MIRRORED_REPEAT: +// return TEXCOORDMODE_MIRROR; + default: + return TEXCOORDMODE_WRAP; + } +} + +static unsigned translate_img_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return FILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return FILTER_LINEAR; + default: + assert(0); + return FILTER_NEAREST; + } +} + +static unsigned translate_mip_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NONE: + return MIPFILTER_NONE; + case PIPE_TEX_MIPFILTER_NEAREST: + return MIPFILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return MIPFILTER_LINEAR; + default: + assert(0); + return MIPFILTER_NONE; + } +} + + +/* None of this state is actually used for anything yet. + */ +static void * +i915_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + struct i915_blend_state *cso_data = CALLOC_STRUCT( i915_blend_state ); + + { + unsigned eqRGB = blend->rgb_func; + unsigned srcRGB = blend->rgb_src_factor; + unsigned dstRGB = blend->rgb_dst_factor; + + unsigned eqA = blend->alpha_func; + unsigned srcA = blend->alpha_src_factor; + unsigned dstA = blend->alpha_dst_factor; + + /* Special handling for MIN/MAX filter modes handled at + * state_tracker level. + */ + + if (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB) { + + cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + IAB_ENABLE | + IAB_MODIFY_FUNC | + IAB_MODIFY_SRC_FACTOR | + IAB_MODIFY_DST_FACTOR | + SRC_ABLND_FACT(i915_translate_blend_factor(srcA)) | + DST_ABLND_FACT(i915_translate_blend_factor(dstA)) | + (i915_translate_blend_func(eqA) << IAB_FUNC_SHIFT)); + } + else { + cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + 0); + } + } + + cso_data->modes4 |= (_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | + LOGIC_OP_FUNC(i915_translate_logic_op(blend->logicop_func))); + + if (blend->logicop_enable) + cso_data->LIS5 |= S5_LOGICOP_ENABLE; + + if (blend->dither) + cso_data->LIS5 |= S5_COLOR_DITHER_ENABLE; + + if ((blend->colormask & PIPE_MASK_R) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_RED; + + if ((blend->colormask & PIPE_MASK_G) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_GREEN; + + if ((blend->colormask & PIPE_MASK_B) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_BLUE; + + if ((blend->colormask & PIPE_MASK_A) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_ALPHA; + + if (blend->blend_enable) { + unsigned funcRGB = blend->rgb_func; + unsigned srcRGB = blend->rgb_src_factor; + unsigned dstRGB = blend->rgb_dst_factor; + + cso_data->LIS6 |= (S6_CBUF_BLEND_ENABLE | + SRC_BLND_FACT(i915_translate_blend_factor(srcRGB)) | + DST_BLND_FACT(i915_translate_blend_factor(dstRGB)) | + (i915_translate_blend_func(funcRGB) << S6_CBUF_BLEND_FUNC_SHIFT)); + } + + return cso_data; +} + +static void i915_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->blend = (struct i915_blend_state*)blend; + + i915->dirty |= I915_NEW_BLEND; +} + + +static void i915_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + +static void i915_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->blend_color = *blend_color; + + i915->dirty |= I915_NEW_BLEND; +} + +static void * +i915_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + struct i915_sampler_state *cso = CALLOC_STRUCT( i915_sampler_state ); + const unsigned ws = sampler->wrap_s; + const unsigned wt = sampler->wrap_t; + const unsigned wr = sampler->wrap_r; + unsigned minFilt, magFilt; + unsigned mipFilt; + + cso->templ = sampler; + + mipFilt = translate_mip_filter(sampler->min_mip_filter); + if (sampler->max_anisotropy > 1.0) { + minFilt = FILTER_ANISOTROPIC; + magFilt = FILTER_ANISOTROPIC; + if (sampler->max_anisotropy > 2.0) { + cso->state[0] |= SS2_MAX_ANISO_4; + } + } + else { + minFilt = translate_img_filter( sampler->min_img_filter ); + magFilt = translate_img_filter( sampler->mag_img_filter ); + } + + { + int b = (int) (sampler->lod_bias * 16.0); + b = CLAMP(b, -256, 255); + cso->state[0] |= ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); + } + + /* Shadow: + */ + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) + { + cso->state[0] |= (SS2_SHADOW_ENABLE | + i915_translate_compare_func(sampler->compare_func)); + + minFilt = FILTER_4X4_FLAT; + magFilt = FILTER_4X4_FLAT; + } + + cso->state[0] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | + (mipFilt << SS2_MIP_FILTER_SHIFT) | + (magFilt << SS2_MAG_FILTER_SHIFT)); + + cso->state[1] |= + ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) | + (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) | + (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT)); + + if (sampler->normalized_coords) + cso->state[1] |= SS3_NORMALIZED_COORDS; + + if (0) /* XXX not tested yet */ + { + int minlod = (int) (16.0 * sampler->min_lod); + minlod = CLAMP(minlod, 0, 16 * 11); + cso->state[1] |= (minlod << SS3_MIN_LOD_SHIFT); + } + + { + ubyte r = float_to_ubyte(sampler->border_color[0]); + ubyte g = float_to_ubyte(sampler->border_color[1]); + ubyte b = float_to_ubyte(sampler->border_color[2]); + ubyte a = float_to_ubyte(sampler->border_color[3]); + cso->state[2] = I915PACKCOLOR8888(r, g, b, a); + } + return cso; +} + +static void i915_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct i915_context *i915 = i915_context(pipe); + + assert(unit < PIPE_MAX_SAMPLERS); + i915->sampler[unit] = (const struct i915_sampler_state*)sampler; + + i915->dirty |= I915_NEW_SAMPLER; +} + +static void i915_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE(sampler); +} + + +/** XXX move someday? Or consolidate all these simple state setters + * into one file. + */ + +static void * +i915_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + struct i915_depth_stencil_state *cso = CALLOC_STRUCT( i915_depth_stencil_state ); + + { + int testmask = depth_stencil->stencil[0].value_mask & 0xff; + int writemask = depth_stencil->stencil[0].write_mask & 0xff; + + cso->stencil_modes4 |= (_3DSTATE_MODES_4_CMD | + ENABLE_STENCIL_TEST_MASK | + STENCIL_TEST_MASK(testmask) | + ENABLE_STENCIL_WRITE_MASK | + STENCIL_WRITE_MASK(writemask)); + } + + if (depth_stencil->stencil[0].enabled) { + int test = i915_translate_compare_func(depth_stencil->stencil[0].func); + int fop = i915_translate_stencil_op(depth_stencil->stencil[0].fail_op); + int dfop = i915_translate_stencil_op(depth_stencil->stencil[0].zfail_op); + int dpop = i915_translate_stencil_op(depth_stencil->stencil[0].zpass_op); + int ref = depth_stencil->stencil[0].ref_value & 0xff; + + cso->stencil_LIS5 |= (S5_STENCIL_TEST_ENABLE | + S5_STENCIL_WRITE_ENABLE | + (ref << S5_STENCIL_REF_SHIFT) | + (test << S5_STENCIL_TEST_FUNC_SHIFT) | + (fop << S5_STENCIL_FAIL_SHIFT) | + (dfop << S5_STENCIL_PASS_Z_FAIL_SHIFT) | + (dpop << S5_STENCIL_PASS_Z_PASS_SHIFT)); + } + + if (depth_stencil->stencil[1].enabled) { + int test = i915_translate_compare_func(depth_stencil->stencil[1].func); + int fop = i915_translate_stencil_op(depth_stencil->stencil[1].fail_op); + int dfop = i915_translate_stencil_op(depth_stencil->stencil[1].zfail_op); + int dpop = i915_translate_stencil_op(depth_stencil->stencil[1].zpass_op); + int ref = depth_stencil->stencil[1].ref_value & 0xff; + int tmask = depth_stencil->stencil[1].value_mask & 0xff; + int wmask = depth_stencil->stencil[1].write_mask & 0xff; + + cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | + BFO_ENABLE_STENCIL_FUNCS | + BFO_ENABLE_STENCIL_TWO_SIDE | + BFO_ENABLE_STENCIL_REF | + BFO_STENCIL_TWO_SIDE | + (ref << BFO_STENCIL_REF_SHIFT) | + (test << BFO_STENCIL_TEST_SHIFT) | + (fop << BFO_STENCIL_FAIL_SHIFT) | + (dfop << BFO_STENCIL_PASS_Z_FAIL_SHIFT) | + (dpop << BFO_STENCIL_PASS_Z_PASS_SHIFT)); + + cso->bfo[1] = (_3DSTATE_BACKFACE_STENCIL_MASKS | + BFM_ENABLE_STENCIL_TEST_MASK | + BFM_ENABLE_STENCIL_WRITE_MASK | + (tmask << BFM_STENCIL_TEST_MASK_SHIFT) | + (wmask << BFM_STENCIL_WRITE_MASK_SHIFT)); + } + else { + /* This actually disables two-side stencil: The bit set is a + * modify-enable bit to indicate we are changing the two-side + * setting. Then there is a symbolic zero to show that we are + * setting the flag to zero/off. + */ + cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | + BFO_ENABLE_STENCIL_TWO_SIDE | + 0); + cso->bfo[1] = 0; + } + + if (depth_stencil->depth.enabled) { + int func = i915_translate_compare_func(depth_stencil->depth.func); + + cso->depth_LIS6 |= (S6_DEPTH_TEST_ENABLE | + (func << S6_DEPTH_TEST_FUNC_SHIFT)); + + if (depth_stencil->depth.writemask) + cso->depth_LIS6 |= S6_DEPTH_WRITE_ENABLE; + } + + if (depth_stencil->alpha.enabled) { + int test = i915_translate_compare_func(depth_stencil->alpha.func); + ubyte refByte = float_to_ubyte(depth_stencil->alpha.ref); + + cso->depth_LIS6 |= (S6_ALPHA_TEST_ENABLE | + (test << S6_ALPHA_TEST_FUNC_SHIFT) | + (((unsigned) refByte) << S6_ALPHA_REF_SHIFT)); + } + + return cso; +} + +static void i915_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->depth_stencil = (const struct i915_depth_stencil_state *)depth_stencil; + + i915->dirty |= I915_NEW_DEPTH_STENCIL; +} + +static void i915_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + FREE(depth_stencil); +} + + +static void i915_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct i915_context *i915 = i915_context(pipe); + + memcpy( &i915->scissor, scissor, sizeof(*scissor) ); + i915->dirty |= I915_NEW_SCISSOR; +} + + +static void i915_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + +static void * i915_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + return 0; +} + +static void i915_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->fs = (struct pipe_shader_state *)fs; + + i915->dirty |= I915_NEW_FS; +} + +static void i915_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + /*do nothing*/ +} + +static void * +i915_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + return draw_create_vertex_shader(i915->draw, templ); +} + +static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); +} + +static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + draw_delete_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); +} + +static void i915_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* Make a copy of shader constants. + * During fragment program translation we may add additional + * constants to the array. + * + * We want to consider the situation where some user constants + * (ex: a material color) may change frequently but the shader program + * stays the same. In that case we should only be updating the first + * N constants, leaving any extras from shader translation alone. + */ + { + void *mapped; + if (buf->size && + (mapped = ws->buffer_map(ws, buf->buffer, + PIPE_BUFFER_USAGE_CPU_READ))) { + memcpy(i915->current.constants[shader], mapped, buf->size); + ws->buffer_unmap(ws, buf->buffer); + i915->current.num_user_constants[shader] + = buf->size / (4 * sizeof(float)); + } + else { + i915->current.num_user_constants[shader] = 0; + } + } + + i915->dirty |= I915_NEW_CONSTANTS; +} + + +static void i915_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, + struct pipe_texture *texture) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->texture[sampler] = (struct i915_texture*)texture; /* ptr, not struct */ + + i915->dirty |= I915_NEW_TEXTURE; +} + + + +static void i915_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->framebuffer = *fb; /* struct copy */ + + i915->dirty |= I915_NEW_FRAMEBUFFER; +} + + + +static void i915_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct i915_context *i915 = i915_context(pipe); + + draw_set_clip_state(i915->draw, clip); + + i915->dirty |= I915_NEW_CLIP; +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void i915_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->viewport = *viewport; /* struct copy */ + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(i915->draw, &i915->viewport); + + i915->dirty |= I915_NEW_VIEWPORT; +} + + +static void * +i915_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state ); + + cso->templ = rasterizer; + cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + cso->light_twoside = rasterizer->light_twoside; + cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE; + cso->ds[1].f = rasterizer->offset_scale; + if (rasterizer->poly_stipple_enable) { + cso->st |= ST1_ENABLE; + } + + if (rasterizer->scissor) + cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT; + else + cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT; + + switch (rasterizer->cull_mode) { + case PIPE_WINDING_NONE: + cso->LIS4 |= S4_CULLMODE_NONE; + break; + case PIPE_WINDING_CW: + cso->LIS4 |= S4_CULLMODE_CW; + break; + case PIPE_WINDING_CCW: + cso->LIS4 |= S4_CULLMODE_CCW; + break; + case PIPE_WINDING_BOTH: + cso->LIS4 |= S4_CULLMODE_BOTH; + break; + } + + { + int line_width = CLAMP((int)(rasterizer->line_width * 2), 1, 0xf); + + cso->LIS4 |= line_width << S4_LINE_WIDTH_SHIFT; + + if (rasterizer->line_smooth) + cso->LIS4 |= S4_LINE_ANTIALIAS_ENABLE; + } + + { + int point_size = CLAMP((int) rasterizer->point_size, 1, 0xff); + + cso->LIS4 |= point_size << S4_POINT_WIDTH_SHIFT; + } + + if (rasterizer->flatshade) { + cso->LIS4 |= (S4_FLATSHADE_ALPHA | + S4_FLATSHADE_COLOR | + S4_FLATSHADE_SPECULAR); + } + + cso->LIS7 = fui( rasterizer->offset_units ); + + + return cso; +} + +static void i915_bind_rasterizer_state( struct pipe_context *pipe, + void *setup ) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->rasterizer = (struct i915_rasterizer_state *)setup; + + /* pass-through to draw module */ + draw_set_rasterizer_state(i915->draw, i915->rasterizer->templ); + + i915->dirty |= I915_NEW_RASTERIZER; +} + +static void i915_delete_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + FREE(setup); +} + +static void i915_set_vertex_buffer( struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_buffer *buffer ) +{ + struct i915_context *i915 = i915_context(pipe); + i915->vertex_buffer[index] = *buffer; + /* pass-through to draw module */ + draw_set_vertex_buffer(i915->draw, index, buffer); +} + +static void i915_set_vertex_element( struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_element *element) +{ + struct i915_context *i915 = i915_context(pipe); + /* pass-through to draw module */ + draw_set_vertex_element(i915->draw, index, element); +} + + + +void +i915_init_state_functions( struct i915_context *i915 ) +{ + i915->pipe.create_blend_state = i915_create_blend_state; + i915->pipe.bind_blend_state = i915_bind_blend_state; + i915->pipe.delete_blend_state = i915_delete_blend_state; + + i915->pipe.create_sampler_state = i915_create_sampler_state; + i915->pipe.bind_sampler_state = i915_bind_sampler_state; + i915->pipe.delete_sampler_state = i915_delete_sampler_state; + + i915->pipe.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; + i915->pipe.bind_depth_stencil_alpha_state = i915_bind_depth_stencil_state; + i915->pipe.delete_depth_stencil_alpha_state = i915_delete_depth_stencil_state; + + i915->pipe.create_rasterizer_state = i915_create_rasterizer_state; + i915->pipe.bind_rasterizer_state = i915_bind_rasterizer_state; + i915->pipe.delete_rasterizer_state = i915_delete_rasterizer_state; + i915->pipe.create_fs_state = i915_create_fs_state; + i915->pipe.bind_fs_state = i915_bind_fs_state; + i915->pipe.delete_fs_state = i915_delete_fs_state; + i915->pipe.create_vs_state = i915_create_vs_state; + i915->pipe.bind_vs_state = i915_bind_vs_state; + i915->pipe.delete_vs_state = i915_delete_vs_state; + + i915->pipe.set_blend_color = i915_set_blend_color; + i915->pipe.set_clip_state = i915_set_clip_state; + i915->pipe.set_constant_buffer = i915_set_constant_buffer; + i915->pipe.set_framebuffer_state = i915_set_framebuffer_state; + + i915->pipe.set_polygon_stipple = i915_set_polygon_stipple; + i915->pipe.set_scissor_state = i915_set_scissor_state; + i915->pipe.set_sampler_texture = i915_set_sampler_texture; + i915->pipe.set_viewport_state = i915_set_viewport_state; + i915->pipe.set_vertex_buffer = i915_set_vertex_buffer; + i915->pipe.set_vertex_element = i915_set_vertex_element; +} diff --git a/src/gallium/drivers/i915simple/i915_state.h b/src/gallium/drivers/i915simple/i915_state.h new file mode 100644 index 0000000000..86c6b0027d --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef I915_STATE_H +#define I915_STATE_H + +struct i915_context; + + +struct i915_tracked_state { + unsigned dirty; + void (*update)( struct i915_context * ); +}; + +void i915_update_immediate( struct i915_context *i915 ); +void i915_update_dynamic( struct i915_context *i915 ); +void i915_update_derived( struct i915_context *i915 ); +void i915_update_samplers( struct i915_context *i915 ); +void i915_update_textures(struct i915_context *i915); + +void i915_emit_hardware_state( struct i915_context *i915 ); + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c new file mode 100644 index 0000000000..653983e4a9 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -0,0 +1,177 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_vertex.h" +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "i915_fpc.h" +#include "pipe/p_shader_tokens.h" + + +/** + * Determine which post-transform / pre-rasterization vertex attributes + * we need. + * Derived from: fs, setup states. + */ +static void calculate_vertex_layout( struct i915_context *i915 ) +{ + const struct pipe_shader_state *fs = i915->fs; + const enum interp_mode colorInterp = i915->rasterizer->color_interp; + struct vertex_info vinfo; + uint front0 = 0, back0 = 0, front1 = 0, back1 = 0; + boolean needW = 0; + uint i; + boolean texCoords[8]; + uint src = 0; + + memset(texCoords, 0, sizeof(texCoords)); + memset(&vinfo, 0, sizeof(vinfo)); + + /* pos */ + draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src++); + /* Note: we'll set the S4_VFMT_XYZ[W] bits below */ + + for (i = 0; i < fs->num_inputs; i++) { + switch (fs->input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + if (fs->input_semantic_index[i] == 0) { + front0 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++); + vinfo.hwfmt[0] |= S4_VFMT_COLOR; + } + else { + assert(fs->input_semantic_index[i] == 1); + front1 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++); + vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; + } + break; + case TGSI_SEMANTIC_GENERIC: + /* usually a texcoord */ + { + const uint unit = fs->input_semantic_index[i]; + uint hwtc; + texCoords[unit] = TRUE; + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + hwtc = TEXCOORDFMT_4D; + needW = TRUE; + vinfo.hwfmt[1] |= hwtc << (unit * 4); + } + break; + case TGSI_SEMANTIC_FOG: + debug_printf("i915 fogcoord not implemented yet\n"); + draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src++); + break; + default: + assert(0); + } + + } + + /* finish up texcoord fields */ + for (i = 0; i < 8; i++) { + if (!texCoords[i]) { + const uint hwtc = TEXCOORDFMT_NOT_PRESENT; + vinfo.hwfmt[1] |= hwtc << (i* 4); + } + } + + /* go back and fill in the vertex position info now that we have needW */ + if (needW) { + vinfo.hwfmt[0] |= S4_VFMT_XYZW; + vinfo.emit[0] = EMIT_4F; + } + else { + vinfo.hwfmt[0] |= S4_VFMT_XYZ; + vinfo.emit[0] = EMIT_3F; + } + + /* Additional attributes required for setup: Just twosided + * lighting. Edgeflag is dealt with specially by setting bits in + * the vertex header. + */ + if (i915->rasterizer->light_twoside) { + if (front0) { + back0 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++); + } + if (back0) { + back1 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++); + } + } + + draw_compute_vertex_size(&vinfo); + + if (memcmp(&i915->current.vertex_info, &vinfo, sizeof(vinfo))) { + /* Need to set this flag so that the LIS2/4 registers get set. + * It also means the i915_update_immediate() function must be called + * after this one, in i915_update_derived(). + */ + i915->dirty |= I915_NEW_VERTEX_FORMAT; + + memcpy(&i915->current.vertex_info, &vinfo, sizeof(vinfo)); + } +} + + + + +/* Hopefully this will remain quite simple, otherwise need to pull in + * something like the state tracker mechanism. + */ +void i915_update_derived( struct i915_context *i915 ) +{ + if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS)) + calculate_vertex_layout( i915 ); + + if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE)) + i915_update_samplers(i915); + + if (i915->dirty & I915_NEW_TEXTURE) + i915_update_textures(i915); + + if (i915->dirty) + i915_update_immediate( i915 ); + + if (i915->dirty) + i915_update_dynamic( i915 ); + + if (i915->dirty & I915_NEW_FS) { + i915_translate_fragment_program(i915); + i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ + } + + /* HW emit currently references framebuffer state directly: + */ + if (i915->dirty & I915_NEW_FRAMEBUFFER) + i915->hardware_dirty |= I915_HW_STATIC; + + i915->dirty = 0; +} diff --git a/src/gallium/drivers/i915simple/i915_state_dynamic.c b/src/gallium/drivers/i915simple/i915_state_dynamic.c new file mode 100644 index 0000000000..8cfbdddd19 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_dynamic.c @@ -0,0 +1,308 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_batch.h" +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "pipe/p_util.h" + +#define FILE_DEBUG_FLAG DEBUG_STATE + +/* State that we have chosen to store in the DYNAMIC segment of the + * i915 indirect state mechanism. + * + * Can't cache these in the way we do the static state, as there is no + * start/size in the command packet, instead an 'end' value that gets + * incremented. + * + * Additionally, there seems to be a requirement to re-issue the full + * (active) state every time a 4kb boundary is crossed. + */ + +static INLINE void set_dynamic_indirect( struct i915_context *i915, + unsigned offset, + const unsigned *src, + unsigned dwords ) +{ + unsigned i; + + for (i = 0; i < dwords; i++) + i915->current.dynamic[offset + i] = src[i]; + + i915->hardware_dirty |= I915_HW_DYNAMIC; +} + + +/*********************************************************************** + * Modes4: stencil masks and logicop + */ +static void upload_MODES4( struct i915_context *i915 ) +{ + unsigned modes4 = 0; + + /* I915_NEW_STENCIL */ + modes4 |= i915->depth_stencil->stencil_modes4; + /* I915_NEW_BLEND */ + modes4 |= i915->blend->modes4; + + /* Always, so that we know when state is in-active: + */ + set_dynamic_indirect( i915, + I915_DYNAMIC_MODES4, + &modes4, + 1 ); +} + +const struct i915_tracked_state i915_upload_MODES4 = { + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, + upload_MODES4 +}; + + + + +/*********************************************************************** + */ + +static void upload_BFO( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_BFO_0, + &(i915->depth_stencil->bfo[0]), + 2 ); +} + +const struct i915_tracked_state i915_upload_BFO = { + I915_NEW_DEPTH_STENCIL, + upload_BFO +}; + + +/*********************************************************************** + */ + + +static void upload_BLENDCOLOR( struct i915_context *i915 ) +{ + unsigned bc[2]; + + memset( bc, 0, sizeof(bc) ); + + /* I915_NEW_BLEND {_COLOR} + */ + { + const float *color = i915->blend_color.color; + + bc[0] = _3DSTATE_CONST_BLEND_COLOR_CMD; + bc[1] = pack_ui32_float4( color[0], + color[1], + color[2], + color[3] ); + } + + set_dynamic_indirect( i915, + I915_DYNAMIC_BC_0, + bc, + 2 ); +} + +const struct i915_tracked_state i915_upload_BLENDCOLOR = { + I915_NEW_BLEND, + upload_BLENDCOLOR +}; + +/*********************************************************************** + */ + + +static void upload_IAB( struct i915_context *i915 ) +{ + unsigned iab = i915->blend->iab; + + + set_dynamic_indirect( i915, + I915_DYNAMIC_IAB, + &iab, + 1 ); +} + +const struct i915_tracked_state i915_upload_IAB = { + I915_NEW_BLEND, + upload_IAB +}; + + +/*********************************************************************** + */ + + + +static void upload_DEPTHSCALE( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_DEPTHSCALE_0, + &(i915->rasterizer->ds[0].u), + 2 ); +} + +const struct i915_tracked_state i915_upload_DEPTHSCALE = { + I915_NEW_RASTERIZER, + upload_DEPTHSCALE +}; + + + +/*********************************************************************** + * Polygon stipple + * + * The i915 supports a 4x4 stipple natively, GL wants 32x32. + * Fortunately stipple is usually a repeating pattern. + * + * XXX: does stipple pattern need to be adjusted according to + * the window position? + * + * XXX: possibly need workaround for conform paths test. + */ + +static void upload_STIPPLE( struct i915_context *i915 ) +{ + unsigned st[2]; + + st[0] = _3DSTATE_STIPPLE; + st[1] = 0; + + /* I915_NEW_RASTERIZER + */ + st[1] |= i915->rasterizer->st; + + + /* I915_NEW_STIPPLE + */ + { + const ubyte *mask = (const ubyte *)i915->poly_stipple.stipple; + ubyte p[4]; + + p[0] = mask[12] & 0xf; + p[1] = mask[8] & 0xf; + p[2] = mask[4] & 0xf; + p[3] = mask[0] & 0xf; + + /* Not sure what to do about fallbacks, so for now just dont: + */ + st[1] |= ((p[0] << 0) | + (p[1] << 4) | + (p[2] << 8) | + (p[3] << 12)); + } + + + set_dynamic_indirect( i915, + I915_DYNAMIC_STP_0, + &st[0], + 2 ); +} + + +const struct i915_tracked_state i915_upload_STIPPLE = { + I915_NEW_RASTERIZER | I915_NEW_STIPPLE, + upload_STIPPLE +}; + + + +/*********************************************************************** + * Scissor. + */ +static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_SC_ENA_0, + &(i915->rasterizer->sc[0]), + 1 ); +} + +const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { + I915_NEW_RASTERIZER, + upload_SCISSOR_ENABLE +}; + + + +static void upload_SCISSOR_RECT( struct i915_context *i915 ) +{ + unsigned x1 = i915->scissor.minx; + unsigned y1 = i915->scissor.miny; + unsigned x2 = i915->scissor.maxx; + unsigned y2 = i915->scissor.maxy; + unsigned sc[3]; + + sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD; + sc[1] = (y1 << 16) | (x1 & 0xffff); + sc[2] = (y2 << 16) | (x2 & 0xffff); + + set_dynamic_indirect( i915, + I915_DYNAMIC_SC_RECT_0, + &sc[0], + 3 ); +} + + +const struct i915_tracked_state i915_upload_SCISSOR_RECT = { + I915_NEW_SCISSOR, + upload_SCISSOR_RECT +}; + + + + + + +static const struct i915_tracked_state *atoms[] = { + &i915_upload_MODES4, + &i915_upload_BFO, + &i915_upload_BLENDCOLOR, + &i915_upload_IAB, + &i915_upload_DEPTHSCALE, + &i915_upload_STIPPLE, + &i915_upload_SCISSOR_ENABLE, + &i915_upload_SCISSOR_RECT +}; + +/* These will be dynamic indirect state commands, but for now just end + * up on the batch buffer with everything else. + */ +void i915_update_dynamic( struct i915_context *i915 ) +{ + int i; + + for (i = 0; i < Elements(atoms); i++) + if (i915->dirty & atoms[i]->dirty) + atoms[i]->update( i915 ); +} + diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c new file mode 100644 index 0000000000..3339287f49 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -0,0 +1,374 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_batch.h" +#include "i915_reg.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +static unsigned translate_format( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + return COLOR_BUF_ARGB8888; + case PIPE_FORMAT_R5G6B5_UNORM: + return COLOR_BUF_RGB565; + default: + assert(0); + return 0; + } +} + +static unsigned translate_depth_format( enum pipe_format zformat ) +{ + switch (zformat) { + case PIPE_FORMAT_S8Z24_UNORM: + return DEPTH_FRMT_24_FIXED_8_OTHER; + case PIPE_FORMAT_Z16_UNORM: + return DEPTH_FRMT_16_FIXED; + default: + assert(0); + return 0; + } +} + + +/** + * Examine framebuffer state to determine width, height. + */ +static boolean +framebuffer_size(const struct pipe_framebuffer_state *fb, + uint *width, uint *height) +{ + if (fb->cbufs[0]) { + *width = fb->cbufs[0]->width; + *height = fb->cbufs[0]->height; + return TRUE; + } + else if (fb->zsbuf) { + *width = fb->zsbuf->width; + *height = fb->zsbuf->height; + return TRUE; + } + else { + *width = *height = 0; + return FALSE; + } +} + + +/* Push the state into the sarea and/or texture memory. + */ +void +i915_emit_hardware_state(struct i915_context *i915 ) +{ + /* XXX: there must be an easier way */ + const unsigned dwords = ( 14 + + 7 + + I915_MAX_DYNAMIC + + 8 + + 2 + I915_TEX_UNITS*3 + + 2 + I915_TEX_UNITS*3 + + 2 + I915_MAX_CONSTANT*4 + + i915->current.program_len + + 6 + ) * 3/2; /* plus 50% margin */ + const unsigned relocs = ( I915_TEX_UNITS + + 3 + ) * 3/2; /* plus 50% margin */ + +#if 0 + debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); +#endif + + if(!BEGIN_BATCH(dwords, relocs)) { + FLUSH_BATCH(); + assert(BEGIN_BATCH(dwords, relocs)); + } + + /* 14 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_INVARIENT) + { + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); + + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7)); + + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + ENABLE_TEXKILL_3D_4D | + TEXKILL_4D); + + /* Need to initialize this to zero. + */ + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); + + /* disable indirect state for now + */ + OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); + OUT_BATCH(0); + } + + /* 7 dwords, 1 relocs */ + if (i915->hardware_dirty & I915_HW_IMMEDIATE) + { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(0) | + I1_LOAD_S(1) | + I1_LOAD_S(2) | + I1_LOAD_S(4) | + I1_LOAD_S(5) | + I1_LOAD_S(6) | + (5)); + + if(i915->vbo) + OUT_RELOC(i915->vbo, + I915_BUFFER_ACCESS_READ, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + /* FIXME: we should not do this */ + OUT_BATCH(0); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); + } + + /* I915_MAX_DYNAMIC dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_DYNAMIC) + { + int i; + for (i = 0; i < I915_MAX_DYNAMIC; i++) { + OUT_BATCH(i915->current.dynamic[i]); + } + } + + /* 8 dwords, 2 relocs */ + if (i915->hardware_dirty & I915_HW_STATIC) + { + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + + if (cbuf_surface) { + unsigned pitch = (cbuf_surface->pitch * cbuf_surface->cpp); + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + + OUT_BATCH(BUF_3D_ID_COLOR_BACK | + BUF_3D_PITCH(pitch) | /* pitch in bytes */ + BUF_3D_USE_FENCE); + + OUT_RELOC(cbuf_surface->buffer, + I915_BUFFER_ACCESS_WRITE, + 0); + } + + /* What happens if no zbuf?? + */ + if (depth_surface) { + unsigned zpitch = (depth_surface->pitch * depth_surface->cpp); + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + + OUT_BATCH(BUF_3D_ID_DEPTH | + BUF_3D_PITCH(zpitch) | /* pitch in bytes */ + BUF_3D_USE_FENCE); + + OUT_RELOC(depth_surface->buffer, + I915_BUFFER_ACCESS_WRITE, + 0); + } + + { + unsigned cformat, zformat = 0; + + if (cbuf_surface) + cformat = cbuf_surface->format; + else + cformat = PIPE_FORMAT_A8R8G8B8_UNORM; /* arbitrary */ + cformat = translate_format(cformat); + + if (depth_surface) + zformat = translate_depth_format( i915->framebuffer.zsbuf->format ); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | /* .5 */ + LOD_PRECLAMP_OGL | + TEX_DEFAULT_COLOR_OGL | + cformat | + zformat ); + } + } + +#if 01 + /* texture images */ + /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ + if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) + { + /* XXX: we were refering to sampler state + * (current.sampler_enable_nr) below, but only checking + * I915_HW_MAP above. Should probably calculate the enabled + * flags separately - but there will be further rework of + * state so perhaps not necessary yet. + */ + const uint nr = i915->current.sampler_enable_nr; + if (nr) { + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + uint count = 0; + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); + OUT_BATCH(enabled); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + struct pipe_buffer *buf = + i915->texture[unit]->buffer; + uint offset = 0; + assert(buf); + + count++; + + OUT_RELOC(buf, + I915_BUFFER_ACCESS_READ, + offset); + OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ + OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ + } + } + assert(count == nr); + } + } +#endif + +#if 01 + /* samplers */ + /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_SAMPLER) + { + if (i915->current.sampler_enable_nr) { + int i; + + OUT_BATCH( _3DSTATE_SAMPLER_STATE | + (3 * i915->current.sampler_enable_nr) ); + + OUT_BATCH( i915->current.sampler_enable_flags ); + + for (i = 0; i < I915_TEX_UNITS; i++) { + if (i915->current.sampler_enable_flags & (1<current.sampler[i][0] ); + OUT_BATCH( i915->current.sampler[i][1] ); + OUT_BATCH( i915->current.sampler[i][2] ); + } + } + } + } +#endif + + /* constants */ + /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_PROGRAM) + { + const uint nr = i915->current.num_constants[PIPE_SHADER_FRAGMENT]; + assert(nr <= I915_MAX_CONSTANT); + if (nr > 0) { + const uint *c + = (const uint *) i915->current.constants[PIPE_SHADER_FRAGMENT]; + uint i; + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); + OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); + for (i = 0; i < nr; i++) { + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + } + } + } + + /* Fragment program */ + /* i915->current.program_len dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_PROGRAM) + { + uint i; + /* we should always have, at least, a pass-through program */ + assert(i915->current.program_len > 0); + for (i = 0; i < i915->current.program_len; i++) { + OUT_BATCH(i915->current.program[i]); + } + } + + /* drawing surface size */ + /* 6 dwords, 0 relocs */ + { + uint w, h; + boolean k = framebuffer_size(&i915->framebuffer, &w, &h); + assert(k); + + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16)); + OUT_BATCH(0); + OUT_BATCH(0); + } + + + i915->hardware_dirty = 0; +} diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c new file mode 100644 index 0000000000..07031fc6c5 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -0,0 +1,221 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "p_util.h" + + +/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. + * Would like to opportunistically recombine all these fragments into + * a single packet containing only what has changed, but for now emit + * as multiple packets. + */ + + + + +/*********************************************************************** + * S0,S1: Vertex buffer state. + */ +static void upload_S0S1(struct i915_context *i915) +{ + unsigned LIS0, LIS1; + + /* INTEL_NEW_VBO */ + /* TODO: re-use vertex buffers here? */ + LIS0 = 0; + + /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! + */ + { + unsigned vertex_size = i915->current.vertex_info.size; + + LIS1 = ((vertex_size << 24) | + (vertex_size << 16)); + } + + /* INTEL_NEW_VBO */ + /* TODO: use a vertex generation number to track vbo changes */ + if (1 || + i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || + i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) + { + i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; + i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S0S1 = { + I915_NEW_VBO | I915_NEW_VERTEX_FORMAT, + upload_S0S1 +}; + + + + +/*********************************************************************** + * S4: Vertex format, rasterization state + */ +static void upload_S2S4(struct i915_context *i915) +{ + unsigned LIS2, LIS4; + + /* I915_NEW_VERTEX_FORMAT */ + { + LIS2 = i915->current.vertex_info.hwfmt[1]; + LIS4 = i915->current.vertex_info.hwfmt[0]; + /* + debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4); + */ + assert(LIS4); /* should never be zero? */ + } + + LIS4 |= i915->rasterizer->LIS4; + + if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || + LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { + + i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; + i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + + +const struct i915_tracked_state i915_upload_S2S4 = { + I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT, + upload_S2S4 +}; + + + +/*********************************************************************** + * + */ +static void upload_S5( struct i915_context *i915 ) +{ + unsigned LIS5 = 0; + + LIS5 |= i915->depth_stencil->stencil_LIS5; + + LIS5 |= i915->blend->LIS5; + +#if 0 + /* I915_NEW_RASTERIZER */ + if (i915->state.Polygon->OffsetFill) { + LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; + } +#endif + + + if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { + i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S5 = { + (I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER), + upload_S5 +}; + + +/*********************************************************************** + */ +static void upload_S6( struct i915_context *i915 ) +{ + unsigned LIS6 = (S6_COLOR_WRITE_ENABLE | + (2 << S6_TRISTRIP_PV_SHIFT)); + + /* I915_NEW_BLEND + */ + LIS6 |= i915->blend->LIS6; + + /* I915_NEW_DEPTH + */ + LIS6 |= i915->depth_stencil->depth_LIS6; + + if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { + i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S6 = { + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, + upload_S6 +}; + + +/*********************************************************************** + */ +static void upload_S7( struct i915_context *i915 ) +{ + unsigned LIS7; + + /* I915_NEW_RASTERIZER + */ + LIS7 = i915->rasterizer->LIS7; + + if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { + i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S7 = { + I915_NEW_RASTERIZER, + upload_S7 +}; + + +static const struct i915_tracked_state *atoms[] = { + &i915_upload_S0S1, + &i915_upload_S2S4, + &i915_upload_S5, + &i915_upload_S6, + &i915_upload_S7 +}; + +/* + */ +void i915_update_immediate( struct i915_context *i915 ) +{ + int i; + + for (i = 0; i < Elements(atoms); i++) + if (i915->dirty & atoms[i]->dirty) + atoms[i]->update( i915 ); +} diff --git a/src/gallium/drivers/i915simple/i915_state_inlines.h b/src/gallium/drivers/i915simple/i915_state_inlines.h new file mode 100644 index 0000000000..0934ac79a4 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_inlines.h @@ -0,0 +1,230 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_STATE_INLINES_H +#define I915_STATE_INLINES_H + +#include "p_compiler.h" +#include "p_defines.h" +#include "i915_reg.h" + + +static INLINE unsigned +i915_translate_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return COMPAREFUNC_NEVER; + case PIPE_FUNC_LESS: + return COMPAREFUNC_LESS; + case PIPE_FUNC_LEQUAL: + return COMPAREFUNC_LEQUAL; + case PIPE_FUNC_GREATER: + return COMPAREFUNC_GREATER; + case PIPE_FUNC_GEQUAL: + return COMPAREFUNC_GEQUAL; + case PIPE_FUNC_NOTEQUAL: + return COMPAREFUNC_NOTEQUAL; + case PIPE_FUNC_EQUAL: + return COMPAREFUNC_EQUAL; + case PIPE_FUNC_ALWAYS: + return COMPAREFUNC_ALWAYS; + default: + return COMPAREFUNC_ALWAYS; + } +} + +static INLINE unsigned +i915_translate_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: + return STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return STENCILOP_INVERT; + default: + return STENCILOP_ZERO; + } +} + +static INLINE unsigned +i915_translate_blend_factor(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return BLENDFACT_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BLENDFACT_SRC_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return BLENDFACT_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BLENDFACT_SRC_COLR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BLENDFACT_INV_SRC_COLR; + case PIPE_BLENDFACTOR_DST_COLOR: + return BLENDFACT_DST_COLR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BLENDFACT_INV_DST_COLR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BLENDFACT_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BLENDFACT_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BLENDFACT_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BLENDFACT_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BLENDFACT_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BLENDFACT_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BLENDFACT_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BLENDFACT_INV_CONST_ALPHA; + default: + return BLENDFACT_ZERO; + } +} + +static INLINE unsigned +i915_translate_blend_func(unsigned mode) +{ + switch (mode) { + case PIPE_BLEND_ADD: + return BLENDFUNC_ADD; + case PIPE_BLEND_MIN: + return BLENDFUNC_MIN; + case PIPE_BLEND_MAX: + return BLENDFUNC_MAX; + case PIPE_BLEND_SUBTRACT: + return BLENDFUNC_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BLENDFUNC_REVERSE_SUBTRACT; + default: + return 0; + } +} + + +static INLINE unsigned +i915_translate_logic_op(unsigned opcode) +{ + switch (opcode) { + case PIPE_LOGICOP_CLEAR: + return LOGICOP_CLEAR; + case PIPE_LOGICOP_AND: + return LOGICOP_AND; + case PIPE_LOGICOP_AND_REVERSE: + return LOGICOP_AND_RVRSE; + case PIPE_LOGICOP_COPY: + return LOGICOP_COPY; + case PIPE_LOGICOP_COPY_INVERTED: + return LOGICOP_COPY_INV; + case PIPE_LOGICOP_AND_INVERTED: + return LOGICOP_AND_INV; + case PIPE_LOGICOP_NOOP: + return LOGICOP_NOOP; + case PIPE_LOGICOP_XOR: + return LOGICOP_XOR; + case PIPE_LOGICOP_OR: + return LOGICOP_OR; + case PIPE_LOGICOP_OR_INVERTED: + return LOGICOP_OR_INV; + case PIPE_LOGICOP_NOR: + return LOGICOP_NOR; + case PIPE_LOGICOP_EQUIV: + return LOGICOP_EQUIV; + case PIPE_LOGICOP_INVERT: + return LOGICOP_INV; + case PIPE_LOGICOP_OR_REVERSE: + return LOGICOP_OR_RVRSE; + case PIPE_LOGICOP_NAND: + return LOGICOP_NAND; + case PIPE_LOGICOP_SET: + return LOGICOP_SET; + default: + return LOGICOP_SET; + } +} + + + +static INLINE boolean i915_validate_vertices( unsigned hw_prim, unsigned nr ) +{ + boolean ok; + + switch (hw_prim) { + case PRIM3D_POINTLIST: + ok = (nr >= 1); + assert(ok); + break; + case PRIM3D_LINELIST: + ok = (nr >= 2) && (nr % 2) == 0; + assert(ok); + break; + case PRIM3D_LINESTRIP: + ok = (nr >= 2); + assert(ok); + break; + case PRIM3D_TRILIST: + ok = (nr >= 3) && (nr % 3) == 0; + assert(ok); + break; + case PRIM3D_TRISTRIP: + ok = (nr >= 3); + assert(ok); + break; + case PRIM3D_TRIFAN: + ok = (nr >= 3); + assert(ok); + break; + case PRIM3D_POLY: + ok = (nr >= 3); + assert(ok); + break; + default: + assert(0); + ok = 0; + break; + } + + return ok; +} + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c new file mode 100644 index 0000000000..9c1a5bbbd6 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -0,0 +1,231 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" + + +/** + * Compute i915 texture sampling state. + * + * Recalculate all state from scratch. Perhaps not the most + * efficient, but this has gotten complex enough that we need + * something which is understandable and reliable. + * \param state returns the 3 words of compute state + */ +static void update_sampler(struct i915_context *i915, + uint unit, + const struct i915_sampler_state *sampler, + const struct i915_texture *tex, + unsigned state[3] ) +{ + const struct pipe_texture *pt = &tex->base; + + /* Need to do this after updating the maps, which call the + * intel_finalize_mipmap_tree and hence can update firstLevel: + */ + state[0] = sampler->state[0]; + state[1] = sampler->state[1]; + state[2] = sampler->state[2]; + + if (pt->format == PIPE_FORMAT_YCBCR || + pt->format == PIPE_FORMAT_YCBCR_REV) + state[0] |= SS2_COLORSPACE_CONVERSION; + + /* 3D textures don't seem to respect the border color. + * Fallback if there's ever a danger that they might refer to + * it. + * + * Effectively this means fallback on 3D clamp or + * clamp_to_border. + * + * XXX: Check if this is true on i945. + * XXX: Check if this bug got fixed in release silicon. + */ +#if 0 + { + const unsigned ws = sampler->templ->wrap_s; + const unsigned wt = sampler->templ->wrap_t; + const unsigned wr = sampler->templ->wrap_r; + if (pt->target == PIPE_TEXTURE_3D && + (sampler->templ->min_img_filter != PIPE_TEX_FILTER_NEAREST || + sampler->templ->mag_img_filter != PIPE_TEX_FILTER_NEAREST) && + (ws == PIPE_TEX_WRAP_CLAMP || + wt == PIPE_TEX_WRAP_CLAMP || + wr == PIPE_TEX_WRAP_CLAMP || + ws == PIPE_TEX_WRAP_CLAMP_TO_BORDER || + wt == PIPE_TEX_WRAP_CLAMP_TO_BORDER || + wr == PIPE_TEX_WRAP_CLAMP_TO_BORDER)) { + if (i915->strict_conformance) { + assert(0); + /* sampler->fallback = true; */ + /* TODO */ + } + } + } +#endif + + state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); +} + + +void i915_update_samplers( struct i915_context *i915 ) +{ + uint unit; + + i915->current.sampler_enable_nr = 0; + i915->current.sampler_enable_flags = 0x0; + + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + /* determine unit enable/disable by looking for a bound texture */ + /* could also examine the fragment program? */ + if (i915->texture[unit]) { + update_sampler( i915, + unit, + i915->sampler[unit], /* sampler state */ + i915->texture[unit], /* texture */ + i915->current.sampler[unit] /* the result */ + ); + + i915->current.sampler_enable_nr++; + i915->current.sampler_enable_flags |= (1 << unit); + } + } + + i915->hardware_dirty |= I915_HW_SAMPLER; +} + + +static uint +translate_texture_format(enum pipe_format pipeFormat) +{ + switch (pipeFormat) { + case PIPE_FORMAT_U_L8: + return MAPSURF_8BIT | MT_8BIT_L8; + case PIPE_FORMAT_U_I8: + return MAPSURF_8BIT | MT_8BIT_I8; + case PIPE_FORMAT_U_A8: + return MAPSURF_8BIT | MT_8BIT_A8; + case PIPE_FORMAT_U_A8_L8: + return MAPSURF_16BIT | MT_16BIT_AY88; + case PIPE_FORMAT_R5G6B5_UNORM: + return MAPSURF_16BIT | MT_16BIT_RGB565; + case PIPE_FORMAT_A1R5G5B5_UNORM: + return MAPSURF_16BIT | MT_16BIT_ARGB1555; + case PIPE_FORMAT_A4R4G4B4_UNORM: + return MAPSURF_16BIT | MT_16BIT_ARGB4444; + case PIPE_FORMAT_A8R8G8B8_UNORM: + return MAPSURF_32BIT | MT_32BIT_ARGB8888; + case PIPE_FORMAT_YCBCR_REV: + return (MAPSURF_422 | MT_422_YCRCB_NORMAL); + case PIPE_FORMAT_YCBCR: + return (MAPSURF_422 | MT_422_YCRCB_SWAPY); +#if 0 + case PIPE_FORMAT_RGB_FXT1: + case PIPE_FORMAT_RGBA_FXT1: + return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1); +#endif + case PIPE_FORMAT_Z16_UNORM: + return (MAPSURF_16BIT | MT_16BIT_L16); +#if 0 + case PIPE_FORMAT_RGBA_DXT1: + case PIPE_FORMAT_RGB_DXT1: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1); + case PIPE_FORMAT_RGBA_DXT3: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3); + case PIPE_FORMAT_RGBA_DXT5: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); +#endif + case PIPE_FORMAT_S8Z24_UNORM: + return (MAPSURF_32BIT | MT_32BIT_xL824); + default: + debug_printf("i915: translate_texture_format() bad image format %x\n", + pipeFormat); + assert(0); + return 0; + } +} + + +static void +i915_update_texture(struct i915_context *i915, uint unit, + uint state[6]) +{ + const struct i915_texture *tex = i915->texture[unit]; + const struct pipe_texture *pt = &tex->base; + uint format, pitch; + const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + const uint num_levels = pt->last_level; + + assert(tex); + assert(width); + assert(height); + assert(depth); + + format = translate_texture_format(pt->format); + pitch = tex->pitch * pt->cpp; + + assert(format); + assert(pitch); + + /* MS3 state */ + state[0] = + (((height - 1) << MS3_HEIGHT_SHIFT) + | ((width - 1) << MS3_WIDTH_SHIFT) + | format + | MS3_USE_FENCE_REGS); + + /* MS4 state */ + state[1] = + ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) + | MS4_CUBE_FACE_ENA_MASK + | ((num_levels * 4) << MS4_MAX_LOD_SHIFT) + | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); +} + + +void +i915_update_textures(struct i915_context *i915) +{ + uint unit; + + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + /* determine unit enable/disable by looking for a bound texture */ + /* could also examine the fragment program? */ + if (i915->texture[unit]) { + i915_update_texture(i915, unit, i915->current.texbuffer[unit]); + } + } + + i915->hardware_dirty |= I915_HW_MAP; +} diff --git a/src/gallium/drivers/i915simple/i915_strings.c b/src/gallium/drivers/i915simple/i915_strings.c new file mode 100644 index 0000000000..c713bf7208 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_strings.c @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_context.h" +#include "i915_reg.h" + + +static const char *i915_get_vendor( struct pipe_context *pipe ) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char *i915_get_name( struct pipe_context *pipe ) +{ + static char buffer[128]; + const char *chipset; + + switch (i915_context(pipe)->pci_id) { + case PCI_CHIP_I915_G: + chipset = "915G"; + break; + case PCI_CHIP_I915_GM: + chipset = "915GM"; + break; + case PCI_CHIP_I945_G: + chipset = "945G"; + break; + case PCI_CHIP_I945_GM: + chipset = "945GM"; + break; + case PCI_CHIP_I945_GME: + chipset = "945GME"; + break; + case PCI_CHIP_G33_G: + chipset = "G33"; + break; + case PCI_CHIP_Q35_G: + chipset = "Q35"; + break; + case PCI_CHIP_Q33_G: + chipset = "Q33"; + break; + default: + chipset = "unknown"; + break; + } + + sprintf(buffer, "pipe/i915 (chipset: %s)", chipset); + return buffer; +} + + +void +i915_init_string_functions(struct i915_context *i915) +{ + i915->pipe.get_name = i915_get_name; + i915->pipe.get_vendor = i915_get_vendor; +} diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c new file mode 100644 index 0000000000..de0cc5fe06 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -0,0 +1,191 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_context.h" +#include "i915_blit.h" +#include "i915_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/util/p_tile.h" + + +/* + * XXX note: same as code in sp_surface.c + */ +static struct pipe_surface * +i915_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct i915_texture *tex = (struct i915_texture *)pt; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + offset = tex->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset += tex->image_offset[level][face] * pt->cpp; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset += tex->image_offset[level][zslice] * pt->cpp; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + ps = pipe->winsys->surface_alloc(pipe->winsys); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(pipe->winsys, &ps->buffer, tex->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = tex->pitch; + ps->offset = offset; + } + return ps; +} + + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +i915_surface_copy(struct pipe_context *pipe, + unsigned do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + assert( dst != src ); + assert( dst->cpp == src->cpp ); + + if (0) { + pipe_copy_rect(pipe_surface_map(dst), + dst->cpp, + dst->pitch, + dstx, dsty, + width, height, + pipe_surface_map(src), + do_flip ? -(int) src->pitch : src->pitch, + srcx, do_flip ? 1 - srcy - height : srcy); + + pipe_surface_unmap(src); + pipe_surface_unmap(dst); + } + else { + i915_copy_blit( i915_context(pipe), + do_flip, + dst->cpp, + (short) src->pitch, src->buffer, src->offset, + (short) dst->pitch, dst->buffer, dst->offset, + (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); + } +} + +/* Fill a rectangular sub-region. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +static void * +get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) +{ + return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; +} + + +static void +i915_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + if (0) { + unsigned i, j; + void *dst_map = pipe_surface_map(dst); + + switch (dst->cpp) { + case 1: { + ubyte *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + memset(row, value, width); + row += dst->pitch; + } + } + break; + case 2: { + ushort *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = (ushort) value; + row += dst->pitch; + } + } + break; + case 4: { + unsigned *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = value; + row += dst->pitch; + } + } + break; + default: + assert(0); + break; + } + + pipe_surface_unmap( dst ); + } + else { + i915_fill_blit( i915_context(pipe), + dst->cpp, + (short) dst->pitch, + dst->buffer, dst->offset, + (short) dstx, (short) dsty, + (short) width, (short) height, + value ); + } +} + + +void +i915_init_surface_functions(struct i915_context *i915) +{ + i915->pipe.get_tex_surface = i915_get_tex_surface; + + i915->pipe.surface_copy = i915_surface_copy; + i915->pipe.surface_fill = i915_surface_fill; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c new file mode 100644 index 0000000000..6d37ae3d74 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -0,0 +1,536 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell + * Michel Dänzer + */ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "i915_context.h" +#include "i915_texture.h" +#include "i915_debug.h" + + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + + +static void +i915_miptree_set_level_info(struct i915_texture *tex, + unsigned level, + unsigned nr_images, + unsigned x, unsigned y, unsigned w, unsigned h, unsigned d) +{ + struct pipe_texture *pt = &tex->base; + + assert(level < PIPE_MAX_TEXTURE_LEVELS); + + pt->width[level] = w; + pt->height[level] = h; + pt->depth[level] = d; + + tex->level_offset[level] = (x + y * tex->pitch) * pt->cpp; + tex->nr_images[level] = nr_images; + + /* + DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, tex->level_offset[level]); + */ + + /* Not sure when this would happen, but anyway: + */ + if (tex->image_offset[level]) { + FREE(tex->image_offset[level]); + tex->image_offset[level] = NULL; + } + + assert(nr_images); + assert(!tex->image_offset[level]); + + tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); + tex->image_offset[level][0] = 0; +} + + +static void +i915_miptree_set_image_offset(struct i915_texture *tex, + unsigned level, unsigned img, unsigned x, unsigned y) +{ + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + + assert(img < tex->nr_images[level]); + + tex->image_offset[level][img] = (x + y * tex->pitch); + + /* + DBG("%s level %d img %d pos %d,%d image_offset %x\n", + __FUNCTION__, level, img, x, y, tex->image_offset[level][img]); + */ +} + + +static void +i945_miptree_layout_2d( struct i915_texture *tex ) +{ + struct pipe_texture *pt = &tex->base; + int align_h = 2, align_w = 4; + unsigned level; + unsigned x = 0; + unsigned y = 0; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + + tex->pitch = pt->width[0]; + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap level. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap level out past the width of its parent. + */ + if (pt->last_level > 0) { + unsigned mip1_width = align_int(minify(pt->width[0]), align_w) + + minify(minify(pt->width[0])); + + if (mip1_width > pt->width[0]) + tex->pitch = mip1_width; + } + + /* Pitch must be a whole number of dwords, even though we + * express it in texels. + */ + tex->pitch = align_int(tex->pitch * pt->cpp, 4) / pt->cpp; + tex->total_height = 0; + + for (level = 0; level <= pt->last_level; level++) { + unsigned img_height; + + i915_miptree_set_level_info(tex, level, 1, x, y, width, height, 1); + + if (pt->compressed) + img_height = MAX2(1, height/4); + else + img_height = align_int(height, align_h); + + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + tex->total_height = MAX2(tex->total_height, y + img_height); + + /* Layout_below: step right after second mipmap level. + */ + if (level == 1) { + x += align_int(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } +} + + +static const int initial_offsets[6][2] = { + {0, 0}, + {0, 2}, + {1, 0}, + {1, 2}, + {1, 1}, + {1, 3} +}; + +static const int step_offsets[6][2] = { + {0, 2}, + {0, 2}, + {-1, 2}, + {-1, 2}, + {-1, 1}, + {-1, 1} +}; + + +static boolean +i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: { + const unsigned dim = pt->width[0]; + unsigned face; + unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; + + assert(lvlWidth == lvlHeight); /* cubemap images are square */ + + /* double pitch for cube layouts */ + tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; + tex->total_height = dim * 4; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 6, + 0, 0, + /*OLD: tex->pitch, tex->total_height,*/ + lvlWidth, lvlHeight, + 1); + lvlWidth /= 2; + lvlHeight /= 2; + } + + for (face = 0; face < 6; face++) { + unsigned x = initial_offsets[face][0] * dim; + unsigned y = initial_offsets[face][1] * dim; + unsigned d = dim; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_image_offset(tex, level, face, x, y); + d >>= 1; + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + } + } + break; + } + case PIPE_TEXTURE_3D:{ + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned stack_height = 0; + + /* Calculate the size of a single slice. + */ + tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; + + /* XXX: hardware expects/requires 9 levels at minimum. + */ + for (level = 0; level <= MAX2(8, pt->last_level); + level++) { + i915_miptree_set_level_info(tex, level, depth, 0, tex->total_height, + width, height, depth); + + + stack_height += MAX2(2, height); + + width = minify(width); + height = minify(height); + depth = minify(depth); + } + + /* Fixup depth image_offsets: + */ + depth = pt->depth[0]; + for (level = 0; level <= pt->last_level; level++) { + unsigned i; + for (i = 0; i < depth; i++) + i915_miptree_set_image_offset(tex, level, i, + 0, i * stack_height); + + depth = minify(depth); + } + + + /* Multiply slice size by texture depth for total size. It's + * remarkable how wasteful of memory the i915 texture layouts + * are. They are largely fixed in the i945. + */ + tex->total_height = stack_height * pt->depth[0]; + break; + } + + default:{ + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned img_height; + + tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; + tex->total_height = 0; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 1, + 0, tex->total_height, + width, height, 1); + + if (pt->compressed) + img_height = MAX2(1, height / 4); + else + img_height = (MAX2(2, height) + 1) & ~1; + + tex->total_height += img_height; + + width = minify(width); + height = minify(height); + } + break; + } + } + /* + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + tex->pitch, + tex->total_height, pt->cpp, tex->pitch * tex->total_height * pt->cpp); + */ + + return TRUE; +} + + +static boolean +i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE:{ + const unsigned dim = pt->width[0]; + unsigned face; + unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; + + assert(lvlWidth == lvlHeight); /* cubemap images are square */ + + /* Depending on the size of the largest images, pitch can be + * determined either by the old-style packing of cubemap faces, + * or the final row of 4x4, 2x2 and 1x1 faces below this. + */ + if (dim > 32) + tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; + else + tex->pitch = 14 * 8; + + tex->total_height = dim * 4 + 4; + + /* Set all the levels to effectively occupy the whole rectangular region. + */ + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 6, + 0, 0, + lvlWidth, lvlHeight, 1); + lvlWidth /= 2; + lvlHeight /= 2; + } + + + for (face = 0; face < 6; face++) { + unsigned x = initial_offsets[face][0] * dim; + unsigned y = initial_offsets[face][1] * dim; + unsigned d = dim; + + if (dim == 4 && face >= 4) { + y = tex->total_height - 4; + x = (face - 4) * 8; + } + else if (dim < 4 && (face > 0)) { + y = tex->total_height - 4; + x = face * 8; + } + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_image_offset(tex, level, face, x, y); + + d >>= 1; + + switch (d) { + case 4: + switch (face) { + case PIPE_TEX_FACE_POS_X: + case PIPE_TEX_FACE_NEG_X: + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + break; + case PIPE_TEX_FACE_POS_Y: + case PIPE_TEX_FACE_NEG_Y: + y += 12; + x -= 8; + break; + case PIPE_TEX_FACE_POS_Z: + case PIPE_TEX_FACE_NEG_Z: + y = tex->total_height - 4; + x = (face - 4) * 8; + break; + } + + case 2: + y = tex->total_height - 4; + x = 16 + face * 8; + break; + + case 1: + x += 48; + break; + + default: + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + break; + } + } + } + break; + } + case PIPE_TEXTURE_3D:{ + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned pack_x_pitch, pack_x_nr; + unsigned pack_y_pitch; + unsigned level; + + tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; + tex->total_height = 0; + + pack_y_pitch = MAX2(pt->height[0], 2); + pack_x_pitch = tex->pitch; + pack_x_nr = 1; + + for (level = 0; level <= pt->last_level; level++) { + unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; + int x = 0; + int y = 0; + unsigned q, j; + + i915_miptree_set_level_info(tex, level, nr_images, + 0, tex->total_height, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + i915_miptree_set_image_offset(tex, level, q, x, y); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + tex->total_height += y; + + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= tex->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + } + + width = minify(width); + height = minify(height); + depth = minify(depth); + } + break; + } + + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: +// case PIPE_TEXTURE_RECTANGLE: + i945_miptree_layout_2d(tex); + break; + default: + assert(0); + return FALSE; + } + + /* + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + tex->pitch, + tex->total_height, pt->cpp, tex->pitch * tex->total_height * pt->cpp); + */ + + return TRUE; +} + + +struct pipe_texture * +i915_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat) +{ + struct i915_texture *tex = CALLOC_STRUCT(i915_texture); + + if (tex) { + struct i915_context *i915 = i915_context(pipe); + + tex->base = *templat; + + if (i915->flags.is_i945 ? i945_miptree_layout(pipe, tex) : + i915_miptree_layout(pipe, tex)) + tex->buffer = pipe->winsys->buffer_create(pipe->winsys, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->pitch * tex->base.cpp * + tex->total_height); + + if (!tex->buffer) { + FREE(tex); + return NULL; + } + } + + return &tex->base; +} + + +void +i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct i915_texture *tex = (struct i915_texture *)*pt; + uint i; + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); + */ + + pipe_buffer_reference(pipe->winsys, &tex->buffer, NULL); + + for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) + if (tex->image_offset[i]) + FREE(tex->image_offset[i]); + + FREE(tex); + } + *pt = NULL; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.h b/src/gallium/drivers/i915simple/i915_texture.h new file mode 100644 index 0000000000..330d111dc7 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_texture.h @@ -0,0 +1,17 @@ + +#ifndef I915_TEXTURE_H +#define I915_TEXTURE_H + +struct pipe_context; +struct pipe_texture; + + +struct pipe_texture * +i915_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat); + +extern void +i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); + + +#endif /* I915_TEXTURE_H */ diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h new file mode 100644 index 0000000000..fe49710852 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -0,0 +1,115 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * This is the interface that i915simple requires any window system + * hosting it to implement. This is the only include file in i915simple + * which is public. + * + */ + +#ifndef I915_WINSYS_H +#define I915_WINSYS_H + + +#include "pipe/p_defines.h" + + +/* Pipe drivers are (meant to be!) independent of both GL and the + * window system. The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ + +struct pipe_buffer; +struct pipe_winsys; + + +/** + * Additional winsys interface for i915simple. + * + * It is an over-simple batchbuffer mechanism. Will want to improve the + * performance of this, perhaps based on the cmdstream stuff. It + * would be pretty impossible to implement swz on top of this + * interface. + * + * Will also need additions/changes to implement static/dynamic + * indirect state. + */ +struct i915_winsys { + + /** + * Reserve space on batch buffer. + * + * Returns a null pointer if there is insufficient space in the batch buffer + * to hold the requested number of dwords and relocations. + * + * The number of dwords should also include the number of relocations. + */ + unsigned *(*batch_start)( struct i915_winsys *sws, + unsigned dwords, + unsigned relocs ); + + void (*batch_dword)( struct i915_winsys *sws, + unsigned dword ); + + /** + * Emit a relocation to a buffer. + * + * Used not only when the buffer addresses are not pinned, but also to + * ensure refered buffers will not be destroyed until the current batch + * buffer execution is finished. + * + * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and + * I915_BUFFER_ACCESS_READ macros. + */ + void (*batch_reloc)( struct i915_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta ); + + void (*batch_flush)( struct i915_winsys *sws ); + void (*batch_finish)( struct i915_winsys *sws ); +}; + +#define I915_BUFFER_ACCESS_WRITE 0x1 +#define I915_BUFFER_ACCESS_READ 0x2 + +#define I915_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) + + +struct pipe_context *i915_create( struct pipe_winsys *, + struct i915_winsys *, + unsigned pci_id ); + + +#endif diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile new file mode 100644 index 0000000000..48c00ab50b --- /dev/null +++ b/src/gallium/drivers/i965simple/Makefile @@ -0,0 +1,66 @@ + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i965simple + +DRIVER_SOURCES = \ + brw_blit.c \ + brw_flush.c \ + brw_strings.c \ + brw_surface.c \ + brw_cc.c \ + brw_clip.c \ + brw_clip_line.c \ + brw_clip_point.c \ + brw_clip_state.c \ + brw_clip_tri.c \ + brw_clip_util.c \ + brw_context.c \ + brw_curbe.c \ + brw_draw.c \ + brw_draw_upload.c \ + brw_eu.c \ + brw_eu_debug.c \ + brw_eu_emit.c \ + brw_eu_util.c \ + brw_gs.c \ + brw_gs_emit.c \ + brw_gs_state.c \ + brw_misc_state.c \ + brw_sf.c \ + brw_sf_emit.c \ + brw_sf_state.c \ + brw_shader_info.c \ + brw_state.c \ + brw_state_batch.c \ + brw_state_cache.c \ + brw_state_pool.c \ + brw_state_upload.c \ + brw_tex_layout.c \ + brw_urb.c \ + brw_util.c \ + brw_vs.c \ + brw_vs_emit.c \ + brw_vs_state.c \ + brw_wm.c \ + brw_wm_iz.c \ + brw_wm_decl.c \ + brw_wm_glsl.c \ + brw_wm_sampler_state.c \ + brw_wm_state.c \ + brw_wm_surface_state.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(COMMON_BM_SOURCES) \ + $(MINIGLX_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +DRIVER_DEFINES = -I. + +include ../Makefile.template + +symlinks: diff --git a/src/gallium/drivers/i965simple/SConscript b/src/gallium/drivers/i965simple/SConscript new file mode 100644 index 0000000000..74621de84c --- /dev/null +++ b/src/gallium/drivers/i965simple/SConscript @@ -0,0 +1,55 @@ +Import('*') + +env = env.Clone() + +i965simple = env.ConvenienceLibrary( + target = 'i965simple', + source = [ + 'brw_blit.c', + 'brw_cc.c', + 'brw_clip.c', + 'brw_clip_line.c', + 'brw_clip_point.c', + 'brw_clip_state.c', + 'brw_clip_tri.c', + 'brw_clip_util.c', + 'brw_context.c', + 'brw_curbe.c', + 'brw_draw.c', + 'brw_draw_upload.c', + 'brw_eu.c', + 'brw_eu_debug.c', + 'brw_eu_emit.c', + 'brw_eu_util.c', + 'brw_flush.c', + 'brw_gs.c', + 'brw_gs_emit.c', + 'brw_gs_state.c', + 'brw_misc_state.c', + 'brw_sf.c', + 'brw_sf_emit.c', + 'brw_sf_state.c', + 'brw_shader_info.c', + 'brw_state.c', + 'brw_state_batch.c', + 'brw_state_cache.c', + 'brw_state_pool.c', + 'brw_state_upload.c', + 'brw_strings.c', + 'brw_surface.c', + 'brw_tex_layout.c', + 'brw_urb.c', + 'brw_util.c', + 'brw_vs.c', + 'brw_vs_emit.c', + 'brw_vs_state.c', + 'brw_wm.c', + 'brw_wm_decl.c', + 'brw_wm_glsl.c', + 'brw_wm_iz.c', + 'brw_wm_sampler_state.c', + 'brw_wm_state.c', + 'brw_wm_surface_state.c', + ]) + +Export('i965simple') diff --git a/src/gallium/drivers/i965simple/brw_batch.h b/src/gallium/drivers/i965simple/brw_batch.h new file mode 100644 index 0000000000..5f5932a488 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_batch.h @@ -0,0 +1,59 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_BATCH_H +#define BRW_BATCH_H + +#include "brw_winsys.h" + +#define BATCH_LOCALS + +#define INTEL_BATCH_NO_CLIPRECTS 0x1 +#define INTEL_BATCH_CLIPRECTS 0x2 + +#define BEGIN_BATCH( dwords, relocs ) \ + brw->winsys->batch_start(brw->winsys, dwords, relocs) + +#define OUT_BATCH( dword ) \ + brw->winsys->batch_dword(brw->winsys, dword) + +#define OUT_RELOC( buf, flags, delta ) \ + brw->winsys->batch_reloc(brw->winsys, buf, flags, delta) + +#define ADVANCE_BATCH() \ + brw->winsys->batch_end( brw->winsys ) + +/* XXX: this is bogus - need proper handling for out-of-memory in batchbuffer. + */ +#define FLUSH_BATCH(fence) do { \ + brw->winsys->batch_flush(brw->winsys, fence); \ + brw->hardware_dirty = ~0; \ +} while (0) + +#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->winsys, (s), sizeof(*(s))) + +#endif diff --git a/src/gallium/drivers/i965simple/brw_blit.c b/src/gallium/drivers/i965simple/brw_blit.c new file mode 100644 index 0000000000..8494f70493 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_blit.c @@ -0,0 +1,218 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include +#include + +#include "brw_batch.h" +#include "brw_blit.h" +#include "brw_context.h" +#include "brw_reg.h" + +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +void brw_fill_blit(struct brw_context *brw, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short x, short y, + short w, short h, + unsigned color) +{ + unsigned BR13, CMD; + BATCH_LOCALS; + + dst_pitch *= cpp; + + switch(cpp) { + case 1: + case 2: + case 3: + BR13 = (0xF0 << 16) | (1<<24); + CMD = XY_COLOR_BLT_CMD; + break; + case 4: + BR13 = (0xF0 << 16) | (1<<24) | (1<<25); + CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + return; + } + + if (dst_tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( dst_pitch | BR13 ); + OUT_BATCH( (y << 16) | x ); + OUT_BATCH( ((y+h) << 16) | (x+w) ); + OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, dst_offset ); + OUT_BATCH( color ); + ADVANCE_BATCH(); +} + +static unsigned translate_raster_op(unsigned logicop) +{ + switch(logicop) { + case PIPE_LOGICOP_CLEAR: return 0x00; + case PIPE_LOGICOP_AND: return 0x88; + case PIPE_LOGICOP_AND_REVERSE: return 0x44; + case PIPE_LOGICOP_COPY: return 0xCC; + case PIPE_LOGICOP_AND_INVERTED: return 0x22; + case PIPE_LOGICOP_NOOP: return 0xAA; + case PIPE_LOGICOP_XOR: return 0x66; + case PIPE_LOGICOP_OR: return 0xEE; + case PIPE_LOGICOP_NOR: return 0x11; + case PIPE_LOGICOP_EQUIV: return 0x99; + case PIPE_LOGICOP_INVERT: return 0x55; + case PIPE_LOGICOP_OR_REVERSE: return 0xDD; + case PIPE_LOGICOP_COPY_INVERTED: return 0x33; + case PIPE_LOGICOP_OR_INVERTED: return 0xBB; + case PIPE_LOGICOP_NAND: return 0x77; + case PIPE_LOGICOP_SET: return 0xFF; + default: return 0; + } +} + + +/* Copy BitBlt + */ +void brw_copy_blit(struct brw_context *brw, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + boolean src_tiled, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h, + unsigned logic_op) +{ + unsigned CMD, BR13; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + BATCH_LOCALS; + + + DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_x, src_y, + dst_buffer, dst_pitch, dst_x, dst_y, + w,h,logic_op); + + assert( logic_op - PIPE_LOGICOP_CLEAR >= 0 ); + assert( logic_op - PIPE_LOGICOP_CLEAR < 0x10 ); + + src_pitch *= cpp; + dst_pitch *= cpp; + + switch(cpp) { + case 1: + case 2: + case 3: + BR13 = (translate_raster_op(logic_op) << 16) | (1<<24); + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) | + (1<<25); + CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + return; + } + + if (src_tiled) { + CMD |= XY_SRC_TILED; + src_pitch /= 4; + } + + if (dst_tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + + if (dst_y2 < dst_y || + dst_x2 < dst_x) { + return; + } + + dst_pitch &= 0xffff; + src_pitch &= 0xffff; + + /* Initial y values don't seem to work with negative pitches. If + * we adjust the offsets manually (below), it seems to work fine. + * + * On the other hand, if we always adjust, the hardware doesn't + * know which blit directions to use, so overlapping copypixels get + * the wrong result. + */ + if (dst_pitch > 0 && src_pitch > 0) { + BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( dst_pitch | BR13 ); + OUT_BATCH( (dst_y << 16) | dst_x ); + OUT_BATCH( (dst_y2 << 16) | dst_x2 ); + OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, + dst_offset ); + OUT_BATCH( (src_y << 16) | src_x ); + OUT_BATCH( src_pitch ); + OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, + src_offset ); + ADVANCE_BATCH(); + } + else { + BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( (dst_pitch & 0xffff) | BR13 ); + OUT_BATCH( (0 << 16) | dst_x ); + OUT_BATCH( (h << 16) | dst_x2 ); + OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, + dst_offset + dst_y * dst_pitch ); + OUT_BATCH( (src_pitch & 0xffff) ); + OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, + src_offset + src_y * src_pitch ); + ADVANCE_BATCH(); + } +} + + + diff --git a/src/gallium/drivers/i965simple/brw_blit.h b/src/gallium/drivers/i965simple/brw_blit.h new file mode 100644 index 0000000000..111c5d91d3 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_blit.h @@ -0,0 +1,33 @@ +#ifndef BRW_BLIT_H +#define BRW_BLIT_H + +#include "pipe/p_compiler.h" + +struct pipe_buffer; +struct brw_context; + +void brw_fill_blit(struct brw_context *intel, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short x, short y, + short w, short h, + unsigned color); +void brw_copy_blit(struct brw_context *intel, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + boolean src_tiled, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h, + unsigned logic_op); +#endif diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c new file mode 100644 index 0000000000..337e4f95f6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_cc.c @@ -0,0 +1,269 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" + + +static int brw_translate_compare_func(int func) +{ + switch(func) { + case PIPE_FUNC_NEVER: + return BRW_COMPAREFUNCTION_NEVER; + case PIPE_FUNC_LESS: + return BRW_COMPAREFUNCTION_LESS; + case PIPE_FUNC_LEQUAL: + return BRW_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_GREATER: + return BRW_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_GEQUAL: + return BRW_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_NOTEQUAL: + return BRW_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_EQUAL: + return BRW_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_ALWAYS: + return BRW_COMPAREFUNCTION_ALWAYS; + } + + debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); + return BRW_COMPAREFUNCTION_ALWAYS; +} + +static int brw_translate_stencil_op(int op) +{ + switch(op) { + case PIPE_STENCIL_OP_KEEP: + return BRW_STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return BRW_STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return BRW_STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return BRW_STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return BRW_STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return BRW_STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return BRW_STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return BRW_STENCILOP_INVERT; + default: + return BRW_STENCILOP_ZERO; + } +} + + +static int brw_translate_logic_op(int opcode) +{ + switch(opcode) { + case PIPE_LOGICOP_CLEAR: + return BRW_LOGICOPFUNCTION_CLEAR; + case PIPE_LOGICOP_AND: + return BRW_LOGICOPFUNCTION_AND; + case PIPE_LOGICOP_AND_REVERSE: + return BRW_LOGICOPFUNCTION_AND_REVERSE; + case PIPE_LOGICOP_COPY: + return BRW_LOGICOPFUNCTION_COPY; + case PIPE_LOGICOP_COPY_INVERTED: + return BRW_LOGICOPFUNCTION_COPY_INVERTED; + case PIPE_LOGICOP_AND_INVERTED: + return BRW_LOGICOPFUNCTION_AND_INVERTED; + case PIPE_LOGICOP_NOOP: + return BRW_LOGICOPFUNCTION_NOOP; + case PIPE_LOGICOP_XOR: + return BRW_LOGICOPFUNCTION_XOR; + case PIPE_LOGICOP_OR: + return BRW_LOGICOPFUNCTION_OR; + case PIPE_LOGICOP_OR_INVERTED: + return BRW_LOGICOPFUNCTION_OR_INVERTED; + case PIPE_LOGICOP_NOR: + return BRW_LOGICOPFUNCTION_NOR; + case PIPE_LOGICOP_EQUIV: + return BRW_LOGICOPFUNCTION_EQUIV; + case PIPE_LOGICOP_INVERT: + return BRW_LOGICOPFUNCTION_INVERT; + case PIPE_LOGICOP_OR_REVERSE: + return BRW_LOGICOPFUNCTION_OR_REVERSE; + case PIPE_LOGICOP_NAND: + return BRW_LOGICOPFUNCTION_NAND; + case PIPE_LOGICOP_SET: + return BRW_LOGICOPFUNCTION_SET; + default: + return BRW_LOGICOPFUNCTION_SET; + } +} + + +static void upload_cc_vp( struct brw_context *brw ) +{ + struct brw_cc_viewport ccv; + + memset(&ccv, 0, sizeof(ccv)); + + ccv.min_depth = 0.0; + ccv.max_depth = 1.0; + + brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv ); +} + +const struct brw_tracked_state brw_cc_vp = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_cc_vp +}; + + +static void upload_cc_unit( struct brw_context *brw ) +{ + struct brw_cc_unit_state cc; + + memset(&cc, 0, sizeof(cc)); + + /* BRW_NEW_DEPTH_STENCIL */ + if (brw->attribs.DepthStencil->stencil[0].enabled) { + cc.cc0.stencil_enable = brw->attribs.DepthStencil->stencil[0].enabled; + cc.cc0.stencil_func = brw_translate_compare_func(brw->attribs.DepthStencil->stencil[0].func); + cc.cc0.stencil_fail_op = brw_translate_stencil_op(brw->attribs.DepthStencil->stencil[0].fail_op); + cc.cc0.stencil_pass_depth_fail_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[0].zfail_op); + cc.cc0.stencil_pass_depth_pass_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[0].zpass_op); + cc.cc1.stencil_ref = brw->attribs.DepthStencil->stencil[0].ref_value; + cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil[0].write_mask; + cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil[0].value_mask; + + if (brw->attribs.DepthStencil->stencil[1].enabled) { + cc.cc0.bf_stencil_enable = brw->attribs.DepthStencil->stencil[1].enabled; + cc.cc0.bf_stencil_func = brw_translate_compare_func( + brw->attribs.DepthStencil->stencil[1].func); + cc.cc0.bf_stencil_fail_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[1].fail_op); + cc.cc0.bf_stencil_pass_depth_fail_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[1].zfail_op); + cc.cc0.bf_stencil_pass_depth_pass_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[1].zpass_op); + cc.cc1.bf_stencil_ref = brw->attribs.DepthStencil->stencil[1].ref_value; + cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil[1].write_mask; + cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil[1].value_mask; + } + + /* Not really sure about this: + */ + if (brw->attribs.DepthStencil->stencil[0].write_mask || + brw->attribs.DepthStencil->stencil[1].write_mask) + cc.cc0.stencil_write_enable = 1; + } + + /* BRW_NEW_BLEND */ + if (brw->attribs.Blend->logicop_enable) { + cc.cc2.logicop_enable = 1; + cc.cc5.logicop_func = brw_translate_logic_op( brw->attribs.Blend->logicop_func ); + } + else if (brw->attribs.Blend->blend_enable) { + int eqRGB = brw->attribs.Blend->rgb_func; + int eqA = brw->attribs.Blend->alpha_func; + int srcRGB = brw->attribs.Blend->rgb_src_factor; + int dstRGB = brw->attribs.Blend->rgb_dst_factor; + int srcA = brw->attribs.Blend->alpha_src_factor; + int dstA = brw->attribs.Blend->alpha_dst_factor; + + if (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX) { + srcRGB = dstRGB = PIPE_BLENDFACTOR_ONE; + } + + if (eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX) { + srcA = dstA = PIPE_BLENDFACTOR_ONE; + } + + cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); + cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); + cc.cc6.blend_function = brw_translate_blend_equation( eqRGB ); + + cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); + cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA ); + + cc.cc3.blend_enable = 1; + cc.cc3.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + /* BRW_NEW_ALPHATEST + */ + if (brw->attribs.DepthStencil->alpha.enabled) { + cc.cc3.alpha_test = 1; + cc.cc3.alpha_test_func = + brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func); + + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], + brw->attribs.DepthStencil->alpha.ref); + + cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + } + + if (brw->attribs.Blend->dither) { + cc.cc5.dither_enable = 1; + cc.cc6.y_dither_offset = 0; + cc.cc6.x_dither_offset = 0; + } + + if (brw->attribs.DepthStencil->depth.enabled) { + cc.cc2.depth_test = brw->attribs.DepthStencil->depth.enabled; + cc.cc2.depth_test_function = brw_translate_compare_func(brw->attribs.DepthStencil->depth.func); + cc.cc2.depth_write_enable = brw->attribs.DepthStencil->depth.writemask; + } + + /* CACHE_NEW_CC_VP */ + cc.cc4.cc_viewport_state_offset = brw->cc.vp_gs_offset >> 5; + + if (BRW_DEBUG & DEBUG_STATS) + cc.cc5.statistics_enable = 1; + + brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc ); +} + +const struct brw_tracked_state brw_cc_unit = { + .dirty = { + .brw = BRW_NEW_DEPTH_STENCIL | BRW_NEW_BLEND | BRW_NEW_ALPHA_TEST, + .cache = CACHE_NEW_CC_VP + }, + .update = upload_cc_unit +}; + diff --git a/src/gallium/drivers/i965simple/brw_clip.c b/src/gallium/drivers/i965simple/brw_clip.c new file mode 100644 index 0000000000..268124cc53 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip.c @@ -0,0 +1,206 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_clip.h" + +#define FRONT_UNFILLED_BIT 0x1 +#define BACK_UNFILLED_BIT 0x2 + + +static void compile_clip_prog( struct brw_context *brw, + struct brw_clip_prog_key *key ) +{ + struct brw_clip_compile c; + const unsigned *program; + unsigned program_size; + unsigned delta; + unsigned i; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.func.single_program_flow = 1; + + c.key = *key; + + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.header_position_offset = ATTR_SIZE; + + for (i = 0, delta = REG_SIZE; i < PIPE_MAX_SHADER_OUTPUTS; i++) + if (c.key.attrs & (1<primitive) { + case PIPE_PRIM_TRIANGLES: +#if 0 + if (key->do_unfilled) + brw_emit_unfilled_clip( &c ); + else +#endif + brw_emit_tri_clip( &c ); + break; + case PIPE_PRIM_LINES: + brw_emit_line_clip( &c ); + break; + case PIPE_PRIM_POINTS: + brw_emit_point_clip( &c ); + break; + default: + assert(0); + return; + } + + + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->clip.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, + struct brw_clip_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_CLIP_PROG], + key, sizeof(*key), + &brw->clip.prog_data, + &brw->clip.prog_gs_offset); +} + + + + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_clip_prog(struct brw_context *brw) +{ + struct brw_clip_prog_key key; + + memset(&key, 0, sizeof(key)); + + /* Populate the key: + */ + /* BRW_NEW_REDUCED_PRIMITIVE */ + key.primitive = brw->reduced_primitive; + /* CACHE_NEW_VS_PROG */ + key.attrs = brw->vs.prog_data->outputs_written; + /* BRW_NEW_RASTER */ + key.do_flat_shading = (brw->attribs.Raster->flatshade); + /* BRW_NEW_CLIP */ + key.nr_userclip = brw->attribs.Clip.nr; /* XXX */ + +#if 0 + key.clip_mode = BRW_CLIPMODE_NORMAL; + + if (key.primitive == PIPE_PRIM_TRIANGLES) { + if (brw->attribs.Raster->cull_mode == PIPE_WINDING_BOTH) + key.clip_mode = BRW_CLIPMODE_REJECT_ALL; + else { + if (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || + brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL) + key.do_unfilled = 1; + + /* Most cases the fixed function units will handle. Cases where + * one or more polygon faces are unfilled will require help: + */ + if (key.do_unfilled) { + key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + + if (brw->attribs.Raster->offset_cw || + brw->attribs.Raster->offset_ccw) { + key.offset_units = brw->attribs.Raster->offset_units; + key.offset_factor = brw->attribs.Raster->offset_scale; + } + key.fill_ccw = brw->attribs.Raster->fill_ccw; + key.fill_cw = brw->attribs.Raster->fill_cw; + key.offset_ccw = brw->attribs.Raster->offset_ccw; + key.offset_cw = brw->attribs.Raster->offset_cw; + if (brw->attribs.Raster->light_twoside && + key.fill_cw != CLIP_CULL) + key.copy_bfc_cw = 1; + } + } + } +#else + key.clip_mode = BRW_CLIPMODE_ACCEPT_ALL; +#endif + + if (!search_cache(brw, &key)) + compile_clip_prog( brw, &key ); +} + +const struct brw_tracked_state brw_clip_prog = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_CLIP | + BRW_NEW_REDUCED_PRIMITIVE), + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_clip_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_clip.h b/src/gallium/drivers/i965simple/brw_clip.h new file mode 100644 index 0000000000..a89d08b791 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip.h @@ -0,0 +1,170 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#ifndef BRW_CLIP_H +#define BRW_CLIP_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_VERTS (3+6+6) + +/* Note that if unfilled primitives are being emitted, we have to fix + * up polygon offset and flatshading at this point: + */ +struct brw_clip_prog_key { + unsigned attrs:32; + unsigned primitive:4; + unsigned nr_userclip:3; + unsigned do_flat_shading:1; + unsigned do_unfilled:1; + unsigned fill_cw:2; /* includes cull information */ + unsigned fill_ccw:2; /* includes cull information */ + unsigned offset_cw:1; + unsigned offset_ccw:1; + unsigned pad0:17; + + unsigned copy_bfc_cw:1; + unsigned copy_bfc_ccw:1; + unsigned clip_mode:3; + unsigned pad1:27; + + float offset_factor; + float offset_units; +}; + + +#define CLIP_LINE 0 +#define CLIP_POINT 1 +#define CLIP_FILL 2 +#define CLIP_CULL 3 + + +#define PRIM_MASK (0x1f) + +struct brw_clip_compile { + struct brw_compile func; + struct brw_clip_prog_key key; + struct brw_clip_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_VERTS]; + + struct brw_reg t; + struct brw_reg t0, t1; + struct brw_reg dp0, dp1; + + struct brw_reg dpPrev; + struct brw_reg dp; + struct brw_reg loopcount; + struct brw_reg nr_verts; + struct brw_reg planemask; + + struct brw_reg inlist; + struct brw_reg outlist; + struct brw_reg freelist; + + struct brw_reg dir; + struct brw_reg tmp0, tmp1; + struct brw_reg offset; + + struct brw_reg fixed_planes; + struct brw_reg plane_equation; + } reg; + + /* 3 different ways of expressing vertex size: + */ + unsigned nr_attrs; + unsigned nr_regs; + unsigned nr_bytes; + + unsigned first_tmp; + unsigned last_tmp; + + boolean need_direction; + + unsigned last_mrf; + + unsigned header_position_offset; + unsigned offset[PIPE_ATTRIB_MAX]; +}; + +#define ATTR_SIZE (4*4) + +/* Points are only culled, so no need for a clip routine, however it + * works out easier to have a dummy one. + */ +void brw_emit_unfilled_clip( struct brw_clip_compile *c ); +void brw_emit_tri_clip( struct brw_clip_compile *c ); +void brw_emit_line_clip( struct brw_clip_compile *c ); +void brw_emit_point_clip( struct brw_clip_compile *c ); + +/* brw_clip_tri.c, for use by the unfilled clip routine: + */ +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ); +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ); +void brw_clip_tri( struct brw_clip_compile *c ); +void brw_clip_tri_emit_polygon( struct brw_clip_compile *c ); +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + unsigned nr_verts ); + + +/* Utils: + */ + +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + boolean force_edgeflag ); + +void brw_clip_init_planes( struct brw_clip_compile *c ); + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + boolean allocate, + boolean eot, + unsigned header); + +void brw_clip_kill_thread(struct brw_clip_compile *c); + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ); +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ); + +void brw_clip_copy_colors( struct brw_clip_compile *c, + unsigned to, unsigned from ); + +void brw_clip_init_clipmask( struct brw_clip_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_clip_line.c b/src/gallium/drivers/i965simple/brw_clip_line.c new file mode 100644 index 0000000000..75d9e5fcda --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_line.c @@ -0,0 +1,245 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) +{ + unsigned i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < 4; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.t0 = brw_vec1_grf(i, 1); + c->reg.t1 = brw_vec1_grf(i, 2); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp1 = brw_vec1_grf(i, 4); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +/* Line clipping, more or less following the following algorithm: + * + * for (p=0;p t1) t1 = t; + * } else { + * float t = dp0 / (dp0 - dp1); + * if (t > t0) t0 = t; + * } + * + * if (t0 + t1 >= 1.0) + * return; + * } + * } + * + * interp( ctx, newvtx0, vtx0, vtx1, t0 ); + * interp( ctx, newvtx1, vtx1, vtx0, t1 ); + * + */ +static void clip_and_emit_line( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_indirect vtx0 = brw_indirect(0, 0); + struct brw_indirect vtx1 = brw_indirect(1, 0); + struct brw_indirect newvtx0 = brw_indirect(2, 0); + struct brw_indirect newvtx1 = brw_indirect(3, 0); + struct brw_indirect plane_ptr = brw_indirect(4, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *is_negative; + struct brw_instruction *is_neg2; + struct brw_instruction *not_culled; + struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); + brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + + /* Note: init t0, t1 together: + */ + brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); + + brw_clip_init_planes(c); + brw_clip_init_clipmask(c); + + /* -ve rhw workaround */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + +#if 0 + /* dp = DP4(vtx->position, plane) + */ + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + + /* if (IS_NEGATIVE(dp1)) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); +#else + #warning "disabled" +#endif + is_negative = brw_IF(p, BRW_EXECUTE_1); + { + brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); + brw_MOV(p, c->reg.t1, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + is_negative = brw_ELSE(p, is_negative); + { + /* Coming back in. We know that both cannot be negative + * because the line would have been culled in that case. + */ + + /* If both are positive, do nothing */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + { + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_neg2); + } + brw_ENDIF(p, is_negative); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* while (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); + + brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); + not_culled = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE); + brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE); + + brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, not_culled); + brw_clip_kill_thread(c); +} + + + +void brw_emit_line_clip( struct brw_clip_compile *c ) +{ + brw_clip_line_alloc_regs(c); + + if (c->key.do_flat_shading) + brw_clip_copy_colors(c, 0, 1); + + clip_and_emit_line(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_point.c b/src/gallium/drivers/i965simple/brw_clip_point.c new file mode 100644 index 0000000000..6fce7210d1 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_point.c @@ -0,0 +1,47 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + +/* Point clipping, nothing to do? + */ +void brw_emit_point_clip( struct brw_clip_compile *c ) +{ + /* Send an empty message to kill the thread: + */ + brw_clip_tri_alloc_regs(c, 0); + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_state.c b/src/gallium/drivers/i965simple/brw_clip_state.c new file mode 100644 index 0000000000..ea5c05a279 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_state.c @@ -0,0 +1,92 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "pipe/p_util.h" + + +static void upload_clip_unit( struct brw_context *brw ) +{ + struct brw_clip_unit_state clip; + + memset(&clip, 0, sizeof(clip)); + + /* CACHE_NEW_CLIP_PROG */ + clip.thread0.grf_reg_count = + align(brw->clip.prog_data->total_grf, 16) / 16 - 1; + clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; + clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; + clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; + clip.clip5.clip_mode = brw->clip.prog_data->clip_mode; + + /* BRW_NEW_CURBE_OFFSETS */ + clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + + /* BRW_NEW_URB_FENCE */ + clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; + clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + clip.thread4.max_threads = 1; /* 2 threads */ + + if (BRW_DEBUG & DEBUG_STATS) + clip.thread4.stats_enable = 1; + + /* CONSTANT */ + clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + clip.thread1.single_program_flow = 1; + clip.thread3.dispatch_grf_start_reg = 1; + clip.thread3.urb_entry_read_offset = 0; + clip.clip5.userclip_enable_flags = 0x7f; + clip.clip5.userclip_must_clip = 1; + clip.clip5.guard_band_enable = 0; + clip.clip5.viewport_z_clip_enable = 1; + clip.clip5.viewport_xy_clip_enable = 1; + clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; + clip.clip5.api_mode = BRW_CLIP_API_OGL; + clip.clip6.clipper_viewport_state_ptr = 0; + clip.viewport_xmin = -1; + clip.viewport_xmax = 1; + clip.viewport_ymin = -1; + clip.viewport_ymax = 1; + + brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip ); +} + + +const struct brw_tracked_state brw_clip_unit = { + .dirty = { + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_CLIP_PROG + }, + .update = upload_clip_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_clip_tri.c b/src/gallium/drivers/i965simple/brw_clip_tri.c new file mode 100644 index 0000000000..c5da7b825e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_tri.c @@ -0,0 +1,566 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + +static struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmps( struct brw_clip_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + unsigned nr_verts ) +{ + unsigned i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + if (c->nr_attrs & 1) { + for (j = 0; j < 3; j++) { + unsigned delta = c->nr_attrs*16 + 32; + brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); + } + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_UD); + c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp = brw_vec1_grf(i, 4); + i++; + + c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + if (c->key.do_unfilled) { + c->reg.dir = brw_vec4_grf(i, 0); + c->reg.offset = brw_vec4_grf(i, 4); + i++; + c->reg.tmp0 = brw_vec4_grf(i, 0); + c->reg.tmp1 = brw_vec4_grf(i, 4); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + struct brw_instruction *is_rev; + + /* Initial list of indices for incoming vertexes: + */ + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); + + /* XXX: Is there an easier way to do this? Need to reverse every + * second tristrip element: Can ignore sometimes? + */ + is_rev = brw_IF(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(-1)); + } + is_rev = brw_ELSE(p, is_rev); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(1)); + } + brw_ENDIF(p, is_rev); + + brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); +} + + + +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + is_poly = brw_ELSE(p, is_poly); + { + brw_clip_copy_colors(c, 0, 2); + brw_clip_copy_colors(c, 1, 2); + } + brw_ENDIF(p, is_poly); +} + + + +/* Use mesa's clipping algorithms, translated to GEN4 assembly. + */ +void brw_clip_tri( struct brw_clip_compile *c ) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_indirect vtx = brw_indirect(0, 0); + struct brw_indirect vtxPrev = brw_indirect(1, 0); + struct brw_indirect vtxOut = brw_indirect(2, 0); + struct brw_indirect plane_ptr = brw_indirect(3, 0); + struct brw_indirect inlist_ptr = brw_indirect(4, 0); + struct brw_indirect outlist_ptr = brw_indirect(5, 0); + struct brw_indirect freelist_ptr = brw_indirect(6, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *vertex_loop; + struct brw_instruction *next_test; + struct brw_instruction *prev_test; + + brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + + brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + /* vtxOut = freelist_ptr++ + */ + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); + brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); + + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); + + vertex_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* vtx = *input_ptr; + */ + brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); + + /* IS_NEGATIVE(prev) */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + prev_test = brw_IF(p, BRW_EXECUTE_1); + { + /* IS_POSITIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + + /* Coming back in. + */ + brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); + + /* If (vtxOut == 0) vtxOut = vtxPrev + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, FALSE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + + } + prev_test = brw_ELSE(p, prev_test); + { + /* *outlist_ptr++ = vtxPrev; + * nr_verts++; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + + /* IS_NEGATIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + /* Going out of bounds. Avoid division by zero as we + * know dp != dpPrev from DIFFERENT_SIGNS, above. + */ + brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); + + /* If (vtxOut == 0) vtxOut = vtx + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, TRUE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + } + brw_ENDIF(p, prev_test); + + /* vtxPrev = vtx; + * inlist_ptr++; + */ + brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); + brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); + + /* while (--loopcount != 0) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, vertex_loop); + + /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] + * inlist = outlist + * inlist_ptr = &inlist[0] + * outlist_ptr = &outlist[0] + */ + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); + brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); + brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* nr_verts >= 3 + */ + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + c->reg.nr_verts, + brw_imm_ud(3)); + + /* && (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); +#else + #warning "disabled" +#endif +} + + + +void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop, *if_insn; + + /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, + c->reg.loopcount, + c->reg.nr_verts, + brw_imm_d(-2)); + + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect vptr = brw_indirect(1, 0); + + brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + + brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END)); + } + brw_ENDIF(p, if_insn); +} + +static void do_clip_tri( struct brw_clip_compile *c ) +{ + brw_clip_init_planes(c); + + brw_clip_tri(c); +} + + +static void maybe_do_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + do_clip_tri(c); + } + brw_ENDIF(p, do_clip); +} + +static void brw_clip_test( struct brw_clip_compile *c ) +{ +#if 0 + struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + + struct brw_reg v0 = get_tmp(c); + struct brw_reg v1 = get_tmp(c); + struct brw_reg v2 = get_tmp(c); + + struct brw_indirect vt0 = brw_indirect(0, 0); + struct brw_indirect vt1 = brw_indirect(1, 0); + struct brw_indirect vt2 = brw_indirect(2, 0); + + struct brw_compile *p = &c->func; + + brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); + brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + + /* test nearz, xmin, ymin plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* test farz, xmax, ymax plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + release_tmps(c); +#else + #warning "disabled" +#endif +} + + +void brw_emit_tri_clip( struct brw_clip_compile *c ) +{ + struct brw_instruction *neg_rhw; + struct brw_compile *p = &c->func; + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_clipmask(c); + + /* if -ve rhw workaround bit is set, + do cliptest */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + neg_rhw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_test(c); + } + brw_ENDIF(p, neg_rhw); + + /* Can't push into do_clip_tri because with polygon (or quad) + * flatshading, need to apply the flatshade here because we don't + * respect the PV when converting to trifan for emit: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + if (c->key.clip_mode == BRW_CLIPMODE_NORMAL) + do_clip_tri(c); + else + maybe_do_clip_tri(c); + + brw_clip_tri_emit_polygon(c); + + /* Send an empty message to kill the thread: + */ + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_unfilled.c b/src/gallium/drivers/i965simple/brw_clip_unfilled.c new file mode 100644 index 0000000000..b774a76dd6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_unfilled.c @@ -0,0 +1,477 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +/* This is performed against the original triangles, so no indirection + * required: +BZZZT! + */ +static void compute_tri_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg e = c->reg.tmp0; + struct brw_reg f = c->reg.tmp1; + struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); + + + /* Calculate the vectors of two edges of the triangle: + */ + brw_ADD(p, e, v0, negate(v2)); + brw_ADD(p, f, v1, negate(v2)); + + /* Take their crossproduct: + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); + brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); +} + + +static void cull_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + unsigned conditional; + + assert (!(c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL)); + + if (c->key.fill_ccw == CLIP_CULL) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, ccw); +} + + + +static void copy_bfc( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + unsigned conditional; + + /* Do we have any colors to copy? + */ + if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) && + !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])) + return; + + /* In some wierd degnerate cases we can end up testing the + * direction twice, once for culling and once for bfc copying. Oh + * well, that's what you get for setting wierd GL state. + */ + if (c->key.copy_bfc_ccw) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + unsigned i; + + for (i = 0; i < 3; i++) { + if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]), + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0])); + + if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]), + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1])); + } + } + brw_ENDIF(p, ccw); +} + + + + +/* + float iz = 1.0 / dir.z; + float ac = dir.x * iz; + float bc = dir.y * iz; + offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; + offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; + offset *= MRD; +*/ +static void compute_offset( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg off = c->reg.offset; + struct brw_reg dir = c->reg.dir; + + brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); + brw_MUL(p, vec2(off), dir, get_element(off, 2)); + + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + brw_abs(get_element(off, 0)), + brw_abs(get_element(off, 1))); + + brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor)); + brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units)); +} + + +static void merge_edgeflags( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + /* Get away with using reg.vertex because we know that this is not + * a _3DPRIM_TRISTRIP_REVERSE: + */ + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); + brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); + brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_poly); +} + + + +static void apply_one_offset( struct brw_clip_compile *c, + struct brw_indirect vert ) +{ + struct brw_compile *p = &c->func; + struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]); + struct brw_reg z = get_element(pos, 2); + + brw_ADD(p, z, z, vec1(c->reg.offset)); +} + + + +/*********************************************************************** + * Output clipped polygon as an unfilled primitive: + */ +static void emit_lines(struct brw_clip_compile *c, + boolean do_offset) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_edge; + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v1 = brw_indirect(1, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + struct brw_indirect v1ptr = brw_indirect(3, 0); + + /* Need a seperate loop for offset: + */ + if (do_offset) { + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + apply_one_offset(c, v0); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + } + + /* v1ptr = &inlist[nr_verts] + * *v1ptr = v0 + */ + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw edge if edgeflag != 0 */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + brw_imm_f(0)); + draw_edge = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, draw_edge); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + +static void emit_points(struct brw_clip_compile *c, + boolean do_offset ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_point; + + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw if edgeflag != 0 + */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + brw_imm_f(0)); + draw_point = brw_IF(p, BRW_EXECUTE_1); + { + if (do_offset) + apply_one_offset(c, v0); + + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END); + } + brw_ENDIF(p, draw_point); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + + + + + +static void emit_primitives( struct brw_clip_compile *c, + unsigned mode, + boolean do_offset ) +{ + switch (mode) { + case CLIP_FILL: + brw_clip_tri_emit_polygon(c); + break; + + case CLIP_LINE: + emit_lines(c, do_offset); + break; + + case CLIP_POINT: + emit_points(c, do_offset); + break; + + case CLIP_CULL: + assert(0); + break; + } +} + + + +static void emit_unfilled_primitives( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + + /* Direction culling has already been done. + */ + if (c->key.fill_ccw != c->key.fill_cw && + c->key.fill_ccw != CLIP_CULL && + c->key.fill_cw != CLIP_CULL) + { + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } + ccw = brw_ELSE(p, ccw); + { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + brw_ENDIF(p, ccw); + } + else if (c->key.fill_cw != CLIP_CULL) { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + else if (c->key.fill_ccw != CLIP_CULL) { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } +} + + + + +static void check_nr_verts( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3)); + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, if_insn); +} + + +void brw_emit_unfilled_clip( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + + c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || + (c->key.fill_ccw != c->key.fill_cw) || + c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL || + c->key.copy_bfc_cw || + c->key.copy_bfc_ccw); + + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + + assert(c->offset[VERT_RESULT_EDGE]); + + if (c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL) { + brw_clip_kill_thread(c); + return; + } + + merge_edgeflags(c); + + /* Need to use the inlist indirection here: + */ + if (c->need_direction) + compute_tri_direction(c); + + if (c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL) + cull_direction(c); + + if (c->key.offset_ccw || + c->key.offset_cw) + compute_offset(c); + + if (c->key.copy_bfc_ccw || + c->key.copy_bfc_cw) + copy_bfc(c); + + /* Need to do this whether we clip or not: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + brw_clip_init_clipmask(c); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_init_planes(c); + brw_clip_tri(c); + check_nr_verts(c); + } + brw_ENDIF(p, do_clip); + + emit_unfilled_primitives(c); + brw_clip_kill_thread(c); +} + + + diff --git a/src/gallium/drivers/i965simple/brw_clip_util.c b/src/gallium/drivers/i965simple/brw_clip_util.c new file mode 100644 index 0000000000..6d58ceafff --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_util.c @@ -0,0 +1,351 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + + + +static struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + + +static struct brw_reg make_plane_ud(unsigned x, unsigned y, unsigned z, unsigned w) +{ + return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); +} + + +void brw_clip_init_planes( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + + if (!c->key.nr_userclip) { + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); + } +} + + + +#define W 3 + +/* Project 'pos' to screen space (or back again), overwrite with results: + */ +static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +{ + struct brw_compile *p = &c->func; + + /* calc rhw + */ + brw_math_invert(p, get_element(pos, W), get_element(pos, W)); + + /* value.xyz *= value.rhw + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, brw_writemask(pos, TGSI_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); + brw_set_access_mode(p, BRW_ALIGN_1); +} + + +static void brw_clip_project_vertex( struct brw_clip_compile *c, + struct brw_indirect vert_addr ) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + + /* Fixup position. Extract from the original vertex and re-project + * to screen space: + */ + brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS])); + brw_clip_project_position(c, tmp); + brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); + + release_tmp(c, tmp); +#else + #warning "disabled" +#endif +} + + + + +/* Interpolate between two vertices and put the result into a0.0. + * Increment a0.0 accordingly. + */ +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + boolean force_edgeflag) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + unsigned i; + + /* Just copy the vertex header: + */ + brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); + + /* Iterate over each attribute (could be done in pairs?) + */ + for (i = 0; i < c->nr_attrs; i++) { + unsigned delta = i*16 + 32; + + if (delta == c->offset[VERT_RESULT_EDGE]) { + if (force_edgeflag) + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); + else + brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); + } + else { + /* Interpolate: + * + * New = attr0 + t*attr1 - t*attr0 + */ + brw_MUL(p, + vec4(brw_null_reg()), + deref_4f(v1_ptr, delta), + t0); + + brw_MAC(p, + tmp, + negate(deref_4f(v0_ptr, delta)), + t0); + + brw_ADD(p, + deref_4f(dest_ptr, delta), + deref_4f(v0_ptr, delta), + tmp); + } + } + + if (i & 1) { + unsigned delta = i*16 + 32; + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); + } + + release_tmp(c, tmp); + + /* Recreate the projected (NDC) coordinate in the new vertex + * header: + */ + brw_clip_project_vertex(c, dest_ptr ); +#else + #warning "disabled" +#endif +} + + + + +#define MAX_MRF 16 + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + boolean allocate, + boolean eot, + unsigned header) +{ + struct brw_compile *p = &c->func; + unsigned start = c->last_mrf; + + assert(!(allocate && eot)); + + /* Cycle through mrf regs - probably futile as we have to wait for + * the allocation response anyway. Also, the order this function + * is invoked doesn't correspond to the order the instructions will + * be executed, so it won't have any effect in many cases. + */ +#if 0 + if (start + c->nr_regs + 1 >= MAX_MRF) + start = 0; + + c->last_mrf = start + c->nr_regs + 1; +#endif + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + + /* Send each vertex as a seperate write to the urb. This + * is different to the concept in brw_sf_emit.c, where + * subsequent writes are used to build up a single urb + * entry. Each of these writes instantiates a seperate + * urb entry - (I think... what about 'allocate'?) + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + start, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response_length */ + eot, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_clip_kill_thread(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + + /* Send an empty message to kill the thread and release any + * allocated urb entry: + */ + brw_urb_WRITE(p, + retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + 0, /* allocate */ + 0, /* used */ + 0, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, + BRW_URB_SWIZZLE_NONE); +} + + + + +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) +{ + return brw_address(c->reg.fixed_planes); +} + + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) +{ + if (c->key.nr_userclip) { + return brw_imm_uw(16); + } + else { + return brw_imm_uw(4); + } +} + + +/* If flatshading, distribute color from provoking vertex prior to + * clipping. + */ +void brw_clip_copy_colors( struct brw_clip_compile *c, + unsigned to, unsigned from ) +{ +#if 0 + struct brw_compile *p = &c->func; + + if (c->offset[VERT_RESULT_COL0]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0])); + + if (c->offset[VERT_RESULT_COL1]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1])); + + if (c->offset[VERT_RESULT_BFC0]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0])); + + if (c->offset[VERT_RESULT_BFC1]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1])); +#else + #warning "disabled" +#endif +} + + + +void brw_clip_init_clipmask( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg incoming = get_element_ud(c->reg.R0, 2); + + /* Shift so that lowest outcode bit is rightmost: + */ + brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); + + if (c->key.nr_userclip) { + struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); + + /* Rearrange userclip outcodes so that they come directly after + * the fixed plane bits. + */ + brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); + brw_SHR(p, tmp, tmp, brw_imm_ud(8)); + brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); + + release_tmp(c, tmp); + } +} + diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c new file mode 100644 index 0000000000..5e58701e91 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -0,0 +1,245 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_draw.h" +#include "brw_vs.h" +#include "brw_tex_layout.h" +#include "brw_winsys.h" + +#include "pipe/p_winsys.h" +#include "pipe/p_context.h" +#include "pipe/p_util.h" + +/*************************************** + * Mesa's Driver Functions + ***************************************/ + +#ifndef BRW_DEBUG +int BRW_DEBUG = (0); +#endif + +static void brw_destroy(struct pipe_context *pipe) +{ + struct brw_context *brw = brw_context(pipe); + + FREE(brw); +} + +static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + int x, y, w, h; + /* FIXME: corny... */ + + x = 0; + y = 0; + w = ps->width; + h = ps->height; + + pipe->surface_fill(pipe, ps, x, y, w, h, clearValue); +} + + +static int +brw_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + + +static float +brw_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + +static boolean +brw_is_format_supported( struct pipe_context *pipe, + enum pipe_format format, uint type ) +{ +#if 0 + /* XXX: This is broken -- rewrite if still needed. */ + static const unsigned tex_supported[] = { + PIPE_FORMAT_U_R8_G8_B8_A8, + PIPE_FORMAT_U_A8_R8_G8_B8, + PIPE_FORMAT_U_R5_G6_B5, + PIPE_FORMAT_U_L8, + PIPE_FORMAT_U_A8, + PIPE_FORMAT_U_I8, + PIPE_FORMAT_U_L8_A8, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8_Z24, + }; + + + /* Actually a lot more than this - add later: + */ + static const unsigned render_supported[] = { + PIPE_FORMAT_U_A8_R8_G8_B8, + PIPE_FORMAT_U_R5_G6_B5, + }; + + /* + */ + static const unsigned z_stencil_supported[] = { + PIPE_FORMAT_U_Z16, + PIPE_FORMAT_U_Z32, + PIPE_FORMAT_S8_Z24, + }; + + switch (type) { + case PIPE_RENDER_FORMAT: + *numFormats = Elements(render_supported); + return render_supported; + + case PIPE_TEX_FORMAT: + *numFormats = Elements(tex_supported); + return render_supported; + + case PIPE_Z_STENCIL_FORMAT: + *numFormats = Elements(render_supported); + return render_supported; + + default: + *numFormats = 0; + return NULL; + } +#else + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + return TRUE; + default: + return FALSE; + }; + return FALSE; +#endif +} + + + + +struct pipe_context *brw_create(struct pipe_winsys *pipe_winsys, + struct brw_winsys *brw_winsys, + unsigned pci_id) +{ + struct brw_context *brw; + + pipe_winsys->printf(pipe_winsys, + "%s: creating brw_context with pci id 0x%x\n", + __FUNCTION__, pci_id); + + brw = CALLOC_STRUCT(brw_context); + if (brw == NULL) + return NULL; + + brw->winsys = brw_winsys; + brw->pipe.winsys = pipe_winsys; + + brw->pipe.destroy = brw_destroy; + brw->pipe.is_format_supported = brw_is_format_supported; + brw->pipe.get_param = brw_get_param; + brw->pipe.get_paramf = brw_get_paramf; + brw->pipe.clear = brw_clear; + brw->pipe.texture_create = brw_texture_create; + brw->pipe.texture_release = brw_texture_release; + + brw_init_surface_functions(brw); + brw_init_state_functions(brw); + brw_init_flush_functions(brw); + brw_init_string_functions(brw); + brw_init_draw_functions( brw ); + + + brw_init_state( brw ); + + brw->pci_id = pci_id; + brw->dirty = ~0; + brw->hardware_dirty = ~0; + + memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); + + return &brw->pipe; +} + diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h new file mode 100644 index 0000000000..65664d853d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -0,0 +1,690 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "brw_structs.h" +#include "brw_winsys.h" + + +/* Glossary: + * + * URB - uniform resource buffer. A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry. An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawining a thread. + * + * VUE - vertex URB entry. An urb entry holding a vertex and usually + * a vertex header. The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry. An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file. One of several register files + * addressable by programmed threads. The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned. The registers are individually 8 dwords wide and suitable + * for general usage. Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file. Threads communicate (and terminate) + * by sending messages. Message parameters are placed in contigous + * MRF registers. All program output is via these messages. URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0. Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware. Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer. Notional first unit, little software + * interaction. Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs. If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader. This corresponds to a new DX10 concept. If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units. The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses. This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more. Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper. Mesa's clipping algorithms are imported to run on + * this unit. The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes descisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization. Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower. Interpolation of vertex attributes performed here. + * Fragment shader implemented here. SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator. No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_MAX_CURBE (32*16) + +struct brw_context; +struct brw_winsys; + + +/* Raised when we receive new state across the pipe interface: + */ +#define BRW_NEW_VIEWPORT 0x1 +#define BRW_NEW_RASTERIZER 0x2 +#define BRW_NEW_FS 0x4 +#define BRW_NEW_BLEND 0x8 +#define BRW_NEW_CLIP 0x10 +#define BRW_NEW_SCISSOR 0x20 +#define BRW_NEW_STIPPLE 0x40 +#define BRW_NEW_FRAMEBUFFER 0x80 +#define BRW_NEW_ALPHA_TEST 0x100 +#define BRW_NEW_DEPTH_STENCIL 0x200 +#define BRW_NEW_SAMPLER 0x400 +#define BRW_NEW_TEXTURE 0x800 +#define BRW_NEW_CONSTANTS 0x1000 +#define BRW_NEW_VBO 0x2000 +#define BRW_NEW_VS 0x4000 + +/* Raised for other internal events: + */ +#define BRW_NEW_URB_FENCE 0x10000 +#define BRW_NEW_PSP 0x20000 +#define BRW_NEW_CURBE_OFFSETS 0x40000 +#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 +#define BRW_NEW_PRIMITIVE 0x100000 +#define BRW_NEW_SCENE 0x200000 +#define BRW_NEW_SF_LINKAGE 0x400000 + +extern int BRW_DEBUG; + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_PRIMS 0x8 +#define DEBUG_VERTS 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_DRI 0x80 +#define DEBUG_DMA 0x100 +#define DEBUG_SANITY 0x200 +#define DEBUG_SYNC 0x400 +#define DEBUG_SLEEP 0x800 +#define DEBUG_PIXEL 0x1000 +#define DEBUG_STATS 0x2000 +#define DEBUG_TILE 0x4000 +#define DEBUG_SINGLE_THREAD 0x8000 +#define DEBUG_WM 0x10000 +#define DEBUG_URB 0x20000 +#define DEBUG_VS 0x40000 +#define DEBUG_BATCH 0x80000 +#define DEBUG_BUFMGR 0x100000 +#define DEBUG_BLIT 0x200000 +#define DEBUG_REGION 0x400000 +#define DEBUG_MIPTREE 0x800000 + +#define DBG(...) do { \ + if (BRW_DEBUG & FILE_DEBUG_FLAG) \ + brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ +} while(0) + +#define PRINT(...) do { \ + brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ +} while(0) + +struct brw_state_flags { + unsigned cache; + unsigned brw; +}; + + +struct brw_shader_info { + int nr_regs[8]; /* TGSI_FILE_* */ +}; + + + +struct brw_vertex_program { + struct pipe_shader_state program; + struct brw_shader_info info; + int id; +}; + + + +struct brw_fragment_program { + struct pipe_shader_state program; + struct brw_shader_info info; + + boolean UsesDepth; + boolean UsesKill; + boolean ComputesDepth; + int id; +}; + + + + +struct pipe_setup_linkage { + struct { + unsigned vp_output:5; + unsigned interp_mode:4; + unsigned bf_vp_output:5; + } fp_input[PIPE_MAX_SHADER_INPUTS]; + + unsigned fp_input_count:5; + unsigned max_vp_output:5; +}; + + + +struct brw_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned pitch; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_height; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ + +struct brw_wm_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + + unsigned first_curbe_grf; + unsigned total_grf; + unsigned total_scratch; + + /* Internally generated constants for the CURBE. These are loaded + * ahead of the data from the constant buffer. + */ + const float internal_const[8]; + unsigned nr_internal_consts; + unsigned max_const; + + boolean error; +}; + +struct brw_sf_prog_data { + unsigned urb_read_length; + unsigned total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + unsigned urb_entry_size; +}; + +struct brw_clip_prog_data { + unsigned curb_read_length; /* user planes? */ + unsigned clip_mode; + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_gs_prog_data { + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_vs_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + unsigned total_grf; + unsigned outputs_written; + + unsigned inputs_read; + + unsigned max_const; + + float imm_buf[PIPE_MAX_CONSTANT][4]; + unsigned num_imm; + unsigned num_consts; + + /* Used for calculating urb partitions: + */ + unsigned urb_entry_size; +}; + + +#define BRW_MAX_TEX_UNIT 8 +#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 + +/* Create a fixed sized struct for caching binding tables: + */ +struct brw_surface_binding_table { + unsigned surf_ss_offset[BRW_WM_MAX_SURF]; +}; + + +struct brw_cache; + +struct brw_mem_pool { + struct pipe_buffer *buffer; + + unsigned size; + unsigned offset; /* offset of first free byte */ + + struct brw_context *brw; +}; + +struct brw_cache_item { + unsigned hash; + unsigned key_size; /* for variable-sized keys */ + const void *key; + + unsigned offset; /* offset within pool's buffer */ + unsigned data_size; + + struct brw_cache_item *next; +}; + + + +struct brw_cache { + unsigned id; + + const char *name; + + struct brw_context *brw; + struct brw_mem_pool *pool; + + struct brw_cache_item **items; + unsigned size, n_items; + + unsigned key_size; /* for fixed-size keys */ + unsigned aux_size; + + unsigned last_addr; /* offset of active item */ +}; + + + + +/* Considered adding a member to this struct to document which flags + * an update might raise so that ordering of the state atoms can be + * checked or derived at runtime. Dropped the idea in favor of having + * a debug mode where the state is monitored for flags which are + * raised that have already been tested against. + */ +struct brw_tracked_state { + struct brw_state_flags dirty; + void (*update)( struct brw_context *brw ); +}; + + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP (1< 32. Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define ATTRIB_BIT_DWORDS ((PIPE_ATTRIB_MAX+31)/32) + + + + +struct brw_vertex_info { + unsigned varying; /* varying:1[PIPE_ATTRIB_MAX] */ + unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_ATTRIB_MAX] */ +}; + + + + + +struct brw_context +{ + struct pipe_context pipe; + struct brw_winsys *winsys; + + unsigned primitive; + unsigned reduced_primitive; + + boolean emit_state_always; + + struct { + struct brw_state_flags dirty; + } state; + + + struct { + const struct pipe_blend_state *Blend; + const struct pipe_depth_stencil_alpha_state *DepthStencil; + const struct pipe_poly_stipple *PolygonStipple; + const struct pipe_rasterizer_state *Raster; + const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; + const struct brw_vertex_program *VertexProgram; + const struct brw_fragment_program *FragmentProgram; + + struct pipe_clip_state Clip; + struct pipe_blend_color BlendColor; + struct pipe_scissor_state Scissor; + struct pipe_viewport_state Viewport; + struct pipe_framebuffer_state FrameBuffer; + + const struct pipe_constant_buffer *Constants[2]; + const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; + } attribs; + + struct brw_mem_pool pool[BRW_MAX_POOL]; + struct brw_cache cache[BRW_MAX_CACHE]; + struct brw_cached_batch_item *cached_batch_items; + + struct { + + /* Arrays with buffer objects to copy non-bufferobj arrays into + * for upload: + */ + struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; + + struct brw_vertex_element_state inputs[PIPE_ATTRIB_MAX]; + +#define BRW_NR_UPLOAD_BUFS 17 +#define BRW_UPLOAD_INIT_SIZE (128*1024) + + /* Summary of size and varying of active arrays, so we can check + * for changes to this state: + */ + struct brw_vertex_info info; + } vb; + + + unsigned hardware_dirty; + unsigned dirty; + unsigned pci_id; + /* BRW_NEW_URB_ALLOCATIONS: + */ + struct { + unsigned vsize; /* vertex size plus header in urb registers */ + unsigned csize; /* constant buffer size in urb registers */ + unsigned sfsize; /* setup data size in urb registers */ + + boolean constrained; + + unsigned nr_vs_entries; + unsigned nr_gs_entries; + unsigned nr_clip_entries; + unsigned nr_sf_entries; + unsigned nr_cs_entries; + +/* unsigned vs_size; */ +/* unsigned gs_size; */ +/* unsigned clip_size; */ +/* unsigned sf_size; */ +/* unsigned cs_size; */ + + unsigned vs_start; + unsigned gs_start; + unsigned clip_start; + unsigned sf_start; + unsigned cs_start; + } urb; + + + /* BRW_NEW_CURBE_OFFSETS: + */ + struct { + unsigned wm_start; + unsigned wm_size; + unsigned clip_start; + unsigned clip_size; + unsigned vs_start; + unsigned vs_size; + unsigned total_size; + + unsigned gs_offset; + + float *last_buf; + unsigned last_bufsz; + } curbe; + + struct { + struct brw_vs_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } vs; + + struct { + struct brw_gs_prog_data *prog_data; + + boolean prog_active; + unsigned prog_gs_offset; + unsigned state_gs_offset; + } gs; + + struct { + struct brw_clip_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } clip; + + + struct { + struct brw_sf_prog_data *prog_data; + + struct pipe_setup_linkage linkage; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } sf; + + struct { + struct brw_wm_prog_data *prog_data; + +// struct brw_wm_compiler *compile_data; + + + /** + * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER + * cache + */ + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + + unsigned render_surf; + unsigned nr_surfaces; + + unsigned max_threads; + struct pipe_buffer *scratch_buffer; + unsigned scratch_buffer_size; + + unsigned sampler_count; + unsigned sampler_gs_offset; + + struct brw_surface_binding_table bind; + unsigned bind_ss_offset; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } wm; + + + struct { + unsigned vp_gs_offset; + unsigned state_gs_offset; + } cc; + + + /* Used to give every program string a unique id + */ + unsigned program_id; +}; + + +#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) + + +/*====================================================================== + * brw_vtbl.c + */ +void brw_do_flush( struct brw_context *brw, + unsigned flags ); + + +/*====================================================================== + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + + +/*====================================================================== + * brw_tex.c + */ +void brwUpdateTextureState( struct brw_context *brw ); + + +/* brw_urb.c + */ +void brw_upload_urb_fence(struct brw_context *brw); + +void brw_upload_constant_buffer_state(struct brw_context *brw); + +void brw_init_surface_functions(struct brw_context *brw); +void brw_init_state_functions(struct brw_context *brw); +void brw_init_flush_functions(struct brw_context *brw); +void brw_init_string_functions(struct brw_context *brw); + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static inline struct brw_context * +brw_context( struct pipe_context *ctx ) +{ + return (struct brw_context *)ctx; +} + +#endif + diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c new file mode 100644 index 0000000000..52bbd525c1 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_curbe.c @@ -0,0 +1,368 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_batch.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "pipe/p_state.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + +#define FILE_DEBUG_FLAG DEBUG_FALLBACKS + +/* Partition the CURBE between the various users of constant values: + */ +static void calculate_curbe_offsets( struct brw_context *brw ) +{ + /* CACHE_NEW_WM_PROG */ + unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16); + + /* BRW_NEW_VERTEX_PROGRAM */ + unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16); + unsigned nr_clip_regs = 0; + unsigned total_regs; + +#if 0 + /* BRW_NEW_CLIP ? */ + if (brw->attribs.Transform->ClipPlanesEnabled) { + unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); + nr_clip_regs = align(nr_planes * 4, 16); + } +#endif + + + total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; + + /* This can happen - what to do? Probably rather than falling + * back, the best thing to do is emit programs which code the + * constants as immediate values. Could do this either as a static + * cap on WM and VS, or adaptively. + * + * Unfortunately, this is currently dependent on the results of the + * program generation process (in the case of wm), so this would + * introduce the need to re-generate programs in the event of a + * curbe allocation failure. + */ + /* Max size is 32 - just large enough to + * hold the 128 parameters allowed by + * the fragment and vertex program + * api's. It's not clear what happens + * when both VP and FP want to use 128 + * parameters, though. + */ + assert(total_regs <= 32); + + /* Lazy resize: + */ + if (nr_fp_regs > brw->curbe.wm_size || + nr_vp_regs > brw->curbe.vs_size || + nr_clip_regs != brw->curbe.clip_size || + (total_regs < brw->curbe.total_size / 4 && + brw->curbe.total_size > 16)) { + + unsigned reg = 0; + + /* Calculate a new layout: + */ + reg = 0; + brw->curbe.wm_start = reg; + brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; + brw->curbe.clip_start = reg; + brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; + brw->curbe.vs_start = reg; + brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; + brw->curbe.total_size = reg; + +#if 0 + if (0) + DBG("curbe wm %d+%d clip %d+%d vs %d+%d\n", + brw->curbe.wm_start, + brw->curbe.wm_size, + brw->curbe.clip_start, + brw->curbe.clip_size, + brw->curbe.vs_start, + brw->curbe.vs_size ); +#endif + + brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; + } +} + + +const struct brw_tracked_state brw_curbe_offsets = { + .dirty = { + .brw = (BRW_NEW_CLIP | + BRW_NEW_VS), + .cache = CACHE_NEW_WM_PROG + }, + .update = calculate_curbe_offsets +}; + + + +/* Define the number of curbes within CS's urb allocation. Multiple + * urb entries -> multiple curbes. These will be used by + * fixed-function hardware in a double-buffering scheme to avoid a + * pipeline stall each time the contents of the curbe is changed. + */ +void brw_upload_constant_buffer_state(struct brw_context *brw) +{ + struct brw_constant_buffer_state cbs; + memset(&cbs, 0, sizeof(cbs)); + + /* It appears that this is the state packet for the CS unit, ie. the + * urb entries detailed here are housed in the CS range from the + * URB_FENCE command. + */ + cbs.header.opcode = CMD_CONST_BUFFER_STATE; + cbs.header.length = sizeof(cbs)/4 - 2; + + /* BRW_NEW_URB_FENCE */ + cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cbs.bits0.urb_entry_size = brw->urb.csize - 1; + + assert(brw->urb.nr_cs_entries); + BRW_CACHED_BATCH_STRUCT(brw, &cbs); +} + + +static float fixed_plane[6][4] = { + { 0, 0, -1, 1 }, + { 0, 0, 1, 1 }, + { 0, -1, 0, 1 }, + { 0, 1, 0, 1 }, + {-1, 0, 0, 1 }, + { 1, 0, 0, 1 } +}; + +/* Upload a new set of constants. Too much variability to go into the + * cache mechanism, but maybe would benefit from a comparison against + * the current uploaded set of constants. + */ +static void upload_constant_buffer(struct brw_context *brw) +{ + struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; + unsigned sz = brw->curbe.total_size; + unsigned bufsz = sz * sizeof(float); + float *buf; + unsigned i; + + + if (sz == 0) { + struct brw_constant_buffer cb; + cb.header.opcode = CMD_CONST_BUFFER; + cb.header.length = sizeof(cb)/4 - 2; + cb.header.valid = 0; + cb.bits0.buffer_length = 0; + cb.bits0.buffer_address = 0; + BRW_BATCH_STRUCT(brw, &cb); + + if (brw->curbe.last_buf) { + free(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + brw->curbe.last_bufsz = 0; + } + + return; + } + + buf = (float *)malloc(bufsz); + + memset(buf, 0, bufsz); + + if (brw->curbe.wm_size) { + unsigned offset = brw->curbe.wm_start * 16; + + /* First the constant buffer constants: + */ + + /* Then any internally generated constants: + */ + for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++) + buf[offset + i] = brw->wm.prog_data->internal_const[i]; + + assert(brw->wm.prog_data->max_const == + brw->wm.prog_data->nr_internal_consts); + } + + + /* The clipplanes are actually delivered to both CLIP and VS units. + * VS uses them to calculate the outcode bitmasks. + */ + if (brw->curbe.clip_size) { + unsigned offset = brw->curbe.clip_start * 16; + unsigned j; + + /* If any planes are going this way, send them all this way: + */ + for (i = 0; i < 6; i++) { + buf[offset + i * 4 + 0] = fixed_plane[i][0]; + buf[offset + i * 4 + 1] = fixed_plane[i][1]; + buf[offset + i * 4 + 2] = fixed_plane[i][2]; + buf[offset + i * 4 + 3] = fixed_plane[i][3]; + } + + /* Clip planes: BRW_NEW_CLIP: + */ + for (j = 0; j < brw->attribs.Clip.nr; j++) { + buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0]; + buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1]; + buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2]; + buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3]; + i++; + } + } + + + if (brw->curbe.vs_size) { + unsigned offset = brw->curbe.vs_start * 16; + /*unsigned nr = vp->max_const;*/ + const struct pipe_constant_buffer *cbuffer = brw->attribs.Constants[0]; + struct pipe_winsys *ws = brw->pipe.winsys; + /* FIXME: buffer size is num_consts + num_immediates */ + if (brw->vs.prog_data->num_consts) { + /* map the vertex constant buffer and copy to curbe: */ + void *data = ws->buffer_map(ws, cbuffer->buffer, 0); + /* FIXME: this is wrong. the cbuffer->size currently + * represents size of consts + immediates. so if we'll + * have both we'll copy over the end of the buffer + * with the subsequent memcpy */ + memcpy(&buf[offset], data, cbuffer->size); + ws->buffer_unmap(ws, cbuffer->buffer); + offset += cbuffer->size; + } + /*immediates*/ + if (brw->vs.prog_data->num_imm) { + memcpy(&buf[offset], brw->vs.prog_data->imm_buf, + brw->vs.prog_data->num_imm * 4 * sizeof(float)); + } + } + + if (1) { + for (i = 0; i < sz; i+=4) + debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + buf[i+0], buf[i+1], buf[i+2], buf[i+3]); + + debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", + brw->curbe.last_buf, buf, + bufsz, brw->curbe.last_bufsz, + brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); + } + + if (brw->curbe.last_buf && + bufsz == brw->curbe.last_bufsz && + memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { + free(buf); +/* return; */ + } + else { + if (brw->curbe.last_buf) + free(brw->curbe.last_buf); + brw->curbe.last_buf = buf; + brw->curbe.last_bufsz = bufsz; + + + if (!brw_pool_alloc(pool, + bufsz, + 1 << 6, + &brw->curbe.gs_offset)) { + debug_printf("out of GS memory for curbe\n"); + assert(0); + return; + } + + + /* Copy data to the buffer: + */ + brw->winsys->buffer_subdata_typed(brw->winsys, + pool->buffer, + brw->curbe.gs_offset, + bufsz, + buf, + BRW_CONSTANT_BUFFER ); + } + + /* TODO: only emit the constant_buffer packet when necessary, ie: + - contents have changed + - offset has changed + - hw requirements due to other packets emitted. + */ + { + struct brw_constant_buffer cb; + + memset(&cb, 0, sizeof(cb)); + + cb.header.opcode = CMD_CONST_BUFFER; + cb.header.length = sizeof(cb)/4 - 2; + cb.header.valid = 1; + cb.bits0.buffer_length = sz - 1; + cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; + + /* Because this provokes an action (ie copy the constants into the + * URB), it shouldn't be shortcircuited if identical to the + * previous time - because eg. the urb destination may have + * changed, or the urb contents different to last time. + * + * Note that the data referred to is actually copied internally, + * not just used in place according to passed pointer. + * + * It appears that the CS unit takes care of using each available + * URB entry (Const URB Entry == CURBE) in turn, and issuing + * flushes as necessary when doublebuffering of CURBEs isn't + * possible. + */ + BRW_BATCH_STRUCT(brw, &cb); + } +} + +/* This tracked state is unique in that the state it monitors varies + * dynamically depending on the parameters tracked by the fragment and + * vertex programs. This is the template used as a starting point, + * each context will maintain a copy of this internally and update as + * required. + */ +const struct brw_tracked_state brw_constant_buffer = { + .dirty = { + .brw = (BRW_NEW_CLIP | + BRW_NEW_CONSTANTS | + BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ + BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ + BRW_NEW_CURBE_OFFSETS), + .cache = (CACHE_NEW_WM_PROG) + }, + .update = upload_constant_buffer +}; + diff --git a/src/gallium/drivers/i965simple/brw_defines.h b/src/gallium/drivers/i965simple/brw_defines.h new file mode 100644 index 0000000000..9379a397f6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_defines.h @@ -0,0 +1,852 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_DEFINES_H +#define BRW_DEFINES_H + +/* + */ +#define MI_NOOP 0x00 +#define MI_USER_INTERRUPT 0x02 +#define MI_WAIT_FOR_EVENT 0x03 +#define MI_FLUSH 0x04 +#define MI_REPORT_HEAD 0x07 +#define MI_ARB_ON_OFF 0x08 +#define MI_BATCH_BUFFER_END 0x0A +#define MI_OVERLAY_FLIP 0x11 +#define MI_LOAD_SCAN_LINES_INCL 0x12 +#define MI_LOAD_SCAN_LINES_EXCL 0x13 +#define MI_DISPLAY_BUFFER_INFO 0x14 +#define MI_SET_CONTEXT 0x18 +#define MI_STORE_DATA_IMM 0x20 +#define MI_STORE_DATA_INDEX 0x21 +#define MI_LOAD_REGISTER_IMM 0x22 +#define MI_STORE_REGISTER_MEM 0x24 +#define MI_BATCH_BUFFER_START 0x31 + +#define MI_SYNCHRONOUS_FLIP 0x0 +#define MI_ASYNCHRONOUS_FLIP 0x1 + +#define MI_BUFFER_SECURE 0x0 +#define MI_BUFFER_NONSECURE 0x1 + +#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0 +#define MI_ARBITRATE_BETWEEN_INSTS 0x1 +#define MI_NO_ARBITRATION 0x3 + +#define MI_CONDITION_CODE_WAIT_DISABLED 0x0 +#define MI_CONDITION_CODE_WAIT_0 0x1 +#define MI_CONDITION_CODE_WAIT_1 0x2 +#define MI_CONDITION_CODE_WAIT_2 0x3 +#define MI_CONDITION_CODE_WAIT_3 0x4 +#define MI_CONDITION_CODE_WAIT_4 0x5 + +#define MI_DISPLAY_PIPE_A 0x0 +#define MI_DISPLAY_PIPE_B 0x1 + +#define MI_DISPLAY_PLANE_A 0x0 +#define MI_DISPLAY_PLANE_B 0x1 +#define MI_DISPLAY_PLANE_C 0x2 + +#define MI_STANDARD_FLIP 0x0 +#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1 +#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2 +#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3 + +#define MI_PHYSICAL_ADDRESS 0x0 +#define MI_VIRTUAL_ADDRESS 0x1 + +#define MI_BUFFER_MEMORY_MAIN 0x0 +#define MI_BUFFER_MEMORY_GTT 0x2 +#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3 + +#define MI_FLIP_CONTINUE 0x0 +#define MI_FLIP_ON 0x1 +#define MI_FLIP_OFF 0x2 + +#define MI_UNTRUSTED_REGISTER_SPACE 0x0 +#define MI_TRUSTED_REGISTER_SPACE 0x1 + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 + +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_SPRITE_POINT_ENABLE 16 + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0 +#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 + +#define BRW_VFCOMPONENT_NOSTORE 0 +#define BRW_VFCOMPONENT_STORE_SRC 1 +#define BRW_VFCOMPONENT_STORE_0 2 +#define BRW_VFCOMPONENT_STORE_1_FLT 3 +#define BRW_VFCOMPONENT_STORE_1_INT 4 +#define BRW_VFCOMPONENT_STORE_VID 5 +#define BRW_VFCOMPONENT_STORE_IID 6 +#define BRW_VFCOMPONENT_STORE_PID 7 + + + +/* Execution Unit (EU) defines + */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +#define BRW_COMPRESSION_NONE 0 +#define BRW_COMPRESSION_2NDHALF 1 +#define BRW_COMPRESSION_COMPRESSED 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_C 7 +#define BRW_CONDITIONAL_O 8 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +#define BRW_OPCODE_MOV 1 +#define BRW_OPCODE_SEL 2 +#define BRW_OPCODE_NOT 4 +#define BRW_OPCODE_AND 5 +#define BRW_OPCODE_OR 6 +#define BRW_OPCODE_XOR 7 +#define BRW_OPCODE_SHR 8 +#define BRW_OPCODE_SHL 9 +#define BRW_OPCODE_RSR 10 +#define BRW_OPCODE_RSL 11 +#define BRW_OPCODE_ASR 12 +#define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_JMPI 32 +#define BRW_OPCODE_IF 34 +#define BRW_OPCODE_IFF 35 +#define BRW_OPCODE_ELSE 36 +#define BRW_OPCODE_ENDIF 37 +#define BRW_OPCODE_DO 38 +#define BRW_OPCODE_WHILE 39 +#define BRW_OPCODE_BREAK 40 +#define BRW_OPCODE_CONTINUE 41 +#define BRW_OPCODE_HALT 42 +#define BRW_OPCODE_MSAVE 44 +#define BRW_OPCODE_MRESTORE 45 +#define BRW_OPCODE_PUSH 46 +#define BRW_OPCODE_POP 47 +#define BRW_OPCODE_WAIT 48 +#define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_ADD 64 +#define BRW_OPCODE_MUL 65 +#define BRW_OPCODE_AVG 66 +#define BRW_OPCODE_FRC 67 +#define BRW_OPCODE_RNDU 68 +#define BRW_OPCODE_RNDD 69 +#define BRW_OPCODE_RNDE 70 +#define BRW_OPCODE_RNDZ 71 +#define BRW_OPCODE_MAC 72 +#define BRW_OPCODE_MACH 73 +#define BRW_OPCODE_LZD 74 +#define BRW_OPCODE_SAD2 80 +#define BRW_OPCODE_SADA2 81 +#define BRW_OPCODE_DP4 84 +#define BRW_OPCODE_DPH 85 +#define BRW_OPCODE_DP3 86 +#define BRW_OPCODE_DP2 87 +#define BRW_OPCODE_DPA2 88 +#define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_NOP 126 + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT 0x6104 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 +#define CMD_VERTEX_BUFFER 0x7808 +#define CMD_VERTEX_ELEMENT 0x7809 +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS 0x780b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_draw.c b/src/gallium/drivers/i965simple/brw_draw.c new file mode 100644 index 0000000000..7598e3dc8a --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw.c @@ -0,0 +1,239 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include + +#include "brw_batch.h" +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" + +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +static unsigned hw_prim[PIPE_PRIM_POLYGON+1] = { + _3DPRIM_POINTLIST, + _3DPRIM_LINELIST, + _3DPRIM_LINELOOP, + _3DPRIM_LINESTRIP, + _3DPRIM_TRILIST, + _3DPRIM_TRISTRIP, + _3DPRIM_TRIFAN, + _3DPRIM_QUADLIST, + _3DPRIM_QUADSTRIP, + _3DPRIM_POLYGON +}; + + +static const int reduced_prim[PIPE_PRIM_POLYGON+1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + +/* When the primitive changes, set a state bit and re-validate. Not + * the nicest and would rather deal with this by having all the + * programs be immune to the active primitive (ie. cope with all + * possibilities). That may not be realistic however. + */ +static void brw_set_prim(struct brw_context *brw, int prim) +{ + PRINT("PRIM: %d\n", prim); + + /* Slight optimization to avoid the GS program when not needed: + */ + if (prim == PIPE_PRIM_QUAD_STRIP && + brw->attribs.Raster->flatshade && + brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_FILL && + brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_FILL) + prim = PIPE_PRIM_TRIANGLE_STRIP; + + if (prim != brw->primitive) { + brw->primitive = prim; + brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; + + if (reduced_prim[prim] != brw->reduced_primitive) { + brw->reduced_primitive = reduced_prim[prim]; + brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; + } + + brw_validate_state(brw); + } + +} + + +static unsigned trim(int prim, unsigned length) +{ + if (prim == PIPE_PRIM_QUAD_STRIP) + return length > 3 ? (length - length % 2) : 0; + else if (prim == PIPE_PRIM_QUADS) + return length - length % 4; + else + return length; +} + + + +static boolean brw_emit_prim( struct brw_context *brw, + boolean indexed, + unsigned start, + unsigned count ) + +{ + struct brw_3d_primitive prim_packet; + + if (BRW_DEBUG & DEBUG_PRIMS) + PRINT("PRIM: %d %d %d\n", brw->primitive, start, count); + + prim_packet.header.opcode = CMD_3D_PRIM; + prim_packet.header.length = sizeof(prim_packet)/4 - 2; + prim_packet.header.pad = 0; + prim_packet.header.topology = hw_prim[brw->primitive]; + prim_packet.header.indexed = indexed; + + prim_packet.verts_per_instance = trim(brw->primitive, count); + prim_packet.start_vert_location = start; + prim_packet.instance_count = 1; + prim_packet.start_instance_location = 0; + prim_packet.base_vert_location = 0; + + if (prim_packet.verts_per_instance == 0) + return TRUE; + + return brw_batchbuffer_data( brw->winsys, + &prim_packet, + sizeof(prim_packet) ); +} + + +/* May fail if out of video memory for texture or vbo upload, or on + * fallback conditions. + */ +static boolean brw_try_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned mode, + unsigned start, + unsigned count ) +{ + struct brw_context *brw = brw_context(pipe); + + /* Set the first primitive ahead of validate_state: + */ + brw_set_prim(brw, mode); + + /* Upload index, vertex data: + */ + if (index_buffer && + !brw_upload_indices( brw, index_buffer, index_size, start, count )) + return FALSE; + + if (!brw_upload_vertex_buffers(brw)) + return FALSE; + + if (!brw_upload_vertex_elements( brw )) + return FALSE; + + /* XXX: Need to separate validate and upload of state. + */ + if (brw->state.dirty.brw) + brw_validate_state( brw ); + + if (!brw_emit_prim(brw, index_buffer != NULL, + start, count)) + return FALSE; + + return TRUE; +} + + + +static boolean brw_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count ) +{ + if (!brw_try_draw_elements( pipe, + indexBuffer, + indexSize, + mode, start, count )) + { + /* flush ? */ + + if (!brw_try_draw_elements( pipe, + indexBuffer, + indexSize, + mode, start, + count )) { + assert(0); + return FALSE; + } + } + + return TRUE; +} + + + +static boolean brw_draw_arrays( struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count ) +{ + if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { + /* flush ? */ + + if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { + assert(0); + return FALSE; + } + } + + return TRUE; +} + + + +void brw_init_draw_functions( struct brw_context *brw ) +{ + brw->pipe.draw_arrays = brw_draw_arrays; + brw->pipe.draw_elements = brw_draw_elements; +} + + diff --git a/src/gallium/drivers/i965simple/brw_draw.h b/src/gallium/drivers/i965simple/brw_draw.h new file mode 100644 index 0000000000..62fe0d5d0e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw.h @@ -0,0 +1,55 @@ + /************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_DRAW_H +#define BRW_DRAW_H + +#include "pipe/p_context.h" + +struct brw_context; + + + +void brw_init_draw_functions( struct brw_context *brw ); + + +boolean brw_upload_vertices( struct brw_context *brw, + unsigned min_index, + unsigned max_index ); + +boolean brw_upload_indices(struct brw_context *brw, + const struct pipe_buffer *index_buffer, + int ib_size, int start, int count); + +boolean brw_upload_vertex_buffers( struct brw_context *brw ); +boolean brw_upload_vertex_elements( struct brw_context *brw ); + +unsigned brw_translate_surface_format( unsigned id ); + + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c new file mode 100644 index 0000000000..aa85d93866 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw_upload.c @@ -0,0 +1,299 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include + +#include "brw_batch.h" +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" + +struct brw_array_state { + union header_union header; + + struct { + union { + struct { + unsigned pitch:11; + unsigned pad:15; + unsigned access_type:1; + unsigned vb_index:5; + } bits; + unsigned dword; + } vb0; + + struct pipe_buffer *buffer; + unsigned offset; + + unsigned max_index; + unsigned instance_data_step_rate; + + } vb[BRW_VBP_MAX]; +}; + + + +unsigned brw_translate_surface_format( unsigned id ) +{ + switch (id) { + case PIPE_FORMAT_R64_FLOAT: + return BRW_SURFACEFORMAT_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return BRW_SURFACEFORMAT_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return BRW_SURFACEFORMAT_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return BRW_SURFACEFORMAT_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return BRW_SURFACEFORMAT_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return BRW_SURFACEFORMAT_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return BRW_SURFACEFORMAT_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return BRW_SURFACEFORMAT_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return BRW_SURFACEFORMAT_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return BRW_SURFACEFORMAT_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return BRW_SURFACEFORMAT_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return BRW_SURFACEFORMAT_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return BRW_SURFACEFORMAT_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return BRW_SURFACEFORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return BRW_SURFACEFORMAT_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return BRW_SURFACEFORMAT_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return BRW_SURFACEFORMAT_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return BRW_SURFACEFORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return BRW_SURFACEFORMAT_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return BRW_SURFACEFORMAT_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return BRW_SURFACEFORMAT_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return BRW_SURFACEFORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return BRW_SURFACEFORMAT_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return BRW_SURFACEFORMAT_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return BRW_SURFACEFORMAT_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return BRW_SURFACEFORMAT_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return BRW_SURFACEFORMAT_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; + + default: + assert(0); + return 0; + } +} + +static unsigned get_index_type(int type) +{ + switch (type) { + case 1: return BRW_INDEX_BYTE; + case 2: return BRW_INDEX_WORD; + case 4: return BRW_INDEX_DWORD; + default: assert(0); return 0; + } +} + + +boolean brw_upload_vertex_buffers( struct brw_context *brw ) +{ + struct brw_array_state vbp; + unsigned nr_enabled = 0; + unsigned i; + + memset(&vbp, 0, sizeof(vbp)); + + /* This is a hardware limit: + */ + + for (i = 0; i < BRW_VEP_MAX; i++) + { + if (brw->vb.vbo_array[i] == NULL) { + nr_enabled = i; + break; + } + + vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->pitch; + vbp.vb[i].vb0.bits.pad = 0; + vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; + vbp.vb[i].vb0.bits.vb_index = i; + vbp.vb[i].offset = brw->vb.vbo_array[i]->buffer_offset; + vbp.vb[i].buffer = brw->vb.vbo_array[i]->buffer; + vbp.vb[i].max_index = brw->vb.vbo_array[i]->max_index; + } + + + vbp.header.bits.length = (1 + nr_enabled * 4) - 2; + vbp.header.bits.opcode = CMD_VERTEX_BUFFER; + + BEGIN_BATCH(vbp.header.bits.length+2, 0); + OUT_BATCH( vbp.header.dword ); + + for (i = 0; i < nr_enabled; i++) { + OUT_BATCH( vbp.vb[i].vb0.dword ); + OUT_RELOC( vbp.vb[i].buffer, PIPE_BUFFER_USAGE_GPU_READ, + vbp.vb[i].offset); + OUT_BATCH( vbp.vb[i].max_index ); + OUT_BATCH( vbp.vb[i].instance_data_step_rate ); + } + ADVANCE_BATCH(); + return TRUE; +} + + + +boolean brw_upload_vertex_elements( struct brw_context *brw ) +{ + struct brw_vertex_element_packet vep; + + unsigned i; + unsigned nr_enabled = brw->attribs.VertexProgram->program.num_inputs; + + memset(&vep, 0, sizeof(vep)); + + for (i = 0; i < nr_enabled; i++) + vep.ve[i] = brw->vb.inputs[i]; + + + vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2; + vep.header.opcode = CMD_VERTEX_ELEMENT; + brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0])); + + return TRUE; +} + +boolean brw_upload_indices( struct brw_context *brw, + const struct pipe_buffer *index_buffer, + int ib_size, int start, int count) +{ + /* Emit the indexbuffer packet: + */ + { + struct brw_indexbuffer ib; + + memset(&ib, 0, sizeof(ib)); + + ib.header.bits.opcode = CMD_INDEX_BUFFER; + ib.header.bits.length = sizeof(ib)/4 - 2; + ib.header.bits.index_format = get_index_type(ib_size); + ib.header.bits.cut_index_enable = 0; + + + BEGIN_BATCH(4, 0); + OUT_BATCH( ib.header.dword ); + OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start); + OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start + count); + OUT_BATCH( 0 ); + ADVANCE_BATCH(); + } + return TRUE; +} diff --git a/src/gallium/drivers/i965simple/brw_eu.c b/src/gallium/drivers/i965simple/brw_eu.c new file mode 100644 index 0000000000..e2002d1821 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu.c @@ -0,0 +1,130 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_push_insn_state(p); + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + brw_pop_insn_state(p); + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) +{ + p->current->header.predicate_control = pc; +} + +void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) +{ + p->current->header.destreg__conditonalmod = conditional; +} + +void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ) +{ + p->current->header.access_mode = access_mode; +} + +void brw_set_compression_control( struct brw_compile *p, boolean compression_control ) +{ + p->current->header.compression_control = compression_control; +} + +void brw_set_mask_control( struct brw_compile *p, unsigned value ) +{ + p->current->header.mask_control = value; +} + +void brw_set_saturate( struct brw_compile *p, unsigned value ) +{ + p->current->header.saturate = value; +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; +} + + +/*********************************************************************** + */ +void brw_init_compile( struct brw_compile *p ) +{ + p->nr_insn = 0; + p->current = p->stack; + memset(p->current, 0, sizeof(p->current[0])); + + /* Some defaults? + */ + brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate(p, 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value(p, 0xff); +} + + +const unsigned *brw_get_program( struct brw_compile *p, + unsigned *sz ) +{ + unsigned i; + + for (i = 0; i < 8; i++) + brw_NOP(p); + + *sz = p->nr_insn * sizeof(struct brw_instruction); + return (const unsigned *)p->store; +} + diff --git a/src/gallium/drivers/i965simple/brw_eu.h b/src/gallium/drivers/i965simple/brw_eu.h new file mode 100644 index 0000000000..23151ae9ed --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu.h @@ -0,0 +1,888 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include "brw_structs.h" +#include "brw_defines.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_shader_tokens.h" + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + + +#define REG_SIZE (8*4) + + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg +{ + unsigned type:4; + unsigned file:2; + unsigned nr:8; + unsigned subnr:5; /* :1 in align16 */ + unsigned negate:1; /* source only */ + unsigned abs:1; /* source only */ + unsigned vstride:4; /* source only */ + unsigned width:3; /* src only, align1 only */ + unsigned hstride:2; /* src only, align1 only */ + unsigned address_mode:1; /* relative addressing, hopefully! */ + unsigned pad0:1; + + union { + struct { + unsigned swizzle:8; /* src only, align16 only */ + unsigned writemask:4; /* dest only, align16 only */ + int indirect_offset:10; /* relative addressing offset */ + unsigned pad1:10; /* two dwords total */ + } bits; + + float f; + int d; + unsigned ud; + } dw1; +}; + + +struct brw_indirect { + unsigned addr_subnr:4; + int addr_offset:10; + unsigned pad:18; +}; + + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 1200 + +struct brw_compile { + struct brw_instruction store[BRW_EU_MAX_INSN]; + unsigned nr_insn; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + unsigned flag_value; + boolean single_program_flow; +}; + + + +static __inline int type_sz( unsigned type ) +{ + switch( type ) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +static __inline struct brw_reg brw_reg( unsigned file, + unsigned nr, + unsigned subnr, + unsigned type, + unsigned vstride, + unsigned width, + unsigned hstride, + unsigned swizzle, + unsigned writemask) +{ + + struct brw_reg reg; + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +static __inline struct brw_reg brw_vec16_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + TGSI_WRITEMASK_XYZW); +} + +static __inline struct brw_reg brw_vec8_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + TGSI_WRITEMASK_XYZW); +} + + +static __inline struct brw_reg brw_vec4_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + TGSI_WRITEMASK_XYZW); +} + + +static __inline struct brw_reg brw_vec2_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + TGSI_WRITEMASK_XY); +} + +static __inline struct brw_reg brw_vec1_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + TGSI_WRITEMASK_X); +} + + +static __inline struct brw_reg retype( struct brw_reg reg, + unsigned type ) +{ + reg.type = type; + return reg; +} + +static __inline struct brw_reg suboffset( struct brw_reg reg, + unsigned delta ) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + + +static __inline struct brw_reg offset( struct brw_reg reg, + unsigned delta ) +{ + reg.nr += delta; + return reg; +} + + +static __inline struct brw_reg byte_offset( struct brw_reg reg, + unsigned bytes ) +{ + unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +static __inline struct brw_reg brw_uw16_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_uw8_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_uw1_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_imm_reg( unsigned type ) +{ + return brw_reg( BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +static __inline struct brw_reg brw_imm_f( float f ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +static __inline struct brw_reg brw_imm_d( int d ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +static __inline struct brw_reg brw_imm_ud( unsigned ud ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +static __inline struct brw_reg brw_imm_uw( ushort uw ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw; + return imm; +} + +static __inline struct brw_reg brw_imm_w( short w ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w; + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/* Vector of eight signed half-byte values: + */ +static __inline struct brw_reg brw_imm_v( unsigned v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/* Vector of four 8-bit float values: + */ +static __inline struct brw_reg brw_imm_vf( unsigned v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static __inline struct brw_reg brw_imm_vf4( unsigned v0, + unsigned v1, + unsigned v2, + unsigned v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + + +static __inline struct brw_reg brw_address( struct brw_reg reg ) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + + +static __inline struct brw_reg brw_vec1_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_vec8_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_vec4_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +static __inline struct brw_reg brw_vec2_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_uw8_grf( unsigned nr, + unsigned subnr ) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_null_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static __inline struct brw_reg brw_address_reg( unsigned subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static __inline struct brw_reg brw_ip_reg( void ) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + TGSI_WRITEMASK_XYZW); /* NOTE! */ +} + +static __inline struct brw_reg brw_acc_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + + +static __inline struct brw_reg brw_flag_reg( void ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + + +static __inline struct brw_reg brw_mask_reg( unsigned subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static __inline struct brw_reg brw_message_reg( unsigned nr ) +{ + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, + nr, + 0); +} + + + + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static __inline unsigned cvt( unsigned val ) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static __inline struct brw_reg stride( struct brw_reg reg, + unsigned vstride, + unsigned width, + unsigned hstride ) +{ + + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + +static __inline struct brw_reg vec16( struct brw_reg reg ) +{ + return stride(reg, 16,16,1); +} + +static __inline struct brw_reg vec8( struct brw_reg reg ) +{ + return stride(reg, 8,8,1); +} + +static __inline struct brw_reg vec4( struct brw_reg reg ) +{ + return stride(reg, 4,4,1); +} + +static __inline struct brw_reg vec2( struct brw_reg reg ) +{ + return stride(reg, 2,2,1); +} + +static __inline struct brw_reg vec1( struct brw_reg reg ) +{ + return stride(reg, 0,1,0); +} + +static __inline struct brw_reg get_element( struct brw_reg reg, unsigned elt ) +{ + return vec1(suboffset(reg, elt)); +} + +static __inline struct brw_reg get_element_ud( struct brw_reg reg, unsigned elt ) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + + +static __inline struct brw_reg brw_swizzle( struct brw_reg reg, + unsigned x, + unsigned y, + unsigned z, + unsigned w) +{ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + + +static __inline struct brw_reg brw_swizzle1( struct brw_reg reg, + unsigned x ) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static __inline struct brw_reg brw_writemask( struct brw_reg reg, + unsigned mask ) +{ + reg.dw1.bits.writemask &= mask; + return reg; +} + +static __inline struct brw_reg brw_set_writemask( struct brw_reg reg, + unsigned mask ) +{ + reg.dw1.bits.writemask = mask; + return reg; +} + +static __inline struct brw_reg negate( struct brw_reg reg ) +{ + reg.negate ^= 1; + return reg; +} + +static __inline struct brw_reg brw_abs( struct brw_reg reg ) +{ + reg.abs = 1; + return reg; +} + +/*********************************************************************** + */ +static __inline struct brw_reg brw_vec4_indirect( unsigned subnr, + int offset ) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static __inline struct brw_reg brw_vec1_indirect( unsigned subnr, + int offset ) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static __inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static __inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static __inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) +{ + return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static __inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, int offset ) +{ + ptr.addr_offset += offset; + return ptr; +} + +static __inline struct brw_indirect brw_indirect( unsigned addr_subnr, int offset ) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +static __inline struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +void brw_pop_insn_state( struct brw_compile *p ); +void brw_push_insn_state( struct brw_compile *p ); +void brw_set_mask_control( struct brw_compile *p, unsigned value ); +void brw_set_saturate( struct brw_compile *p, unsigned value ); +void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ); +void brw_set_compression_control( struct brw_compile *p, boolean control ); +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); +void brw_set_predicate_control( struct brw_compile *p, unsigned pc ); +void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ); + +void brw_init_compile( struct brw_compile *p ); +const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz ); + + +struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ); + +struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ); + +/* Helpers for regular instructions: + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + +#undef ALU1 +#undef ALU2 + + + +/* Helpers for SEND instruction: + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + boolean allocate, + boolean used, + unsigned msg_length, + unsigned response_length, + boolean eot, + boolean writes_complete, + unsigned offset, + unsigned swizzle); + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + boolean eot); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + boolean eot); + +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision ); + +void brw_math( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision ); + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + unsigned scratch_offset ); + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + unsigned msg_reg_nr, + unsigned scratch_offset ); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + unsigned execute_size); + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn); + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *if_or_else_insn); + + +/* DO/WHILE loops: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, + unsigned execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *patch_insn); + +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); +/* Forward jumps: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn); + + + +void brw_NOP(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1); + +void brw_print_reg( struct brw_reg reg ); + + +/*********************************************************************** + * brw_eu_util.c: + */ + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + unsigned count); + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + unsigned count); + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count); + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count); + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ); +#endif diff --git a/src/gallium/drivers/i965simple/brw_eu_debug.c b/src/gallium/drivers/i965simple/brw_eu_debug.c new file mode 100644 index 0000000000..4a94ddefa6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_debug.c @@ -0,0 +1,90 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "pipe/p_debug.h" + +#include "brw_eu.h" + +void brw_print_reg( struct brw_reg hwreg ) +{ + static const char *file[] = { + "arf", + "grf", + "msg", + "imm" + }; + + static const char *type[] = { + "ud", + "d", + "uw", + "w", + "ub", + "vf", + "hf", + "f" + }; + + debug_printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); + + if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.nr % 2 == 0 && + hwreg.subnr == 0 && + hwreg.vstride == BRW_VERTICAL_STRIDE_8 && + hwreg.width == BRW_WIDTH_8 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && + hwreg.type == BRW_REGISTER_TYPE_F) { + debug_printf("vec%d", hwreg.nr); + } + else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.vstride == BRW_VERTICAL_STRIDE_0 && + hwreg.width == BRW_WIDTH_1 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && + hwreg.type == BRW_REGISTER_TYPE_F) { + debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + } + else { + debug_printf("%s%d.%d<%d;%d,%d>:%s", + file[hwreg.file], + hwreg.nr, + hwreg.subnr / type_sz(hwreg.type), + hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, + 1< + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size( struct brw_instruction *insn, + struct brw_reg reg ) +{ + if (reg.width == BRW_WIDTH_8 && + insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; /* note - definitions are compatible */ +} + + +static void brw_set_dest( struct brw_instruction *insn, + struct brw_reg dest ) +{ + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + } + else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + } + } + else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + } + else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + } + } + + /* NEW: Set the execution size based on dest.width and + * insn->compression_control: + */ + guess_execution_size(insn, dest); +} + +static void brw_set_src0( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } + else + { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } + else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } + else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } + else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } + else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. + */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } + else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + //assert (reg.file == BRW_GENERAL_REGISTER_FILE); + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } + else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } + else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + + + +static void brw_set_math_message( struct brw_instruction *insn, + unsigned msg_length, + unsigned response_length, + unsigned function, + unsigned integer_type, + boolean low_precision, + boolean saturate, + unsigned dataType ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; +} + +static void brw_set_urb_message( struct brw_instruction *insn, + boolean allocate, + boolean used, + unsigned msg_length, + unsigned response_length, + boolean end_of_thread, + boolean complete, + unsigned offset, + unsigned swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_write_message( struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned msg_length, + unsigned pixel_scoreboard_clear, + unsigned response_length, + unsigned end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_read_message( struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned target_cache, + unsigned msg_length, + unsigned response_length, + unsigned end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.dp_read.binding_table_index = binding_table_index; + insn->bits3.dp_read.msg_control = msg_control; + insn->bits3.dp_read.msg_type = msg_type; + insn->bits3.dp_read.target_cache = target_cache; + insn->bits3.dp_read.response_length = response_length; + insn->bits3.dp_read.msg_length = msg_length; + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits3.dp_read.end_of_thread = end_of_thread; +} + +static void brw_set_sampler_message( struct brw_instruction *insn, + unsigned binding_table_index, + unsigned sampler, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + boolean eot) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + insn->bits3.sampler.response_length = response_length; + insn->bits3.sampler.msg_length = msg_length; + insn->bits3.sampler.end_of_thread = eot; + insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; +} + + + +static struct brw_instruction *next_insn( struct brw_compile *p, + unsigned opcode ) +{ + struct brw_instruction *insn; + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + memcpy(insn, p->current, sizeof(*insn)); + + /* Reset this one-shot flag: + */ + + if (p->current->header.destreg__conditonalmod) { + p->current->header.destreg__conditonalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + + +struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + return insn; +} + +struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + return insn; +} + + +/*********************************************************************** + * Convenience routines. + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + + + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_ud(0x0)); +} + + + + + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + * + * No attempt is made to deal with stack overflow (14 elements?). + */ +struct brw_instruction *brw_IF(struct brw_compile *p, unsigned execute_size) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + assert(execute_size == BRW_EXECUTE_1); + + insn = next_insn(p, BRW_OPCODE_ADD); + insn->header.predicate_inverse = 1; + } else { + insn = next_insn(p, BRW_OPCODE_IF); + } + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); + } else { + insn = next_insn(p, BRW_OPCODE_ELSE); + } + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = if_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + + /* Patch the if instruction to point at this instruction. + */ + if (p->single_program_flow) { + assert(if_insn->header.opcode == BRW_OPCODE_ADD); + + if_insn->bits3.ud = (insn - if_insn + 1) * 16; + } else { + assert(if_insn->header.opcode == BRW_OPCODE_IF); + + if_insn->bits3.if_else.jump_count = insn - if_insn; + if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.pad0 = 0; + } + + return insn; +} + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *patch_insn) +{ + if (p->single_program_flow) { + /* In single program flow mode, there's no need to execute an ENDIF, + * since we don't need to do any stack operations, and if we're executing + * currently, we want to just continue executing. + */ + struct brw_instruction *next = &p->store[p->nr_insn]; + + assert(patch_insn->header.opcode == BRW_OPCODE_ADD); + + patch_insn->bits3.ud = (next - patch_insn) * 16; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); + + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = patch_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + + assert(patch_insn->bits3.if_else.jump_count == 0); + + /* Patch the if or else instructions to point at this or the next + * instruction respectively. + */ + if (patch_insn->header.opcode == BRW_OPCODE_IF) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + assert(0); + } + + /* Also pop item off the stack in the endif instruction: + */ + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + insn->header.mask_control = BRW_MASK_DISABLE; + insn->bits3.if_else.pad0 = 0; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + insn->header.mask_control = BRW_MASK_DISABLE; + insn->bits3.if_else.pad0 = 0; + return insn; +} + +/* DO/WHILE loop: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) +{ + if (p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_null_reg()); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.mask_control = BRW_MASK_DISABLE; + + return insn; + } +} + + + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) + insn = next_insn(p, BRW_OPCODE_ADD); + else + insn = next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->single_program_flow) { + insn->header.execution_size = BRW_EXECUTE_1; + + insn->bits3.d = (do_insn - insn) * 16; + } else { + insn->header.execution_size = do_insn->header.execution_size; + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + insn->bits3.if_else.jump_count = do_insn - insn; + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + +/* insn->header.mask_control = BRW_MASK_ENABLE; */ + + insn->header.mask_control = BRW_MASK_DISABLE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; + return insn; +} + + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = (landing - jmp_insn) - 1; +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditonalmod = conditional; + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + +/* guess_execution_size(insn, src0); */ + + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + + + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/* Invert 8 values + */ +void brw_math( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision ) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); +} + +/* Use 2 send instructions to invert 16 elements + */ +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision ) +{ + struct brw_instruction *insn; + unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditonalmod = msg_reg_nr+1; + + brw_set_dest(insn, offset(dest,1)); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + + + + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + unsigned msg_reg_nr, + unsigned scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + unsigned msg_length = 3; + struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + + brw_set_dp_write_message(insn, + 255, /* bti */ + BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ + msg_length, + 0, /* pixel scoreboard */ + 0, /* response_length */ + 0); /* eot */ + } + +} + + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + unsigned scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); /* UW? */ + brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + + brw_set_dp_read_message(insn, + 255, /* bti */ + 3, /* msg_control */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 1, /* target cache */ + 1, /* msg_length */ + 2, /* response_length */ + 0); /* eot */ + } +} + + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + boolean eot) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_dp_write_message(insn, + binding_table_index, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + msg_length, + 1, /* pixel scoreboard */ + response_length, + eot); +} + + + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + boolean eot) +{ + boolean need_stall = 0; + + if(writemask == 0) { +/* debug_printf("%s: zero writemask??\n", __FUNCTION__); */ + return; + } + + /* Hardware doesn't do destination dependency checking on send + * instructions properly. Add a workaround which generates the + * dependency by other means. In practice it seems like this bug + * only crops up for texture samples, and only where registers are + * written by the send and then written again later without being + * read in between. Luckily for us, we already track that + * information and use it to modify the writemask for the + * instruction, so that is a guide for whether a workaround is + * needed. + */ + if (writemask != TGSI_WRITEMASK_XYZW) { + unsigned dst_offset = 0; + unsigned i, newmask = 0, len = 0; + + for (i = 0; i < 4; i++) { + if (writemask & (1<header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_sampler_message(insn, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + eot); + } + + if (need_stall) + { + struct brw_reg reg = vec8(offset(dest, response_length-1)); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, FALSE); + brw_MOV(p, reg, reg); + brw_pop_insn_state(p); + } + +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + boolean allocate, + boolean used, + unsigned msg_length, + unsigned response_length, + boolean eot, + boolean writes_complete, + unsigned offset, + unsigned swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_urb_message(insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} + diff --git a/src/gallium/drivers/i965simple/brw_eu_util.c b/src/gallium/drivers/i965simple/brw_eu_util.c new file mode 100644 index 0000000000..3a65b141f0 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_util.c @@ -0,0 +1,126 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count) +{ + unsigned i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count) +{ + unsigned i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + unsigned count) +{ + unsigned i; + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + unsigned count) +{ + unsigned i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} + + + + diff --git a/src/gallium/drivers/i965simple/brw_flush.c b/src/gallium/drivers/i965simple/brw_flush.c new file mode 100644 index 0000000000..5216c680cf --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_flush.c @@ -0,0 +1,80 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_batch.h" + + +/** + * In future we may want a fence-like interface instead of finish. + */ +static void brw_flush( struct pipe_context *pipe, + unsigned flags ) +{ + struct brw_context *brw = brw_context(pipe); + struct pipe_fence_handle *fence; + + /* Do we need to emit an MI_FLUSH command to flush the hardware + * caches? + */ + if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { + struct brw_mi_flush flush; + + memset(&flush, 0, sizeof(flush)); + flush.opcode = CMD_MI_FLUSH; + + if (!(flags & PIPE_FLUSH_RENDER_CACHE)) + flush.flags |= BRW_INHIBIT_FLUSH_RENDER_CACHE; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) + flush.flags |= BRW_FLUSH_READ_CACHE; + + BRW_BATCH_STRUCT(brw, &flush); + } + + /* If there are no flags, just flush pending commands to hardware: + */ + FLUSH_BATCH( &fence ); + + if (flags & PIPE_FLUSH_WAIT) { +// brw->winsys->wait_fence(brw->winsys, fence); + } +} + + + +void brw_init_flush_functions( struct brw_context *brw ) +{ + brw->pipe.flush = brw_flush; +} diff --git a/src/gallium/drivers/i965simple/brw_gs.c b/src/gallium/drivers/i965simple/brw_gs.c new file mode 100644 index 0000000000..de60868ccc --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs.c @@ -0,0 +1,196 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_gs.h" + + + +static void compile_gs_prog( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + struct brw_gs_compile c; + const unsigned *program; + unsigned program_size; + + memset(&c, 0, sizeof(c)); + + c.key = *key; + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; + + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.func.single_program_flow = 1; + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Note that primitives which don't require a GS program have + * already been weeded out by this stage: + */ + switch (key->primitive) { + case PIPE_PRIM_QUADS: + brw_gs_quads( &c ); + break; + case PIPE_PRIM_QUAD_STRIP: + brw_gs_quad_strip( &c ); + break; + case PIPE_PRIM_LINE_LOOP: + brw_gs_lines( &c ); + break; + case PIPE_PRIM_LINES: + if (key->hint_gs_always) + brw_gs_lines( &c ); + else { + return; + } + break; + case PIPE_PRIM_TRIANGLES: + if (key->hint_gs_always) + brw_gs_tris( &c ); + else { + return; + } + break; + case PIPE_PRIM_POINTS: + if (key->hint_gs_always) + brw_gs_points( &c ); + else { + return; + } + break; + default: + return; + } + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->gs.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_GS_PROG], + key, sizeof(*key), + &brw->gs.prog_data, + &brw->gs.prog_gs_offset); +} + + +static const int gs_prim[PIPE_PRIM_POLYGON+1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINE_LOOP, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_QUADS, + PIPE_PRIM_QUAD_STRIP, + PIPE_PRIM_TRIANGLES +}; + +static void populate_key( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_VS_PROG */ + key->attrs = brw->vs.prog_data->outputs_written; + + /* BRW_NEW_PRIMITIVE */ + key->primitive = gs_prim[brw->primitive]; + + key->hint_gs_always = 0; /* debug code? */ + + key->need_gs_prog = (key->hint_gs_always || + brw->primitive == PIPE_PRIM_QUADS || + brw->primitive == PIPE_PRIM_QUAD_STRIP || + brw->primitive == PIPE_PRIM_LINE_LOOP); +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_gs_prog( struct brw_context *brw ) +{ + struct brw_gs_prog_key key; + + /* Populate the key: + */ + populate_key(brw, &key); + + if (brw->gs.prog_active != key.need_gs_prog) { + brw->state.dirty.cache |= CACHE_NEW_GS_PROG; + brw->gs.prog_active = key.need_gs_prog; + } + + if (brw->gs.prog_active) { + if (!search_cache(brw, &key)) + compile_gs_prog( brw, &key ); + } +} + + +const struct brw_tracked_state brw_gs_prog = { + .dirty = { + .brw = BRW_NEW_PRIMITIVE, + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_gs_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_gs.h b/src/gallium/drivers/i965simple/brw_gs.h new file mode 100644 index 0000000000..f09141c6aa --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs.h @@ -0,0 +1,75 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_GS_H +#define BRW_GS_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_GS_VERTS (4) + +struct brw_gs_prog_key { + unsigned attrs:32; + unsigned primitive:4; + unsigned hint_gs_always:1; + unsigned need_gs_prog:1; + unsigned pad:26; +}; + +struct brw_gs_compile { + struct brw_compile func; + struct brw_gs_prog_key key; + struct brw_gs_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_GS_VERTS]; + } reg; + + /* 3 different ways of expressing vertex size: + */ + unsigned nr_attrs; + unsigned nr_regs; + unsigned nr_bytes; +}; + +#define ATTR_SIZE (4*4) + +void brw_gs_quads( struct brw_gs_compile *c ); +void brw_gs_quad_strip( struct brw_gs_compile *c ); +void brw_gs_tris( struct brw_gs_compile *c ); +void brw_gs_lines( struct brw_gs_compile *c ); +void brw_gs_points( struct brw_gs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_gs_emit.c b/src/gallium/drivers/i965simple/brw_gs_emit.c new file mode 100644 index 0000000000..c3cc90b10f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs_emit.c @@ -0,0 +1,148 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_gs.h" + +static void brw_gs_alloc_regs( struct brw_gs_compile *c, + unsigned nr_verts ) +{ + unsigned i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->prog_data.urb_read_length = c->nr_regs; + c->prog_data.total_grf = i; +} + + +static void brw_gs_emit_vue(struct brw_gs_compile *c, + struct brw_reg vert, + boolean last, + unsigned header) +{ + struct brw_compile *p = &c->func; + boolean allocate = !last; + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); + + /* Send each vertex as a seperate write to the urb. This is + * different to the concept in brw_sf_emit.c, where subsequent + * writes are used to build up a single urb entry. Each of these + * writes instantiates a seperate urb entry, and a new one must be + * allocated each time. + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response length */ + allocate ? 0 : 1, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_gs_quads( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + /* Use polygons for correct edgeflag behaviour. Note that vertex 3 + * is the PV for quads, but vertex 0 for polygons: + */ + brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_quad_strip( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_tris( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 3); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); +} + +void brw_gs_lines( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 2); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); +} + +void brw_gs_points( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); +} + + + + + + + + diff --git a/src/gallium/drivers/i965simple/brw_gs_state.c b/src/gallium/drivers/i965simple/brw_gs_state.c new file mode 100644 index 0000000000..3932e9e939 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs_state.c @@ -0,0 +1,89 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "pipe/p_util.h" + + + +static void upload_gs_unit( struct brw_context *brw ) +{ + struct brw_gs_unit_state gs; + + memset(&gs, 0, sizeof(gs)); + + /* CACHE_NEW_GS_PROG */ + if (brw->gs.prog_active) { + gs.thread0.grf_reg_count = + align(brw->gs.prog_data->total_grf, 16) / 16 - 1; + gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; + gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; + } + else { + gs.thread0.grf_reg_count = 0; + gs.thread0.kernel_start_pointer = 0; + gs.thread3.urb_entry_read_length = 1; + } + + /* BRW_NEW_URB_FENCE */ + gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries; + gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + + gs.thread4.max_threads = 0; /* Hardware requirement */ + + if (BRW_DEBUG & DEBUG_STATS) + gs.thread4.stats_enable = 1; + + /* CONSTANT */ + gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + gs.thread1.single_program_flow = 1; + gs.thread3.dispatch_grf_start_reg = 1; + gs.thread3.const_urb_entry_read_offset = 0; + gs.thread3.const_urb_entry_read_length = 0; + gs.thread3.urb_entry_read_offset = 0; + + + brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs ); +} + + +const struct brw_tracked_state brw_gs_unit = { + .dirty = { + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_GS_PROG + }, + .update = upload_gs_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c new file mode 100644 index 0000000000..925049ecc1 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_misc_state.c @@ -0,0 +1,486 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_batch.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + + + + + +/*********************************************************************** + * Blend color + */ + +static void upload_blend_constant_color(struct brw_context *brw) +{ + struct brw_blend_constant_color bcc; + + memset(&bcc, 0, sizeof(bcc)); + bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc.header.length = sizeof(bcc)/4-2; + bcc.blend_constant_color[0] = brw->attribs.BlendColor.color[0]; + bcc.blend_constant_color[1] = brw->attribs.BlendColor.color[1]; + bcc.blend_constant_color[2] = brw->attribs.BlendColor.color[2]; + bcc.blend_constant_color[3] = brw->attribs.BlendColor.color[3]; + + BRW_CACHED_BATCH_STRUCT(brw, &bcc); +} + + +const struct brw_tracked_state brw_blend_constant_color = { + .dirty = { + .brw = BRW_NEW_BLEND, + .cache = 0 + }, + .update = upload_blend_constant_color +}; + + +/*********************************************************************** + * Drawing rectangle + */ +static void upload_drawing_rect(struct brw_context *brw) +{ + struct brw_drawrect bdr; + + memset(&bdr, 0, sizeof(bdr)); + bdr.header.opcode = CMD_DRAW_RECT; + bdr.header.length = sizeof(bdr)/4 - 2; + bdr.xmin = 0; + bdr.ymin = 0; + bdr.xmax = brw->attribs.FrameBuffer.cbufs[0]->width; + bdr.ymax = brw->attribs.FrameBuffer.cbufs[0]->height; + bdr.xorg = 0; + bdr.yorg = 0; + + /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted + * uncached in brw_draw.c: + */ + BRW_BATCH_STRUCT(brw, &bdr); +} + +const struct brw_tracked_state brw_drawing_rect = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_drawing_rect +}; + +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is the BRW_SS_POOL cache buffer. + */ +static void upload_binding_table_pointers(struct brw_context *brw) +{ + struct brw_binding_table_pointers btp; + memset(&btp, 0, sizeof(btp)); + + btp.header.opcode = CMD_BINDING_TABLE_PTRS; + btp.header.length = sizeof(btp)/4 - 2; + btp.vs = 0; + btp.gs = 0; + btp.clp = 0; + btp.sf = 0; + btp.wm = brw->wm.bind_ss_offset; + + BRW_CACHED_BATCH_STRUCT(brw, &btp); +} + +const struct brw_tracked_state brw_binding_table_pointers = { + .dirty = { + .brw = 0, + .cache = CACHE_NEW_SURF_BIND + }, + .update = upload_binding_table_pointers, +}; + + +/** + * Upload pointers to the per-stage state. + * + * The state pointers in this packet are all relative to the general state + * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer. + */ +static void upload_pipelined_state_pointers(struct brw_context *brw ) +{ + struct brw_pipelined_state_pointers psp; + memset(&psp, 0, sizeof(psp)); + + psp.header.opcode = CMD_PIPELINED_STATE_POINTERS; + psp.header.length = sizeof(psp)/4 - 2; + + psp.vs.offset = brw->vs.state_gs_offset >> 5; + psp.sf.offset = brw->sf.state_gs_offset >> 5; + psp.wm.offset = brw->wm.state_gs_offset >> 5; + psp.cc.offset = brw->cc.state_gs_offset >> 5; + + /* GS gets turned on and off regularly. Need to re-emit URB fence + * after this occurs. + */ + if (brw->gs.prog_active) { + psp.gs.offset = brw->gs.state_gs_offset >> 5; + psp.gs.enable = 1; + } + + if (0) { + psp.clp.offset = brw->clip.state_gs_offset >> 5; + psp.clp.enable = 1; + } + + + if (BRW_CACHED_BATCH_STRUCT(brw, &psp)) + brw->state.dirty.brw |= BRW_NEW_PSP; +} + +const struct brw_tracked_state brw_pipelined_state_pointers = { + .dirty = { + .brw = 0, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .update = upload_pipelined_state_pointers +}; + +static void upload_psp_urb_cbs(struct brw_context *brw ) +{ + upload_pipelined_state_pointers(brw); + brw_upload_urb_fence(brw); + brw_upload_constant_buffer_state(brw); +} + + +const struct brw_tracked_state brw_psp_urb_cbs = { + .dirty = { + .brw = BRW_NEW_URB_FENCE, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .update = upload_psp_urb_cbs +}; + +/** + * Upload the depthbuffer offset and format. + * + * We have to do this per state validation as we need to emit the relocation + * in the batch buffer. + */ +static void upload_depthbuffer(struct brw_context *brw) +{ + struct pipe_surface *depth_surface = brw->attribs.FrameBuffer.zsbuf; + + BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2)); + if (depth_surface == NULL) { + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | + (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } else { + unsigned int format; + + switch (depth_surface->cpp) { + case 2: + format = BRW_DEPTHFORMAT_D16_UNORM; + break; + case 4: + if (depth_surface->format == PIPE_FORMAT_Z32_FLOAT) + format = BRW_DEPTHFORMAT_D32_FLOAT; + else + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + break; + default: + assert(0); + return; + } + + OUT_BATCH(((depth_surface->pitch * depth_surface->cpp) - 1) | + (format << 18) | + (BRW_TILEWALK_YMAJOR << 26) | +// (depth_surface->region->tiled << 27) | + (BRW_SURFACE_2D << 29)); + OUT_RELOC(depth_surface->buffer, + PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0); + OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | + ((depth_surface->pitch - 1) << 6) | + ((depth_surface->height - 1) << 19)); + OUT_BATCH(0); + } + ADVANCE_BATCH(); +} + +const struct brw_tracked_state brw_depthbuffer = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_depthbuffer, +}; + + + + +/*********************************************************************** + * Polygon stipple packet + */ + +static void upload_polygon_stipple(struct brw_context *brw) +{ + struct brw_polygon_stipple bps; + unsigned i; + + memset(&bps, 0, sizeof(bps)); + bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; + bps.header.length = sizeof(bps)/4-2; + + /* XXX: state tracker should send *all* state down initially! + */ + if (brw->attribs.PolygonStipple) + for (i = 0; i < 32; i++) + bps.stipple[i] = brw->attribs.PolygonStipple->stipple[31 - i]; /* invert */ + + BRW_CACHED_BATCH_STRUCT(brw, &bps); +} + +const struct brw_tracked_state brw_polygon_stipple = { + .dirty = { + .brw = BRW_NEW_STIPPLE, + .cache = 0 + }, + .update = upload_polygon_stipple +}; + + +/*********************************************************************** + * Line stipple packet + */ + +static void upload_line_stipple(struct brw_context *brw) +{ + struct brw_line_stipple bls; + float tmp; + int tmpi; + + memset(&bls, 0, sizeof(bls)); + bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; + bls.header.length = sizeof(bls)/4 - 2; + + bls.bits0.pattern = brw->attribs.Raster->line_stipple_pattern; + bls.bits1.repeat_count = brw->attribs.Raster->line_stipple_factor; + + tmp = 1.0 / (float) brw->attribs.Raster->line_stipple_factor; + tmpi = tmp * (1<<13); + + + bls.bits1.inverse_repeat_count = tmpi; + + BRW_CACHED_BATCH_STRUCT(brw, &bls); +} + +const struct brw_tracked_state brw_line_stipple = { + .dirty = { + .brw = BRW_NEW_STIPPLE, + .cache = 0 + }, + .update = upload_line_stipple +}; + + +/*********************************************************************** + * Misc constant state packets + */ + +static void upload_pipe_control(struct brw_context *brw) +{ + struct brw_pipe_control pc; + + return; + + memset(&pc, 0, sizeof(pc)); + + pc.header.opcode = CMD_PIPE_CONTROL; + pc.header.length = sizeof(pc)/4 - 2; + pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE; + + pc.header.instruction_state_cache_flush_enable = 1; + + pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL; + + BRW_BATCH_STRUCT(brw, &pc); +} + +const struct brw_tracked_state brw_pipe_control = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_pipe_control +}; + + +/*********************************************************************** + * Misc invarient state packets + */ + +static void upload_invarient_state( struct brw_context *brw ) +{ + { + struct brw_mi_flush flush; + + memset(&flush, 0, sizeof(flush)); + flush.opcode = CMD_MI_FLUSH; + flush.flags = BRW_FLUSH_STATE_CACHE | BRW_FLUSH_READ_CACHE; + BRW_BATCH_STRUCT(brw, &flush); + } + + { + /* 0x61040000 Pipeline Select */ + /* PipelineSelect : 0 */ + struct brw_pipeline_select ps; + + memset(&ps, 0, sizeof(ps)); + ps.header.opcode = CMD_PIPELINE_SELECT; + ps.header.pipeline_select = 0; + BRW_BATCH_STRUCT(brw, &ps); + } + + { + struct brw_global_depth_offset_clamp gdo; + memset(&gdo, 0, sizeof(gdo)); + + /* Disable depth offset clamping. + */ + gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; + gdo.header.length = sizeof(gdo)/4 - 2; + gdo.depth_offset_clamp = 0.0; + + BRW_BATCH_STRUCT(brw, &gdo); + } + + + /* 0x61020000 State Instruction Pointer */ + { + struct brw_system_instruction_pointer sip; + memset(&sip, 0, sizeof(sip)); + + sip.header.opcode = CMD_STATE_INSN_POINTER; + sip.header.length = 0; + sip.bits0.pad = 0; + sip.bits0.system_instruction_pointer = 0; + BRW_BATCH_STRUCT(brw, &sip); + } + + + { + struct brw_vf_statistics vfs; + memset(&vfs, 0, sizeof(vfs)); + + vfs.opcode = CMD_VF_STATISTICS; + if (BRW_DEBUG & DEBUG_STATS) + vfs.statistics_enable = 1; + + BRW_BATCH_STRUCT(brw, &vfs); + } + + + { + struct brw_polygon_stipple_offset bpso; + + memset(&bpso, 0, sizeof(bpso)); + bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; + bpso.header.length = sizeof(bpso)/4-2; + bpso.bits0.x_offset = 0; + bpso.bits0.y_offset = 0; + + BRW_BATCH_STRUCT(brw, &bpso); + } +} + +const struct brw_tracked_state brw_invarient_state = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_invarient_state +}; + +/** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations in many places for + * cached state, and instead emit pointers inside of large, mostly-static + * state pools. This comes at the expense of memory, and more expensive cache + * misses. + */ +static void upload_state_base_address( struct brw_context *brw ) +{ + /* Output the structure (brw_state_base_address) directly to the + * batchbuffer, so we can emit relocations inline. + */ + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_RELOC(brw->pool[BRW_GS_POOL].buffer, + PIPE_BUFFER_USAGE_GPU_READ, + 1); /* General state base address */ + OUT_RELOC(brw->pool[BRW_SS_POOL].buffer, + PIPE_BUFFER_USAGE_GPU_READ, + 1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); +} + + +const struct brw_tracked_state brw_state_base_address = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_state_base_address +}; diff --git a/src/gallium/drivers/i965simple/brw_reg.h b/src/gallium/drivers/i965simple/brw_reg.h new file mode 100644 index 0000000000..9e885c3b3b --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_reg.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#define CMD_MI (0x0 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) + +#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) + +/* Stalls command execution waiting for the given events to have occurred. */ +#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +/* Primitive dispatch on 830-945 */ +#define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) +#define PRIM_INDIRECT (1<<23) +#define PRIM_INLINE (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS (1<<17) + +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) + +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) + +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) + +/* BR13 */ +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) + +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c new file mode 100644 index 0000000000..7c83b81c85 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf.c @@ -0,0 +1,351 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" +#include "brw_state.h" +#include "tgsi/util/tgsi_parse.h" + + +static void compile_sf_prog( struct brw_context *brw, + struct brw_sf_prog_key *key ) +{ + struct brw_sf_compile c; + const unsigned *program; + unsigned program_size; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.key = *key; + + + c.nr_attrs = c.key.vp_output_count; + c.nr_attr_regs = (c.nr_attrs+1)/2; + + c.nr_setup_attrs = c.key.fp_input_count + 1; /* +1 for position */ + c.nr_setup_regs = (c.nr_setup_attrs+1)/2; + + c.prog_data.urb_read_length = c.nr_attr_regs; + c.prog_data.urb_entry_size = c.nr_setup_regs * 2; + + + /* Which primitive? Or all three? + */ + switch (key->primitive) { + case SF_TRIANGLES: + c.nr_verts = 3; + brw_emit_tri_setup( &c ); + break; + case SF_LINES: + c.nr_verts = 2; + brw_emit_line_setup( &c ); + break; + case SF_POINTS: + c.nr_verts = 1; + brw_emit_point_setup( &c ); + break; + + case SF_UNFILLED_TRIS: + default: + assert(0); + return; + } + + + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->sf.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, + struct brw_sf_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_SF_PROG], + key, sizeof(*key), + &brw->sf.prog_data, + &brw->sf.prog_gs_offset); +} + + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_sf_prog( struct brw_context *brw ) +{ + const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; + struct brw_sf_prog_key key; + struct tgsi_parse_context parse; + int i, done = 0; + + + memset(&key, 0, sizeof(key)); + + /* Populate the key, noting state dependencies: + */ + /* CACHE_NEW_VS_PROG */ + key.vp_output_count = brw->vs.prog_data->outputs_written; + + /* BRW_NEW_FS */ + key.fp_input_count = brw->attribs.FragmentProgram->info.nr_regs[TGSI_FILE_INPUT]; + + + /* BRW_NEW_REDUCED_PRIMITIVE */ + switch (brw->reduced_primitive) { + case PIPE_PRIM_TRIANGLES: +// if (key.attrs & (1<program.tokens ); + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) + { + int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + int interp_mode = parse.FullToken.FullDeclaration.Interpolation.Interpolate; + //int semantic = parse.FullToken.FullDeclaration.Semantic.SemanticName; + //int semantic_index = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + + debug_printf("fs input %d..%d interp mode %d\n", first, last, interp_mode); + + switch (interp_mode) { + case TGSI_INTERPOLATE_CONSTANT: + for (i = first; i <= last; i++) + key.const_mask |= (1 << i); + break; + case TGSI_INTERPOLATE_LINEAR: + for (i = first; i <= last; i++) + key.linear_mask |= (1 << i); + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + for (i = first; i <= last; i++) + key.persp_mask |= (1 << i); + break; + default: + break; + } + + /* Also need stuff for flat shading, twosided color. + */ + + } + break; + default: + done = 1; + break; + } + } + + /* Hack: Adjust for position. Optimize away when not required (ie + * for perspective interpolation). + */ + key.persp_mask <<= 1; + key.linear_mask <<= 1; + key.linear_mask |= 1; + key.const_mask <<= 1; + + debug_printf("key.persp_mask: %x\n", key.persp_mask); + debug_printf("key.linear_mask: %x\n", key.linear_mask); + debug_printf("key.const_mask: %x\n", key.const_mask); + + +// key.do_point_sprite = brw->attribs.Point->PointSprite; +// key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; + +// key.do_flat_shading = (brw->attribs.Raster->flatshade); +// key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); + +// if (key.do_twoside_color) +// key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); + + + if (!search_cache(brw, &key)) + compile_sf_prog( brw, &key ); +} + + +const struct brw_tracked_state brw_sf_prog = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_REDUCED_PRIMITIVE | + BRW_NEW_VS | + BRW_NEW_FS), + .cache = 0, + }, + .update = upload_sf_prog +}; + + + +#if 0 +/* Build a struct like the one we'd like the state tracker to pass to + * us. + */ +static void update_sf_linkage( struct brw_context *brw ) +{ + const struct brw_vertex_program *vs = brw->attribs.VertexProgram; + const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; + struct pipe_setup_linkage state; + struct tgsi_parse_context parse; + + int i, j; + int nr_vp_outputs = 0; + int done = 0; + + struct { + unsigned semantic:8; + unsigned semantic_index:16; + } fp_semantic[32], vp_semantic[32]; + + memset(&state, 0, sizeof(state)); + + state.fp_input_count = 0; + + + + + + + assert(state.fp_input_count == fs->program.num_inputs); + + + /* Then scan vp outputs + */ + done = 0; + tgsi_parse_init( &parse, vs->program.tokens ); + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) + { + int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + + for (i = first; i < last; i++) { + vp_semantic[i].semantic = + parse.FullToken.FullDeclaration.Semantic.SemanticName; + vp_semantic[i].semantic_index = + parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + } + + assert(last > nr_vp_outputs); + nr_vp_outputs = last; + } + break; + default: + done = 1; + break; + } + } + + + /* Now match based on semantic information. + */ + for (i = 0; i< state.fp_input_count; i++) { + for (j = 0; j < nr_vp_outputs; j++) { + if (fp_semantic[i].semantic == vp_semantic[j].semantic && + fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { + state.fp_input[i].vp_output = j; + } + } + if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) { + for (j = 0; j < nr_vp_outputs; j++) { + if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic && + fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { + state.fp_input[i].bf_vp_output = j; + } + } + } + } + + if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) { + brw->sf.linkage = state; + brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE; + } +} + + +const struct brw_tracked_state brw_sf_linkage = { + .dirty = { + .brw = (BRW_NEW_VS | + BRW_NEW_FS), + .cache = 0, + }, + .update = update_sf_linkage +}; + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_sf.h b/src/gallium/drivers/i965simple/brw_sf.h new file mode 100644 index 0000000000..b7ada47560 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf.h @@ -0,0 +1,122 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_SF_H +#define BRW_SF_H + +#include "brw_context.h" +#include "brw_eu.h" + + +#define SF_POINTS 0 +#define SF_LINES 1 +#define SF_TRIANGLES 2 +#define SF_UNFILLED_TRIS 3 + + + +struct brw_sf_prog_key { + unsigned vp_output_count:5; + unsigned fp_input_count:5; + + unsigned primitive:2; + unsigned do_twoside_color:1; + unsigned do_flat_shading:1; + unsigned frontface_ccw:1; + unsigned do_point_sprite:1; + + /* Interpolation masks; + */ + unsigned linear_mask; + unsigned persp_mask; + unsigned const_mask; + + +// int SpriteOrigin; +}; + +struct brw_sf_point_tex { + boolean CoordReplace; +}; + +struct brw_sf_compile { + struct brw_compile func; + struct brw_sf_prog_key key; + struct brw_sf_prog_data prog_data; + + struct brw_reg pv; + struct brw_reg det; + struct brw_reg dx0; + struct brw_reg dx2; + struct brw_reg dy0; + struct brw_reg dy2; + + /* z and 1/w passed in seperately: + */ + struct brw_reg z[3]; + struct brw_reg inv_w[3]; + + /* The vertices: + */ + struct brw_reg vert[3]; + + /* Temporaries, allocated after last vertex reg. + */ + struct brw_reg inv_det; + struct brw_reg a1_sub_a0; + struct brw_reg a2_sub_a0; + struct brw_reg tmp; + + struct brw_reg m1Cx; + struct brw_reg m2Cy; + struct brw_reg m3C0; + + unsigned nr_verts; + unsigned nr_attrs; + unsigned nr_attr_regs; + unsigned nr_setup_attrs; + unsigned nr_setup_regs; +#if 0 + ubyte attr_to_idx[VERT_RESULT_MAX]; + ubyte idx_to_attr[VERT_RESULT_MAX]; + struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; +#endif +}; + + +void brw_emit_tri_setup( struct brw_sf_compile *c ); +void brw_emit_line_setup( struct brw_sf_compile *c ); +void brw_emit_point_setup( struct brw_sf_compile *c ); +void brw_emit_point_sprite_setup( struct brw_sf_compile *c ); +void brw_emit_anyprim_setup( struct brw_sf_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_sf_emit.c b/src/gallium/drivers/i965simple/brw_sf_emit.c new file mode 100644 index 0000000000..78d6fa5e9e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf_emit.c @@ -0,0 +1,382 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" + + + +/*********************************************************************** + * Triangle setup. + */ + + +static void alloc_regs( struct brw_sf_compile *c ) +{ + unsigned reg, i; + + /* Values computed by fixed function unit: + */ + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD); + c->det = brw_vec1_grf(1, 2); + c->dx0 = brw_vec1_grf(1, 3); + c->dx2 = brw_vec1_grf(1, 4); + c->dy0 = brw_vec1_grf(1, 5); + c->dy2 = brw_vec1_grf(1, 6); + + /* z and 1/w passed in seperately: + */ + c->z[0] = brw_vec1_grf(2, 0); + c->inv_w[0] = brw_vec1_grf(2, 1); + c->z[1] = brw_vec1_grf(2, 2); + c->inv_w[1] = brw_vec1_grf(2, 3); + c->z[2] = brw_vec1_grf(2, 4); + c->inv_w[2] = brw_vec1_grf(2, 5); + + /* The vertices: + */ + reg = 3; + for (i = 0; i < c->nr_verts; i++) { + c->vert[i] = brw_vec8_grf(reg, 0); + reg += c->nr_attr_regs; + } + + /* Temporaries, allocated after last vertex reg. + */ + c->inv_det = brw_vec1_grf(reg, 0); reg++; + c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->tmp = brw_vec8_grf(reg, 0); reg++; + + /* Note grf allocation: + */ + c->prog_data.total_grf = reg; + + + /* Outputs of this program - interpolation coefficients for + * rasterization: + */ + c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); + c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); + c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); +} + + +static void copy_z_inv_w( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + brw_push_insn_state(p); + + /* Copy both scalars with a single MOV: + */ + for (i = 0; i < c->nr_verts; i++) + brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); + + brw_pop_insn_state(p); +} + + +static void invert_det( struct brw_sf_compile *c) +{ + brw_math(&c->func, + c->inv_det, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->det, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + +} + +#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \ + FRAG_BIT_COL0 | \ + FRAG_BIT_COL1) + +static boolean calculate_masks( struct brw_sf_compile *c, + unsigned reg, + ushort *pc, + ushort *pc_persp, + ushort *pc_linear) +{ + boolean is_last_attr = (reg == c->nr_setup_regs - 1); + unsigned persp_mask = c->key.persp_mask; + unsigned linear_mask = c->key.linear_mask; + + debug_printf("persp_mask: %x\n", persp_mask); + debug_printf("linear_mask: %x\n", linear_mask); + + *pc_persp = 0; + *pc_linear = 0; + *pc = 0xf; + + if (persp_mask & (1 << (reg*2))) + *pc_persp = 0xf; + + if (linear_mask & (1 << (reg*2))) + *pc_linear = 0xf; + + /* Maybe only processs one attribute on the final round: + */ + if (reg*2+1 < c->nr_setup_attrs) { + *pc |= 0xf0; + + if (persp_mask & (1 << (reg*2+1))) + *pc_persp |= 0xf0; + + if (linear_mask & (1 << (reg*2+1))) + *pc_linear |= 0xf0; + } + + debug_printf("pc: %x\n", *pc); + debug_printf("pc_persp: %x\n", *pc_persp); + debug_printf("pc_linear: %x\n", *pc_linear); + + + return is_last_attr; +} + + + +void brw_emit_tri_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + debug_printf("%s START ==============\n", __FUNCTION__); + + c->nr_verts = 3; + alloc_regs(c); + invert_det(c); + copy_z_inv_w(c); + + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + struct brw_reg a2 = offset(c->vert[2], i); + ushort pc = 0, pc_persp = 0, pc_linear = 0; + boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + brw_MUL(p, a2, a2, c->inv_w[2]); + } + + + /* Calculate coefficients for interpolated values: + */ + if (pc_linear) + { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); + + /* calculate dA/dx + */ + brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); + brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + /* calculate dA/dy + */ + brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); + brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in + * the send instruction: + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* offset */ + BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ + } + } + + debug_printf("%s DONE ==============\n", __FUNCTION__); + +} + + + +void brw_emit_line_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + + c->nr_verts = 2; + alloc_regs(c); + invert_det(c); + copy_z_inv_w(c); + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + ushort pc, pc_persp, pc_linear; + boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + } + + /* Calculate coefficients for position, color: + */ + if (pc_linear) { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + + +/* Points setup - several simplifications as all attributes are + * constant across the face of the point (point sprites excluded!) + */ +void brw_emit_point_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + c->nr_verts = 1; + alloc_regs(c); + copy_z_inv_w(c); + + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ + + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_reg a0 = offset(c->vert[0], i); + ushort pc, pc_persp, pc_linear; + boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + /* This seems odd as the values are all constant, but the + * fragment shader will be expecting it: + */ + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + + + /* The delta values are always zero, just send the starting + * coordinate. Again, this is to fit in with the interpolation + * code in the fragment shader. + */ + { + brw_set_predicate_control_flag_value(p, pc); + + brw_MOV(p, c->m3C0, a0); /* constant value */ + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} diff --git a/src/gallium/drivers/i965simple/brw_sf_state.c b/src/gallium/drivers/i965simple/brw_sf_state.c new file mode 100644 index 0000000000..9acd3ea61b --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf_state.c @@ -0,0 +1,180 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "pipe/p_util.h" + +static void upload_sf_vp(struct brw_context *brw) +{ + struct brw_sf_viewport sfv; + + memset(&sfv, 0, sizeof(sfv)); + + + /* BRW_NEW_VIEWPORT */ + { + const float *scale = brw->attribs.Viewport.scale; + const float *trans = brw->attribs.Viewport.translate; + + sfv.viewport.m00 = scale[0]; + sfv.viewport.m11 = scale[1]; + sfv.viewport.m22 = scale[2]; + sfv.viewport.m30 = trans[0]; + sfv.viewport.m31 = trans[1]; + sfv.viewport.m32 = trans[2]; + } + + /* _NEW_SCISSOR */ + sfv.scissor.xmin = brw->attribs.Scissor.minx; + sfv.scissor.xmax = brw->attribs.Scissor.maxx - 1; + sfv.scissor.ymin = brw->attribs.Scissor.miny; + sfv.scissor.ymax = brw->attribs.Scissor.maxy - 1; + + brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); +} + +const struct brw_tracked_state brw_sf_vp = { + .dirty = { + .brw = (BRW_NEW_SCISSOR | + BRW_NEW_VIEWPORT), + .cache = 0 + }, + .update = upload_sf_vp +}; + +static void upload_sf_unit( struct brw_context *brw ) +{ + struct brw_sf_unit_state sf; + memset(&sf, 0, sizeof(sf)); + + /* CACHE_NEW_SF_PROG */ + sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1; + sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; + sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; + + sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + sf.thread3.dispatch_grf_start_reg = 3; + sf.thread3.urb_entry_read_offset = 1; + + /* BRW_NEW_URB_FENCE */ + sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries; + sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1; + sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1; + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + sf.thread4.max_threads = 0; + + if (BRW_DEBUG & DEBUG_STATS) + sf.thread4.stats_enable = 1; + + /* CACHE_NEW_SF_VP */ + sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; + sf.sf5.viewport_transform = 1; + + /* BRW_NEW_RASTER */ + if (brw->attribs.Raster->scissor) + sf.sf6.scissor = 1; + +#if 0 + if (brw->attribs.Polygon->FrontFace == GL_CCW) + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + else + sf.sf5.front_winding = BRW_FRONTWINDING_CW; + + + if (brw->attribs.Polygon->CullFlag) { + switch (brw->attribs.Polygon->CullFaceMode) { + case GL_FRONT: + sf.sf6.cull_mode = BRW_CULLMODE_FRONT; + break; + case GL_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BACK; + break; + case GL_FRONT_AND_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BOTH; + break; + default: + assert(0); + break; + } + } + else + sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#else + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#endif + + sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1); + + sf.sf6.line_endcap_aa_region_width = 1; + if (brw->attribs.Raster->line_smooth) + sf.sf6.aa_enable = 1; + else if (sf.sf6.line_width <= 0x2) + sf.sf6.line_width = 0; + + sf.sf6.point_rast_rule = 1; /* opengl conventions */ + + sf.sf7.sprite_point = brw->attribs.Raster->point_sprite; + sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3); + sf.sf7.use_point_size_state = !brw->attribs.Raster->point_size_per_vertex; + + /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: + */ + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + sf.sf7.line_last_pixel_enable = 0; + + /* Set bias for OpenGL rasterization rules: + */ + sf.sf6.dest_org_vbias = 0x8; + sf.sf6.dest_org_hbias = 0x8; + + brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf ); +} + + +const struct brw_tracked_state brw_sf_unit = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_URB_FENCE), + .cache = (CACHE_NEW_SF_VP | + CACHE_NEW_SF_PROG) + }, + .update = upload_sf_unit +}; + + diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c new file mode 100644 index 0000000000..431b45466a --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -0,0 +1,49 @@ + +#include "brw_context.h" +#include "brw_state.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + + + + +void brw_shader_info(const struct tgsi_token *tokens, + struct brw_shader_info *info ) +{ + struct tgsi_parse_context parse; + int done = 0; + + tgsi_parse_init( &parse, tokens ); + + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + unsigned last = decl->u.DeclarationRange.Last; + + assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + + // Broken by crazy wpos init: + //assert( info->nr_regs[decl->Declaration.File] <= last); + + info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File], + last+1); + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_INSTRUCTION: + default: + done = 1; + break; + } + } + + tgsi_parse_free (&parse); + +} diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c new file mode 100644 index 0000000000..95dfce88e4 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -0,0 +1,424 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Zack Rusin + * Keith Whitwell + */ + + +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_dump.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_draw.h" + + +#define DUP( TYPE, VAL ) \ +do { \ + struct TYPE *x = malloc(sizeof(*x)); \ + memcpy(x, VAL, sizeof(*x) ); \ + return x; \ +} while (0) + +/************************************************************************ + * Blend + */ +static void * +brw_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + DUP( pipe_blend_state, blend ); +} + +static void brw_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Blend = (struct pipe_blend_state*)blend; + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + + +static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + free(blend); +} + +static void brw_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.BlendColor = *blend_color; + + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + +/************************************************************************ + * Sampler + */ + +static void * +brw_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + DUP( pipe_sampler_state, sampler ); +} + +static void brw_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Samplers[unit] = sampler; + brw->state.dirty.brw |= BRW_NEW_SAMPLER; +} + +static void brw_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + free(sampler); +} + + +/************************************************************************ + * Depth stencil + */ + +static void * +brw_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + DUP( pipe_depth_stencil_alpha_state, depth_stencil ); +} + +static void brw_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + + brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; +} + +static void brw_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + free(depth_stencil); +} + +/************************************************************************ + * Scissor + */ +static void brw_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct brw_context *brw = brw_context(pipe); + + memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); + brw->state.dirty.brw |= BRW_NEW_SCISSOR; +} + + +/************************************************************************ + * Stipple + */ + +static void brw_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + +/************************************************************************ + * Fragment shader + */ + +static void * brw_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); + + /* XXX: Do I have to duplicate the tokens as well?? + */ + brw_fp->program = *shader; + brw_fp->id = brw_context(pipe)->program_id++; + + brw_shader_info(shader->tokens, + &brw_fp->info); + + tgsi_dump(shader->tokens, 0); + + + return (void *)brw_fp; +} + +static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; + brw->state.dirty.brw |= BRW_NEW_FS; +} + +static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + FREE(shader); +} + + +/************************************************************************ + * Vertex shader and other TNL state + */ + +static void *brw_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); + + /* XXX: Do I have to duplicate the tokens as well?? + */ + brw_vp->program = *shader; + brw_vp->id = brw_context(pipe)->program_id++; + brw_shader_info(shader->tokens, + &brw_vp->info); + + tgsi_dump(shader->tokens, 0); + + return (void *)brw_vp; +} + +static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; + brw->state.dirty.brw |= BRW_NEW_VS; + + debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); +} + +static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + FREE(shader); +} + + +static void brw_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Clip = *clip; +} + + +static void brw_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Viewport = *viewport; /* struct copy */ + brw->state.dirty.brw |= BRW_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + //draw_set_viewport_state(brw->draw, viewport); +} + + +static void brw_set_vertex_buffer( struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_buffer *buffer ) +{ + struct brw_context *brw = brw_context(pipe); + brw->vb.vbo_array[index] = buffer; +} + +static void brw_set_vertex_element(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_element *element) +{ + /* flush ? */ + struct brw_context *brw = brw_context(pipe); + + assert(index < PIPE_ATTRIB_MAX); + struct brw_vertex_element_state el; + memset(&el, 0, sizeof(el)); + + el.ve0.src_offset = element->src_offset; + el.ve0.src_format = brw_translate_surface_format(element->src_format); + el.ve0.valid = 1; + el.ve0.vertex_buffer_index = element->vertex_buffer_index; + + el.ve1.dst_offset = index * 4; + + el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + + switch (element->nr_components) { + case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; + case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; + case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; + break; + } + + brw->vb.inputs[index] = el; +} + + + +/************************************************************************ + * Constant buffers + */ + +static void brw_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct brw_context *brw = brw_context(pipe); + + assert(buf == 0 || index == 0); + + brw->attribs.Constants[shader] = buf; + brw->state.dirty.brw |= BRW_NEW_CONSTANTS; +} + + +/************************************************************************ + * Texture surfaces + */ + + +static void brw_set_sampler_texture(struct pipe_context *pipe, + unsigned unit, + struct pipe_texture *texture) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Texture[unit] = (struct brw_texture*)texture; /* ptr, not struct */ + + brw->state.dirty.brw |= BRW_NEW_TEXTURE; +} + + +/************************************************************************ + * Render targets, etc + */ + +static void brw_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FrameBuffer = *fb; /* struct copy */ + + brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; +} + + + +/************************************************************************ + * Rasterizer state + */ + +static void * +brw_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + DUP(pipe_rasterizer_state, rasterizer); +} + +static void brw_bind_rasterizer_state( struct pipe_context *pipe, + void *setup ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; + + /* Also pass-through to draw module: + */ + //draw_set_rasterizer_state(brw->draw, setup); + + brw->state.dirty.brw |= BRW_NEW_RASTERIZER; +} + +static void brw_delete_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + free(setup); +} + + + +void +brw_init_state_functions( struct brw_context *brw ) +{ + brw->pipe.create_blend_state = brw_create_blend_state; + brw->pipe.bind_blend_state = brw_bind_blend_state; + brw->pipe.delete_blend_state = brw_delete_blend_state; + + brw->pipe.create_sampler_state = brw_create_sampler_state; + brw->pipe.bind_sampler_state = brw_bind_sampler_state; + brw->pipe.delete_sampler_state = brw_delete_sampler_state; + + brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; + brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; + brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; + + brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; + brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; + brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; + brw->pipe.create_fs_state = brw_create_fs_state; + brw->pipe.bind_fs_state = brw_bind_fs_state; + brw->pipe.delete_fs_state = brw_delete_fs_state; + brw->pipe.create_vs_state = brw_create_vs_state; + brw->pipe.bind_vs_state = brw_bind_vs_state; + brw->pipe.delete_vs_state = brw_delete_vs_state; + + brw->pipe.set_blend_color = brw_set_blend_color; + brw->pipe.set_clip_state = brw_set_clip_state; + brw->pipe.set_constant_buffer = brw_set_constant_buffer; + brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; + +// brw->pipe.set_feedback_state = brw_set_feedback_state; +// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; + + brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; + brw->pipe.set_scissor_state = brw_set_scissor_state; + brw->pipe.set_sampler_texture = brw_set_sampler_texture; + brw->pipe.set_viewport_state = brw_set_viewport_state; + brw->pipe.set_vertex_buffer = brw_set_vertex_buffer; + brw->pipe.set_vertex_element = brw_set_vertex_element; +} diff --git a/src/gallium/drivers/i965simple/brw_state.h b/src/gallium/drivers/i965simple/brw_state.h new file mode 100644 index 0000000000..258e9a556e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state.h @@ -0,0 +1,158 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_STATE_H +#define BRW_STATE_H + +#include "brw_context.h" +#include "brw_winsys.h" + + +const struct brw_tracked_state brw_blend_constant_color; +const struct brw_tracked_state brw_cc_unit; +const struct brw_tracked_state brw_cc_vp; +const struct brw_tracked_state brw_clip_prog; +const struct brw_tracked_state brw_clip_unit; +const struct brw_tracked_state brw_constant_buffer_state; +const struct brw_tracked_state brw_constant_buffer; +const struct brw_tracked_state brw_curbe_offsets; +const struct brw_tracked_state brw_invarient_state; +const struct brw_tracked_state brw_gs_prog; +const struct brw_tracked_state brw_gs_unit; +const struct brw_tracked_state brw_drawing_rect; +const struct brw_tracked_state brw_line_stipple; +const struct brw_tracked_state brw_pipelined_state_pointers; +const struct brw_tracked_state brw_binding_table_pointers; +const struct brw_tracked_state brw_depthbuffer; +const struct brw_tracked_state brw_polygon_stipple_offset; +const struct brw_tracked_state brw_polygon_stipple; +const struct brw_tracked_state brw_program_parameters; +const struct brw_tracked_state brw_recalculate_urb_fence; +const struct brw_tracked_state brw_sf_prog; +const struct brw_tracked_state brw_sf_unit; +const struct brw_tracked_state brw_sf_vp; +const struct brw_tracked_state brw_state_base_address; +const struct brw_tracked_state brw_urb_fence; +const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_prog; +const struct brw_tracked_state brw_vs_unit; +const struct brw_tracked_state brw_wm_prog; +const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_surfaces; +const struct brw_tracked_state brw_wm_unit; + +const struct brw_tracked_state brw_psp_urb_cbs; + +const struct brw_tracked_state brw_active_vertprog; +const struct brw_tracked_state brw_tnl_vertprog; +const struct brw_tracked_state brw_pipe_control; + +const struct brw_tracked_state brw_clear_surface_cache; +const struct brw_tracked_state brw_clear_batch_cache; + +/*********************************************************************** + * brw_state_cache.c + */ +unsigned brw_cache_data(struct brw_cache *cache, + const void *data ); + +unsigned brw_cache_data_sz(struct brw_cache *cache, + const void *data, + unsigned data_sz); + +unsigned brw_upload_cache( struct brw_cache *cache, + const void *key, + unsigned key_sz, + const void *data, + unsigned data_sz, + const void *aux, + void *aux_return ); + +boolean brw_search_cache( struct brw_cache *cache, + const void *key, + unsigned key_size, + void *aux_return, + unsigned *offset_return); + +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); + +static inline struct pipe_buffer *brw_cache_buffer(struct brw_context *brw, + enum brw_cache_id id) +{ + return brw->cache[id].pool->buffer; +} + +/*********************************************************************** + * brw_state_batch.c + */ +#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) + +boolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + unsigned sz ); + +void brw_destroy_batch_cache( struct brw_context *brw ); + + +/*********************************************************************** + * brw_state_pool.c + */ +void brw_init_pools( struct brw_context *brw ); +void brw_destroy_pools( struct brw_context *brw ); + +boolean brw_pool_alloc( struct brw_mem_pool *pool, + unsigned size, + unsigned alignment, + unsigned *offset_return); + +void brw_pool_fence( struct brw_context *brw, + struct brw_mem_pool *pool, + unsigned fence ); + + +void brw_pool_check_wrap( struct brw_context *brw, + struct brw_mem_pool *pool ); + +void brw_clear_all_caches( struct brw_context *brw ); +void brw_invalidate_pools( struct brw_context *brw ); +void brw_clear_batch_cache_flush( struct brw_context *brw ); + + +/* brw_shader_info.c + */ + +void brw_shader_info(const struct tgsi_token *tokens, + struct brw_shader_info *info ); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_state_batch.c b/src/gallium/drivers/i965simple/brw_state_batch.c new file mode 100644 index 0000000000..35db76b594 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_batch.c @@ -0,0 +1,113 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_state.h" +#include "brw_winsys.h" + +#include "pipe/p_util.h" + +/* A facility similar to the data caching code above, which aims to + * prevent identical commands being issued repeatedly. + */ +boolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + unsigned sz ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + struct header *newheader = (struct header *)data; + + if (brw->emit_state_always) { + brw_batchbuffer_data(brw->winsys, data, sz); + return TRUE; + } + + while (item) { + if (item->header->opcode == newheader->opcode) { + if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) + return FALSE; + if (item->sz != sz) { + FREE(item->header); + item->header = MALLOC(sz); + item->sz = sz; + } + goto emit; + } + item = item->next; + } + + assert(!item); + item = CALLOC_STRUCT(brw_cached_batch_item); + item->header = MALLOC(sz); + item->sz = sz; + item->next = brw->cached_batch_items; + brw->cached_batch_items = item; + +emit: + memcpy(item->header, newheader, sz); + brw_batchbuffer_data(brw->winsys, data, sz); + return TRUE; +} + +static void clear_batch_cache( struct brw_context *brw ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + + while (item) { + struct brw_cached_batch_item *next = item->next; + free((void *)item->header); + free(item); + item = next; + } + + brw->cached_batch_items = NULL; + + + brw_clear_all_caches(brw); + + brw_invalidate_pools(brw); +} + +void brw_clear_batch_cache_flush( struct brw_context *brw ) +{ + clear_batch_cache(brw); + +/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ + + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + + + +void brw_destroy_batch_cache( struct brw_context *brw ) +{ + clear_batch_cache(brw); +} diff --git a/src/gallium/drivers/i965simple/brw_state_cache.c b/src/gallium/drivers/i965simple/brw_state_cache.c new file mode 100644 index 0000000000..b3a5124461 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_cache.c @@ -0,0 +1,443 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_state.h" + +#include "brw_wm.h" +#include "brw_vs.h" +#include "brw_clip.h" +#include "brw_sf.h" +#include "brw_gs.h" + +#include "pipe/p_util.h" + + + +/*********************************************************************** + * Check cache for uploaded version of struct, else upload new one. + * Fail when memory is exhausted. + * + * XXX: FIXME: Currently search is so slow it would be quicker to + * regenerate the data every time... + */ + +static unsigned hash_key( const void *key, unsigned key_size ) +{ + unsigned *ikey = (unsigned *)key; + unsigned hash = 0, i; + + assert(key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < key_size/4; i++) + hash ^= ikey[i]; + + return hash; +} + +static struct brw_cache_item *search_cache( struct brw_cache *cache, + unsigned hash, + const void *key, + unsigned key_size) +{ + struct brw_cache_item *c; + + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (c->hash == hash && + c->key_size == key_size && + memcmp(c->key, key, key_size) == 0) + return c; + } + + return NULL; +} + + +static void rehash( struct brw_cache *cache ) +{ + struct brw_cache_item **items; + struct brw_cache_item *c, *next; + unsigned size, i; + + size = cache->size * 3; + items = (struct brw_cache_item**) MALLOC(size * sizeof(*items)); + memset(items, 0, size * sizeof(*items)); + + for (i = 0; i < cache->size; i++) + for (c = cache->items[i]; c; c = next) { + next = c->next; + c->next = items[c->hash % size]; + items[c->hash % size] = c; + } + + FREE(cache->items); + cache->items = items; + cache->size = size; +} + + +boolean brw_search_cache( struct brw_cache *cache, + const void *key, + unsigned key_size, + void *aux_return, + unsigned *offset_return) +{ + struct brw_cache_item *item; + unsigned addr = 0; + unsigned hash = hash_key(key, key_size); + + item = search_cache(cache, hash, key, key_size); + + if (item) { + if (aux_return) + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + + *offset_return = addr = item->offset; + } + + if (item == NULL || addr != cache->last_addr) { + cache->brw->state.dirty.cache |= 1<id; + cache->last_addr = addr; + } + + return item != NULL; +} + +unsigned brw_upload_cache( struct brw_cache *cache, + const void *key, + unsigned key_size, + const void *data, + unsigned data_size, + const void *aux, + void *aux_return ) +{ + unsigned offset; + struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); + unsigned hash = hash_key(key, key_size); + void *tmp = MALLOC(key_size + cache->aux_size); + + if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) { + /* Should not be possible: + */ + debug_printf("brw_pool_alloc failed\n"); + exit(1); + } + + memcpy(tmp, key, key_size); + + if (cache->aux_size) + memcpy(tmp+key_size, aux, cache->aux_size); + + item->key = tmp; + item->hash = hash; + item->key_size = key_size; + item->offset = offset; + item->data_size = data_size; + + if (++cache->n_items > cache->size * 1.5) + rehash(cache); + + hash %= cache->size; + item->next = cache->items[hash]; + cache->items[hash] = item; + + if (aux_return) { + assert(cache->aux_size); + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + } + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("upload %s: %d bytes to pool buffer %p offset %x\n", + cache->name, + data_size, + (void*)cache->pool->buffer, + offset); + + /* Copy data to the buffer: + */ + cache->brw->winsys->buffer_subdata_typed(cache->brw->winsys, + cache->pool->buffer, + offset, + data_size, + data, + cache->id); + + cache->brw->state.dirty.cache |= 1<id; + cache->last_addr = offset; + + return offset; +} + +/* This doesn't really work with aux data. Use search/upload instead + */ +unsigned brw_cache_data_sz(struct brw_cache *cache, + const void *data, + unsigned data_size) +{ + unsigned addr; + + if (!brw_search_cache(cache, data, data_size, NULL, &addr)) { + addr = brw_upload_cache(cache, + data, data_size, + data, data_size, + NULL, NULL); + } + + return addr; +} + +unsigned brw_cache_data(struct brw_cache *cache, + const void *data) +{ + return brw_cache_data_sz(cache, data, cache->key_size); +} + +enum pool_type { + DW_SURFACE_STATE, + DW_GENERAL_STATE +}; + +static void brw_init_cache( struct brw_context *brw, + const char *name, + unsigned id, + unsigned key_size, + unsigned aux_size, + enum pool_type pool_type) +{ + struct brw_cache *cache = &brw->cache[id]; + cache->brw = brw; + cache->id = id; + cache->name = name; + cache->items = NULL; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + CALLOC(cache->size, sizeof(struct brw_cache_item)); + + + cache->key_size = key_size; + cache->aux_size = aux_size; + switch (pool_type) { + case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break; + case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break; + default: assert(0); break; + } +} + +void brw_init_caches( struct brw_context *brw ) +{ + + brw_init_cache(brw, + "CC_VP", + BRW_CC_VP, + sizeof(struct brw_cc_viewport), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "CC_UNIT", + BRW_CC_UNIT, + sizeof(struct brw_cc_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "WM_PROG", + BRW_WM_PROG, + sizeof(struct brw_wm_prog_key), + sizeof(struct brw_wm_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SAMPLER_DEFAULT_COLOR", + BRW_SAMPLER_DEFAULT_COLOR, + sizeof(struct brw_sampler_default_color), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SAMPLER", + BRW_SAMPLER, + 0, /* variable key/data size */ + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "WM_UNIT", + BRW_WM_UNIT, + sizeof(struct brw_wm_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SF_PROG", + BRW_SF_PROG, + sizeof(struct brw_sf_prog_key), + sizeof(struct brw_sf_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SF_VP", + BRW_SF_VP, + sizeof(struct brw_sf_viewport), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SF_UNIT", + BRW_SF_UNIT, + sizeof(struct brw_sf_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "VS_UNIT", + BRW_VS_UNIT, + sizeof(struct brw_vs_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "VS_PROG", + BRW_VS_PROG, + sizeof(struct brw_vs_prog_key), + sizeof(struct brw_vs_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "CLIP_UNIT", + BRW_CLIP_UNIT, + sizeof(struct brw_clip_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "CLIP_PROG", + BRW_CLIP_PROG, + sizeof(struct brw_clip_prog_key), + sizeof(struct brw_clip_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "GS_UNIT", + BRW_GS_UNIT, + sizeof(struct brw_gs_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "GS_PROG", + BRW_GS_PROG, + sizeof(struct brw_gs_prog_key), + sizeof(struct brw_gs_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0, + DW_SURFACE_STATE); + + brw_init_cache(brw, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + sizeof(struct brw_surface_binding_table), + 0, + DW_SURFACE_STATE); +} + + +/* When we lose hardware context, need to invalidate the surface cache + * as these structs must be explicitly re-uploaded. They are subject + * to fixup by the memory manager as they contain absolute agp + * offsets, so we need to ensure there is a fresh version of the + * struct available to receive the fixup. + * + * XXX: Need to ensure that there aren't two versions of a surface or + * bufferobj with different backing data active in the same buffer at + * once? Otherwise the cache could confuse them. Maybe better not to + * cache at all? + * + * --> Isn't this the same as saying need to ensure batch is flushed + * before new data is uploaded to an existing buffer? We + * already try to make sure of that. + */ +static void clear_cache( struct brw_cache *cache ) +{ + struct brw_cache_item *c, *next; + unsigned i; + + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { + next = c->next; + free((void *)c->key); + free(c); + } + cache->items[i] = NULL; + } + + cache->n_items = 0; +} + +void brw_clear_all_caches( struct brw_context *brw ) +{ + int i; + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); + + for (i = 0; i < BRW_MAX_CACHE; i++) + clear_cache(&brw->cache[i]); + + if (brw->curbe.last_buf) { + FREE(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + } + + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + + + + + +void brw_destroy_caches( struct brw_context *brw ) +{ + unsigned i; + + for (i = 0; i < BRW_MAX_CACHE; i++) + clear_cache(&brw->cache[i]); +} diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c new file mode 100644 index 0000000000..f3174bfe0a --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -0,0 +1,137 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +/** @file brw_state_pool.c + * Implements the state pool allocator. + * + * For the 965, we create two state pools for state cache entries. Objects + * will be allocated into the pools depending on which state base address + * their pointer is relative to in other 965 state. + * + * The state pools are relatively simple: As objects are allocated, increment + * the offset to allocate space. When the pool is "full" (rather, close to + * full), we reset the pool and reset the state cache entries that point into + * the pool. + */ + +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "brw_context.h" +#include "brw_state.h" + +boolean brw_pool_alloc( struct brw_mem_pool *pool, + unsigned size, + unsigned alignment, + unsigned *offset_return) +{ + unsigned fixup = align(pool->offset, alignment) - pool->offset; + + size = align(size, 4); + + if (pool->offset + fixup + size >= pool->size) { + debug_printf("%s failed\n", __FUNCTION__); + assert(0); + exit(0); + } + + pool->offset += fixup; + *offset_return = pool->offset; + pool->offset += size; + + return TRUE; +} + +static +void brw_invalidate_pool( struct brw_mem_pool *pool ) +{ + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("\n\n\n %s \n\n\n", __FUNCTION__); + + pool->offset = 0; + + brw_clear_all_caches(pool->brw); +} + + +static void brw_init_pool( struct brw_context *brw, + unsigned pool_id, + unsigned size ) +{ + struct brw_mem_pool *pool = &brw->pool[pool_id]; + + pool->size = size; + pool->brw = brw; + + pool->buffer = brw->pipe.winsys->buffer_create(brw->pipe.winsys, + 4096, + 0 /* DRM_BO_FLAG_MEM_TT */, + size); +} + +static void brw_destroy_pool( struct brw_context *brw, + unsigned pool_id ) +{ + struct brw_mem_pool *pool = &brw->pool[pool_id]; + + pipe_buffer_reference( pool->brw->pipe.winsys, + &pool->buffer, + NULL ); +} + + +void brw_pool_check_wrap( struct brw_context *brw, + struct brw_mem_pool *pool ) +{ + if (pool->offset > (pool->size * 3) / 4) { + brw->state.dirty.brw |= BRW_NEW_SCENE; + } + +} + +void brw_init_pools( struct brw_context *brw ) +{ + brw_init_pool(brw, BRW_GS_POOL, 0x80000); + brw_init_pool(brw, BRW_SS_POOL, 0x80000); +} + +void brw_destroy_pools( struct brw_context *brw ) +{ + brw_destroy_pool(brw, BRW_GS_POOL); + brw_destroy_pool(brw, BRW_SS_POOL); +} + + +void brw_invalidate_pools( struct brw_context *brw ) +{ + brw_invalidate_pool(&brw->pool[BRW_GS_POOL]); + brw_invalidate_pool(&brw->pool[BRW_SS_POOL]); +} diff --git a/src/gallium/drivers/i965simple/brw_state_upload.c b/src/gallium/drivers/i965simple/brw_state_upload.c new file mode 100644 index 0000000000..e727601e1e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_upload.c @@ -0,0 +1,202 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_state.h" + +#include "pipe/p_util.h" + +/* This is used to initialize brw->state.atoms[]. We could use this + * list directly except for a single atom, brw_constant_buffer, which + * has a .dirty value which changes according to the parameters of the + * current fragment and vertex programs, and so cannot be a static + * value. + */ +const struct brw_tracked_state *atoms[] = +{ + &brw_vs_prog, + &brw_gs_prog, + &brw_clip_prog, + &brw_sf_prog, + &brw_wm_prog, + + /* Once all the programs are done, we know how large urb entry + * sizes need to be and can decide if we need to change the urb + * layout. + */ + &brw_curbe_offsets, + &brw_recalculate_urb_fence, + + + &brw_cc_vp, + &brw_cc_unit, + + &brw_wm_surfaces, /* must do before samplers */ + &brw_wm_samplers, + + &brw_wm_unit, + &brw_sf_vp, + &brw_sf_unit, + &brw_vs_unit, /* always required, enabled or not */ + &brw_clip_unit, + &brw_gs_unit, + + /* Command packets: + */ + &brw_invarient_state, + &brw_state_base_address, + &brw_pipe_control, + + &brw_binding_table_pointers, + &brw_blend_constant_color, + + &brw_drawing_rect, + &brw_depthbuffer, + + &brw_polygon_stipple, + &brw_line_stipple, + + &brw_psp_urb_cbs, + + &brw_constant_buffer +}; + + +void brw_init_state( struct brw_context *brw ) +{ + brw_init_pools(brw); + brw_init_caches(brw); + + brw->state.dirty.brw = ~0; + brw->emit_state_always = 0; +} + + +void brw_destroy_state( struct brw_context *brw ) +{ + brw_destroy_caches(brw); + brw_destroy_batch_cache(brw); + brw_destroy_pools(brw); +} + +/*********************************************************************** + */ + +static boolean check_state( const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + return ((a->brw & b->brw) || + (a->cache & b->cache)); +} + +static void accumulate_state( struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + a->brw |= b->brw; + a->cache |= b->cache; +} + + +static void xor_states( struct brw_state_flags *result, + const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + result->brw = a->brw ^ b->brw; + result->cache = a->cache ^ b->cache; +} + + +/*********************************************************************** + * Emit all state: + */ +void brw_validate_state( struct brw_context *brw ) +{ + struct brw_state_flags *state = &brw->state.dirty; + unsigned i; + + if (brw->emit_state_always) + state->brw |= ~0; + + if (state->cache == 0 && + state->brw == 0) + return; + + if (brw->state.dirty.brw & BRW_NEW_SCENE) + brw_clear_batch_cache_flush(brw); + + if (BRW_DEBUG) { + /* Debug version which enforces various sanity checks on the + * state flags which are generated and checked to help ensure + * state atoms are ordered correctly in the list. + */ + struct brw_state_flags examined, prev; + memset(&examined, 0, sizeof(examined)); + prev = *state; + + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + struct brw_state_flags generated; + + assert(atom->dirty.brw || + atom->dirty.cache); + assert(atom->update); + + if (check_state(state, &atom->dirty)) { + atom->update( brw ); + } + + accumulate_state(&examined, &atom->dirty); + + /* generated = (prev ^ state) + * if (examined & generated) + * fail; + */ + xor_states(&generated, &prev, state); + assert(!check_state(&examined, &generated)); + prev = *state; + } + } + else { + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + + assert(atom->dirty.brw || + atom->dirty.cache); + assert(atom->update); + + if (check_state(state, &atom->dirty)) + atom->update( brw ); + } + } + + memset(state, 0, sizeof(*state)); +} diff --git a/src/gallium/drivers/i965simple/brw_strings.c b/src/gallium/drivers/i965simple/brw_strings.c new file mode 100644 index 0000000000..29a41ed1e9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_strings.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "brw_context.h" +#include "brw_reg.h" + + +static const char *brw_get_vendor( struct pipe_context *pipe ) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char *brw_get_name( struct pipe_context *pipe ) +{ + static char buffer[128]; + const char *chipset; + + switch (brw_context(pipe)->pci_id) { + case PCI_CHIP_I965_Q: + chipset = "Intel(R) 965Q"; + break; + case PCI_CHIP_I965_G: + case PCI_CHIP_I965_G_1: + chipset = "Intel(R) 965G"; + break; + case PCI_CHIP_I965_GM: + chipset = "Intel(R) 965GM"; + break; + case PCI_CHIP_I965_GME: + chipset = "Intel(R) 965GME/GLE"; + break; + default: + chipset = "unknown"; + break; + } + + sprintf(buffer, "pipe/i965 (chipset: %s)", chipset); + return buffer; +} + + +void +brw_init_string_functions(struct brw_context *brw) +{ + brw->pipe.get_name = brw_get_name; + brw->pipe.get_vendor = brw_get_vendor; +} diff --git a/src/gallium/drivers/i965simple/brw_structs.h b/src/gallium/drivers/i965simple/brw_structs.h new file mode 100644 index 0000000000..bbb087e95d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_structs.h @@ -0,0 +1,1348 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_STRUCTS_H +#define BRW_STRUCTS_H + +#include "pipe/p_compiler.h" + +/* Command packets: + */ +struct header +{ + unsigned length:16; + unsigned opcode:16; +}; + + +union header_union +{ + struct header bits; + unsigned dword; +}; + +struct brw_3d_control +{ + struct + { + unsigned length:8; + unsigned notify_enable:1; + unsigned pad:3; + unsigned wc_flush_enable:1; + unsigned depth_stall_enable:1; + unsigned operation:2; + unsigned opcode:16; + } header; + + struct + { + unsigned pad:2; + unsigned dest_addr_type:1; + unsigned dest_addr:29; + } dest; + + unsigned dword2; + unsigned dword3; +}; + + +struct brw_3d_primitive +{ + struct + { + unsigned length:8; + unsigned pad:2; + unsigned topology:5; + unsigned indexed:1; + unsigned opcode:16; + } header; + + unsigned verts_per_instance; + unsigned start_vert_location; + unsigned instance_count; + unsigned start_instance_location; + unsigned base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define BRW_FLUSH_READ_CACHE 0x1 +#define BRW_FLUSH_STATE_CACHE 0x2 +#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct brw_mi_flush +{ + unsigned flags:4; + unsigned pad:12; + unsigned opcode:16; +}; + +struct brw_vf_statistics +{ + unsigned statistics_enable:1; + unsigned pad:15; + unsigned opcode:16; +}; + + + +struct brw_binding_table_pointers +{ + struct header header; + unsigned vs; + unsigned gs; + unsigned clp; + unsigned sf; + unsigned wm; +}; + + +struct brw_blend_constant_color +{ + struct header header; + float blend_constant_color[4]; +}; + + +struct brw_depthbuffer +{ + union header_union header; + + union { + struct { + unsigned pitch:18; + unsigned format:3; + unsigned pad:4; + unsigned depth_offset_disable:1; + unsigned tile_walk:1; + unsigned tiled_surface:1; + unsigned pad2:1; + unsigned surface_type:3; + } bits; + unsigned dword; + } dword1; + + unsigned dword2_base_addr; + + union { + struct { + unsigned pad:1; + unsigned mipmap_layout:1; + unsigned lod:4; + unsigned width:13; + unsigned height:13; + } bits; + unsigned dword; + } dword3; + + union { + struct { + unsigned pad:12; + unsigned min_array_element:9; + unsigned depth:11; + } bits; + unsigned dword; + } dword4; +}; + +struct brw_drawrect +{ + struct header header; + unsigned xmin:16; + unsigned ymin:16; + unsigned xmax:16; + unsigned ymax:16; + unsigned xorg:16; + unsigned yorg:16; +}; + + + + +struct brw_global_depth_offset_clamp +{ + struct header header; + float depth_offset_clamp; +}; + +struct brw_indexbuffer +{ + union { + struct + { + unsigned length:8; + unsigned index_format:2; + unsigned cut_index_enable:1; + unsigned pad:5; + unsigned opcode:16; + } bits; + unsigned dword; + + } header; + + unsigned buffer_start; + unsigned buffer_end; +}; + + +struct brw_line_stipple +{ + struct header header; + + struct + { + unsigned pattern:16; + unsigned pad:16; + } bits0; + + struct + { + unsigned repeat_count:9; + unsigned pad:7; + unsigned inverse_repeat_count:16; + } bits1; +}; + + +struct brw_pipelined_state_pointers +{ + struct header header; + + struct { + unsigned pad:5; + unsigned offset:27; + } vs; + + struct + { + unsigned enable:1; + unsigned pad:4; + unsigned offset:27; + } gs; + + struct + { + unsigned enable:1; + unsigned pad:4; + unsigned offset:27; + } clp; + + struct + { + unsigned pad:5; + unsigned offset:27; + } sf; + + struct + { + unsigned pad:5; + unsigned offset:27; + } wm; + + struct + { + unsigned pad:5; + unsigned offset:27; /* KW: check me! */ + } cc; +}; + + +struct brw_polygon_stipple_offset +{ + struct header header; + + struct { + unsigned y_offset:5; + unsigned pad:3; + unsigned x_offset:5; + unsigned pad0:19; + } bits0; +}; + + + +struct brw_polygon_stipple +{ + struct header header; + unsigned stipple[32]; +}; + + + +struct brw_pipeline_select +{ + struct + { + unsigned pipeline_select:1; + unsigned pad:15; + unsigned opcode:16; + } header; +}; + + +struct brw_pipe_control +{ + struct + { + unsigned length:8; + unsigned notify_enable:1; + unsigned pad:2; + unsigned instruction_state_cache_flush_enable:1; + unsigned write_cache_flush_enable:1; + unsigned depth_stall_enable:1; + unsigned post_sync_operation:2; + + unsigned opcode:16; + } header; + + struct + { + unsigned pad:2; + unsigned dest_addr_type:1; + unsigned dest_addr:29; + } bits1; + + unsigned data0; + unsigned data1; +}; + + +struct brw_urb_fence +{ + struct + { + unsigned length:8; + unsigned vs_realloc:1; + unsigned gs_realloc:1; + unsigned clp_realloc:1; + unsigned sf_realloc:1; + unsigned vfe_realloc:1; + unsigned cs_realloc:1; + unsigned pad:2; + unsigned opcode:16; + } header; + + struct + { + unsigned vs_fence:10; + unsigned gs_fence:10; + unsigned clp_fence:10; + unsigned pad:2; + } bits0; + + struct + { + unsigned sf_fence:10; + unsigned vf_fence:10; + unsigned cs_fence:10; + unsigned pad:2; + } bits1; +}; + +struct brw_constant_buffer_state /* previously brw_command_streamer */ +{ + struct header header; + + struct + { + unsigned nr_urb_entries:3; + unsigned pad:1; + unsigned urb_entry_size:5; + unsigned pad0:23; + } bits0; +}; + +struct brw_constant_buffer +{ + struct + { + unsigned length:8; + unsigned valid:1; + unsigned pad:7; + unsigned opcode:16; + } header; + + struct + { + unsigned buffer_length:6; + unsigned buffer_address:26; + } bits0; +}; + +struct brw_state_base_address +{ + struct header header; + + struct + { + unsigned modify_enable:1; + unsigned pad:4; + unsigned general_state_address:27; + } bits0; + + struct + { + unsigned modify_enable:1; + unsigned pad:4; + unsigned surface_state_address:27; + } bits1; + + struct + { + unsigned modify_enable:1; + unsigned pad:4; + unsigned indirect_object_state_address:27; + } bits2; + + struct + { + unsigned modify_enable:1; + unsigned pad:11; + unsigned general_state_upper_bound:20; + } bits3; + + struct + { + unsigned modify_enable:1; + unsigned pad:11; + unsigned indirect_object_state_upper_bound:20; + } bits4; +}; + +struct brw_state_prefetch +{ + struct header header; + + struct + { + unsigned prefetch_count:3; + unsigned pad:3; + unsigned prefetch_pointer:26; + } bits0; +}; + +struct brw_system_instruction_pointer +{ + struct header header; + + struct + { + unsigned pad:4; + unsigned system_instruction_pointer:28; + } bits0; +}; + + + + +/* State structs for the various fixed function units: + */ + + +struct thread0 +{ + unsigned pad0:1; + unsigned grf_reg_count:3; + unsigned pad1:2; + unsigned kernel_start_pointer:26; +}; + +struct thread1 +{ + unsigned ext_halt_exception_enable:1; + unsigned sw_exception_enable:1; + unsigned mask_stack_exception_enable:1; + unsigned timeout_exception_enable:1; + unsigned illegal_op_exception_enable:1; + unsigned pad0:3; + unsigned depth_coef_urb_read_offset:6; /* WM only */ + unsigned pad1:2; + unsigned floating_point_mode:1; + unsigned thread_priority:1; + unsigned binding_table_entry_count:8; + unsigned pad3:5; + unsigned single_program_flow:1; +}; + +struct thread2 +{ + unsigned per_thread_scratch_space:4; + unsigned pad0:6; + unsigned scratch_space_base_pointer:22; +}; + + +struct thread3 +{ + unsigned dispatch_grf_start_reg:4; + unsigned urb_entry_read_offset:6; + unsigned pad0:1; + unsigned urb_entry_read_length:6; + unsigned pad1:1; + unsigned const_urb_entry_read_offset:6; + unsigned pad2:1; + unsigned const_urb_entry_read_length:6; + unsigned pad3:1; +}; + + + +struct brw_clip_unit_state +{ + struct thread0 thread0; + struct + { + unsigned pad0:7; + unsigned sw_exception_enable:1; + unsigned pad1:3; + unsigned mask_stack_exception_enable:1; + unsigned pad2:1; + unsigned illegal_op_exception_enable:1; + unsigned pad3:2; + unsigned floating_point_mode:1; + unsigned thread_priority:1; + unsigned binding_table_entry_count:8; + unsigned pad4:5; + unsigned single_program_flow:1; + } thread1; + + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:9; + unsigned gs_output_stats:1; /* not always */ + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:1; /* may be less */ + unsigned pad3:6; + } thread4; + + struct + { + unsigned pad0:13; + unsigned clip_mode:3; + unsigned userclip_enable_flags:8; + unsigned userclip_must_clip:1; + unsigned pad1:1; + unsigned guard_band_enable:1; + unsigned viewport_z_clip_enable:1; + unsigned viewport_xy_clip_enable:1; + unsigned vertex_position_space:1; + unsigned api_mode:1; + unsigned pad2:1; + } clip5; + + struct + { + unsigned pad0:5; + unsigned clipper_viewport_state_ptr:27; + } clip6; + + + float viewport_xmin; + float viewport_xmax; + float viewport_ymin; + float viewport_ymax; +}; + + + +struct brw_cc_unit_state +{ + struct + { + unsigned pad0:3; + unsigned bf_stencil_pass_depth_pass_op:3; + unsigned bf_stencil_pass_depth_fail_op:3; + unsigned bf_stencil_fail_op:3; + unsigned bf_stencil_func:3; + unsigned bf_stencil_enable:1; + unsigned pad1:2; + unsigned stencil_write_enable:1; + unsigned stencil_pass_depth_pass_op:3; + unsigned stencil_pass_depth_fail_op:3; + unsigned stencil_fail_op:3; + unsigned stencil_func:3; + unsigned stencil_enable:1; + } cc0; + + + struct + { + unsigned bf_stencil_ref:8; + unsigned stencil_write_mask:8; + unsigned stencil_test_mask:8; + unsigned stencil_ref:8; + } cc1; + + + struct + { + unsigned logicop_enable:1; + unsigned pad0:10; + unsigned depth_write_enable:1; + unsigned depth_test_function:3; + unsigned depth_test:1; + unsigned bf_stencil_write_mask:8; + unsigned bf_stencil_test_mask:8; + } cc2; + + + struct + { + unsigned pad0:8; + unsigned alpha_test_func:3; + unsigned alpha_test:1; + unsigned blend_enable:1; + unsigned ia_blend_enable:1; + unsigned pad1:1; + unsigned alpha_test_format:1; + unsigned pad2:16; + } cc3; + + struct + { + unsigned pad0:5; + unsigned cc_viewport_state_offset:27; + } cc4; + + struct + { + unsigned pad0:2; + unsigned ia_dest_blend_factor:5; + unsigned ia_src_blend_factor:5; + unsigned ia_blend_function:3; + unsigned statistics_enable:1; + unsigned logicop_func:4; + unsigned pad1:11; + unsigned dither_enable:1; + } cc5; + + struct + { + unsigned clamp_post_alpha_blend:1; + unsigned clamp_pre_alpha_blend:1; + unsigned clamp_range:2; + unsigned pad0:11; + unsigned y_dither_offset:2; + unsigned x_dither_offset:2; + unsigned dest_blend_factor:5; + unsigned src_blend_factor:5; + unsigned blend_function:3; + } cc6; + + struct { + union { + float f; + ubyte ub[4]; + } alpha_ref; + } cc7; +}; + + + +struct brw_sf_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:6; + unsigned pad3:1; + } thread4; + + struct + { + unsigned front_winding:1; + unsigned viewport_transform:1; + unsigned pad0:3; + unsigned sf_viewport_state_offset:27; + } sf5; + + struct + { + unsigned pad0:9; + unsigned dest_org_vbias:4; + unsigned dest_org_hbias:4; + unsigned scissor:1; + unsigned disable_2x2_trifilter:1; + unsigned disable_zero_pix_trifilter:1; + unsigned point_rast_rule:2; + unsigned line_endcap_aa_region_width:2; + unsigned line_width:4; + unsigned fast_scissor_disable:1; + unsigned cull_mode:2; + unsigned aa_enable:1; + } sf6; + + struct + { + unsigned point_size:11; + unsigned use_point_size_state:1; + unsigned subpixel_precision:1; + unsigned sprite_point:1; + unsigned pad0:11; + unsigned trifan_pv:2; + unsigned linestrip_pv:2; + unsigned tristrip_pv:2; + unsigned line_last_pixel_enable:1; + } sf7; + +}; + + +struct brw_gs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:1; + unsigned pad3:6; + } thread4; + + struct + { + unsigned sampler_count:3; + unsigned pad0:2; + unsigned sampler_state_pointer:27; + } gs5; + + + struct + { + unsigned max_vp_index:4; + unsigned pad0:26; + unsigned reorder_enable:1; + unsigned pad1:1; + } gs6; +}; + + +struct brw_vs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:4; + unsigned pad3:3; + } thread4; + + struct + { + unsigned sampler_count:3; + unsigned pad0:2; + unsigned sampler_state_pointer:27; + } vs5; + + struct + { + unsigned vs_enable:1; + unsigned vert_cache_disable:1; + unsigned pad0:30; + } vs6; +}; + + +struct brw_wm_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned stats_enable:1; + unsigned pad0:1; + unsigned sampler_count:3; + unsigned sampler_state_pointer:27; + } wm4; + + struct + { + unsigned enable_8_pix:1; + unsigned enable_16_pix:1; + unsigned enable_32_pix:1; + unsigned pad0:7; + unsigned legacy_global_depth_bias:1; + unsigned line_stipple:1; + unsigned depth_offset:1; + unsigned polygon_stipple:1; + unsigned line_aa_region_width:2; + unsigned line_endcap_aa_region_width:2; + unsigned early_depth_test:1; + unsigned thread_dispatch_enable:1; + unsigned program_uses_depth:1; + unsigned program_computes_depth:1; + unsigned program_uses_killpixel:1; + unsigned legacy_line_rast: 1; + unsigned pad1:1; + unsigned max_threads:6; + unsigned pad2:1; + } wm5; + + float global_depth_offset_constant; + float global_depth_offset_scale; +}; + +struct brw_sampler_default_color { + float color[4]; +}; + +struct brw_sampler_state +{ + + struct + { + unsigned shadow_function:3; + unsigned lod_bias:11; + unsigned min_filter:3; + unsigned mag_filter:3; + unsigned mip_filter:2; + unsigned base_level:5; + unsigned pad:1; + unsigned lod_preclamp:1; + unsigned default_color_mode:1; + unsigned pad0:1; + unsigned disable:1; + } ss0; + + struct + { + unsigned r_wrap_mode:3; + unsigned t_wrap_mode:3; + unsigned s_wrap_mode:3; + unsigned pad:3; + unsigned max_lod:10; + unsigned min_lod:10; + } ss1; + + + struct + { + unsigned pad:5; + unsigned default_color_pointer:27; + } ss2; + + struct + { + unsigned pad:19; + unsigned max_aniso:3; + unsigned chroma_key_mode:1; + unsigned chroma_key_index:2; + unsigned chroma_key_enable:1; + unsigned monochrome_filter_width:3; + unsigned monochrome_filter_height:3; + } ss3; +}; + + +struct brw_clipper_viewport +{ + float xmin; + float xmax; + float ymin; + float ymax; +}; + +struct brw_cc_viewport +{ + float min_depth; + float max_depth; +}; + +struct brw_sf_viewport +{ + struct { + float m00; + float m11; + float m22; + float m30; + float m31; + float m32; + } viewport; + + struct { + short xmin; + short ymin; + short xmax; + short ymax; + } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... + */ +struct brw_surface_state +{ + struct { + unsigned cube_pos_z:1; + unsigned cube_neg_z:1; + unsigned cube_pos_y:1; + unsigned cube_neg_y:1; + unsigned cube_pos_x:1; + unsigned cube_neg_x:1; + unsigned pad:4; + unsigned mipmap_layout_mode:1; + unsigned vert_line_stride_ofs:1; + unsigned vert_line_stride:1; + unsigned color_blend:1; + unsigned writedisable_blue:1; + unsigned writedisable_green:1; + unsigned writedisable_red:1; + unsigned writedisable_alpha:1; + unsigned surface_format:9; + unsigned data_return_format:1; + unsigned pad0:1; + unsigned surface_type:3; + } ss0; + + struct { + unsigned base_addr; + } ss1; + + struct { + unsigned pad:2; + unsigned mip_count:4; + unsigned width:13; + unsigned height:13; + } ss2; + + struct { + unsigned tile_walk:1; + unsigned tiled_surface:1; + unsigned pad:1; + unsigned pitch:18; + unsigned depth:11; + } ss3; + + struct { + unsigned pad:19; + unsigned min_array_elt:9; + unsigned min_lod:4; + } ss4; +}; + + + +struct brw_vertex_buffer_state +{ + struct { + unsigned pitch:11; + unsigned pad:15; + unsigned access_type:1; + unsigned vb_index:5; + } vb0; + + unsigned start_addr; + unsigned max_index; +#if 1 + unsigned instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define BRW_VBP_MAX 17 + +struct brw_vb_array_state { + struct header header; + struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; +}; + + +struct brw_vertex_element_state +{ + struct + { + unsigned src_offset:11; + unsigned pad:5; + unsigned src_format:9; + unsigned pad0:1; + unsigned valid:1; + unsigned vertex_buffer_index:5; + } ve0; + + struct + { + unsigned dst_offset:8; + unsigned pad:8; + unsigned vfcomponent3:4; + unsigned vfcomponent2:4; + unsigned vfcomponent1:4; + unsigned vfcomponent0:4; + } ve1; +}; + +#define BRW_VEP_MAX 18 + +struct brw_vertex_element_packet { + struct header header; + struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ +}; + + +struct brw_urb_immediate { + unsigned opcode:4; + unsigned offset:6; + unsigned swizzle_control:2; + unsigned pad:1; + unsigned allocate:1; + unsigned used:1; + unsigned complete:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; +}; + +/* Instruction format for the execution units: + */ + +struct brw_instruction +{ + struct + { + unsigned opcode:7; + unsigned pad:1; + unsigned access_mode:1; + unsigned mask_control:1; + unsigned dependency_control:2; + unsigned compression_control:2; + unsigned thread_control:2; + unsigned predicate_control:4; + unsigned predicate_inverse:1; + unsigned execution_size:3; + unsigned destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + unsigned pad0:2; + unsigned debug_control:1; + unsigned saturate:1; + } header; + + union { + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + unsigned dest_subreg_nr:5; + unsigned dest_reg_nr:8; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } da1; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned pad:6; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } ia1; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad0:1; + unsigned dest_writemask:4; + unsigned dest_subreg_nr:1; + unsigned dest_reg_nr:8; + unsigned pad1:2; + unsigned dest_address_mode:1; + } da16; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned pad0:6; + unsigned dest_writemask:4; + int dest_indirect_offset:6; + unsigned dest_subreg_nr:3; + unsigned pad1:2; + unsigned dest_address_mode:1; + } ia16; + } bits1; + + + union { + struct + { + unsigned src0_subreg_nr:5; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad:6; + } da1; + + struct + { + int src0_indirect_offset:10; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad:6; + } ia1; + + struct + { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + unsigned src0_subreg_nr:1; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad1:6; + } da16; + + struct + { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + int src0_indirect_offset:6; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad1:6; + } ia16; + + } bits2; + + union + { + struct + { + unsigned src1_subreg_nr:5; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned pad0:7; + } da1; + + struct + { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + unsigned src1_subreg_nr:1; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned pad2:7; + } da16; + + struct + { + int src1_indirect_offset:10; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad1:6; + } ia1; + + struct + { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + int src1_indirect_offset:6; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad2:6; + } ia16; + + + struct + { + int jump_count:16; /* note: signed */ + unsigned pop_count:4; + unsigned pad0:12; + } if_else; + + struct { + unsigned function:4; + unsigned int_type:1; + unsigned precision:1; + unsigned saturate:1; + unsigned data_type:1; + unsigned pad0:8; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } math; + + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned return_format:2; + unsigned msg_type:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } sampler; + + struct brw_urb_immediate urb; + + struct { + unsigned binding_table_index:8; + unsigned msg_control:4; + unsigned msg_type:2; + unsigned target_cache:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_read; + + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned pixel_scoreboard_clear:1; + unsigned msg_type:3; + unsigned send_commit_msg:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_write; + + struct { + unsigned pad:16; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } generic; + + int d; + unsigned ud; + } bits3; +}; + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c new file mode 100644 index 0000000000..518845e4b2 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -0,0 +1,210 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "brw_blit.h" +#include "brw_context.h" +#include "brw_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/util/p_tile.h" + + +/* + * XXX note: same as code in sp_surface.c + */ +static struct pipe_surface * +brw_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct brw_texture *tex = (struct brw_texture *)pt; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + offset = tex->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset += tex->image_offset[level][face] * pt->cpp; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset += tex->image_offset[level][zslice] * pt->cpp; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + ps = pipe->winsys->surface_alloc(pipe->winsys); + if (ps) { + assert(ps->format); + assert(ps->refcount); + pipe_buffer_reference(pipe->winsys, &ps->buffer, tex->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = tex->pitch; + ps->offset = offset; + } + return ps; +} + + +/* Upload data to a rectangular sub-region. Lots of choices how to do this: + * + * - memcpy by span to current destination + * - upload data as new buffer and blit + * + * Currently always memcpy. + */ +static void +brw_surface_data(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + const void *src, unsigned src_pitch, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + pipe_copy_rect(pipe_surface_map(dst) + dst->offset, + dst->cpp, dst->pitch, + dstx, dsty, width, height, src, src_pitch, srcx, srcy); + + pipe_surface_unmap(dst); +} + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +brw_surface_copy(struct pipe_context *pipe, + unsigned do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + assert(dst != src); + assert(dst->cpp == src->cpp); + + if (0) { + pipe_copy_rect(pipe_surface_map(dst) + dst->offset, + dst->cpp, + dst->pitch, + dstx, dsty, + width, height, + pipe_surface_map(src) + src->offset, + do_flip ? -src->pitch : src->pitch, + srcx, do_flip ? 1 - srcy - height : srcy); + + pipe_surface_unmap(src); + pipe_surface_unmap(dst); + } + else { + brw_copy_blit(brw_context(pipe), + do_flip, + dst->cpp, + (short) src->pitch, src->buffer, src->offset, FALSE, + (short) dst->pitch, dst->buffer, dst->offset, FALSE, + (short) srcx, (short) srcy, (short) dstx, (short) dsty, + (short) width, (short) height, PIPE_LOGICOP_COPY); + } +} + +/* Fill a rectangular sub-region. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +static void * +get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) +{ + return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; +} + + +static void +brw_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + if (0) { + unsigned i, j; + void *dst_map = pipe_surface_map(dst); + + switch (dst->cpp) { + case 1: { + ubyte *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + memset(row, value, width); + row += dst->pitch; + } + } + break; + case 2: { + ushort *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = (ushort) value; + row += dst->pitch; + } + } + break; + case 4: { + unsigned *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = value; + row += dst->pitch; + } + } + break; + default: + assert(0); + break; + } + + pipe_surface_unmap( dst ); + } + else { + brw_fill_blit(brw_context(pipe), + dst->cpp, + (short) dst->pitch, + dst->buffer, dst->offset, FALSE, + (short) dstx, (short) dsty, + (short) width, (short) height, + value); + } +} + +void +brw_init_surface_functions(struct brw_context *brw) +{ + brw->pipe.get_tex_surface = brw_get_tex_surface; + brw->pipe.surface_copy = brw_surface_copy; + brw->pipe.surface_fill = brw_surface_fill; +} diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c new file mode 100644 index 0000000000..90561f1307 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -0,0 +1,353 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +/* Code to layout images in a mipmap tree for i965. + */ + +#include "brw_tex_layout.h" + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" + +#include "brw_context.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +#if 0 +unsigned intel_compressed_alignment(unsigned internalFormat) +{ + unsigned alignment = 4; + + switch (internalFormat) { + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + alignment = 8; + break; + + default: + break; + } + + return alignment; +} +#endif + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static boolean brw_miptree_layout(struct pipe_context *, struct brw_texture *); + +static void intel_miptree_set_image_offset(struct brw_texture *tex, + unsigned level, + unsigned img, + unsigned x, unsigned y) +{ + struct pipe_texture *pt = &tex->base; + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + assert(img < tex->nr_images[level]); + + tex->image_offset[level][img] = (x + y * tex->pitch) * pt->cpp; +} + +static void intel_miptree_set_level_info(struct brw_texture *tex, + unsigned level, + unsigned nr_images, + unsigned x, unsigned y, + unsigned w, unsigned h, unsigned d) +{ + struct pipe_texture *pt = &tex->base; + + assert(level < PIPE_MAX_TEXTURE_LEVELS); + + pt->width[level] = w; + pt->height[level] = h; + pt->depth[level] = d; + + tex->level_offset[level] = (x + y * tex->pitch) * pt->cpp; + tex->nr_images[level] = nr_images; + + /* + DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, tex->level_offset[level]); + */ + + /* Not sure when this would happen, but anyway: + */ + if (tex->image_offset[level]) { + FREE(tex->image_offset[level]); + tex->image_offset[level] = NULL; + } + + assert(nr_images); + assert(!tex->image_offset[level]); + + tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); + tex->image_offset[level][0] = 0; +} + +static void i945_miptree_layout_2d(struct brw_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned align_h = 2, align_w = 4; + unsigned level; + unsigned x = 0; + unsigned y = 0; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + + tex->pitch = pt->width[0]; + +#if 0 + if (pt->compressed) { + align_w = intel_compressed_alignment(pt->internal_format); + tex->pitch = ALIGN(pt->width[0], align_w); + } +#endif + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap out past the width of its parent. + */ + if (pt->last_level > 0) { + unsigned mip1_width; + + if (pt->compressed) { + mip1_width = align(minify(pt->width[0]), align_w) + + align(minify(minify(pt->width[0])), align_w); + } else { + mip1_width = align(minify(pt->width[0]), align_w) + + minify(minify(pt->width[0])); + } + + if (mip1_width > tex->pitch) { + tex->pitch = mip1_width; + } + } + + /* Pitch must be a whole number of dwords, even though we + * express it in texels. + */ + tex->pitch = align(tex->pitch * pt->cpp, 4) / pt->cpp; + tex->total_height = 0; + + for (level = 0; level <= pt->last_level; level++) { + unsigned img_height; + + intel_miptree_set_level_info(tex, level, 1, x, y, width, + height, 1); + + if (pt->compressed) + img_height = MAX2(1, height/4); + else + img_height = align(height, align_h); + + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + tex->total_height = MAX2(tex->total_height, y + img_height); + + /* Layout_below: step right after second mipmap. + */ + if (level == 1) { + x += align(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } +} + +static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + /* XXX: these vary depending on image format: + */ +/* int align_w = 4; */ + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_3D: { + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned pack_x_pitch, pack_x_nr; + unsigned pack_y_pitch; + unsigned level; + unsigned align_h = 2; + unsigned align_w = 4; + + tex->total_height = 0; +#if 0 + if (pt->compressed) { + align_w = intel_compressed_alignment(pt->internal_format); + pt->pitch = align(width, align_w); + pack_y_pitch = (height + 3) / 4; + } else +#endif + { + tex->pitch = align(pt->width[0] * pt->cpp, 4) / pt->cpp; + pack_y_pitch = align(pt->height[0], align_h); + } + + pack_x_pitch = tex->pitch; + pack_x_nr = 1; + + for (level = 0; level <= pt->last_level; level++) { + unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; + int x = 0; + int y = 0; + uint q, j; + + intel_miptree_set_level_info(tex, level, nr_images, + 0, tex->total_height, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + intel_miptree_set_image_offset(tex, level, q, x, y); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + tex->total_height += y; + width = minify(width); + height = minify(height); + depth = minify(depth); + + if (pt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > align(width, align_w)) { + pack_x_pitch = align(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= tex->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = align(pack_y_pitch, align_h); + } + } + + } + break; + } + + default: + i945_miptree_layout_2d(tex); + break; + } +#if 0 + PRINT("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + pt->pitch, + pt->total_height, + pt->cpp, + pt->pitch * pt->total_height * pt->cpp ); +#endif + + return TRUE; +} + + +struct pipe_texture * +brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) +{ + struct brw_texture *tex = CALLOC_STRUCT(brw_texture); + + if (tex) { + tex->base = *templat; + + if (brw_miptree_layout(pipe, tex)) + tex->buffer = pipe->winsys->buffer_create(pipe->winsys, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->pitch * tex->base.cpp * + tex->total_height); + + if (!tex->buffer) { + FREE(tex); + return NULL; + } + } + + return &tex->base; +} + +void +brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct brw_texture *tex = (struct brw_texture *)*pt; + uint i; + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); + */ + + pipe_buffer_reference(pipe->winsys, &tex->buffer, NULL); + + for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) + if (tex->image_offset[i]) + free(tex->image_offset[i]); + + free(tex); + } + *pt = NULL; +} diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.h b/src/gallium/drivers/i965simple/brw_tex_layout.h new file mode 100644 index 0000000000..cfd6b1ef3a --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_tex_layout.h @@ -0,0 +1,15 @@ +#ifndef BRW_TEX_LAYOUT_H +#define BRW_TEX_LAYOUT_H + +#include "pipe/p_compiler.h" + +struct pipe_context; +struct pipe_texture; + +extern struct pipe_texture * +brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat); + +extern void +brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_urb.c b/src/gallium/drivers/i965simple/brw_urb.c new file mode 100644 index 0000000000..101a4367b9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_urb.c @@ -0,0 +1,186 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +//#include "brw_state.h" +#include "brw_batch.h" +#include "brw_defines.h" + +#define VS 0 +#define GS 1 +#define CLP 2 +#define SF 3 +#define CS 4 + +/* XXX: Are the min_entry_size numbers useful? + * XXX: Verify min_nr_entries, esp for VS. + * XXX: Verify SF min_entry_size. + */ +static const struct { + unsigned min_nr_entries; + unsigned preferred_nr_entries; + unsigned min_entry_size; + unsigned max_entry_size; +} limits[CS+1] = { + { 8, 32, 1, 5 }, /* vs */ + { 4, 8, 1, 5 }, /* gs */ + { 6, 8, 1, 5 }, /* clp */ + { 1, 8, 1, 12 }, /* sf */ + { 1, 4, 1, 32 } /* cs */ +}; + + +static boolean check_urb_layout( struct brw_context *brw ) +{ + brw->urb.vs_start = 0; + brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize; + brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize; + brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; + brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; + + return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= 256; +} + +/* Most minimal update, forces re-emit of URB fence packet after GS + * unit turned on/off. + */ +static void recalculate_urb_fence( struct brw_context *brw ) +{ + unsigned csize = brw->curbe.total_size; + unsigned vsize = brw->vs.prog_data->urb_entry_size; + unsigned sfsize = brw->sf.prog_data->urb_entry_size; + + if (csize < limits[CS].min_entry_size) + csize = limits[CS].min_entry_size; + + if (vsize < limits[VS].min_entry_size) + vsize = limits[VS].min_entry_size; + + if (sfsize < limits[SF].min_entry_size) + sfsize = limits[SF].min_entry_size; + + if (brw->urb.vsize < vsize || + brw->urb.sfsize < sfsize || + brw->urb.csize < csize || + (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize || + brw->urb.sfsize > brw->urb.sfsize || + brw->urb.csize > brw->urb.csize))) { + + + brw->urb.csize = csize; + brw->urb.sfsize = sfsize; + brw->urb.vsize = vsize; + + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; + brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; + brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; + + if (!check_urb_layout(brw)) { + brw->urb.nr_vs_entries = limits[VS].min_nr_entries; + brw->urb.nr_gs_entries = limits[GS].min_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; + brw->urb.nr_sf_entries = limits[SF].min_nr_entries; + brw->urb.nr_cs_entries = limits[CS].min_nr_entries; + + brw->urb.constrained = 1; + + if (!check_urb_layout(brw)) { + /* This is impossible, given the maximal sizes of urb + * entries and the values for minimum nr of entries + * provided above. + */ + debug_printf("couldn't calculate URB layout!\n"); + exit(1); + } + + if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) + debug_printf("URB CONSTRAINED\n"); + } + else + brw->urb.constrained = 0; + + if (BRW_DEBUG & DEBUG_URB) + debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + brw->urb.vs_start, + brw->urb.gs_start, + brw->urb.clip_start, + brw->urb.sf_start, + brw->urb.cs_start, + 256); + + brw->state.dirty.brw |= BRW_NEW_URB_FENCE; + } +} + + +const struct brw_tracked_state brw_recalculate_urb_fence = { + .dirty = { + .brw = BRW_NEW_CURBE_OFFSETS, + .cache = (CACHE_NEW_VS_PROG | + CACHE_NEW_SF_PROG) + }, + .update = recalculate_urb_fence +}; + + + + + +void brw_upload_urb_fence(struct brw_context *brw) +{ + struct brw_urb_fence uf; + memset(&uf, 0, sizeof(uf)); + + uf.header.opcode = CMD_URB_FENCE; + uf.header.length = sizeof(uf)/4-2; + uf.header.vs_realloc = 1; + uf.header.gs_realloc = 1; + uf.header.clp_realloc = 1; + uf.header.sf_realloc = 1; + uf.header.vfe_realloc = 1; + uf.header.cs_realloc = 1; + + /* The ordering below is correct, not the layout in the + * instruction. + * + * There are 256 urb reg pairs in total. + */ + uf.bits0.vs_fence = brw->urb.gs_start; + uf.bits0.gs_fence = brw->urb.clip_start; + uf.bits0.clp_fence = brw->urb.sf_start; + uf.bits1.sf_fence = brw->urb.cs_start; + uf.bits1.cs_fence = 256; + + BRW_BATCH_STRUCT(brw, &uf); +} diff --git a/src/gallium/drivers/i965simple/brw_util.c b/src/gallium/drivers/i965simple/brw_util.c new file mode 100644 index 0000000000..42391d7c8c --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_util.c @@ -0,0 +1,104 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_util.h" +#include "brw_defines.h" + +#include "pipe/p_defines.h" + +unsigned brw_count_bits( unsigned val ) +{ + unsigned i; + for (i = 0; val ; val >>= 1) + if (val & 1) + i++; + return i; +} + + +unsigned brw_translate_blend_equation( int mode ) +{ + switch (mode) { + case PIPE_BLEND_ADD: + return BRW_BLENDFUNCTION_ADD; + case PIPE_BLEND_MIN: + return BRW_BLENDFUNCTION_MIN; + case PIPE_BLEND_MAX: + return BRW_BLENDFUNCTION_MAX; + case PIPE_BLEND_SUBTRACT: + return BRW_BLENDFUNCTION_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; + default: + assert(0); + return BRW_BLENDFUNCTION_ADD; + } +} + +unsigned brw_translate_blend_factor( int factor ) +{ + switch(factor) { + case PIPE_BLENDFACTOR_ZERO: + return BRW_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BRW_BLENDFACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return BRW_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BRW_BLENDFACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BRW_BLENDFACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_DST_COLOR: + return BRW_BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BRW_BLENDFACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BRW_BLENDFACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BRW_BLENDFACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BRW_BLENDFACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BRW_BLENDFACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BRW_BLENDFACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BRW_BLENDFACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BRW_BLENDFACTOR_INV_CONST_ALPHA; + default: + assert(0); + return BRW_BLENDFACTOR_ZERO; + } +} diff --git a/src/gallium/drivers/i965simple/brw_util.h b/src/gallium/drivers/i965simple/brw_util.h new file mode 100644 index 0000000000..d60e5934db --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_util.h @@ -0,0 +1,43 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_UTIL_H +#define BRW_UTIL_H + +#include "pipe/p_state.h" + +extern unsigned brw_count_bits( unsigned val ); +extern unsigned brw_translate_blend_factor( int factor ); +extern unsigned brw_translate_blend_equation( int mode ); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_vs.c b/src/gallium/drivers/i965simple/brw_vs.c new file mode 100644 index 0000000000..738c6346d5 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs.c @@ -0,0 +1,120 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_vs.h" +#include "brw_util.h" +#include "brw_state.h" + + +static void do_vs_prog( struct brw_context *brw, + const struct brw_vertex_program *vp, + struct brw_vs_prog_key *key ) +{ + unsigned program_size; + const unsigned *program; + struct brw_vs_compile c; + + memset(&c, 0, sizeof(c)); + memcpy(&c.key, key, sizeof(*key)); + + brw_init_compile(&c.func); + c.vp = vp; + + c.prog_data.outputs_written = vp->program.num_outputs; + c.prog_data.inputs_read = vp->program.num_inputs; + +#if 0 + if (c.key.copy_edgeflag) { + c.prog_data.outputs_written |= 1<vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->vs.prog_data); +} + + +static void brw_upload_vs_prog( struct brw_context *brw ) +{ + struct brw_vs_prog_key key; + const struct brw_vertex_program *vp = brw->attribs.VertexProgram; + + assert(vp); + + memset(&key, 0, sizeof(key)); + + /* Just upload the program verbatim for now. Always send it all + * the inputs it asks for, whether they are varying or not. + */ + key.program_string_id = vp->id; + key.nr_userclip = brw->attribs.Clip.nr; + key.copy_edgeflag = (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || + brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL); + + /* Make an early check for the key. + */ + if (brw_search_cache(&brw->cache[BRW_VS_PROG], + &key, sizeof(key), + &brw->vs.prog_data, + &brw->vs.prog_gs_offset)) + return; + + do_vs_prog(brw, vp, &key); +} + + +/* See brw_vs.c: + */ +const struct brw_tracked_state brw_vs_prog = { + .dirty = { + .brw = BRW_NEW_VS, + .cache = 0 + }, + .update = brw_upload_vs_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_vs.h b/src/gallium/drivers/i965simple/brw_vs.h new file mode 100644 index 0000000000..0e58f043b0 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs.h @@ -0,0 +1,82 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_VS_H +#define BRW_VS_H + + +#include "brw_context.h" +#include "brw_eu.h" + + +struct brw_vs_prog_key { + unsigned program_string_id; + unsigned nr_userclip:4; + unsigned copy_edgeflag:1; + unsigned know_w_is_one:1; + unsigned pad:26; +}; + + +struct brw_vs_compile { + struct brw_compile func; + struct brw_vs_prog_key key; + struct brw_vs_prog_data prog_data; + + struct brw_vertex_program *vp; + + unsigned nr_inputs; + + unsigned first_output; + unsigned nr_outputs; + + unsigned first_tmp; + unsigned last_tmp; + + struct brw_reg r0; + struct brw_reg r1; + struct brw_reg regs[12][128]; + struct brw_reg tmp; + struct brw_reg stack; + + struct { + boolean used_in_src; + struct brw_reg reg; + } output_regs[128]; + + struct brw_reg userplane[6]; + +}; + +void brw_vs_emit( struct brw_vs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c new file mode 100644 index 0000000000..98915ba101 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -0,0 +1,1332 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_context.h" +#include "brw_vs.h" + +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + +struct brw_prog_info { + unsigned num_temps; + unsigned num_addrs; + unsigned num_consts; + + unsigned writes_psize; + + unsigned pos_idx; + unsigned result_edge_idx; + unsigned edge_flag_idx; + unsigned psize_idx; +}; + +/* Do things as simply as possible. Allocate and populate all regs + * ahead of time. + */ +static void brw_vs_alloc_regs( struct brw_vs_compile *c, + struct brw_prog_info *info ) +{ + unsigned i, reg = 0, mrf; + unsigned nr_params; + + /* r0 -- reserved as usual + */ + c->r0 = brw_vec8_grf(reg, 0); reg++; + + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + for (i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); + } + + /* Deal with curbe alignment: + */ + reg += ((6+c->key.nr_userclip+3)/4)*2; + } + + /* Vertex program parameters from curbe: + */ + nr_params = c->prog_data.max_const; + for (i = 0; i < nr_params; i++) { + c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params+1)/2; + c->prog_data.curb_read_length = reg - 1; + + + + /* Allocate input regs: + */ + c->nr_inputs = c->vp->program.num_inputs; + for (i = 0; i < c->nr_inputs; i++) { + c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + + + /* Allocate outputs: TODO: could organize the non-position outputs + * to go straight into message regs. + */ + c->nr_outputs = 0; + c->first_output = reg; + mrf = 4; + for (i = 0; i < c->vp->program.num_outputs; i++) { + c->nr_outputs++; +#if 0 + if (i == VERT_RESULT_HPOS) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + else if (i == VERT_RESULT_PSIZ) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + mrf++; /* just a placeholder? XXX fix later stages & remove this */ + } + else { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } +#else + /*treat pos differently for now */ + if (i == info->pos_idx) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } else { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } +#endif + } + + /* Allocate program temporaries: + */ + for (i = 0; i < info->num_temps; i++) { + c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0); + reg++; + } + + /* Address reg(s). Don't try to use the internal address reg until + * deref time. + */ + for (i = 0; i < info->num_addrs; i++) { + c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE, + reg, + 0, + BRW_REGISTER_TYPE_D, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XXXX, + TGSI_WRITEMASK_X); + reg++; + } + + for (i = 0; i < 128; i++) { + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; + + + /* Some opcodes need an internal temporary: + */ + c->first_tmp = reg; + c->last_tmp = reg; /* for allocation purposes */ + + /* Each input reg holds data from two vertices. The + * urb_read_length is the number of registers read from *each* + * vertex urb, so is half the amount: + */ + c->prog_data.urb_read_length = (c->nr_inputs+1)/2; + + c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; + c->prog_data.total_grf = reg; +} + + +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +static void unalias1( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg )) +{ + if (dst.file == arg0.file && dst.nr == arg0.nr) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0); + brw_MOV(p, dst, tmp); + } + else { + func(c, dst, arg0); + } +} + +static void unalias2( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg )) +{ + if ((dst.file == arg0.file && dst.nr == arg0.nr) || + (dst.file == arg1.file && dst.nr == arg1.nr)) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0, arg1); + brw_MOV(p, dst, tmp); + } + else { + func(c, dst, arg0, arg1); + } +} + +static void emit_sop( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + unsigned cond) +{ + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), cond, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(1.0f)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(0.0f)); + brw_pop_insn_state(p); +} + +static void emit_seq( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); +} + +static void emit_sne( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); +} +static void emit_slt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); +} + +static void emit_sle( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); +} + +static void emit_sgt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); +} + +static void emit_sge( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); +} + +static void emit_max( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg1, arg0); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + +static void emit_min( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + + +static void emit_math1( struct brw_vs_compile *c, + unsigned function, + struct brw_reg dst, + struct brw_reg arg0, + unsigned precision) +{ + /* There are various odd behaviours with SEND on the simulator. In + * addition there are documented issues with the fact that the GEN4 + * processor doesn't do dependency control properly on SEND + * results. So, on balance, this kludge to get around failures + * with writemasked math results looks like it might be necessary + * whether that turns out to be a simulator bug or not: + */ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + +static void emit_math2( struct brw_vs_compile *c, + unsigned function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + unsigned precision) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_message_reg(3), arg1); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + +static void emit_exp_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) { + struct brw_reg tmp = get_tmp(c); + struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); + + /* tmp_d = floor(arg0.x) */ + brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0)); + + /* result[0] = 2.0 ^ tmp */ + + /* Adjust exponent for floating point: + * exp += 127 + */ + brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127)); + + /* Install exponent and sign. + * Excess drops off the edge: + */ + brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X), + tmp_d, brw_imm_d(23)); + + release_tmp(c, tmp); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) { + /* result[1] = arg0.x - floor(arg0.x) */ + brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { + /* As with the LOG instruction, we might be better off just + * doing a taylor expansion here, seeing as we have to do all + * the prep work. + * + * If mathbox partial precision is too low, consider also: + * result[3] = result[0] * EXP(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_EXP, + brw_writemask(dst, TGSI_WRITEMASK_Z), + brw_swizzle1(arg0, 0), + BRW_MATH_PRECISION_PARTIAL); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1)); + } +} + + +static void emit_log_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD); + boolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) { + tmp = get_tmp(c); + tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + } + + /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt + * according to spec: + * + * These almost look likey they could be joined up, but not really + * practical: + * + * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 + * result[1].i = (x.i & ((1<<23)-1) + (127<<23) + */ + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) { + brw_AND(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_X), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1U<<31)-1)); + + brw_SHR(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_X), + tmp_ud, + brw_imm_ud(23)); + + brw_ADD(p, + brw_writemask(tmp, TGSI_WRITEMASK_X), + retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ + brw_imm_d(-127)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) { + brw_AND(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1<<23)-1)); + + brw_OR(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), + tmp_ud, + brw_imm_ud(127<<23)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { + /* result[2] = result[0] + LOG2(result[1]); */ + + /* Why bother? The above is just a hint how to do this with a + * taylor series. Maybe we *should* use a taylor series as by + * the time all the above has been done it's almost certainly + * quicker than calling the mathbox, even with low precision. + * + * Options are: + * - result[0] + mathbox.LOG2(result[1]) + * - mathbox.LOG2(arg0.x) + * - result[0] + inline_taylor_approx(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_LOG, + brw_writemask(tmp, TGSI_WRITEMASK_Z), + brw_swizzle1(tmp, 1), + BRW_MATH_PRECISION_FULL); + + brw_ADD(p, + brw_writemask(tmp, TGSI_WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(tmp, 0)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1)); + } + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + + +/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1 + */ +static void emit_dst_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1) +{ + struct brw_compile *p = &c->func; + + /* There must be a better way to do this: + */ + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0)); + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) + brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1); + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0); + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1); +} + +static void emit_xpd( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg t, + struct brw_reg u) +{ + brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3)); + brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3)); +} + + + +static void emit_lit_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0)); + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1)); + + /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order + * to get all channels active inside the IF. In the clipping code + * we run with NoMask, so it's not an option and we can use + * BRW_EXECUTE_1 for all comparisions. + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); + if_insn = brw_IF(p, BRW_EXECUTE_8); + { + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0)); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); + brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z), brw_swizzle1(arg0,1)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + emit_math2(c, + BRW_MATH_FUNCTION_POW, + brw_writemask(dst, TGSI_WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(arg0, 3), + BRW_MATH_PRECISION_PARTIAL); + } + + brw_ENDIF(p, if_insn); +} + + + + + +/* TODO: relative addressing! + */ +static struct brw_reg get_reg( struct brw_vs_compile *c, + unsigned file, + unsigned index ) +{ + switch (file) { + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + case TGSI_FILE_CONSTANT: + assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0); + return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm]; + case TGSI_FILE_IMMEDIATE: + assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0); + return c->regs[TGSI_FILE_CONSTANT][index]; + case TGSI_FILE_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case TGSI_FILE_NULL: /* undef values */ + return brw_null_reg(); + + default: + assert(0); + return brw_null_reg(); + } +} + + + +static struct brw_reg deref( struct brw_vs_compile *c, + struct brw_reg arg, + int offset) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = vec4(get_tmp(c)); + struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); + unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16; + struct brw_reg indirect = brw_vec4_indirect(0,0); + + { + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + + /* This is pretty clunky - load the address register twice and + * fetch each 4-dword value in turn. There must be a way to do + * this in a single pass, but I couldn't get it to work. + */ + brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); + brw_MOV(p, tmp, indirect); + + brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); + brw_MOV(p, suboffset(tmp, 4), indirect); + + brw_pop_insn_state(p); + } + + return vec8(tmp); +} + + +static void emit_arl( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_RNDD(p, tmp, arg0); + brw_MUL(p, dst, tmp, brw_imm_d(16)); + + if (need_tmp) + release_tmp(c, tmp); +} + + +/* Will return mangled results for SWZ op. The emit_swz() function + * ignores this result and recalculates taking extended swizzles into + * account. + */ +static struct brw_reg get_arg( struct brw_vs_compile *c, + struct tgsi_src_register *src ) +{ + struct brw_reg reg; + + if (src->File == TGSI_FILE_NULL) + return brw_null_reg(); + +#if 0 + if (src->RelAddr) + reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); + else +#endif + reg = get_reg(c, src->File, src->Index); + + /* Convert 3-bit swizzle to 2-bit. + */ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX, + src->SwizzleY, + src->SwizzleZ, + src->SwizzleW); + + /* Note this is ok for non-swizzle instructions: + */ + reg.negate = src->Negate ? 1 : 0; + + return reg; +} + + +static struct brw_reg get_dst( struct brw_vs_compile *c, + const struct tgsi_dst_register *dst ) +{ + struct brw_reg reg = get_reg(c, dst->File, dst->Index); + + reg.dw1.bits.writemask = dst->WriteMask; + + return reg; +} + + + + +static void emit_swz( struct brw_vs_compile *c, + struct brw_reg dst, + struct tgsi_src_register src ) +{ + struct brw_compile *p = &c->func; + unsigned zeros_mask = 0; + unsigned ones_mask = 0; + unsigned src_mask = 0; + ubyte src_swz[4]; + boolean need_tmp = (src.Negate && + dst.file != BRW_GENERAL_REGISTER_FILE); + struct brw_reg tmp = dst; + unsigned i; + + if (need_tmp) + tmp = get_tmp(c); + + for (i = 0; i < 4; i++) { + if (dst.dw1.bits.writemask & (1<regs[PROGRAM_STATE_VAR][0], src.Index); + else +#endif + arg0 = get_reg(c, src.File, src.Index); + + arg0 = brw_swizzle(arg0, + src_swz[0], src_swz[1], + src_swz[2], src_swz[3]); + + brw_MOV(p, brw_writemask(tmp, src_mask), arg0); + } + + if (zeros_mask) + brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0)); + + if (ones_mask) + brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); + + if (src.Negate) + brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + +/* Post-vertex-program processing. Send the results to the URB. + */ +static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info) +{ + struct brw_compile *p = &c->func; + struct brw_reg m0 = brw_message_reg(0); + struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx]; + struct brw_reg ndc; + + if (c->key.copy_edgeflag) { + brw_MOV(p, + get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx), + get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx)); + } + + + /* Build ndc coords? TODO: Shortcircuit when w is known to be one. + */ + if (!c->key.know_w_is_one) { + ndc = get_tmp(c); + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc); + } + else { + ndc = pos; + } + + /* This includes the workaround for -ve rhw, so is no longer an + * optional step: + */ + if (info->writes_psize || + c->key.nr_userclip || + !c->key.know_w_is_one) + { + struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + unsigned i; + + brw_MOV(p, header1, brw_imm_ud(0)); + + brw_set_access_mode(p, BRW_ALIGN_16); + + if (info->writes_psize) { + struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx]; + brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W), + brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); + brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, + brw_imm_ud(0x7ff<<8)); + } + + + for (i = 0; i < c->key.nr_userclip; i++) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); + brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<key.know_w_is_one) { + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + brw_set_access_mode(p, BRW_ALIGN_1); /* why? */ + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1); + brw_set_access_mode(p, BRW_ALIGN_16); + + release_tmp(c, header1); + } + else { + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); + } + + + /* Emit the (interleaved) headers for the two vertices - an 8-reg + * of zeros followed by two sets of NDC coordinates: + */ + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 0, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + c->nr_outputs + 3, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + +} + +static void +post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) +{ + struct tgsi_parse_context parse; + const struct tgsi_token *tokens = c->vp->program.tokens; + tgsi_parse_init(&parse, tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { +#if 0 + struct brw_instruction *brw_inst1, *brw_inst2; + const struct tgsi_full_instruction *inst1, *inst2; + int offset; + inst1 = &parse.FullToken.FullInstruction; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_BRA: + target_insn = inst1->BranchTarget; + inst2 = &c->vp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + case TGSI_OPCODE_END: + offset = end_inst - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } +#endif + } + } + tgsi_parse_free(&parse); +} + +static void process_declaration(const struct tgsi_full_declaration *decl, + struct brw_prog_info *info) +{ + int first = decl->u.DeclarationRange.First; + int last = decl->u.DeclarationRange.Last; + + assert (decl->Declaration.Declare != TGSI_DECLARE_MASK); + + switch(decl->Declaration.File) { + case TGSI_FILE_CONSTANT: + info->num_consts += last - first + 1; + break; + case TGSI_FILE_INPUT: { + } + break; + case TGSI_FILE_OUTPUT: { + assert(last == first); /* for now */ + if (decl->Declaration.Semantic) { + switch (decl->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: { + info->pos_idx = first; + } + break; + case TGSI_SEMANTIC_COLOR: + break; + case TGSI_SEMANTIC_BCOLOR: + break; + case TGSI_SEMANTIC_FOG: + break; + case TGSI_SEMANTIC_PSIZE: { + info->writes_psize = TRUE; + info->psize_idx = first; + } + break; + case TGSI_SEMANTIC_GENERIC: + break; + } + } + } + break; + case TGSI_FILE_TEMPORARY: { + info->num_temps += (last - first) + 1; + } + break; + case TGSI_FILE_SAMPLER: { + } + break; + case TGSI_FILE_ADDRESS: { + info->num_addrs += (last - first) + 1; + } + break; + case TGSI_FILE_IMMEDIATE: { + } + break; + case TGSI_FILE_NULL: { + } + break; + } +} + +static void process_instruction(struct brw_vs_compile *c, + struct tgsi_full_instruction *inst, + struct brw_prog_info *info) +{ + struct brw_reg args[3], dst; + struct brw_compile *p = &c->func; + struct brw_indirect stack_index = brw_indirect(0, 0); + unsigned i; + unsigned index; + unsigned file; + /*FIXME: might not be the only one*/ + const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister; + /* + struct brw_instruction *if_inst[MAX_IFSN]; + unsigned insn, if_insn = 0; + */ + + for (i = 0; i < 3; i++) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + index = src->SrcRegister.Index; + file = src->SrcRegister.File; + if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) + args[i] = c->output_regs[index].reg; + else + args[i] = get_arg(c, &src->SrcRegister); + } + + /* Get dest regs. Note that it is possible for a reg to be both + * dst and arg, given the static allocation of registers. So + * care needs to be taken emitting multi-operation instructions. + */ + index = dst_reg->Index; + file = dst_reg->File; + if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) + dst = c->output_regs[index].reg; + else + dst = get_dst(c, dst_reg); + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + brw_MOV(p, dst, brw_abs(args[0])); + break; + case TGSI_OPCODE_ADD: + brw_ADD(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DP3: + brw_DP3(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DP4: + brw_DP4(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DPH: + brw_DPH(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DST: + unalias2(c, dst, args[0], args[1], emit_dst_noalias); + break; + case TGSI_OPCODE_EXP: + unalias1(c, dst, args[0], emit_exp_noalias); + break; + case TGSI_OPCODE_EX2: + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_ARL: + emit_arl(c, dst, args[0]); + break; + case TGSI_OPCODE_FLR: + brw_RNDD(p, dst, args[0]); + break; + case TGSI_OPCODE_FRC: + brw_FRC(p, dst, args[0]); + break; + case TGSI_OPCODE_LOG: + unalias1(c, dst, args[0], emit_log_noalias); + break; + case TGSI_OPCODE_LG2: + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_LIT: + unalias1(c, dst, args[0], emit_lit_noalias); + break; + case TGSI_OPCODE_MAD: + brw_MOV(p, brw_acc_reg(), args[2]); + brw_MAC(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MAX: + emit_max(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MIN: + emit_min(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MOV: +#if 0 + case TGSI_OPCODE_SWZ: + /* The args[0] value can't be used here as it won't have + * correctly encoded the full swizzle: + */ + emit_swz(c, dst, inst->SrcReg[0] ); +#endif + brw_MOV(p, dst, args[0]); + break; + case TGSI_OPCODE_MUL: + brw_MUL(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_POW: + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_RCP: + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_RSQ: + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + + case TGSI_OPCODE_SEQ: + emit_seq(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SNE: + emit_sne(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SGE: + emit_sge(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SGT: + emit_sgt(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SLT: + emit_slt(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SLE: + emit_sle(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SUB: + brw_ADD(p, dst, args[0], negate(args[1])); + break; + case TGSI_OPCODE_XPD: + emit_xpd(p, dst, args[0], args[1]); + break; +#if 0 + case TGSI_OPCODE_IF: + assert(if_insn < MAX_IFSN); + if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_ELSE: + if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + break; + case TGSI_OPCODE_ENDIF: + assert(if_insn > 0); + brw_ENDIF(p, if_inst[--if_insn]); + break; + case TGSI_OPCODE_BRA: + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_set_predicate_control_flag_value(p, 0xff); + break; + case TGSI_OPCODE_CAL: + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + inst->Data = &p->store[p->nr_insn]; + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; +#endif + case TGSI_OPCODE_RET: +#if 0 + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); +#else + /*brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));*/ +#endif + break; + case TGSI_OPCODE_END: + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + break; + default: + debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode); + break; + } + + if (dst_reg->File == TGSI_FILE_OUTPUT + && dst_reg->Index != info->pos_idx + && c->output_regs[dst_reg->Index].used_in_src) + brw_MOV(p, get_dst(c, dst_reg), dst); + + release_tmps(c); +} + +/* Emit the fragment program instructions here. + */ +void brw_vs_emit(struct brw_vs_compile *c) +{ +#define MAX_IFSN 32 + struct brw_compile *p = &c->func; + struct brw_instruction *end_inst; + struct tgsi_parse_context parse; + struct brw_indirect stack_index = brw_indirect(0, 0); + const struct tgsi_token *tokens = c->vp->program.tokens; + struct brw_prog_info prog_info; + unsigned allocated_registers = 0; + memset(&prog_info, 0, sizeof(struct brw_prog_info)); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_access_mode(p, BRW_ALIGN_16); + + tgsi_parse_init(&parse, tokens); + /* Message registers can't be read, so copy the output into GRF register + if they are used in source registers */ + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + unsigned i; + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: { + const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; + for (i = 0; i < 3; ++i) { + const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister; + unsigned index = src->Index; + unsigned file = src->File; + if (file == TGSI_FILE_OUTPUT) + c->output_regs[index].used_in_src = TRUE; + } + } + break; + default: + /* nothing */ + break; + } + } + tgsi_parse_free(&parse); + + tgsi_parse_init(&parse, tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: { + struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + process_declaration(decl, &prog_info); + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: { + struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate; + /*assert(imm->Immediate.Size == 4);*/ + c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float; + c->prog_data.num_imm++; + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: { + struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; + if (!allocated_registers) { + /* first instruction (declerations finished). + * now that we know what vars are being used allocate + * registers for them.*/ + c->prog_data.num_consts = prog_info.num_consts; + c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm; + brw_vs_alloc_regs(c, &prog_info); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + brw_set_access_mode(p, BRW_ALIGN_16); + allocated_registers = 1; + } + process_instruction(c, inst, &prog_info); + } + break; + } + } + + end_inst = &p->store[p->nr_insn]; + emit_vertex_write(c, &prog_info); + post_vs_emit(c, end_inst); + tgsi_parse_free(&parse); + +} diff --git a/src/gallium/drivers/i965simple/brw_vs_state.c b/src/gallium/drivers/i965simple/brw_vs_state.c new file mode 100644 index 0000000000..c73469929c --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs_state.c @@ -0,0 +1,102 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#include "pipe/p_util.h" + +static void upload_vs_unit( struct brw_context *brw ) +{ + struct brw_vs_unit_state vs; + + memset(&vs, 0, sizeof(vs)); + + /* CACHE_NEW_VS_PROG */ + vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; + vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1; + vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; + vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; + vs.thread3.dispatch_grf_start_reg = 1; + + + /* BRW_NEW_URB_FENCE */ + vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; + vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + vs.thread4.max_threads = MIN2( + MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), + 15); + + + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + vs.thread4.max_threads = 0; + + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ + if (0 /*brw->attribs.Clip->ClipPlanesEnabled*/) { + /* Note that we read in the userclip planes as well, hence + * clip_start: + */ + vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + } + else { + vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; + } + + vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + vs.thread3.urb_entry_read_offset = 0; + + /* No samplers for ARB_vp programs: + */ + vs.vs5.sampler_count = 0; + + if (BRW_DEBUG & DEBUG_STATS) + vs.thread4.stats_enable = 1; + + /* Vertex program always enabled: + */ + vs.vs6.vs_enable = 1; + + brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs ); +} + + +const struct brw_tracked_state brw_vs_unit = { + .dirty = { + .brw = (BRW_NEW_CLIP | + BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_vs_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_winsys.h b/src/gallium/drivers/i965simple/brw_winsys.h new file mode 100644 index 0000000000..3523a58614 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_winsys.h @@ -0,0 +1,205 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * This is the interface that i965simple requires any window system + * hosting it to implement. This is the only include file in i965simple + * which is public. + * + */ + +#ifndef BRW_WINSYS_H +#define BRW_WINSYS_H + + +#include "pipe/p_defines.h" + + +/* Pipe drivers are (meant to be!) independent of both GL and the + * window system. The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ + +struct pipe_buffer; +struct pipe_fence_handle; +struct pipe_winsys; + +/* The pipe driver currently understands the following chipsets: + */ +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + + +/* These are the names of all the state caches managed by the driver. + * + * When data is uploaded to a buffer with buffer_subdata, we use the + * special version of that function below so that information about + * what type of data this is can be passed to the winsys backend. + * That in turn allows the correct flags to be set in the aub file + * dump to allow human-readable file dumps later on. + */ + +enum brw_cache_id { + BRW_CC_VP, + BRW_CC_UNIT, + BRW_WM_PROG, + BRW_SAMPLER_DEFAULT_COLOR, + BRW_SAMPLER, + BRW_WM_UNIT, + BRW_SF_PROG, + BRW_SF_VP, + BRW_SF_UNIT, + BRW_VS_UNIT, + BRW_VS_PROG, + BRW_GS_UNIT, + BRW_GS_PROG, + BRW_CLIP_VP, + BRW_CLIP_UNIT, + BRW_CLIP_PROG, + BRW_SS_SURFACE, + BRW_SS_SURF_BIND, + + BRW_MAX_CACHE +}; + +#define BRW_CONSTANT_BUFFER BRW_MAX_CACHE + +/** + * Additional winsys interface for i965simple. + * + * It is an over-simple batchbuffer mechanism. Will want to improve the + * performance of this, perhaps based on the cmdstream stuff. It + * would be pretty impossible to implement swz on top of this + * interface. + * + * Will also need additions/changes to implement static/dynamic + * indirect state. + */ +struct brw_winsys { + + /** + * Reserve space on batch buffer. + * + * Returns a null pointer if there is insufficient space in the batch buffer + * to hold the requested number of dwords and relocations. + * + * The number of dwords should also include the number of relocations. + */ + unsigned *(*batch_start)(struct brw_winsys *sws, + unsigned dwords, + unsigned relocs); + + void (*batch_dword)(struct brw_winsys *sws, + unsigned dword); + + /** + * Emit a relocation to a buffer. + * + * Used not only when the buffer addresses are not pinned, but also to + * ensure refered buffers will not be destroyed until the current batch + * buffer execution is finished. + * + * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and + * I915_BUFFER_ACCESS_READ macros. + */ + void (*batch_reloc)(struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta); + + + /* Not used yet, but really want this: + */ + void (*batch_end)( struct brw_winsys *sws ); + + /** + * Flush the batch buffer. + * + * Fence argument must point to NULL or to a previous fence, and the caller + * must call fence_reference when done with the fence. + */ + void (*batch_flush)(struct brw_winsys *sws, + struct pipe_fence_handle **fence); + + + /* A version of buffer_subdata that includes information for the + * simulator: + */ + void (*buffer_subdata_typed)(struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned long offset, + unsigned long size, + const void *data, + unsigned data_type); + + + /* A cheat so we don't have to think about relocations in a couple + * of places yet: + */ + unsigned (*get_buffer_offset)( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned flags ); + +}; + +#define BRW_BUFFER_ACCESS_WRITE 0x1 +#define BRW_BUFFER_ACCESS_READ 0x2 + +#define BRW_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) + + +struct pipe_context *brw_create(struct pipe_winsys *, + struct brw_winsys *, + unsigned pci_id); + +static inline boolean brw_batchbuffer_data(struct brw_winsys *winsys, + const void *data, + unsigned bytes) +{ + static const unsigned incr = sizeof(unsigned); + uint i; + const unsigned *udata = (const unsigned*)(data); + unsigned size = bytes/incr; + + winsys->batch_start(winsys, size, 0); + for (i = 0; i < size; ++i) { + winsys->batch_dword(winsys, udata[i]); + } + winsys->batch_end(winsys); + + return (i == size); +} +#endif diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c new file mode 100644 index 0000000000..539b170744 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm.c @@ -0,0 +1,210 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "brw_eu.h" +#include "brw_state.h" +#include "pipe/p_util.h" + + + +static void do_wm_prog( struct brw_context *brw, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) +{ + struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile); + const unsigned *program; + unsigned program_size; + + c->key = *key; + c->fp = fp; + + c->delta_xy[0] = brw_null_reg(); + c->delta_xy[1] = brw_null_reg(); + c->pixel_xy[0] = brw_null_reg(); + c->pixel_xy[1] = brw_null_reg(); + c->pixel_w = brw_null_reg(); + + + debug_printf("XXXXXXXX FP\n"); + + brw_wm_glsl_emit(c); + + /* get the program + */ + program = brw_get_program(&c->func, &program_size); + + /* + */ + brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], + &c->key, + sizeof(c->key), + program, + program_size, + &c->prog_data, + &brw->wm.prog_data ); + + FREE(c); +} + + + +static void brw_wm_populate_key( struct brw_context *brw, + struct brw_wm_prog_key *key ) +{ + /* BRW_NEW_FRAGMENT_PROGRAM */ + struct brw_fragment_program *fp = + (struct brw_fragment_program *)brw->attribs.FragmentProgram; + unsigned lookup = 0; + unsigned line_aa; + + memset(key, 0, sizeof(*key)); + + /* Build the index for table lookup + */ + /* BRW_NEW_DEPTH_STENCIL */ + if (fp->UsesKill || + brw->attribs.DepthStencil->alpha.enabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->ComputesDepth) + lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + if (brw->attribs.DepthStencil->depth.enabled) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (brw->attribs.DepthStencil->depth.enabled && + brw->attribs.DepthStencil->depth.writemask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + if (brw->attribs.DepthStencil->stencil[0].enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (brw->attribs.DepthStencil->stencil[0].write_mask || + brw->attribs.DepthStencil->stencil[1].write_mask) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + + /* XXX: when should this be disabled? + */ + if (1) + lookup |= IZ_EARLY_DEPTH_TEST_BIT; + + + line_aa = AA_NEVER; + + /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ + if (brw->attribs.Raster->line_smooth) { + if (brw->reduced_primitive == PIPE_PRIM_LINES) { + line_aa = AA_ALWAYS; + } + else if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) { + if (brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_LINE) { + line_aa = AA_SOMETIMES; + + if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE || + (brw->attribs.Raster->cull_mode == PIPE_WINDING_CW)) + line_aa = AA_ALWAYS; + } + else if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE) { + line_aa = AA_SOMETIMES; + + if (brw->attribs.Raster->cull_mode == PIPE_WINDING_CCW) + line_aa = AA_ALWAYS; + } + } + } + + brw_wm_lookup_iz(line_aa, + lookup, + key); + + +#if 0 + /* BRW_NEW_SAMPLER + * + * Not doing any of this at the moment: + */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + const struct pipe_sampler_state *unit = brw->attribs.Samplers[i]; + + if (unit) { + + if (unit->compare && + unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + key->shadowtex_mask |= 1<Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) + key->yuvtex_mask |= 1<program_string_id = fp->id; + +} + + +static void brw_upload_wm_prog( struct brw_context *brw ) +{ + struct brw_wm_prog_key key; + struct brw_fragment_program *fp = (struct brw_fragment_program *) + brw->attribs.FragmentProgram; + + brw_wm_populate_key(brw, &key); + + /* Make an early check for the key. + */ + if (brw_search_cache(&brw->cache[BRW_WM_PROG], + &key, sizeof(key), + &brw->wm.prog_data, + &brw->wm.prog_gs_offset)) + return; + + do_wm_prog(brw, fp, &key); +} + + +const struct brw_tracked_state brw_wm_prog = { + .dirty = { + .brw = (BRW_NEW_FS | + BRW_NEW_REDUCED_PRIMITIVE), + .cache = 0 + }, + .update = brw_upload_wm_prog +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm.h b/src/gallium/drivers/i965simple/brw_wm.h new file mode 100644 index 0000000000..a1ac0f504a --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm.h @@ -0,0 +1,142 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_WM_H +#define BRW_WM_H + + +#include "brw_context.h" +#include "brw_eu.h" + +/* A big lookup table is used to figure out which and how many + * additional regs will inserted before the main payload in the WM + * program execution. These mainly relate to depth and stencil + * processing and the early-depth-test optimization. + */ +#define IZ_PS_KILL_ALPHATEST_BIT 0x1 +#define IZ_PS_COMPUTES_DEPTH_BIT 0x2 +#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4 +#define IZ_DEPTH_TEST_ENABLE_BIT 0x8 +#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10 +#define IZ_STENCIL_TEST_ENABLE_BIT 0x20 +#define IZ_EARLY_DEPTH_TEST_BIT 0x40 +#define IZ_BIT_MAX 0x80 + +#define AA_NEVER 0 +#define AA_SOMETIMES 1 +#define AA_ALWAYS 2 + +struct brw_wm_prog_key { + unsigned source_depth_reg:3; + unsigned aa_dest_stencil_reg:3; + unsigned dest_depth_reg:3; + unsigned nr_depth_regs:3; + unsigned shadowtex_mask:8; + unsigned computes_depth:1; /* could be derived from program string */ + unsigned source_depth_to_render_target:1; + unsigned runtime_check_aads_emit:1; + + unsigned yuvtex_mask:8; + + unsigned program_string_id; +}; + + + + + +#define PROGRAM_INTERNAL_PARAM +#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ +#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_ATTRIB_MAX + 3) +#define BRW_WM_MAX_GRF 128 /* hardware limit */ +#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) +#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) +#define BRW_WM_MAX_PARAM 256 +#define BRW_WM_MAX_CONST 256 +#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS + +#define PAYLOAD_DEPTH (PIPE_ATTRIB_MAX) + +#define MAX_IFSN 32 +#define MAX_LOOP_DEPTH 32 + +struct brw_wm_compile { + struct brw_compile func; + struct brw_wm_prog_key key; + struct brw_wm_prog_data prog_data; /* result */ + + struct brw_fragment_program *fp; + + unsigned grf_limit; + unsigned max_wm_grf; + + + struct brw_reg pixel_xy[2]; + struct brw_reg delta_xy[2]; + struct brw_reg pixel_w; + + + struct brw_reg wm_regs[8][32][4]; + + struct brw_reg payload_depth[4]; + struct brw_reg payload_coef[16]; + + struct brw_reg emit_mask_reg; + + struct brw_instruction *if_inst[MAX_IFSN]; + int if_insn; + + struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; + int loop_insn; + + struct brw_instruction *inst0; + struct brw_instruction *inst1; + + struct brw_reg stack; + struct brw_indirect stack_index; + + unsigned reg_index; + + unsigned tmp_start; + unsigned tmp_index; +}; + + + +void brw_wm_lookup_iz( unsigned line_aa, + unsigned lookup, + struct brw_wm_prog_key *key ); + +void brw_wm_glsl_emit(struct brw_wm_compile *c); +void brw_wm_emit_decls(struct brw_wm_compile *c); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c new file mode 100644 index 0000000000..b45a333a2e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -0,0 +1,383 @@ + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + c->tmp_index++; + c->reg_index = MAX2(c->reg_index, c->tmp_start + c->tmp_index); + return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); +} + +static void release_tmps(struct brw_wm_compile *c) +{ + c->tmp_index = 0; +} + + + +static int is_null( struct brw_reg reg ) +{ + return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + reg.nr == BRW_ARF_NULL); +} + +static void emit_pixel_xy( struct brw_wm_compile *c ) +{ + if (is_null(c->pixel_xy[0])) { + + struct brw_compile *p = &c->func; + struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); + + c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + brw_ADD(p, + c->pixel_xy[0], + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + + brw_ADD(p, + c->pixel_xy[1], + stride(suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + } +} + + + + + + +static void emit_delta_xy( struct brw_wm_compile *c ) +{ + if (is_null(c->delta_xy[0])) { + struct brw_compile *p = &c->func; + struct brw_reg r1 = brw_vec1_grf(1, 0); + + emit_pixel_xy(c); + + c->delta_xy[0] = alloc_tmp(c); + c->delta_xy[1] = alloc_tmp(c); + + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + brw_ADD(p, + c->delta_xy[0], + retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + + brw_ADD(p, + c->delta_xy[1], + retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + } +} + + + +#if 0 +static void emit_pixel_w( struct brw_wm_compile *c ) +{ + if (is_null(c->pixel_w)) { + struct brw_compile *p = &c->func; + + struct brw_reg interp_wpos = c->coef_wpos; + + c->pixel_w = alloc_tmp(c); + + emit_delta_xy(c); + + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4); + brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]); + + /* Calc w */ + brw_math_16( p, + c->pixel_w, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} +#endif + + +static void emit_cinterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<wm_regs[TGSI_FILE_INPUT][idx][i]; + brw_MOV(p, dst, suboffset(interp[i],3)); + } + } +} + +static void emit_linterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + emit_delta_xy(c); + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<wm_regs[TGSI_FILE_INPUT][idx][i]; + brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); + brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); + } + } +} + +#if 0 +static void emit_pinterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + get_delta_xy(c); + get_pixel_w(c); + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<delta_xy[0]); + brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); + brw_MUL(p, dst, dst, c->pixel_w); + } + } +} +#endif + + + +#if 0 +static void emit_wpos( ) +{ + struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); + struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + struct tgsi_full_src_register deltas = get_delta_xy(c); + struct tgsi_full_src_register arg2; + unsigned opcode; + + opcode = WM_LINTERP; + arg2 = src_undef(); + + /* Have to treat wpos.xy specially: + */ + emit_op(c, + WM_WPOSXY, + dst_mask(dst, WRITEMASK_XY), + 0, 0, 0, + get_pixel_xy(c), + src_undef(), + src_undef()); + + dst = dst_mask(dst, WRITEMASK_ZW); + + /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw + */ + emit_op(c, + WM_LINTERP, + dst, + 0, 0, 0, + interp, + deltas, + arg2); +} +#endif + + + + +/* Perform register allocation: + * + * -- r0??? + * -- passthrough depth regs (and stencil/aa??) + * -- curbe ?? + * -- inputs (coefficients) + * + * Use a totally static register allocation. This will perform poorly + * but is an easy way to get started (again). + */ +static void prealloc_reg(struct brw_wm_compile *c) +{ + int i, j; + int nr_curbe_regs = 0; + + /* R0, then some depth related regs: + */ + for (i = 0; i < c->key.nr_depth_regs; i++) { + c->payload_depth[i] = brw_vec8_grf(i*2, 0); + c->reg_index += 2; + } + + + /* Then a copy of our part of the CURBE entry: + */ + { + int nr_constants = c->fp->info.nr_regs[TGSI_FILE_CONSTANT]; + int index = 0; + + c->prog_data.max_const = 4*nr_constants; + for (i = 0; i < nr_constants; i++) { + for (j = 0; j < 4; j++, index++) + c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8, + index%8); + } + + nr_curbe_regs = 2*((4*nr_constants+15)/16); + c->reg_index += nr_curbe_regs; + } + + /* Adjust for parameter coefficients for position, which are + * currently always provided. + */ +// c->position_coef[i] = brw_vec8_grf(c->reg_index, 0); + c->reg_index += 2; + + /* Next we receive the plane coefficients for parameter + * interpolation: + */ + for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) { + c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); + c->reg_index += 2; + } + + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = (c->fp->program.num_inputs + 1) * 2; + c->prog_data.curb_read_length = nr_curbe_regs; + + /* That's the end of the payload, now we can start allocating registers. + */ + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index++; + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index += 2; + + /* Now allocate room for the interpolated inputs and staging + * registers for the outputs: + */ + for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) + for (j = 0; j < 4; j++) + c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + + for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_OUTPUT]; i++) + for (j = 0; j < 4; j++) + c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + + /* Beyond this we should only need registers for internal temporaries: + */ + c->tmp_start = c->reg_index; +} + + + + + +/* Need to interpolate fragment program inputs in as a preamble to the + * shader. A more sophisticated compiler would do this on demand, but + * we'll do it up front: + */ +void brw_wm_emit_decls(struct brw_wm_compile *c) +{ + struct tgsi_parse_context parse; + int done = 0; + + prealloc_reg(c); + + tgsi_parse_init( &parse, c->fp->program.tokens ); + + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + unsigned first = decl->u.DeclarationRange.First; + unsigned last = decl->u.DeclarationRange.Last; + unsigned mask = decl->Declaration.UsageMask; /* ? */ + unsigned i; + + if (decl->Declaration.File != TGSI_FILE_INPUT) + break; + + assert(decl->Declaration.Interpolate); + + for( i = first; i <= last; i++ ) { + switch (decl->Interpolation.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + emit_cinterp(c, i, mask); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_linterp(c, i, mask); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + //emit_pinterp(c, i, mask); + emit_linterp(c, i, mask); + break; + } + } + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_INSTRUCTION: + default: + done = 1; + break; + } + } + + tgsi_parse_free (&parse); + + release_tmps(c); +} diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c new file mode 100644 index 0000000000..d95645d108 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -0,0 +1,1079 @@ + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + + + +static int get_scalar_dst_index(struct tgsi_full_instruction *inst) +{ + struct tgsi_dst_register dst = inst->FullDstRegisters[0].DstRegister; + int i; + for (i = 0; i < 4; i++) + if (dst.WriteMask & (1<tmp_index++; + c->reg_index = MAX2(c->reg_index, c->tmp_index); + return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); +} + +static void release_tmps(struct brw_wm_compile *c) +{ + c->tmp_index = 0; +} + + +static struct brw_reg +get_reg(struct brw_wm_compile *c, int file, int index, int component ) +{ + switch (file) { + case TGSI_FILE_NULL: + return brw_null_reg(); + + case TGSI_FILE_SAMPLER: + /* Should never get here: + */ + assert (0); + return brw_null_reg(); + + case TGSI_FILE_IMMEDIATE: + /* These need a different path: + */ + assert(0); + return brw_null_reg(); + + + case TGSI_FILE_CONSTANT: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_ADDRESS: + return c->wm_regs[file][index][component]; + + default: + assert(0); + return brw_null_reg(); + } +} + + +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + int component) +{ + return get_reg(c, + inst->FullDstRegisters[0].DstRegister.File, + inst->FullDstRegisters[0].DstRegister.Index, + component); +} + +static int get_swz( struct tgsi_src_register src, int index ) +{ + switch (index & 3) { + case 0: return src.SwizzleX; + case 1: return src.SwizzleY; + case 2: return src.SwizzleZ; + case 3: return src.SwizzleW; + default: return 0; + } +} + +static int get_ext_swz( struct tgsi_src_register_ext_swz src, int index ) +{ + switch (index & 3) { + case 0: return src.ExtSwizzleX; + case 1: return src.ExtSwizzleY; + case 2: return src.ExtSwizzleZ; + case 3: return src.ExtSwizzleW; + default: return 0; + } +} + +static struct brw_reg get_src_reg(struct brw_wm_compile *c, + struct tgsi_full_src_register *src, + int index) +{ + struct brw_reg reg; + int component = index; + int neg = 0; + int abs = 0; + + if (src->SrcRegister.Negate) + neg = 1; + + component = get_swz(src->SrcRegister, component); + + /* Yes, there are multiple negates: + */ + switch (component & 3) { + case 0: neg ^= src->SrcRegisterExtSwz.NegateX; break; + case 1: neg ^= src->SrcRegisterExtSwz.NegateY; break; + case 2: neg ^= src->SrcRegisterExtSwz.NegateZ; break; + case 3: neg ^= src->SrcRegisterExtSwz.NegateW; break; + } + + /* And multiple swizzles, fun isn't it: + */ + component = get_ext_swz(src->SrcRegisterExtSwz, component); + + /* Can't handle this, don't know if we need to: + */ + assert(src->SrcRegisterExtSwz.ExtDivide == TGSI_EXTSWIZZLE_ONE); + + /* Not handling indirect lookups yet: + */ + assert(src->SrcRegister.Indirect == 0); + + /* Don't know what dimension means: + */ + assert(src->SrcRegister.Dimension == 0); + + /* Will never handle any of this stuff: + */ + assert(src->SrcRegisterExtMod.Complement == 0); + assert(src->SrcRegisterExtMod.Bias == 0); + assert(src->SrcRegisterExtMod.Scale2X == 0); + + if (src->SrcRegisterExtMod.Absolute) + abs = 1; + + /* Another negate! This is a post-absolute negate, which we + * can't do. Need to clean the crap out of tgsi somehow. + */ + assert(src->SrcRegisterExtMod.Negate == 0); + + switch( component ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + reg = get_reg(c, + src->SrcRegister.File, + src->SrcRegister.Index, + component ); + + if (neg) + reg = negate(reg); + + if (abs) + reg = brw_abs(reg); + + break; + + /* XXX: this won't really work in the general case, but we know + * that the extended swizzle is only allowed in the SWZ + * instruction (right??), in which case using an immediate + * directly will work. + */ + case TGSI_EXTSWIZZLE_ZERO: + reg = brw_imm_f(0); + break; + + case TGSI_EXTSWIZZLE_ONE: + if (neg && !abs) + reg = brw_imm_f(-1.0); + else + reg = brw_imm_f(1.0); + break; + + default: + assert(0); + break; + } + + + return reg; +} + +static void emit_abs( struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + int i; + struct brw_compile *p = &c->func; + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for (i = 0; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + brw_MOV(p, dst, brw_abs(src)); /* NOTE */ + } + } + brw_set_saturate(p, 0); +} + + +static void emit_xpd(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + for (i = 0; i < 4; i++) { + unsigned i2 = (i+2)%3; + unsigned i1 = (i+1)%3; + if (mask & (1<FullSrcRegisters[0], i2)); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i1); + brw_MUL(p, brw_null_reg(), src0, src1); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i1); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i2); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + brw_MAC(p, dst, src0, src1); + brw_set_saturate(p, 0); + } + } + brw_set_saturate(p, 0); +} + +static void emit_dp3(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_reg src0[3], src1[3], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 3; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, 0); +} + +static void emit_dp4(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, brw_null_reg(), src0[2], src1[2]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_dph(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_ADD(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_math1(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, unsigned func) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MOV(p, brw_message_reg(2), src0); + brw_math(p, + dst, + func, + ((inst->Instruction.Saturate != TGSI_SAT_NONE) + ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE), + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + + +static void emit_alu2(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + unsigned opcode) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + int i; + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_alu2(p, opcode, dst, src0, src1); + } + } + brw_set_saturate(p, 0); +} + + +static void emit_alu1(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + unsigned opcode) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + int i; + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + brw_alu1(p, opcode, dst, src0); + } + } + if (inst->Instruction.Saturate != TGSI_SAT_NONE) + brw_set_saturate(p, 0); +} + + +static void emit_max(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_min(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_pow(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst, src0, src1; + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], 0); + + brw_MOV(p, brw_message_reg(2), src0); + brw_MOV(p, brw_message_reg(3), src1); + + brw_math(p, + dst, + BRW_MATH_FUNCTION_POW, + (inst->Instruction.Saturate != TGSI_SAT_NONE + ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE), + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_lrp(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, tmp1, tmp2, src0, src1, src2; + int i; + for (i = 0; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + + if (src1.nr == dst.nr) { + tmp1 = alloc_tmp(c); + brw_MOV(p, tmp1, src1); + } else + tmp1 = src1; + + src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); + if (src2.nr == dst.nr) { + tmp2 = alloc_tmp(c); + brw_MOV(p, tmp2, src2); + } else + tmp2 = src2; + + brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst, tmp2); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0, tmp1); + brw_set_saturate(p, 0); + } + release_tmps(c); + } +} + +static void emit_kil(struct brw_wm_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, depth, c->emit_mask_reg, depth); + brw_pop_insn_state(p); +} + +static void emit_mad(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, src0, src1, src2; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); + brw_MUL(p, dst, src0, src1); + + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_ADD(p, dst, dst, src2); + brw_set_saturate(p, 0); + } + } +} + +static void emit_sop(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, unsigned cond) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, src0, src1; + int i; + + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_CMP(p, brw_null_reg(), cond, src0, src1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(0.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(1.0)); + } + } + brw_pop_insn_state(p); +} + + +static void emit_ddx(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + unsigned nr, i; + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); + nr = src0.nr; + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for(i = 0; i < 4; i++ ) { + if (mask & (1<func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + unsigned nr, i; + + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + nr = src0.nr; + w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for(i = 0; i < 4; i++ ) { + if (mask & (1<func; + struct brw_reg payload_reg = c->payload_depth[0]; + struct brw_reg dst[4], src[4]; + unsigned i; + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + +#if 0 + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + default: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), src[2]); + break; + } +#else + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); +#endif + + brw_MOV(p, brw_message_reg(5), src[3]); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + inst->TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->FullDstRegisters[0].DstRegister.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + 4, + 4, + 0); +#endif +} + +static void emit_tex(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg payload_reg = c->payload_depth[0]; + struct brw_reg dst[4], src[4]; + unsigned msg_len; + unsigned i, nr; + unsigned emit; + boolean shadow = (c->key.shadowtex_mask & (1<TexSrcUnit)) ? 1 : 0; + + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + +#if 0 + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + default: + emit = WRITEMASK_XYZ; + nr = 3; + break; + } +#else + emit = WRITEMASK_XY; + nr = 2; +#endif + + msg_len = 1; + + for (i = 0; i < nr; i++) { + static const unsigned swz[4] = {0,1,2,2}; + if (emit & (1<TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->FullDstRegisters[0].DstRegister.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, + 4, + shadow ? 6 : 4, + 0); + + if (shadow) + brw_MOV(p, dst[3], brw_imm_f(1.0)); +#endif +} + + + + + + + + +static void emit_fb_write(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + int nr = 2; + int channel; + int base_reg = 0; + + // src0 = output color + // src1 = payload_depth[0] + // src2 = output depth + // dst = ??? + + + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + + { + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + struct brw_reg src0 = c->wm_regs[TGSI_FILE_OUTPUT][0][channel]; + + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); + } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); + } + + + /* Pass through control information: + */ + /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + + /* Send framebuffer write message: */ + brw_fb_WRITE(p, + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + 0, /* render surface always 0 */ + nr, + 0, + 1); + +} + + +static void brw_wm_emit_instruction( struct brw_wm_compile *c, + struct tgsi_full_instruction *inst ) +{ + struct brw_compile *p = &c->func; + +#if 0 + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#endif + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + emit_abs(c, inst); + break; + case TGSI_OPCODE_ADD: + emit_alu2(c, inst, BRW_OPCODE_ADD); + break; + case TGSI_OPCODE_SUB: + assert(0); +// emit_alu2(c, inst, BRW_OPCODE_SUB); + break; + case TGSI_OPCODE_FRC: + emit_alu1(c, inst, BRW_OPCODE_FRC); + break; + case TGSI_OPCODE_FLR: + assert(0); +// emit_alu1(c, inst, BRW_OPCODE_FLR); + break; + case TGSI_OPCODE_LRP: + emit_lrp(c, inst); + break; + case TGSI_OPCODE_INT: + emit_alu1(c, inst, BRW_OPCODE_RNDD); + break; + case TGSI_OPCODE_MOV: + emit_alu1(c, inst, BRW_OPCODE_MOV); + break; + case TGSI_OPCODE_DP3: + emit_dp3(c, inst); + break; + case TGSI_OPCODE_DP4: + emit_dp4(c, inst); + break; + case TGSI_OPCODE_XPD: + emit_xpd(c, inst); + break; + case TGSI_OPCODE_DPH: + emit_dph(c, inst); + break; + case TGSI_OPCODE_RCP: + emit_math1(c, inst, BRW_MATH_FUNCTION_INV); + break; + case TGSI_OPCODE_RSQ: + emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); + break; + case TGSI_OPCODE_SIN: + emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); + break; + case TGSI_OPCODE_COS: + emit_math1(c, inst, BRW_MATH_FUNCTION_COS); + break; + case TGSI_OPCODE_EX2: + emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); + break; + case TGSI_OPCODE_LG2: + emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); + break; + case TGSI_OPCODE_MAX: + emit_max(c, inst); + break; + case TGSI_OPCODE_MIN: + emit_min(c, inst); + break; + case TGSI_OPCODE_DDX: + emit_ddx(c, inst); + break; + case TGSI_OPCODE_DDY: + emit_ddy(c, inst); + break; + case TGSI_OPCODE_SLT: + emit_sop(c, inst, BRW_CONDITIONAL_L); + break; + case TGSI_OPCODE_SLE: + emit_sop(c, inst, BRW_CONDITIONAL_LE); + break; + case TGSI_OPCODE_SGT: + emit_sop(c, inst, BRW_CONDITIONAL_G); + break; + case TGSI_OPCODE_SGE: + emit_sop(c, inst, BRW_CONDITIONAL_GE); + break; + case TGSI_OPCODE_SEQ: + emit_sop(c, inst, BRW_CONDITIONAL_EQ); + break; + case TGSI_OPCODE_SNE: + emit_sop(c, inst, BRW_CONDITIONAL_NEQ); + break; + case TGSI_OPCODE_MUL: + emit_alu2(c, inst, BRW_OPCODE_MUL); + break; + case TGSI_OPCODE_POW: + emit_pow(c, inst); + break; + case TGSI_OPCODE_MAD: + emit_mad(c, inst); + break; + case TGSI_OPCODE_TEX: + emit_tex(c, inst); + break; + case TGSI_OPCODE_TXB: + emit_txb(c, inst); + break; + case TGSI_OPCODE_TEXKILL: + emit_kil(c); + break; + case TGSI_OPCODE_IF: + assert(c->if_insn < MAX_IFSN); + c->if_inst[c->if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_ELSE: + c->if_inst[c->if_insn-1] = brw_ELSE(p, c->if_inst[c->if_insn-1]); + break; + case TGSI_OPCODE_ENDIF: + assert(c->if_insn > 0); + brw_ENDIF(p, c->if_inst[--c->if_insn]); + break; + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + break; + case TGSI_OPCODE_CAL: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1ud(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, + get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(4)); +// orig_inst = inst->Data; +// orig_inst->Data = &p->store[p->nr_insn]; + assert(0); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_pop_insn_state(p); + break; + + case TGSI_OPCODE_RET: +#if 0 + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD(p, + get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1ud(c->stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); +#else + emit_fb_write(c, inst); +#endif + + break; + case TGSI_OPCODE_LOOP: + c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_ENDLOOP: + c->loop_insn--; + c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]); + /* patch all the BREAK instructions from + last BEGINLOOP */ + while (c->inst0 > c->loop_inst[c->loop_insn]) { + c->inst0--; + if (c->inst0->header.opcode == BRW_OPCODE_BREAK) { + c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0 + 1; + c->inst0->bits3.if_else.pop_count = 0; + } else if (c->inst0->header.opcode == BRW_OPCODE_CONTINUE) { + c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0; + c->inst0->bits3.if_else.pop_count = 0; + } + } + break; + case TGSI_OPCODE_END: + emit_fb_write(c, inst); + break; + + default: + debug_printf("unsupported IR in fragment shader %d\n", + inst->Instruction.Opcode); + } +#if 0 + if (inst->CondUpdate) + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + else + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif +} + + + + + + +void brw_wm_glsl_emit(struct brw_wm_compile *c) +{ + struct tgsi_parse_context parse; + struct brw_compile *p = &c->func; + + brw_init_compile(&c->func); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + c->reg_index = 0; + c->if_insn = 0; + c->loop_insn = 0; + c->stack_index = brw_indirect(0,0); + + /* Do static register allocation and parameter interpolation: + */ + brw_wm_emit_decls( c ); + + /* Emit the actual program. All done with very direct translation, + * hopefully we can improve on this shortly... + */ + brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); + + tgsi_parse_init( &parse, c->fp->program.tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* already done */ + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* not handled yet */ + assert(0); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + brw_wm_emit_instruction(c, &parse.FullToken.FullInstruction); + break; + + default: + assert( 0 ); + } + } + + tgsi_parse_free (&parse); + + /* Fix up call targets: + */ +#if 0 + { + unsigned nr_insns = c->fp->program.Base.NumInstructions; + unsigned insn, target_insn; + struct tgsi_full_instruction *inst1, *inst2; + struct brw_instruction *brw_inst1, *brw_inst2; + int offset; + for (insn = 0; insn < nr_insns; insn++) { + inst1 = &c->fp->program.Base.Instructions[insn]; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case TGSI_OPCODE_CAL: + target_insn = inst1->BranchTarget; + inst2 = &c->fp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } + } + } +#endif + + c->prog_data.total_grf = c->reg_index; + c->prog_data.total_scratch = 0; +} diff --git a/src/gallium/drivers/i965simple/brw_wm_iz.c b/src/gallium/drivers/i965simple/brw_wm_iz.c new file mode 100644 index 0000000000..6c5f25bf39 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_iz.c @@ -0,0 +1,214 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_wm.h" + + +#undef P /* prompted depth */ +#undef C /* computed */ +#undef N /* non-promoted? */ + +#define P 0 +#define C 1 +#define N 2 + +const struct { + unsigned mode:2; + unsigned sd_present:1; + unsigned sd_to_rt:1; + unsigned dd_present:1; + unsigned ds_present:1; +} wm_iz_table[IZ_BIT_MAX] = +{ + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 0, 0, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 0, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 } +}; + +void brw_wm_lookup_iz( unsigned line_aa, + unsigned lookup, + struct brw_wm_prog_key *key ) +{ + unsigned reg = 2; + + assert (lookup < IZ_BIT_MAX); + + if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) + key->computes_depth = 1; + + if (wm_iz_table[lookup].sd_present) { + key->source_depth_reg = reg; + reg += 2; + } + + if (wm_iz_table[lookup].sd_to_rt) + key->source_depth_to_render_target = 1; + + if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { + key->aa_dest_stencil_reg = reg; + key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && + line_aa == AA_SOMETIMES); + reg++; + } + + if (wm_iz_table[lookup].dd_present) { + key->dest_depth_reg = reg; + reg+=2; + } + + key->nr_depth_regs = (reg+1)/2; +} + diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c new file mode 100644 index 0000000000..de42ffc5b1 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c @@ -0,0 +1,273 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#include "pipe/p_util.h" + + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +/* Samplers aren't strictly wm state from the hardware's perspective, + * but that is the only situation in which we use them in this driver. + */ + +static int intel_translate_shadow_compare_func(unsigned func) +{ + switch(func) { + case PIPE_FUNC_NEVER: + return COMPAREFUNC_ALWAYS; + case PIPE_FUNC_LESS: + return COMPAREFUNC_LEQUAL; + case PIPE_FUNC_LEQUAL: + return COMPAREFUNC_LESS; + case PIPE_FUNC_GREATER: + return COMPAREFUNC_GEQUAL; + case PIPE_FUNC_GEQUAL: + return COMPAREFUNC_GREATER; + case PIPE_FUNC_NOTEQUAL: + return COMPAREFUNC_EQUAL; + case PIPE_FUNC_EQUAL: + return COMPAREFUNC_NOTEQUAL; + case PIPE_FUNC_ALWAYS: + return COMPAREFUNC_NEVER; + } + + debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); + return COMPAREFUNC_NEVER; +} + +/* The brw (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static unsigned translate_wrap_mode( int wrap ) +{ + switch( wrap ) { + case PIPE_TEX_WRAP_REPEAT: + return BRW_TEXCOORDMODE_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return BRW_TEXCOORDMODE_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return BRW_TEXCOORDMODE_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return BRW_TEXCOORDMODE_MIRROR; + default: + return BRW_TEXCOORDMODE_WRAP; + } +} + + +static unsigned U_FIXED(float value, unsigned frac_bits) +{ + value *= (1<cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc ); +} + + +/* + */ +static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_sampler, + unsigned sdc_gs_offset, + struct brw_sampler_state *sampler) +{ + memset(sampler, 0, sizeof(*sampler)); + + switch (pipe_sampler->min_mip_filter) { + case PIPE_TEX_FILTER_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + break; + default: + break; + } + + switch (pipe_sampler->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + break; + default: + break; + } + /* Set Anisotropy: + */ + if (pipe_sampler->max_anisotropy > 1.0) { + sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; + sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; + + if (pipe_sampler->max_anisotropy > 2.0) { + sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2, + BRW_ANISORATIO_16); + } + } + else { + switch (pipe_sampler->mag_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + default: + break; + } + } + + sampler->ss1.s_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_s); + sampler->ss1.r_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_r); + sampler->ss1.t_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_t); + + /* Fulsim complains if I don't do this. Hardware doesn't mind: + */ +#if 0 + if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; + } +#endif + + /* Set shadow function: + */ + if (pipe_sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + /* Shadowing is "enabled" by emitting a particular sampler + * message (sample_c). So need to recompile WM program when + * shadow comparison is enabled on each/any texture unit. + */ + sampler->ss0.shadow_function = intel_translate_shadow_compare_func(pipe_sampler->compare_func); + } + + /* Set LOD bias: + */ + sampler->ss0.lod_bias = S_FIXED(CLAMP(pipe_sampler->lod_bias, -16, 15), 6); + + sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ + sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ + + /* Set BaseMipLevel, MaxLOD, MinLOD: + * + * XXX: I don't think that using firstLevel, lastLevel works, + * because we always setup the surface state as if firstLevel == + * level zero. Probably have to subtract firstLevel from each of + * these: + */ + sampler->ss0.base_level = U_FIXED(0, 1); + + sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(pipe_sampler->max_lod, 0), 13), 6); + sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(pipe_sampler->min_lod, 0), 13), 6); + + sampler->ss2.default_color_pointer = sdc_gs_offset >> 5; +} + + + +/* All samplers must be uploaded in a single contiguous array, which + * complicates various things. However, this is still too confusing - + * FIXME: simplify all the different new texture state flags. + */ +static void upload_wm_samplers(struct brw_context *brw) +{ + unsigned unit; + unsigned sampler_count = 0; + + /* BRW_NEW_SAMPLER */ + for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + /* determine unit enable/disable by looking for a bound texture */ + if (brw->attribs.Texture[unit]) { + const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit]; + unsigned sdc_gs_offset = upload_default_color(brw, sampler->border_color); + + brw_update_sampler_state(sampler, + sdc_gs_offset, + &brw->wm.sampler[unit]); + + sampler_count = unit + 1; + } + } + + if (brw->wm.sampler_count != sampler_count) { + brw->wm.sampler_count = sampler_count; + brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + } + + brw->wm.sampler_gs_offset = 0; + + if (brw->wm.sampler_count) + brw->wm.sampler_gs_offset = + brw_cache_data_sz(&brw->cache[BRW_SAMPLER], + brw->wm.sampler, + sizeof(struct brw_sampler_state) * brw->wm.sampler_count); +} + +const struct brw_tracked_state brw_wm_samplers = { + .dirty = { + .brw = BRW_NEW_SAMPLER, + .cache = 0 + }, + .update = upload_wm_samplers +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c new file mode 100644 index 0000000000..5ccd488842 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_state.c @@ -0,0 +1,194 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_wm.h" +#include "pipe/p_util.h" + +/*********************************************************************** + * WM unit - fragment programs and rasterization + */ +static void upload_wm_unit(struct brw_context *brw ) +{ + struct brw_wm_unit_state wm; + unsigned max_threads; + unsigned per_thread; + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + max_threads = 0; + else + max_threads = 31; + + + memset(&wm, 0, sizeof(wm)); + + /* CACHE_NEW_WM_PROG */ + wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1; + wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; + wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; + wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; + + wm.wm5.max_threads = max_threads; + + per_thread = align(brw->wm.prog_data->total_scratch, 1024); + assert(per_thread <= 12 * 1024); + +#if 0 + if (brw->wm.prog_data->total_scratch) { + unsigned total = per_thread * (max_threads + 1); + + /* Scratch space -- just have to make sure there is sufficient + * allocated for the active program and current number of threads. + */ + brw->wm.scratch_buffer_size = total; + if (brw->wm.scratch_buffer && + brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) { + dri_bo_unreference(brw->wm.scratch_buffer); + brw->wm.scratch_buffer = NULL; + } + if (!brw->wm.scratch_buffer) { + brw->wm.scratch_buffer = dri_bo_alloc(intel->intelScreen->bufmgr, + "wm scratch", + brw->wm.scratch_buffer_size, + 4096, DRM_BO_FLAG_MEM_TT); + } + } + /* XXX: Scratch buffers are not implemented correectly. + * + * The scratch offset to be programmed into wm is relative to the general + * state base address. However, using dri_bo_alloc/dri_bo_emit_reloc (or + * the previous bmGenBuffers scheme), we get an offset relative to the + * start of framebuffer. Even before then, it was broken in other ways, + * so just fail for now if we hit that path. + */ + assert(brw->wm.prog_data->total_scratch == 0); +#endif + + /* CACHE_NEW_SURFACE */ + wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; + + /* BRW_NEW_CURBE_OFFSETS */ + wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; + + wm.thread3.urb_entry_read_offset = 0; + wm.thread1.depth_coef_urb_read_offset = 1; + wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + + /* CACHE_NEW_SAMPLER */ + wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; + wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + { + const struct brw_fragment_program *fp = brw->attribs.FragmentProgram; + + if (fp->UsesDepth) + wm.wm5.program_uses_depth = 1; /* as far as we can tell */ + + if (fp->ComputesDepth) + wm.wm5.program_computes_depth = 1; + + /* BRW_NEW_ALPHA_TEST */ + if (fp->UsesKill || + brw->attribs.DepthStencil->alpha.enabled) + wm.wm5.program_uses_killpixel = 1; + + wm.wm5.enable_8_pix = 1; + } + + wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ + wm.wm5.legacy_line_rast = 0; + wm.wm5.legacy_global_depth_bias = 0; + wm.wm5.early_depth_test = 1; /* never need to disable */ + wm.wm5.line_aa_region_width = 0; + wm.wm5.line_endcap_aa_region_width = 1; + + /* BRW_NEW_RASTERIZER */ + if (brw->attribs.Raster->poly_stipple_enable) + wm.wm5.polygon_stipple = 1; + +#if 0 + if (brw->attribs.Polygon->OffsetFill) { + wm.wm5.depth_offset = 1; + /* Something wierd going on with legacy_global_depth_bias, + * offset_constant, scaling and MRD. This value passes glean + * but gives some odd results elsewere (eg. the + * quad-offset-units test). + */ + wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2; + + /* This is the only value that passes glean: + */ + wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; + } +#endif + + if (brw->attribs.Raster->line_stipple_enable) { + wm.wm5.line_stipple = 1; + } + + if (BRW_DEBUG & DEBUG_STATS) + wm.wm4.stats_enable = 1; + + brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); + + if (brw->wm.prog_data->total_scratch) { + /* + dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + (per_thread / 1024) - 1, + brw->wm.state_gs_offset + + ((char *)&wm.thread2 - (char *)&wm), + brw->wm.scratch_buffer); + */ + } else { + wm.thread2.scratch_space_base_pointer = 0; + } +} + +const struct brw_tracked_state brw_wm_unit = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_ALPHA_TEST | + BRW_NEW_FS | + BRW_NEW_CURBE_OFFSETS), + + .cache = (CACHE_NEW_SURFACE | + CACHE_NEW_WM_PROG | + CACHE_NEW_SAMPLER) + }, + .update = upload_wm_unit +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c new file mode 100644 index 0000000000..d16d919bce --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -0,0 +1,304 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +static unsigned translate_tex_target( enum pipe_texture_target target ) +{ + switch (target) { + case PIPE_TEXTURE_1D: + return BRW_SURFACE_1D; + + case PIPE_TEXTURE_2D: + return BRW_SURFACE_2D; + + case PIPE_TEXTURE_3D: + return BRW_SURFACE_3D; + + case PIPE_TEXTURE_CUBE: + return BRW_SURFACE_CUBE; + + default: + assert(0); + return 0; + } +} + +static unsigned translate_tex_format( enum pipe_format pipe_format ) +{ + switch( pipe_format ) { + case PIPE_FORMAT_U_L8: + return BRW_SURFACEFORMAT_L8_UNORM; + + case PIPE_FORMAT_U_I8: + return BRW_SURFACEFORMAT_I8_UNORM; + + case PIPE_FORMAT_U_A8: + return BRW_SURFACEFORMAT_A8_UNORM; + + case PIPE_FORMAT_U_A8_L8: + return BRW_SURFACEFORMAT_L8A8_UNORM; + + case PIPE_FORMAT_R8G8B8_UNORM: + assert(0); /* not supported for sampling */ + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R5G6B5_UNORM: + return BRW_SURFACEFORMAT_B5G6R5_UNORM; + + case PIPE_FORMAT_A1R5G5B5_UNORM: + return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + + case PIPE_FORMAT_A4R4G4B4_UNORM: + return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + + case PIPE_FORMAT_YCBCR_REV: + return BRW_SURFACEFORMAT_YCRCB_NORMAL; + + case PIPE_FORMAT_YCBCR: + return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; +#if 0 + case PIPE_FORMAT_RGB_FXT1: + case PIPE_FORMAT_RGBA_FXT1: + return BRW_SURFACEFORMAT_FXT1; +#endif + + case PIPE_FORMAT_Z16_UNORM: + return BRW_SURFACEFORMAT_I16_UNORM; +#if 0 + case PIPE_FORMAT_RGB_DXT1: + return BRW_SURFACEFORMAT_DXT1_RGB; + + case PIPE_FORMAT_RGBA_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM; + + case PIPE_FORMAT_RGBA_DXT3: + return BRW_SURFACEFORMAT_BC2_UNORM; + + case PIPE_FORMAT_RGBA_DXT5: + return BRW_SURFACEFORMAT_BC3_UNORM; + + case PIPE_FORMAT_SRGBA8: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; + case PIPE_FORMAT_SRGB_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; +#endif + + default: + assert(0); + return 0; + } +} + +static unsigned brw_buffer_offset(struct brw_context *brw, + struct pipe_buffer *buffer) +{ + return brw->winsys->get_buffer_offset(brw->winsys, + buffer, + 0); +} + +static +void brw_update_texture_surface( struct brw_context *brw, + unsigned unit ) +{ + const struct brw_texture *tObj = brw->attribs.Texture[unit]; + struct brw_surface_state surf; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = translate_tex_target(tObj->base.target); + surf.ss0.surface_format = translate_tex_format(tObj->base.format); + + /* This is ok for all textures with channel width 8bit or less: + */ +/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + + /* Updated in emit_reloc */ + surf.ss1.base_addr = brw_buffer_offset( brw, tObj->buffer ); + + surf.ss2.mip_count = tObj->base.last_level; + surf.ss2.width = tObj->base.width[0] - 1; + surf.ss2.height = tObj->base.height[0] - 1; + + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = 0; /* always zero */ + surf.ss3.pitch = tObj->pitch - 1; + surf.ss3.depth = tObj->base.depth[0] - 1; + + surf.ss4.min_lod = 0; + + if (tObj->base.target == PIPE_TEXTURE_CUBE) { + surf.ss0.cube_pos_x = 1; + surf.ss0.cube_pos_y = 1; + surf.ss0.cube_pos_z = 1; + surf.ss0.cube_neg_x = 1; + surf.ss0.cube_neg_y = 1; + surf.ss0.cube_neg_z = 1; + } + + brw->wm.bind.surf_ss_offset[unit + 1] = + brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); +} + + + +#define OFFSET(TYPE, FIELD) ( (unsigned)&(((TYPE *)0)->FIELD) ) + + +static void upload_wm_surfaces(struct brw_context *brw ) +{ + unsigned i; + + { + struct brw_surface_state surf; + + /* BRW_NEW_FRAMEBUFFER + */ + struct pipe_surface *pipe_surface = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/ + + memset(&surf, 0, sizeof(surf)); + + if (pipe_surface != NULL) { + if (pipe_surface->cpp == 4) + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + else + surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + + surf.ss0.surface_type = BRW_SURFACE_2D; + + surf.ss1.base_addr = brw_buffer_offset( brw, pipe_surface->buffer ); + + surf.ss2.width = pipe_surface->width - 1; + surf.ss2.height = pipe_surface->height - 1; + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = 0; + surf.ss3.pitch = (pipe_surface->pitch * pipe_surface->cpp) - 1; + } else { + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + surf.ss0.surface_type = BRW_SURFACE_NULL; + } + + /* BRW_NEW_BLEND */ + surf.ss0.color_blend = (!brw->attribs.Blend->logicop_enable && + brw->attribs.Blend->blend_enable); + + + surf.ss0.writedisable_red = !(brw->attribs.Blend->colormask & PIPE_MASK_R); + surf.ss0.writedisable_green = !(brw->attribs.Blend->colormask & PIPE_MASK_G); + surf.ss0.writedisable_blue = !(brw->attribs.Blend->colormask & PIPE_MASK_B); + surf.ss0.writedisable_alpha = !(brw->attribs.Blend->colormask & PIPE_MASK_A); + + + + + brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); + + brw->wm.nr_surfaces = 1; + } + + + /* BRW_NEW_TEXTURE + */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + const struct brw_texture *texUnit = brw->attribs.Texture[i]; + + if (texUnit && + texUnit->base.refcount/*(texUnit->refcount > 0) == really used */) { + + brw_update_texture_surface(brw, i); + + brw->wm.nr_surfaces = i+2; + } + else { + brw->wm.bind.surf_ss_offset[i+1] = 0; + } + } + + brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND], + &brw->wm.bind ); +} + + +/* KW: Will find a different way to acheive this, see for example the + * state caches with relocs in the i915 swz driver. + */ +#if 0 +static void emit_reloc_wm_surfaces(struct brw_context *brw) +{ + int unit; + + if (brw->state.draw_region != NULL) { + /* Emit framebuffer relocation */ + dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + 0, + brw->wm.bind.surf_ss_offset[0] + + offsetof(struct brw_surface_state, ss1), + brw->state.draw_region->buffer); + } + + /* Emit relocations for texture buffers */ + for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + + if (texUnit->_ReallyEnabled && intelObj->mt != NULL) { + dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + brw->wm.bind.surf_ss_offset[unit + 1] + + offsetof(struct brw_surface_state, ss1), + intelObj->mt->region->buffer); + } + } +} +#endif + +const struct brw_tracked_state brw_wm_surfaces = { + .dirty = { + .brw = (BRW_NEW_FRAMEBUFFER | + BRW_NEW_BLEND | + BRW_NEW_TEXTURE), + .cache = 0 + }, + .update = upload_wm_surfaces, +}; diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile new file mode 100644 index 0000000000..31438a882e --- /dev/null +++ b/src/gallium/drivers/softpipe/Makefile @@ -0,0 +1,50 @@ + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = softpipe + +DRIVER_SOURCES = \ + sp_clear.c \ + sp_flush.c \ + sp_query.c \ + sp_context.c \ + sp_draw_arrays.c \ + sp_prim_setup.c \ + sp_prim_vbuf.c \ + sp_quad.c \ + sp_quad_alpha_test.c \ + sp_quad_blend.c \ + sp_quad_bufloop.c \ + sp_quad_colormask.c \ + sp_quad_coverage.c \ + sp_quad_depth_test.c \ + sp_quad_earlyz.c \ + sp_quad_fs.c \ + sp_quad_occlusion.c \ + sp_quad_output.c \ + sp_quad_stencil.c \ + sp_quad_stipple.c \ + sp_state_blend.c \ + sp_state_clip.c \ + sp_state_derived.c \ + sp_state_fs.c \ + sp_state_sampler.c \ + sp_state_rasterizer.c \ + sp_state_surface.c \ + sp_state_vertex.c \ + sp_texture.c \ + sp_tex_sample.c \ + sp_tile_cache.c \ + sp_surface.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript new file mode 100644 index 0000000000..d581ee8d3c --- /dev/null +++ b/src/gallium/drivers/softpipe/SConscript @@ -0,0 +1,42 @@ +Import('*') + +env = env.Clone() + +softpipe = env.ConvenienceLibrary( + target = 'softpipe', + source = [ + 'sp_clear.c', + 'sp_context.c', + 'sp_draw_arrays.c', + 'sp_flush.c', + 'sp_prim_setup.c', + 'sp_prim_vbuf.c', + 'sp_quad_alpha_test.c', + 'sp_quad_blend.c', + 'sp_quad_bufloop.c', + 'sp_quad.c', + 'sp_quad_colormask.c', + 'sp_quad_coverage.c', + 'sp_quad_depth_test.c', + 'sp_quad_earlyz.c', + 'sp_quad_fs.c', + 'sp_quad_occlusion.c', + 'sp_quad_output.c', + 'sp_quad_stencil.c', + 'sp_quad_stipple.c', + 'sp_query.c', + 'sp_state_blend.c', + 'sp_state_clip.c', + 'sp_state_derived.c', + 'sp_state_fs.c', + 'sp_state_rasterizer.c', + 'sp_state_sampler.c', + 'sp_state_surface.c', + 'sp_state_vertex.c', + 'sp_surface.c', + 'sp_tex_sample.c', + 'sp_texture.c', + 'sp_tile_cache.c', + ]) + +Export('softpipe') \ No newline at end of file diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c new file mode 100644 index 0000000000..8d295a30ca --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_state.h" +#include "sp_tile_cache.h" + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + */ +void +softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + +#if 0 + softpipe_update_derived(softpipe); /* not needed?? */ +#endif + + if (ps == sp_tile_cache_get_surface(softpipe->zsbuf_cache)) { + sp_tile_cache_clear(softpipe->zsbuf_cache, clearValue); +#if TILE_CLEAR_OPTIMIZATION + return; +#endif + } + + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { + if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) { + sp_tile_cache_clear(softpipe->cbuf_cache[i], clearValue); + } + } + +#if !TILE_CLEAR_OPTIMIZATION + /* non-cached surface */ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +#endif +} diff --git a/src/gallium/drivers/softpipe/sp_clear.h b/src/gallium/drivers/softpipe/sp_clear.h new file mode 100644 index 0000000000..a8ed1c4ecc --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_clear.h @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + +#ifndef SP_CLEAR_H +#define SP_CLEAR_H + +#include "pipe/p_state.h" +struct pipe_context; + +extern void +softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + +#endif /* SP_CLEAR_H */ diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c new file mode 100644 index 0000000000..cea6b90104 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -0,0 +1,333 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell + */ + +#include "pipe/draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_flush.h" +#include "sp_prim_setup.h" +#include "sp_prim_vbuf.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_texture.h" +#include "sp_winsys.h" +#include "sp_query.h" + + + +/** + * Query format support for creating a texture, drawing surface, etc. + * \param format the format to test + * \param type one of PIPE_TEXTURE, PIPE_SURFACE + */ +static boolean +softpipe_is_format_supported( struct pipe_context *pipe, + enum pipe_format format, uint type ) +{ + switch (type) { + case PIPE_TEXTURE: + /* softpipe supports all texture formats */ + return TRUE; + case PIPE_SURFACE: + /* softpipe supports all (off-screen) surface formats */ + return TRUE; + default: + assert(0); + return FALSE; + } +} + + +/** + * Map any drawing surfaces which aren't already mapped + */ +void +softpipe_map_surfaces(struct softpipe_context *sp) +{ + unsigned i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); + } + + sp_tile_cache_map_surfaces(sp->zsbuf_cache); +} + + +/** + * Unmap any mapped drawing surfaces + */ +void +softpipe_unmap_surfaces(struct softpipe_context *sp) +{ + uint i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) + sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + sp_flush_tile_cache(sp, sp->zsbuf_cache); + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); + } + sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); +} + + +static void softpipe_destroy( struct pipe_context *pipe ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct pipe_winsys *ws = pipe->winsys; + uint i; + + draw_destroy( softpipe->draw ); + + softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); + softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); + softpipe->quad.shade->destroy( softpipe->quad.shade ); + softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); + softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); + softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); + softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); + softpipe->quad.coverage->destroy( softpipe->quad.coverage ); + softpipe->quad.bufloop->destroy( softpipe->quad.bufloop ); + softpipe->quad.blend->destroy( softpipe->quad.blend ); + softpipe->quad.colormask->destroy( softpipe->quad.colormask ); + softpipe->quad.output->destroy( softpipe->quad.output ); + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + sp_destroy_tile_cache(softpipe->cbuf_cache[i]); + sp_destroy_tile_cache(softpipe->zsbuf_cache); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + sp_destroy_tile_cache(softpipe->tex_cache[i]); + + for (i = 0; i < Elements(softpipe->constants); i++) { + if (softpipe->constants[i].buffer) { + pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); + } + } + + FREE( softpipe ); +} + + +static const char *softpipe_get_name( struct pipe_context *pipe ) +{ + return "softpipe"; +} + +static const char *softpipe_get_vendor( struct pipe_context *pipe ) +{ + return "Tungsten Graphics, Inc."; +} + +static int softpipe_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; /* max 2Kx2K */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; /* max 2Kx2K */ + default: + return 0; + } +} + +static float softpipe_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + + default: + return 0; + } +} + +struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, + struct softpipe_winsys *softpipe_winsys ) +{ + struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); + uint i; + +#if defined(__i386__) || defined(__386__) + softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; +#else + softpipe->use_sse = FALSE; +#endif + + softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; + + softpipe->pipe.winsys = pipe_winsys; + softpipe->pipe.destroy = softpipe_destroy; + + /* queries */ + softpipe->pipe.is_format_supported = softpipe_is_format_supported; + softpipe->pipe.get_name = softpipe_get_name; + softpipe->pipe.get_vendor = softpipe_get_vendor; + softpipe->pipe.get_param = softpipe_get_param; + softpipe->pipe.get_paramf = softpipe_get_paramf; + + /* state setters */ + softpipe->pipe.create_blend_state = softpipe_create_blend_state; + softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; + softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; + + softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; + softpipe->pipe.bind_sampler_state = softpipe_bind_sampler_state; + softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; + + softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; + softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; + softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; + + softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; + softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; + softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; + + softpipe->pipe.create_fs_state = softpipe_create_fs_state; + softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; + softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; + + softpipe->pipe.create_vs_state = softpipe_create_vs_state; + softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; + softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + + softpipe->pipe.set_blend_color = softpipe_set_blend_color; + softpipe->pipe.set_clip_state = softpipe_set_clip_state; + softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; + softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; + softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; + softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; + softpipe->pipe.set_sampler_texture = softpipe_set_sampler_texture; + softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; + + softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer; + softpipe->pipe.set_vertex_element = softpipe_set_vertex_element; + + softpipe->pipe.draw_arrays = softpipe_draw_arrays; + softpipe->pipe.draw_elements = softpipe_draw_elements; + + softpipe->pipe.clear = softpipe_clear; + softpipe->pipe.flush = softpipe_flush; + + softpipe_init_query_funcs( softpipe ); + + /* textures */ + softpipe->pipe.texture_create = softpipe_texture_create; + softpipe->pipe.texture_release = softpipe_texture_release; + softpipe->pipe.get_tex_surface = softpipe_get_tex_surface; + + /* + * Alloc caches for accessing drawing surfaces and textures. + * Must be before quad stage setup! + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + softpipe->cbuf_cache[i] = sp_create_tile_cache(); + softpipe->zsbuf_cache = sp_create_tile_cache(); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + softpipe->tex_cache[i] = sp_create_tile_cache(); + + + /* setup quad rendering stages */ + softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); + softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); + softpipe->quad.shade = sp_quad_shade_stage(softpipe); + softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); + softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); + softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); + softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); + softpipe->quad.bufloop = sp_quad_bufloop_stage(softpipe); + softpipe->quad.blend = sp_quad_blend_stage(softpipe); + softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); + softpipe->quad.output = sp_quad_output_stage(softpipe); + + softpipe->winsys = softpipe_winsys; + + /* + * Create drawing context and plug our rendering stage into it. + */ + softpipe->draw = draw_create(); + assert(softpipe->draw); + softpipe->setup = sp_draw_render_stage(softpipe); + + if (GETENV( "SP_VBUF" ) != NULL) { + sp_init_vbuf(softpipe); + } + else { + draw_set_rasterize_stage(softpipe->draw, softpipe->setup); + } + + sp_init_surface_functions(softpipe); + + return &softpipe->pipe; +} diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h new file mode 100644 index 0000000000..aff8c2cc5d --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -0,0 +1,152 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef SP_CONTEXT_H +#define SP_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +#include "pipe/draw/draw_vertex.h" + +#include "sp_quad.h" + + +struct softpipe_winsys; +struct softpipe_vbuf_render; +struct draw_context; +struct draw_stage; +struct softpipe_tile_cache; +struct sp_fragment_shader_state; +struct sp_vertex_shader_state; + + +struct softpipe_context { + struct pipe_context pipe; /**< base class */ + struct softpipe_winsys *winsys; /**< window system interface */ + + + /* The most recent drawing state as set by the driver: + */ + const struct pipe_blend_state *blend; + const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct pipe_rasterizer_state *rasterizer; + const struct sp_fragment_shader_state *fs; + const struct sp_vertex_shader_state *vs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[2]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct softpipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + unsigned dirty; + + /* Counter for occlusion queries. Note this supports overlapping + * queries. + */ + uint64 occlusion_count; + + /* + * Mapped vertex buffers + */ + ubyte *mapped_vbuffer[PIPE_ATTRIB_MAX]; + + /** Mapped constant buffers */ + void *mapped_constants[PIPE_SHADER_TYPES]; + + /** Vertex format */ + struct vertex_info vertex_info; + struct vertex_info vertex_info_vbuf; + + int psize_slot; + +#if 0 + /* Stipple derived state: + */ + ubyte stipple_masks[16][16]; +#endif + + /** Derived from scissor and surface bounds: */ + struct pipe_scissor_state cliprect; + + unsigned line_stipple_counter; + + /** Software quad rendering pipeline */ + struct { + struct quad_stage *polygon_stipple; + struct quad_stage *earlyz; + struct quad_stage *shade; + struct quad_stage *alpha_test; + struct quad_stage *stencil_test; + struct quad_stage *depth_test; + struct quad_stage *occlusion; + struct quad_stage *coverage; + struct quad_stage *bufloop; + struct quad_stage *blend; + struct quad_stage *colormask; + struct quad_stage *output; + + struct quad_stage *first; /**< points to one of the above stages */ + } quad; + + /** The primitive drawing context */ + struct draw_context *draw; + struct draw_stage *setup; + struct draw_stage *vbuf; + struct softpipe_vbuf_render *vbuf_render; + + uint current_cbuf; /**< current color buffer being written to */ + + struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; + struct softpipe_tile_cache *zsbuf_cache; + + struct softpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + + int use_sse : 1; + int dump_fs : 1; +}; + + + + +static INLINE struct softpipe_context * +softpipe_context( struct pipe_context *pipe ) +{ + return (struct softpipe_context *)pipe; +} + + +#endif /* SP_CONTEXT_H */ diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c new file mode 100644 index 0000000000..71a303a8b5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -0,0 +1,164 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#include "sp_context.h" +#include "sp_state.h" + +#include "pipe/draw/draw_context.h" + + + +static void +softpipe_map_constant_buffers(struct softpipe_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + } + + draw_set_mapped_constant_buffer(sp->draw, + sp->mapped_constants[PIPE_SHADER_VERTEX]); +} + +static void +softpipe_unmap_constant_buffers(struct softpipe_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + ws->buffer_unmap(ws, sp->constants[i].buffer); + sp->mapped_constants[i] = NULL; + } +} + + +boolean +softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return softpipe_draw_elements(pipe, NULL, 0, mode, start, count); +} + + + +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + * + * XXX should the element buffer be specified/bound with a separate function? + */ +boolean +softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct softpipe_context *sp = softpipe_context(pipe); + struct draw_context *draw = sp->draw; + unsigned i; + + /* first, check that the primitive is not malformed. It is the + * state tracker's responsibility to do send only correctly formed + * primitives down. It currently isn't doing that though... + */ +#if 1 + count = draw_trim_prim( mode, count ); +#else + if (!draw_validate_prim( mode, count )) + assert(0); +#endif + + + if (sp->dirty) + softpipe_update_derived( sp ); + + softpipe_map_surfaces(sp); + softpipe_map_constant_buffers(sp); + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (sp->vertex_buffer[i].buffer) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + /* draw! */ + draw_arrays(draw, mode, start, count); + + /* + * unmap vertex/index buffers - will cause draw module to flush + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (sp->vertex_buffer[i].buffer) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); + } + } + if (indexBuffer) { + draw_set_mapped_element_buffer(draw, 0, NULL); + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + } + + + /* Note: leave drawing surfaces mapped */ + softpipe_unmap_constant_buffers(sp); + + return TRUE; +} diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c new file mode 100644 index 0000000000..ced0d5d098 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/draw/draw_context.h" +#include "sp_flush.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_state.h" +#include "sp_tile_cache.h" +#include "sp_winsys.h" + + +/* There will be actual work to do here. In future we may want a + * fence-like interface instead of finish, and perhaps flush will take + * flags to indicate what type of flush is required. + */ +void +softpipe_flush( struct pipe_context *pipe, + unsigned flags ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + draw_flush(softpipe->draw); + + /* - flush the quad pipeline + * - flush the texture cache + * - flush the render cache + */ + + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) + if (softpipe->cbuf_cache[i]) + sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); + + if (softpipe->zsbuf_cache) + sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache); + + /* Need this call for hardware buffers before swapbuffers. + * + * there should probably be another/different flush-type function + * that's called before swapbuffers because we don't always want + * to unmap surfaces when flushing. + */ + softpipe_unmap_surfaces(softpipe); +} + diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h new file mode 100644 index 0000000000..34ec617866 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_flush.h @@ -0,0 +1,35 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_FLUSH_H +#define SP_FLUSH_H + +struct pipe_context; + +void softpipe_flush(struct pipe_context *pipe, unsigned flags ); + +#endif diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h new file mode 100644 index 0000000000..0ae31d8796 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -0,0 +1,82 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef SP_HEADERS_H +#define SP_HEADERS_H + +#include "pipe/tgsi/exec/tgsi_exec.h" + +#define PRIM_POINT 1 +#define PRIM_LINE 2 +#define PRIM_TRI 3 + + +/* The rasterizer generates 2x2 quads of fragment and feeds them to + * the current fp_machine (see below). + * Remember that Y=0=top with Y increasing down the window. + */ +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 + +#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) +#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) +#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) +#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) +#define MASK_ALL 0xf + + +/** + * Encodes everything we need to know about a 2x2 pixel block. Uses + * "Channel-Serial" or "SoA" layout. + */ +struct quad_header { + int x0; + int y0; + unsigned mask:4; + unsigned facing:1; /**< Front (0) or back (1) facing? */ + unsigned prim:2; /**< PRIM_POINT, LINE, TRI */ + + struct { + float color[NUM_CHANNELS][QUAD_SIZE]; /* rrrr, gggg, bbbb, aaaa */ + float depth[QUAD_SIZE]; + } outputs; + + float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ + + const struct tgsi_interp_coef *coef; + const struct tgsi_interp_coef *posCoef; + + unsigned nr_attrs; +}; + + +#endif /* SP_HEADERS_H */ diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c new file mode 100644 index 0000000000..2772048661 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -0,0 +1,1247 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Primitive rasterization/rendering (points, lines, triangles) + * + * \author Keith Whitwell + * \author Brian Paul + */ + + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_state.h" +#include "sp_prim_setup.h" +#include "pipe/draw/draw_private.h" +#include "pipe/draw/draw_vertex.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + +#define DEBUG_VERTS 0 + +/** + * Triangle edge info + */ +struct edge { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + float dxdy; /**< dx/dy */ + float sx, sy; /**< first sample point coord */ + int lines; /**< number of lines on this edge */ +}; + + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct softpipe_context *softpipe; + + /* Vertices are just an array of floats making up each attribute in + * turn. Currently fixed at 4 floats, but should change in time. + * Codegen will help cope with this. + */ + const struct vertex_header *vmax; + const struct vertex_header *vmid; + const struct vertex_header *vmin; + const struct vertex_header *vprovoke; + + struct edge ebot; + struct edge etop; + struct edge emaj; + + float oneoverarea; + + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + struct tgsi_interp_coef posCoef; /* For Z, W */ + struct quad_header quad; + + struct { + int left[2]; /**< [0] = row0, [1] = row1 */ + int right[2]; + int y; + unsigned y_flags; + unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ + } span; +}; + + + +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ + return (struct setup_stage *)stage; +} + + +/** + * Clip setup->quad against the scissor/surface bounds. + */ +static INLINE void +quad_clip(struct setup_stage *setup) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + + if (setup->quad.x0 >= maxx || + setup->quad.y0 >= maxy || + setup->quad.x0 + 1 < minx || + setup->quad.y0 + 1 < miny) { + /* totally clipped */ + setup->quad.mask = 0x0; + return; + } + if (setup->quad.x0 < minx) + setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (setup->quad.y0 < miny) + setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (setup->quad.x0 == maxx - 1) + setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (setup->quad.y0 == maxy - 1) + setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); +} + + +/** + * Emit a quad (pass to next stage) with clipping. + */ +static INLINE void +clip_emit_quad(struct setup_stage *setup) +{ + quad_clip(setup); + if (setup->quad.mask) { + struct softpipe_context *sp = setup->softpipe; + sp->quad.first->run(sp->quad.first, &setup->quad); + } +} + + +/** + * Emit a quad (pass to next stage). No clipping is done. + */ +static INLINE void +emit_quad( struct setup_stage *setup, int x, int y, unsigned mask ) +{ + struct softpipe_context *sp = setup->softpipe; + setup->quad.x0 = x; + setup->quad.y0 = y; + setup->quad.mask = mask; + sp->quad.first->run(sp->quad.first, &setup->quad); +} + + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int block( int x ) +{ + return x & ~1; +} + + +/** + * Compute mask which indicates which pixels in the 2x2 quad are actually inside + * the triangle's bounds. + * + * this is pretty nasty... may need to rework flush_spans again to + * fix it, if possible. + */ +static unsigned calculate_mask( struct setup_stage *setup, int x ) +{ + unsigned mask = 0x0; + + if (x >= setup->span.left[0] && x < setup->span.right[0]) + mask |= MASK_TOP_LEFT; + + if (x >= setup->span.left[1] && x < setup->span.right[1]) + mask |= MASK_BOTTOM_LEFT; + + if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0]) + mask |= MASK_TOP_RIGHT; + + if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1]) + mask |= MASK_BOTTOM_RIGHT; + + return mask; +} + + +/** + * Render a horizontal span of quads + */ +static void flush_spans( struct setup_stage *setup ) +{ + int minleft, maxright; + int x; + + switch (setup->span.y_flags) { + case 0x3: + /* both odd and even lines written (both quad rows) */ + minleft = MIN2(setup->span.left[0], setup->span.left[1]); + maxright = MAX2(setup->span.right[0], setup->span.right[1]); + break; + + case 0x1: + /* only even line written (quad top row) */ + minleft = setup->span.left[0]; + maxright = setup->span.right[0]; + break; + + case 0x2: + /* only odd line written (quad bottom row) */ + minleft = setup->span.left[1]; + maxright = setup->span.right[1]; + break; + + default: + return; + } + + /* XXX this loop could be moved into the above switch cases and + * calculate_mask() could be simplified a bit... + */ + for (x = block(minleft); x <= block(maxright); x += 2) { + emit_quad( setup, x, setup->span.y, + calculate_mask( setup, x ) ); + } + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; +} + +#if DEBUG_VERTS +static void print_vertex(const struct setup_stage *setup, + const struct vertex_header *v) +{ + int i; + debug_printf("Vertex: (%p)\n", v); + for (i = 0; i < setup->quad.nr_attrs; i++) { + debug_printf(" %d: %f %f %f %f\n", i, + v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); + } +} +#endif + +static boolean setup_sort_vertices( struct setup_stage *setup, + const struct prim_header *prim ) +{ + const struct vertex_header *v0 = prim->v[0]; + const struct vertex_header *v1 = prim->v[1]; + const struct vertex_header *v2 = prim->v[2]; + +#if DEBUG_VERTS + debug_printf("Triangle:\n"); + print_vertex(setup, v0); + print_vertex(setup, v1); + print_vertex(setup, v2); +#endif + + setup->vprovoke = v2; + + /* determine bottom to top order of vertices */ + { + float y0 = v0->data[0][1]; + float y1 = v1->data[0][1]; + float y2 = v2->data[0][1]; + if (y0 <= y1) { + if (y1 <= y2) { + /* y0<=y1<=y2 */ + setup->vmin = v0; + setup->vmid = v1; + setup->vmax = v2; + } + else if (y2 <= y0) { + /* y2<=y0<=y1 */ + setup->vmin = v2; + setup->vmid = v0; + setup->vmax = v1; + } + else { + /* y0<=y2<=y1 */ + setup->vmin = v0; + setup->vmid = v2; + setup->vmax = v1; + } + } + else { + if (y0 <= y2) { + /* y1<=y0<=y2 */ + setup->vmin = v1; + setup->vmid = v0; + setup->vmax = v2; + } + else if (y2 <= y1) { + /* y2<=y1<=y0 */ + setup->vmin = v2; + setup->vmid = v1; + setup->vmax = v0; + } + else { + /* y1<=y2<=y0 */ + setup->vmin = v1; + setup->vmid = v2; + setup->vmax = v0; + } + } + } + + setup->ebot.dx = setup->vmid->data[0][0] - setup->vmin->data[0][0]; + setup->ebot.dy = setup->vmid->data[0][1] - setup->vmin->data[0][1]; + setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0]; + setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1]; + setup->etop.dx = setup->vmax->data[0][0] - setup->vmid->data[0][0]; + setup->etop.dy = setup->vmax->data[0][1] - setup->vmid->data[0][1]; + + /* + * Compute triangle's area. Use 1/area to compute partial + * derivatives of attributes later. + * + * The area will be the same as prim->det, but the sign may be + * different depending on how the vertices get sorted above. + * + * To determine whether the primitive is front or back facing we + * use the prim->det value because its sign is correct. + */ + { + const float area = (setup->emaj.dx * setup->ebot.dy - + setup->ebot.dx * setup->emaj.dy); + + setup->oneoverarea = 1.0f / area; + /* + debug_printf("%s one-over-area %f area %f det %f\n", + __FUNCTION__, setup->oneoverarea, area, prim->det ); + */ + } + + /* We need to know if this is a front or back-facing triangle for: + * - the GLSL gl_FrontFacing fragment attribute (bool) + * - two-sided stencil test + */ + setup->quad.facing = (prim->det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + + return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex->data[slot][i]. + * The result will be put into setup->coef[slot].a0[i]. + * \param slot which attribute slot + * \param i which component of the slot (0..3) + */ +static void const_coeff( struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + + coef->dadx[i] = 0; + coef->dady[i] = 0; + + /* need provoking vertex info! + */ + coef->a0[i] = setup->vprovoke->data[vertSlot][i]; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void tri_linear_coeff( struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + float botda = setup->vmid->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; + float majda = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + coef->a0[i] = (setup->vmin->data[vertSlot][i] - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); + + /* + debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", + slot, "xyzw"[i], + setup->coef[slot].a0[i], + setup->coef[slot].dadx[i], + setup->coef[slot].dady[i]); + */ +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void tri_persp_coeff( struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* premultiply by 1/w (v->data[0][3] is always W): + */ + float mina = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; + float mida = setup->vmid->data[vertSlot][i] * setup->vmid->data[0][3]; + float maxa = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3]; + float botda = mida - mina; + float majda = maxa - mina; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + /* + debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, + setup->vmin->data[vertSlot][i], + setup->vmid->data[vertSlot][i], + setup->vmax->data[vertSlot][i] + ); + */ + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (mina - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from posCoef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coeff(struct setup_stage *setup) +{ + /*X*/ + setup->coef[0].a0[0] = 0; + setup->coef[0].dadx[0] = 1.0; + setup->coef[0].dady[0] = 0.0; + /*Y*/ + if (setup->softpipe->rasterizer->origin_lower_left) { + /* y=0=bottom */ + const int winHeight = setup->softpipe->framebuffer.cbufs[0]->height; + setup->coef[0].a0[1] = (float) (winHeight - 1); + setup->coef[0].dady[1] = -1.0; + } + else { + /* y=0=top */ + setup->coef[0].a0[1] = 0.0; + setup->coef[0].dady[1] = 1.0; + } + setup->coef[0].dadx[1] = 0.0; + /*Z*/ + setup->coef[0].a0[2] = setup->posCoef.a0[2]; + setup->coef[0].dadx[2] = setup->posCoef.dadx[2]; + setup->coef[0].dady[2] = setup->posCoef.dady[2]; + /*W*/ + setup->coef[0].a0[3] = setup->posCoef.a0[3]; + setup->coef[0].dadx[3] = setup->posCoef.dadx[3]; + setup->coef[0].dady[3] = setup->posCoef.dady[3]; +} + + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. + */ +static void setup_tri_coefficients( struct setup_stage *setup ) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct pipe_shader_state *fs = &softpipe->fs->shader; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* z and w are done by linear interpolation: + */ + tri_linear_coeff(setup, &setup->posCoef, 0, 2); + tri_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + assert(fragSlot == 0); + setup_fragcoord_coeff(setup); + break; + default: + assert(0); + } + + if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } +} + + + +static void setup_tri_edges( struct setup_stage *setup ) +{ + float vmin_x = setup->vmin->data[0][0] + 0.5f; + float vmid_x = setup->vmid->data[0][0] + 0.5f; + + float vmin_y = setup->vmin->data[0][1] - 0.5f; + float vmid_y = setup->vmid->data[0][1] - 0.5f; + float vmax_y = setup->vmax->data[0][1] - 0.5f; + + setup->emaj.sy = CEILF(vmin_y); + setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy); + setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; + setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; + + setup->etop.sy = CEILF(vmid_y); + setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy); + setup->etop.dxdy = setup->etop.dx / setup->etop.dy; + setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; + + setup->ebot.sy = CEILF(vmin_y); + setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy); + setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; + setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void subtriangle( struct setup_stage *setup, + struct edge *eleft, + struct edge *eright, + unsigned lines ) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + int y, start_y, finish_y; + int sy = (int)eleft->sy; + + assert((int)eleft->sy == (int) eright->sy); + + /* clip top/bottom */ + start_y = sy; + finish_y = sy + lines; + + if (start_y < miny) + start_y = miny; + + if (finish_y > maxy) + finish_y = maxy; + + start_y -= sy; + finish_y -= sy; + + /* + debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + */ + + for (y = start_y; y < finish_y; y++) { + + /* avoid accumulating adds as floats don't have the precision to + * accurately iterate large triangle edges that way. luckily we + * can just multiply these days. + * + * this is all drowned out by the attribute interpolation anyway. + */ + int left = (int)(eleft->sx + y * eleft->dxdy); + int right = (int)(eright->sx + y * eright->dxdy); + + /* clip left/right */ + if (left < minx) + left = minx; + if (right > maxx) + right = maxx; + + if (left < right) { + int _y = sy + y; + if (block(_y) != setup->span.y) { + flush_spans(setup); + setup->span.y = block(_y); + } + + setup->span.left[_y&1] = left; + setup->span.right[_y&1] = right; + setup->span.y_flags |= 1<<(_y&1); + } + } + + + /* save the values so that emaj can be restarted: + */ + eleft->sx += lines * eleft->dxdy; + eright->sx += lines * eright->dxdy; + eleft->sy += lines; + eright->sy += lines; +} + + +/** + * Do setup for triangle rasterization, then render the triangle. + */ +static void setup_tri( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct setup_stage *setup = setup_stage( stage ); + + /* + debug_printf("%s\n", __FUNCTION__ ); + */ + + setup_sort_vertices( setup, prim ); + setup_tri_coefficients( setup ); + setup_tri_edges( setup ); + + setup->quad.prim = PRIM_TRI; + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; + /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ + + /* init_constant_attribs( setup ); */ + + if (setup->oneoverarea < 0.0) { + /* emaj on left: + */ + subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); + subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); + } + else { + /* emaj on right: + */ + subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); + subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); + } + + flush_spans( setup ); +} + + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a line. + */ +static void +line_linear_coeff(struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + const float da = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin->data[vertSlot][i] - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a line. + */ +static void +line_persp_coeff(struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* XXX double-check/verify this arithmetic */ + const float a0 = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; + const float a1 = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3]; + const float da = a1 - a0; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin->data[vertSlot][i] - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); +} + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmax are initialized. + */ +static INLINE void +setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* use setup->vmin, vmax to point to vertices */ + setup->vprovoke = prim->v[1]; + setup->vmin = prim->v[0]; + setup->vmax = prim->v[1]; + + setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0]; + setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1]; + /* NOTE: this is not really 1/area */ + setup->oneoverarea = 1.0f / (setup->emaj.dx * setup->emaj.dx + + setup->emaj.dy * setup->emaj.dy); + + /* z and w are done by linear interpolation: + */ + line_linear_coeff(setup, &setup->posCoef, 0, 2); + line_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + assert(fragSlot == 0); + assert(0); /* XXX fix this: */ + setup_fragcoord_coeff(setup); + break; + default: + assert(0); + } + + if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } +} + + +/** + * Plot a pixel in a line segment. + */ +static INLINE void +plot(struct setup_stage *setup, int x, int y) +{ + const int iy = y & 1; + const int ix = x & 1; + const int quadX = x - ix; + const int quadY = y - iy; + const int mask = (1 << ix) << (2 * iy); + + if (quadX != setup->quad.x0 || + quadY != setup->quad.y0) + { + /* flush prev quad, start new quad */ + + if (setup->quad.x0 != -1) + clip_emit_quad(setup); + + setup->quad.x0 = quadX; + setup->quad.y0 = quadY; + setup->quad.mask = 0x0; + } + + setup->quad.mask |= mask; +} + + +/** + * Do setup for line rasterization, then render the line. + * Single-pixel width, no stipple, etc. We rely on the 'draw' module + * to handle stippling and wide lines. + */ +static void +setup_line(struct draw_stage *stage, struct prim_header *prim) +{ + const struct vertex_header *v0 = prim->v[0]; + const struct vertex_header *v1 = prim->v[1]; + struct setup_stage *setup = setup_stage( stage ); + int x0 = (int) v0->data[0][0]; + int x1 = (int) v1->data[0][0]; + int y0 = (int) v0->data[0][1]; + int y1 = (int) v1->data[0][1]; + int dx = x1 - x0; + int dy = y1 - y0; + int xstep, ystep; + + if (dx == 0 && dy == 0) + return; + + setup_line_coefficients(setup, prim); + + if (dx < 0) { + dx = -dx; /* make positive */ + xstep = -1; + } + else { + xstep = 1; + } + + if (dy < 0) { + dy = -dy; /* make positive */ + ystep = -1; + } + else { + ystep = 1; + } + + assert(dx >= 0); + assert(dy >= 0); + + setup->quad.x0 = setup->quad.y0 = -1; + setup->quad.mask = 0x0; + setup->quad.prim = PRIM_LINE; + /* XXX temporary: set coverage to 1.0 so the line appears + * if AA mode happens to be enabled. + */ + setup->quad.coverage[0] = + setup->quad.coverage[1] = + setup->quad.coverage[2] = + setup->quad.coverage[3] = 1.0; + + if (dx > dy) { + /*** X-major line ***/ + int i; + const int errorInc = dy + dy; + int error = errorInc - dx; + const int errorDec = error - dx; + + for (i = 0; i < dx; i++) { + plot(setup, x0, y0); + + x0 += xstep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + y0 += ystep; + } + } + } + else { + /*** Y-major line ***/ + int i; + const int errorInc = dx + dx; + int error = errorInc - dy; + const int errorDec = error - dy; + + for (i = 0; i < dy; i++) { + plot(setup, x0, y0); + + y0 += ystep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + x0 += xstep; + } + } + } + + /* draw final quad */ + if (setup->quad.mask) { + clip_emit_quad(setup); + } +} + + +static void +point_persp_coeff(struct setup_stage *setup, + const struct vertex_header *vert, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + coef->dadx[i] = 0.0F; + coef->dady[i] = 0.0F; + coef->a0[i] = vert->data[vertSlot][i] * vert->data[0][3]; +} + + +/** + * Do setup for point rasterization, then render the point. + * Round or square points... + * XXX could optimize a lot for 1-pixel points. + */ +static void +setup_point(struct draw_stage *stage, struct prim_header *prim) +{ + struct setup_stage *setup = setup_stage( stage ); + struct softpipe_context *softpipe = setup->softpipe; + const struct pipe_shader_state *fs = &softpipe->fs->shader; + const struct vertex_header *v0 = prim->v[0]; + const int sizeAttr = setup->softpipe->psize_slot; + const float size + = sizeAttr > 0 ? v0->data[sizeAttr][0] + : setup->softpipe->rasterizer->point_size; + const float halfSize = 0.5F * size; + const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; + const float x = v0->data[0][0]; /* Note: data[0] is always position */ + const float y = v0->data[0][1]; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* For points, all interpolants are constant-valued. + * However, for point sprites, we'll need to setup texcoords appropriately. + * XXX: which coefficients are the texcoords??? + * We may do point sprites as textured quads... + * + * KW: We don't know which coefficients are texcoords - ultimately + * the choice of what interpolation mode to use for each attribute + * should be determined by the fragment program, using + * per-attribute declaration statements that include interpolation + * mode as a parameter. So either the fragment program will have + * to be adjusted for pointsprite vs normal point behaviour, or + * otherwise a special interpolation mode will have to be defined + * which matches the required behaviour for point sprites. But - + * the latter is not a feature of normal hardware, and as such + * probably should be ruled out on that basis. + */ + setup->vprovoke = prim->v[0]; + + /* setup Z, W */ + const_coeff(setup, &setup->posCoef, 0, 2); + const_coeff(setup, &setup->posCoef, 0, 3); + + for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + /* fall-through */ + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + point_persp_coeff(setup, setup->vprovoke, + &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + assert(fragSlot == 0); + assert(0); /* XXX fix this: */ + setup_fragcoord_coeff(setup); + break; + default: + assert(0); + } + + if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } + + setup->quad.prim = PRIM_POINT; + + if (halfSize <= 0.5 && !round) { + /* special case for 1-pixel points */ + const int ix = ((int) x) & 1; + const int iy = ((int) y) & 1; + setup->quad.x0 = (int) x - ix; + setup->quad.y0 = (int) y - iy; + setup->quad.mask = (1 << ix) << (2 * iy); + clip_emit_quad(setup); + } + else { + if (round) { + /* rounded points */ + const int ixmin = block((int) (x - halfSize)); + const int ixmax = block((int) (x + halfSize)); + const int iymin = block((int) (y - halfSize)); + const int iymax = block((int) (y + halfSize)); + const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ + const float rmax = halfSize + 0.7071F; + const float rmin2 = MAX2(0.0F, rmin * rmin); + const float rmax2 = rmax * rmax; + const float cscale = 1.0F / (rmax2 - rmin2); + int ix, iy; + + for (iy = iymin; iy <= iymax; iy += 2) { + for (ix = ixmin; ix <= ixmax; ix += 2) { + float dx, dy, dist2, cover; + + setup->quad.mask = 0x0; + + dx = (ix + 0.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_TOP_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_TOP_RIGHT; + } + + dx = (ix + 0.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_BOTTOM_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_BOTTOM_RIGHT; + } + + if (setup->quad.mask) { + setup->quad.x0 = ix; + setup->quad.y0 = iy; + clip_emit_quad(setup); + } + } + } + } + else { + /* square points */ + const int xmin = (int) (x + 0.75 - halfSize); + const int ymin = (int) (y + 0.25 - halfSize); + const int xmax = xmin + (int) size; + const int ymax = ymin + (int) size; + /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ + const int ixmin = block(xmin); + const int ixmax = block(xmax - 1); + const int iymin = block(ymin); + const int iymax = block(ymax - 1); + int ix, iy; + + /* + debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); + */ + for (iy = iymin; iy <= iymax; iy += 2) { + uint rowMask = 0xf; + if (iy < ymin) { + /* above the top edge */ + rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + } + if (iy + 1 >= ymax) { + /* below the bottom edge */ + rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + } + + for (ix = ixmin; ix <= ixmax; ix += 2) { + uint mask = rowMask; + + if (ix < xmin) { + /* fragment is past left edge of point, turn off left bits */ + mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + } + if (ix + 1 >= xmax) { + /* past the right edge */ + mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + } + + setup->quad.mask = mask; + setup->quad.x0 = ix; + setup->quad.y0 = iy; + clip_emit_quad(setup); + } + } + } + } +} + + + +static void setup_begin( struct draw_stage *stage ) +{ + struct setup_stage *setup = setup_stage(stage); + struct softpipe_context *sp = setup->softpipe; + const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + + setup->quad.nr_attrs = fs->num_inputs; + + sp->quad.first->begin(sp->quad.first); + + stage->point = setup_point; + stage->line = setup_line; + stage->tri = setup_tri; +} + + +static void setup_first_point( struct draw_stage *stage, + struct prim_header *header ) +{ + setup_begin(stage); + stage->point( stage, header ); +} + +static void setup_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + setup_begin(stage); + stage->line( stage, header ); +} + + +static void setup_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + setup_begin(stage); + stage->tri( stage, header ); +} + + + +static void setup_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->point = setup_first_point; + stage->line = setup_first_line; + stage->tri = setup_first_tri; +} + + +static void reset_stipple_counter( struct draw_stage *stage ) +{ +} + + +static void render_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create a new primitive setup/render stage. + */ +struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe ) +{ + struct setup_stage *setup = CALLOC_STRUCT(setup_stage); + + setup->softpipe = softpipe; + setup->stage.draw = softpipe->draw; + setup->stage.point = setup_first_point; + setup->stage.line = setup_first_line; + setup->stage.tri = setup_first_tri; + setup->stage.flush = setup_flush; + setup->stage.reset_stipple_counter = reset_stipple_counter; + setup->stage.destroy = render_destroy; + + setup->quad.coef = setup->coef; + setup->quad.posCoef = &setup->posCoef; + + return &setup->stage; +} diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.h b/src/gallium/drivers/softpipe/sp_prim_setup.h new file mode 100644 index 0000000000..f3e8a79dd9 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_setup.h @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SP_PRIM_SETUP_H +#define SP_PRIM_SETUP_H + + +/** + * vbuf is a special stage to gather the stream of triangles, lines, points + * together and reconstruct vertex buffers for hardware upload. + * + * First attempt, work in progress. + * + * TODO: + * - separate out vertex buffer building and primitive emit, ie >1 draw per vb. + * - tell vbuf stage how to build hw vertices directly + * - pass vbuf stage a buffer pointer for direct emit to agp/vram. + * + * + * + * Vertices are just an array of floats, with all the attributes + * packed. We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + + +struct draw_stage; +struct softpipe_context; + + +typedef void (*vbuf_draw_func)( struct pipe_context *pipe, + unsigned prim, + const ushort *elements, + unsigned nr_elements, + const void *vertex_buffer, + unsigned nr_vertices ); + + +extern struct draw_stage * +sp_draw_render_stage( struct softpipe_context *softpipe ); + + +extern struct draw_stage * +sp_draw_vbuf_stage( struct draw_context *draw_context, + struct pipe_context *pipe, + vbuf_draw_func draw ); + + +#endif /* SP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c new file mode 100644 index 0000000000..7f71fdb6a9 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -0,0 +1,221 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Post-transform vertex buffering. This is an optional part of the + * softpipe rendering pipeline. + * Probably not desired in general, but useful for testing/debuggin. + * Enabled/Disabled with SP_VBUF env var. + * + * Authors + * Brian Paul + */ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_prim_vbuf.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_private.h" +#include "pipe/draw/draw_vbuf.h" + + +#define SP_MAX_VBUF_INDEXES 1024 +#define SP_MAX_VBUF_SIZE 4096 + + +/** + * Subclass of vbuf_render. + */ +struct softpipe_vbuf_render +{ + struct vbuf_render base; + struct softpipe_context *softpipe; + uint prim; + uint vertex_size; + void *vertex_buffer; +}; + + +/** cast wrapper */ +static struct softpipe_vbuf_render * +softpipe_vbuf_render(struct vbuf_render *vbr) +{ + return (struct softpipe_vbuf_render *) vbr; +} + + + +static const struct vertex_info * +sp_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + return softpipe_get_vbuf_vertex_info(cvbr->softpipe); +} + + +static void * +sp_vbuf_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + assert(!cvbr->vertex_buffer); + cvbr->vertex_buffer = align_malloc(vertex_size * nr_vertices, 16); + cvbr->vertex_size = vertex_size; + return cvbr->vertex_buffer; +} + + +static void +sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, + unsigned vertex_size, unsigned vertices_used) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + align_free(vertices); + assert(vertices == cvbr->vertex_buffer); + cvbr->vertex_buffer = NULL; +} + + +static void +sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + cvbr->prim = prim; +} + + +/** + * Recalculate prim's determinant. + * XXX is this needed? + */ +static void +calc_det(struct prim_header *header) +{ + /* Window coords: */ + const float *v0 = header->v[0]->data[0]; + const float *v1 = header->v[1]->data[0]; + const float *v2 = header->v[2]->data[0]; + + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0] - v2[0]; + const float ey = v0[1] - v2[1]; + const float fx = v1[0] - v2[0]; + const float fy = v1[1] - v2[1]; + + /* det = cross(e,f).z */ + header->det = ex * fy - ey * fx; +} + + +static void +sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + struct softpipe_context *softpipe = cvbr->softpipe; + struct draw_stage *setup = softpipe->setup; + struct prim_header prim; + unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); + unsigned i, j; + void *vertex_buffer = cvbr->vertex_buffer; + + prim.det = 0; + prim.reset_line_stipple = 0; + prim.edgeflags = 0; + prim.pad = 0; + + switch (cvbr->prim) { + case PIPE_PRIM_TRIANGLES: + for (i = 0; i < nr_indices; i += 3) { + for (j = 0; j < 3; j++) + prim.v[j] = (struct vertex_header *)((char *)vertex_buffer + + indices[i+j] * vertex_size); + + calc_det(&prim); + setup->tri( setup, &prim ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i < nr_indices; i += 2) { + for (j = 0; j < 2; j++) + prim.v[j] = (struct vertex_header *)((char *)vertex_buffer + + indices[i+j] * vertex_size); + + setup->line( setup, &prim ); + } + break; + + case PIPE_PRIM_POINTS: + for (i = 0; i < nr_indices; i++) { + prim.v[0] = (struct vertex_header *)((char *)vertex_buffer + + indices[i] * vertex_size); + setup->point( setup, &prim ); + } + break; + } + + setup->flush( setup, 0 ); +} + + +static void +sp_vbuf_destroy(struct vbuf_render *vbr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + cvbr->softpipe->vbuf_render = NULL; + FREE(cvbr); +} + + +/** + * Initialize the post-transform vertex buffer information for the given + * context. + */ +void +sp_init_vbuf(struct softpipe_context *sp) +{ + assert(sp->draw); + + sp->vbuf_render = CALLOC_STRUCT(softpipe_vbuf_render); + + sp->vbuf_render->base.max_indices = SP_MAX_VBUF_INDEXES; + sp->vbuf_render->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE; + + sp->vbuf_render->base.get_vertex_info = sp_vbuf_get_vertex_info; + sp->vbuf_render->base.allocate_vertices = sp_vbuf_allocate_vertices; + sp->vbuf_render->base.set_primitive = sp_vbuf_set_primitive; + sp->vbuf_render->base.draw = sp_vbuf_draw; + sp->vbuf_render->base.release_vertices = sp_vbuf_release_vertices; + sp->vbuf_render->base.destroy = sp_vbuf_destroy; + + sp->vbuf_render->softpipe = sp; + + sp->vbuf = draw_vbuf_stage(sp->draw, &sp->vbuf_render->base); + + draw_set_rasterize_stage(sp->draw, sp->vbuf); +} diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.h b/src/gallium/drivers/softpipe/sp_prim_vbuf.h new file mode 100644 index 0000000000..1de9cc2a89 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_VBUF_H +#define SP_VBUF_H + + +struct softpipe_context; + +extern void +sp_init_vbuf(struct softpipe_context *softpipe); + + +#endif /* SP_VBUF_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c new file mode 100644 index 0000000000..6bd468a51c --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "sp_context.h" +#include "sp_state.h" +#include "pipe/p_shader_tokens.h" + +static void +sp_push_quad_first( + struct softpipe_context *sp, + struct quad_stage *quad ) +{ + quad->next = sp->quad.first; + sp->quad.first = quad; +} + +static void +sp_build_depth_stencil( + struct softpipe_context *sp ) +{ + if (sp->depth_stencil->stencil[0].enabled || + sp->depth_stencil->stencil[1].enabled) { + sp_push_quad_first( sp, sp->quad.stencil_test ); + } + else if (sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf) { + sp_push_quad_first( sp, sp->quad.depth_test ); + } +} + +void +sp_build_quad_pipeline(struct softpipe_context *sp) +{ + boolean early_depth_test = + sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf && + !sp->depth_stencil->alpha.enabled && + sp->fs->shader.output_semantic_name[0] != TGSI_SEMANTIC_POSITION; + + /* build up the pipeline in reverse order... */ + + sp->quad.first = sp->quad.output; + + if (sp->blend->colormask != 0xf) { + sp_push_quad_first( sp, sp->quad.colormask ); + } + + if (sp->blend->blend_enable || + sp->blend->logicop_enable) { + sp_push_quad_first( sp, sp->quad.blend ); + } + + if (sp->framebuffer.num_cbufs == 1) { + /* the usual case: write to exactly one colorbuf */ + sp->current_cbuf = 0; + } + else { + /* insert bufloop stage */ + sp_push_quad_first( sp, sp->quad.bufloop ); + } + + if (sp->depth_stencil->depth.occlusion_count) { + sp_push_quad_first( sp, sp->quad.occlusion ); + } + + if (sp->rasterizer->poly_smooth || + sp->rasterizer->line_smooth || + sp->rasterizer->point_smooth) { + sp_push_quad_first( sp, sp->quad.coverage ); + } + + if (!early_depth_test) { + sp_build_depth_stencil( sp ); + } + + if (sp->depth_stencil->alpha.enabled) { + sp_push_quad_first( sp, sp->quad.alpha_test ); + } + + /* XXX always enable shader? */ + if (1) { + sp_push_quad_first( sp, sp->quad.shade ); + } + + if (early_depth_test) { + sp_build_depth_stencil( sp ); + sp_push_quad_first( sp, sp->quad.earlyz ); + } + + if (sp->rasterizer->poly_stipple_enable) { + sp_push_quad_first( sp, sp->quad.polygon_stipple ); + } +} diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h new file mode 100644 index 0000000000..f1e0281764 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -0,0 +1,70 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef SP_QUAD_H +#define SP_QUAD_H + + +struct softpipe_context; +struct quad_header; + + +struct quad_stage { + struct softpipe_context *softpipe; + + struct quad_stage *next; + + void (*begin)(struct quad_stage *qs); + + /** the stage action */ + void (*run)(struct quad_stage *qs, struct quad_header *quad); + + void (*destroy)(struct quad_stage *qs); +}; + + +struct quad_stage *sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_earlyz_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_alpha_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_bufloop_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); + +void sp_build_quad_pipeline(struct softpipe_context *sp); + +void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); + +#endif /* SP_QUAD_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c new file mode 100644 index 0000000000..4ffeac35e1 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -0,0 +1,108 @@ + +/** + * quad alpha test + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + + +static void +alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + const float ref = softpipe->depth_stencil->alpha.ref; + unsigned passMask = 0x0, j; + const float *aaaa = quad->outputs.color[3]; + + switch (softpipe->depth_stencil->alpha.func) { + case PIPE_FUNC_NEVER: + quad->mask = 0x0; + break; + case PIPE_FUNC_LESS: + /* + * If mask were an array [4] we could do this SIMD-style: + * passMask = (quad->outputs.color[3] <= vec4(ref)); + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] < ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] == ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] <= ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] > ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] != ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] >= ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_ALWAYS: + passMask = MASK_ALL; + break; + default: + abort(); + } + + quad->mask &= passMask; + + if (quad->mask) + qs->next->run(qs->next, quad); +} + + +static void alpha_test_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void alpha_test_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage * +sp_quad_alpha_test_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = alpha_test_begin; + stage->run = alpha_test_quad; + stage->destroy = alpha_test_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c new file mode 100644 index 0000000000..17f3ecd0b8 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -0,0 +1,749 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * quad blending + * \author Brian Paul + */ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_quad.h" + + +#define VEC4_COPY(DST, SRC) \ +do { \ + DST[0] = SRC[0]; \ + DST[1] = SRC[1]; \ + DST[2] = SRC[2]; \ + DST[3] = SRC[3]; \ +} while(0) + +#define VEC4_SCALAR(DST, SRC) \ +do { \ + DST[0] = SRC; \ + DST[1] = SRC; \ + DST[2] = SRC; \ + DST[3] = SRC; \ +} while(0) + +#define VEC4_ADD(R, A, B) \ +do { \ + R[0] = A[0] + B[0]; \ + R[1] = A[1] + B[1]; \ + R[2] = A[2] + B[2]; \ + R[3] = A[3] + B[3]; \ +} while (0) + +#define VEC4_SUB(R, A, B) \ +do { \ + R[0] = A[0] - B[0]; \ + R[1] = A[1] - B[1]; \ + R[2] = A[2] - B[2]; \ + R[3] = A[3] - B[3]; \ +} while (0) + +#define VEC4_MUL(R, A, B) \ +do { \ + R[0] = A[0] * B[0]; \ + R[1] = A[1] * B[1]; \ + R[2] = A[2] * B[2]; \ + R[3] = A[3] * B[3]; \ +} while (0) + +#define VEC4_MIN(R, A, B) \ +do { \ + R[0] = (A[0] < B[0]) ? A[0] : B[0]; \ + R[1] = (A[1] < B[1]) ? A[1] : B[1]; \ + R[2] = (A[2] < B[2]) ? A[2] : B[2]; \ + R[3] = (A[3] < B[3]) ? A[3] : B[3]; \ +} while (0) + +#define VEC4_MAX(R, A, B) \ +do { \ + R[0] = (A[0] > B[0]) ? A[0] : B[0]; \ + R[1] = (A[1] > B[1]) ? A[1] : B[1]; \ + R[2] = (A[2] > B[2]) ? A[2] : B[2]; \ + R[3] = (A[3] > B[3]) ? A[3] : B[3]; \ +} while (0) + + + +static void +logicop_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + float dest[4][QUAD_SIZE]; + ubyte src[4][4], dst[4][4], res[4][4]; + uint *src4 = (uint *) src; + uint *dst4 = (uint *) dst; + uint *res4 = (uint *) res; + struct softpipe_cached_tile * + tile = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[softpipe->current_cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + /* convert to ubyte */ + for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][0], dest[j][0]); /* P0 */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][1], dest[j][1]); /* P1 */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][2], dest[j][2]); /* P2 */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][3], dest[j][3]); /* P3 */ + + UNCLAMPED_FLOAT_TO_UBYTE(src[j][0], quadColor[j][0]); /* P0 */ + UNCLAMPED_FLOAT_TO_UBYTE(src[j][1], quadColor[j][1]); /* P1 */ + UNCLAMPED_FLOAT_TO_UBYTE(src[j][2], quadColor[j][2]); /* P2 */ + UNCLAMPED_FLOAT_TO_UBYTE(src[j][3], quadColor[j][3]); /* P3 */ + } + + switch (softpipe->blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: + for (j = 0; j < 4; j++) + res4[j] = 0; + break; + case PIPE_LOGICOP_NOR: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] | dst4[j]); + break; + case PIPE_LOGICOP_AND_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_COPY_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j]; + break; + case PIPE_LOGICOP_AND_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & ~dst4[j]; + break; + case PIPE_LOGICOP_INVERT: + for (j = 0; j < 4; j++) + res4[j] = ~dst4[j]; + break; + case PIPE_LOGICOP_XOR: + for (j = 0; j < 4; j++) + res4[j] = dst4[j] ^ src4[j]; + break; + case PIPE_LOGICOP_NAND: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] & dst4[j]); + break; + case PIPE_LOGICOP_AND: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_EQUIV: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] ^ dst4[j]); + break; + case PIPE_LOGICOP_NOOP: + for (j = 0; j < 4; j++) + res4[j] = dst4[j]; + break; + case PIPE_LOGICOP_OR_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_COPY: + for (j = 0; j < 4; j++) + res4[j] = src4[j]; + break; + case PIPE_LOGICOP_OR_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | ~dst4[j]; + break; + case PIPE_LOGICOP_OR: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_SET: + for (j = 0; j < 4; j++) + res4[j] = ~0; + break; + default: + assert(0); + } + + for (j = 0; j < 4; j++) { + quadColor[j][0] = UBYTE_TO_FLOAT(res[j][0]); + quadColor[j][1] = UBYTE_TO_FLOAT(res[j][1]); + quadColor[j][2] = UBYTE_TO_FLOAT(res[j][2]); + quadColor[j][3] = UBYTE_TO_FLOAT(res[j][3]); + } + + /* pass quad to next stage */ + qs->next->run(qs->next, quad); +} + + + + +static void +blend_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + static const float zero[4] = { 0, 0, 0, 0 }; + static const float one[4] = { 1, 1, 1, 1 }; + float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[softpipe->current_cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color; + uint i, j; + + if (softpipe->blend->logicop_enable) { + logicop_quad(qs, quad); + return; + } + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + /* + * Compute src/first term RGB + */ + switch (softpipe->blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[0], quadColor[0]); /* R */ + VEC4_COPY(source[1], quadColor[1]); /* G */ + VEC4_COPY(source[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + { + const float *alpha = dest[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + { + const float *alpha = quadColor[3]; + float diff[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(source[0], alpha, diff); /* R */ + VEC4_MIN(source[1], alpha, diff); /* G */ + VEC4_MIN(source[2], alpha, diff); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float alpha[4]; + VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[0], zero); /* R */ + VEC4_COPY(source[1], zero); /* G */ + VEC4_COPY(source[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(source[0], quadColor[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(source[1], quadColor[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(source[2], quadColor[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_alpha[4]; + VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + assert(0); /* to do */ + break; + default: + abort(); + } + + /* + * Compute src/first term A + */ + switch (softpipe->blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + { + const float *alpha = quadColor[3]; + float diff[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(source[3], alpha, diff); /* A */ + } + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(source[3], quadColor[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + /* A */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[3], quadColor[3], inv_comp); + } + break; + default: + abort(); + } + + + /* + * Compute dest/second term RGB + */ + switch (softpipe->blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ + VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ + VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ + VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ + VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[0], zero); /* R */ + VEC4_COPY(dest[1], zero); /* G */ + VEC4_COPY(dest[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ + VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ + VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[3]); /* A */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(dest[0], dest[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(dest[1], dest[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[0], dest[0], inv_comp); + VEC4_MUL(dest[1], dest[1], inv_comp); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + default: + assert(0); + } + + /* + * Compute dest/second term A + */ + switch (softpipe->blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[3], dest[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[3], dest[3], inv_comp); + } + break; + default: + assert(0); + } + + /* + * Combine RGB terms + */ + switch (softpipe->blend->rgb_func) { + case PIPE_BLEND_ADD: + VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB(quadColor[0], source[0], dest[0]); /* R */ + VEC4_SUB(quadColor[1], source[1], dest[1]); /* G */ + VEC4_SUB(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB(quadColor[0], dest[0], source[0]); /* R */ + VEC4_SUB(quadColor[1], dest[1], source[1]); /* G */ + VEC4_SUB(quadColor[2], dest[2], source[2]); /* B */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + break; + default: + assert(0); + } + + /* + * Combine A terms + */ + switch (softpipe->blend->alpha_func) { + case PIPE_BLEND_ADD: + VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB(quadColor[3], dest[3], source[3]); /* A */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + break; + default: + abort(); + } + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quad); +} + + +static void blend_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void blend_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = blend_begin; + stage->run = blend_quad; + stage->destroy = blend_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c new file mode 100644 index 0000000000..2ae4e22a7d --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c @@ -0,0 +1,72 @@ + +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" + + +/** + * Loop over colorbuffers, passing quad to next stage each time. + */ +static void +cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + float tmp[4][QUAD_SIZE]; + unsigned i; + + assert(sizeof(quad->outputs.color) == sizeof(tmp)); + assert(softpipe->framebuffer.num_cbufs <= PIPE_MAX_COLOR_BUFS); + + /* make copy of original colors since they can get modified + * by blending and masking. + * XXX we won't have to do this if the fragment program actually emits + * N separate colors and we're drawing to N color buffers (MRT). + * But if we emitted one color and glDrawBuffer(GL_FRONT_AND_BACK) is + * in effect, we need to save/restore colors like this. + */ + memcpy(tmp, quad->outputs.color, sizeof(tmp)); + + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { + /* set current cbuffer */ + softpipe->current_cbuf = i; + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quad); + + /* restore quad's colors for next buffer */ + memcpy(quad->outputs.color, tmp, sizeof(tmp)); + } +} + + +static void cbuf_loop_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void cbuf_loop_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +/** + * Create the colorbuffer loop stage. + * This is used to implement multiple render targets and GL_FRONT_AND_BACK + * rendering. + */ +struct quad_stage *sp_quad_bufloop_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = cbuf_loop_begin; + stage->run = cbuf_loop_quad; + stage->destroy = cbuf_loop_destroy; + + return stage; +} + diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c new file mode 100644 index 0000000000..1f09d900ca --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief quad colormask stage + * \author Brian Paul + */ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + + +/** + * XXX colormask could be rolled into blending... + */ +static void +colormask_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + float dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[softpipe->current_cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + /* R */ + if (!(softpipe->blend->colormask & PIPE_MASK_R)) + COPY_4V(quadColor[0], dest[0]); + + /* G */ + if (!(softpipe->blend->colormask & PIPE_MASK_G)) + COPY_4V(quadColor[1], dest[1]); + + /* B */ + if (!(softpipe->blend->colormask & PIPE_MASK_B)) + COPY_4V(quadColor[2], dest[2]); + + /* A */ + if (!(softpipe->blend->colormask & PIPE_MASK_A)) + COPY_4V(quadColor[3], dest[3]); + + /* pass quad to next stage */ + qs->next->run(qs->next, quad); +} + + +static void colormask_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void colormask_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = colormask_begin; + stage->run = colormask_quad; + stage->destroy = colormask_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c new file mode 100644 index 0000000000..b3d3fae22f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -0,0 +1,88 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \brief Apply AA coverage to quad alpha valus + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" + + +/** + * Multiply quad's alpha values by the fragment coverage. + */ +static void +coverage_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + + if ((softpipe->rasterizer->poly_smooth && quad->prim == PRIM_TRI) || + (softpipe->rasterizer->line_smooth && quad->prim == PRIM_LINE) || + (softpipe->rasterizer->point_smooth && quad->prim == PRIM_POINT)) { + float (*quadColor)[4] = quad->outputs.color; + unsigned j; + for (j = 0; j < QUAD_SIZE; j++) { + assert(quad->coverage[j] >= 0.0); + assert(quad->coverage[j] <= 1.0); + quadColor[3][j] *= quad->coverage[j]; + } + } + + qs->next->run(qs->next, quad); +} + + +static void coverage_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void coverage_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = coverage_begin; + stage->run = coverage_quad; + stage->destroy = coverage_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c new file mode 100644 index 0000000000..a9a0754f27 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -0,0 +1,276 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Quad depth testing + */ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + +/** + * Do depth testing for a quad. + * Not static since it's used by the stencil code. + */ + +/* + * To increase efficiency, we should probably have multiple versions + * of this function that are specifically for Z16, Z32 and FP Z buffers. + * Try to effectively do that with codegen... + */ + +void +sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + struct pipe_surface *ps = softpipe->framebuffer.zsbuf; + const enum pipe_format format = ps->format; + unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ + unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ + unsigned zmask = 0; + unsigned j; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->x0, quad->y0); + + assert(ps); /* shouldn't get here if there's no zbuffer */ + + /* + * Convert quad's float depth values to int depth values (qzzzz). + * If the Z buffer stores integer values, we _have_ to do the depth + * compares with integers (not floats). Otherwise, the float->int->float + * conversion of Z values (which isn't an identity function) will cause + * Z-fighting errors. + * + * Also, get the zbuffer values (bzzzz) from the cached tile. + */ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + { + float scale = 65535.0; + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth16[y][x]; + } + } + break; + case PIPE_FORMAT_Z32_UNORM: + { + double scale = (double) (uint) ~0UL; + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth32[y][x]; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + { + float scale = (float) ((1 << 24) - 1); + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; + } + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + { + float scale = (float) ((1 << 24) - 1); + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth32[y][x] >> 8; + } + } + break; + default: + assert(0); + } + + switch (softpipe->depth_stencil->depth.func) { + case PIPE_FUNC_NEVER: + /* zmask = 0 */ + break; + case PIPE_FUNC_LESS: + /* Note this is pretty much a single sse or cell instruction. + * Like this: quad->mask &= (quad->outputs.depth < zzzz); + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] < bzzzz[j]) + zmask |= 1 << j; + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] == bzzzz[j]) + zmask |= 1 << j; + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] <= bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] > bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] != bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] >= bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_ALWAYS: + zmask = MASK_ALL; + break; + default: + abort(); + } + + quad->mask &= zmask; + + if (softpipe->depth_stencil->depth.writemask) { + + /* This is also efficient with sse / spe instructions: + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->mask & (1 << j)) { + bzzzz[j] = qzzzz[j]; + } + } + + /* put updated Z values back into cached tile */ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + tile->data.depth16[y][x] = (ushort) bzzzz[j]; + } + break; + case PIPE_FORMAT_Z32_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = bzzzz[j]; + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + uint s8z24 = tile->data.depth32[y][x]; + s8z24 = (s8z24 & 0xff000000) | bzzzz[j]; + tile->data.depth32[y][x] = s8z24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + uint z24s8 = tile->data.depth32[y][x]; + z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 24); + tile->data.depth32[y][x] = z24s8; + } + break; + default: + assert(0); + } + } +} + + +static void +depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + sp_depth_test_quad(qs, quad); + + if (quad->mask) + qs->next->run(qs->next, quad); +} + + +static void depth_test_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void depth_test_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = depth_test_begin; + stage->run = depth_test_quad; + stage->destroy = depth_test_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c new file mode 100644 index 0000000000..22ea99049f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -0,0 +1,88 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Quad early-z testing + */ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_headers.h" +#include "sp_quad.h" + + +/** + * All this stage does is compute the quad's Z values (which is normally + * done by the shading stage). + * The next stage will do the actual depth test. + */ +static void +earlyz_quad( + struct quad_stage *qs, + struct quad_header *quad ) +{ + const float fx = (float) quad->x0; + const float fy = (float) quad->y0; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + + quad->outputs.depth[0] = z0; + quad->outputs.depth[1] = z0 + dzdx; + quad->outputs.depth[2] = z0 + dzdy; + quad->outputs.depth[3] = z0 + dzdx + dzdy; + + qs->next->run( qs->next, quad ); +} + +static void +earlyz_begin( + struct quad_stage *qs ) +{ + qs->next->begin( qs->next ); +} + +static void +earlyz_destroy( + struct quad_stage *qs ) +{ + FREE( qs ); +} + +struct quad_stage * +sp_quad_earlyz_stage( + struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT( quad_stage ); + + stage->softpipe = softpipe; + stage->begin = earlyz_begin; + stage->run = earlyz_quad; + stage->destroy = earlyz_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c new file mode 100644 index 0000000000..3316858413 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -0,0 +1,390 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Vertices are just an array of floats, with all the attributes + * packed. We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "x86/rtasm/x86sse.h" + +#ifdef MESA_LLVM +#include "pipe/llvm/gallivm.h" +#endif + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_texture.h" +#include "sp_tex_sample.h" + + +struct quad_shade_stage +{ + struct quad_stage stage; + struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; + struct tgsi_exec_machine machine; + struct tgsi_exec_vector *inputs, *outputs; + int colorOutSlot, depthOutSlot; +#ifdef MESA_LLVM + struct gallivm_prog *llvm_prog; +#endif +}; + + +/** cast wrapper */ +static INLINE struct quad_shade_stage * +quad_shade_stage(struct quad_stage *qs) +{ + return (struct quad_shade_stage *) qs; +} + + +/** + * Compute quad X,Y,Z,W for the four fragments in a quad. + * Note that we only need to "compute" X and Y for the upper-left fragment. + * We could do less work if we're not depth testing, or there's no + * perspective-corrected attributes, but that's seldom. + */ +static void +setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos) +{ + uint chan; + /* do X */ + quadpos->xyzw[0].f[0] = x; + /* do Y */ + quadpos->xyzw[1].f[0] = y; + /* do Z and W for all fragments in the quad */ + for (chan = 2; chan < 4; chan++) { + const float dadx = coef->dadx[chan]; + const float dady = coef->dady[chan]; + const float a0 = coef->a0[chan] + dadx * x + dady * y; + quadpos->xyzw[chan].f[0] = a0; + quadpos->xyzw[chan].f[1] = a0 + dadx; + quadpos->xyzw[chan].f[2] = a0 + dady; + quadpos->xyzw[chan].f[3] = a0 + dadx + dady; + } +} + + +typedef void (XSTDCALL *codegen_function)( + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + float (*constant)[4], + struct tgsi_exec_vector *temporary, + const struct tgsi_interp_coef *coef +#if 0 + ,const struct tgsi_exec_vector *quadPos +#endif + ); + + +/** + * Execute fragment shader for the four fragments in the quad. + */ +static void +shade_quad( + struct quad_stage *qs, + struct quad_header *quad ) +{ + struct quad_shade_stage *qss = quad_shade_stage( qs ); + struct softpipe_context *softpipe = qs->softpipe; + struct tgsi_exec_machine *machine = &qss->machine; + + /* Consts do not require 16 byte alignment. */ + machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; + + machine->InterpCoefs = quad->coef; + + /* Compute X, Y, Z, W vals for this quad */ + setup_pos_vector(quad->posCoef, (float) quad->x0, (float) quad->y0, &machine->QuadPos); + + /* run shader */ +#if defined(__i386__) || defined(__386__) + if( softpipe->use_sse ) { + codegen_function func = (codegen_function) x86_get_func( &softpipe->fs->sse2_program ); + func( + machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps, + machine->InterpCoefs +#if 0 + ,machine->QuadPos +#endif + ); + quad->mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); + } + else +#endif + { + quad->mask &= tgsi_exec_machine_run( machine ); + } + + /* store result color */ + if (qss->colorOutSlot >= 0) { + /* XXX need to handle multiple color outputs someday */ + assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot] + == TGSI_SEMANTIC_COLOR); + memcpy( + quad->outputs.color, + &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], + sizeof( quad->outputs.color ) ); + } + + /* + * XXX the following code for updating quad->outputs.depth + * isn't really needed if we did early z testing. + */ + + /* store result Z */ + if (qss->depthOutSlot >= 0) { + /* output[slot] is new Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i]; + } + } + else { + /* copy input Z (which was interpolated by the executor) to output Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i]; + /* XXX not sure the above line is always correct. The following + * might be better: + quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i]; + */ + } + } + + /* shader may cull fragments */ + if( quad->mask ) { + qs->next->run( qs->next, quad ); + } +} + +#if 0 +#ifdef MESA_LLVM +#define DLLVM 0 +static void +shade_quad_llvm(struct quad_stage *qs, + struct quad_header *quad) +{ + struct quad_shade_stage *qss = quad_shade_stage(qs); + struct softpipe_context *softpipe = qs->softpipe; + float dests[4][16][4] ALIGN16_ATTRIB; + float inputs[4][16][4] ALIGN16_ATTRIB; + const float fx = (float) quad->x0; + const float fy = (float) quad->y0; + struct gallivm_prog *llvm = qss->llvm_prog; + + inputs[0][0][0] = fx; + inputs[1][0][0] = fx + 1.0f; + inputs[2][0][0] = fx; + inputs[3][0][0] = fx + 1.0f; + + inputs[0][0][1] = fy; + inputs[1][0][1] = fy; + inputs[2][0][1] = fy + 1.0f; + inputs[3][0][1] = fy + 1.0f; +#if DLLVM + debug_printf("MASK = %d\n", quad->mask); +#endif + gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef); +#if DLLVM + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 2; ++j) { + debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j, + inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]); + } + } +#endif + + quad->mask &= + gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs, + softpipe->mapped_constants[PIPE_SHADER_FRAGMENT], + qss->samplers); +#if DLLVM + debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n", + dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3], + dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]); +#endif + + /* store result color */ + if (qss->colorOutSlot >= 0) { + unsigned i; + /* XXX need to handle multiple color outputs someday */ + assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot] + == TGSI_SEMANTIC_COLOR); + for (i = 0; i < QUAD_SIZE; ++i) { + quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0]; + quad->outputs.color[1][i] = dests[i][qss->colorOutSlot][1]; + quad->outputs.color[2][i] = dests[i][qss->colorOutSlot][2]; + quad->outputs.color[3][i] = dests[i][qss->colorOutSlot][3]; + } + } +#if DLLVM + for (int i = 0; i < QUAD_SIZE; ++i) { + debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot, + quad->outputs.color[0][i], + quad->outputs.color[1][i], + quad->outputs.color[2][i], + quad->outputs.color[3][i]); + } +#endif + + /* store result Z */ + if (qss->depthOutSlot >= 0) { + /* output[slot] is new Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = dests[i][0][2]; + } + } + else { + /* copy input Z (which was interpolated by the executor) to output Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = inputs[i][0][2]; + } + } +#if DLLVM + debug_printf("D [%f, %f, %f, %f] mask = %d\n", + quad->outputs.depth[0], + quad->outputs.depth[1], + quad->outputs.depth[2], + quad->outputs.depth[3], quad->mask); +#endif + + /* shader may cull fragments */ + if( quad->mask ) { + qs->next->run( qs->next, quad ); + } +} +#endif /*MESA_LLVM*/ +#endif + +/** + * Per-primitive (or per-begin?) setup + */ +static void shade_begin(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = quad_shade_stage(qs); + struct softpipe_context *softpipe = qs->softpipe; + unsigned i; + + /* set TGSI sampler state that varies */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + qss->samplers[i].state = softpipe->sampler[i]; + qss->samplers[i].texture = &softpipe->texture[i]->base; + } + +#ifdef MESA_LLVM + qss->llvm_prog = softpipe->fs->llvm_prog; +#endif + /* XXX only do this if the fragment shader changes... */ + tgsi_exec_machine_init(&qss->machine, + softpipe->fs->shader.tokens, + PIPE_MAX_SAMPLERS, + qss->samplers ); + + /* find output slots for depth, color */ + qss->colorOutSlot = -1; + qss->depthOutSlot = -1; + for (i = 0; i < qss->stage.softpipe->fs->shader.num_outputs; i++) { + switch (qss->stage.softpipe->fs->shader.output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + qss->depthOutSlot = i; + break; + case TGSI_SEMANTIC_COLOR: + qss->colorOutSlot = i; + break; + } + } + + qs->next->begin(qs->next); +} + + +static void shade_destroy(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; + + tgsi_exec_machine_free_data(&qss->machine); + FREE( qss->inputs ); + FREE( qss->outputs ); + FREE( qs ); +} + + +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) +{ + struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); + uint i; + + /* allocate storage for program inputs/outputs, aligned to 16 bytes */ + qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16); + qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16); + qss->machine.Inputs = align16(qss->inputs); + qss->machine.Outputs = align16(qss->outputs); + + qss->stage.softpipe = softpipe; + qss->stage.begin = shade_begin; +#ifdef MESA_LLVM + /* disable until ported to accept + * x/y and soa layout + qss->stage.run = shade_quad_llvm; + */ + softpipe->use_sse = FALSE; + qss->stage.run = shade_quad; +#else + qss->stage.run = shade_quad; +#endif + qss->stage.destroy = shade_destroy; + + /* set TGSI sampler state that's constant */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + assert(softpipe->tex_cache[i]); + qss->samplers[i].get_samples = sp_get_samples; + qss->samplers[i].pipe = &softpipe->pipe; + qss->samplers[i].cache = softpipe->tex_cache[i]; + } + + return &qss->stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c new file mode 100644 index 0000000000..54254df1f1 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \brief Quad occlusion counter stage + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" + +static unsigned count_bits( unsigned val ) +{ + unsigned i; + + for (i = 0; val ; val >>= 1) + i += (val & 1); + + return i; +} + +static void +occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + + softpipe->occlusion_count += count_bits(quad->mask); + + qs->next->run(qs->next, quad); +} + + +static void occlusion_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void occlusion_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = occlusion_begin; + stage->run = occlusion_count_quad; + stage->destroy = occlusion_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c new file mode 100644 index 0000000000..cfe8f11808 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -0,0 +1,90 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + +/** + * Write quad to framebuffer, taking mask into account. + * + * Note that surfaces support only full quad reads and writes. + */ +static void +output_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[softpipe->current_cbuf], + quad->x0, quad->y0); + /* in-tile pos: */ + const int itx = quad->x0 % TILE_SIZE; + const int ity = quad->y0 % TILE_SIZE; + float (*quadColor)[4] = quad->outputs.color; + int i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } + } + } +} + + +static void output_begin(struct quad_stage *qs) +{ + assert(qs->next == NULL); +} + + +static void output_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = output_begin; + stage->run = output_quad; + stage->destroy = output_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c new file mode 100644 index 0000000000..92a0da0083 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -0,0 +1,352 @@ + +/** + * \brief Quad stencil testing + */ + + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + + +/** Only 8-bit stencil supported */ +#define STENCIL_MAX 0xff + + +/** + * Do the basic stencil test (compare stencil buffer values against the + * reference value. + * + * \param stencilVals the stencil values from the stencil buffer + * \param func the stencil func (PIPE_FUNC_x) + * \param ref the stencil reference value + * \param valMask the stencil value mask indicating which bits of the stencil + * values and ref value are to be used. + * \return mask indicating which pixels passed the stencil test + */ +static unsigned +do_stencil_test(const ubyte stencilVals[QUAD_SIZE], unsigned func, + unsigned ref, unsigned valMask) +{ + unsigned passMask = 0x0; + unsigned j; + + ref &= valMask; + + switch (func) { + case PIPE_FUNC_NEVER: + /* passMask = 0x0 */ + break; + case PIPE_FUNC_LESS: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref < (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref == (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref <= (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref > (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref != (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref >= (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_ALWAYS: + passMask = MASK_ALL; + break; + default: + assert(0); + } + + return passMask; +} + + +/** + * Apply the stencil operator to stencil values. + * + * \param stencilVals the stencil buffer values (read and written) + * \param mask indicates which pixels to update + * \param op the stencil operator (PIPE_STENCIL_OP_x) + * \param ref the stencil reference value + * \param wrtMask writemask controlling which bits are changed in the + * stencil values + */ +static void +apply_stencil_op(ubyte stencilVals[QUAD_SIZE], + unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) +{ + unsigned j; + ubyte newstencil[QUAD_SIZE]; + + for (j = 0; j < QUAD_SIZE; j++) { + newstencil[j] = stencilVals[j]; + } + + switch (op) { + case PIPE_STENCIL_OP_KEEP: + /* no-op */ + break; + case PIPE_STENCIL_OP_ZERO: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = 0; + } + } + break; + case PIPE_STENCIL_OP_REPLACE: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ref; + } + } + break; + case PIPE_STENCIL_OP_INCR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (stencilVals[j] < STENCIL_MAX) { + newstencil[j] = stencilVals[j] + 1; + } + } + } + break; + case PIPE_STENCIL_OP_DECR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (stencilVals[j] > 0) { + newstencil[j] = stencilVals[j] - 1; + } + } + } + break; + case PIPE_STENCIL_OP_INCR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = stencilVals[j] + 1; + } + } + break; + case PIPE_STENCIL_OP_DECR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = stencilVals[j] - 1; + } + } + break; + case PIPE_STENCIL_OP_INVERT: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ~stencilVals[j]; + } + } + break; + default: + assert(0); + } + + /* + * update the stencil values + */ + if (wrtMask != STENCIL_MAX) { + /* apply bit-wise stencil buffer writemask */ + for (j = 0; j < QUAD_SIZE; j++) { + stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & stencilVals[j]); + } + } + else { + for (j = 0; j < QUAD_SIZE; j++) { + stencilVals[j] = newstencil[j]; + } + } +} + + +/** + * Do stencil (and depth) testing. Stenciling depends on the outcome of + * depth testing. + */ +static void +stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + struct pipe_surface *ps = softpipe->framebuffer.zsbuf; + unsigned func, zFailOp, zPassOp, failOp; + ubyte ref, wrtMask, valMask; + ubyte stencilVals[QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->x0, quad->y0); + uint j; + uint face = quad->facing; + + if (!softpipe->depth_stencil->stencil[1].enabled) { + /* single-sided stencil test, use front (face=0) state */ + face = 0; + } + + /* choose front or back face function, operator, etc */ + /* XXX we could do these initializations once per primitive */ + func = softpipe->depth_stencil->stencil[face].func; + failOp = softpipe->depth_stencil->stencil[face].fail_op; + zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; + zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; + ref = softpipe->depth_stencil->stencil[face].ref_value; + wrtMask = softpipe->depth_stencil->stencil[face].write_mask; + valMask = softpipe->depth_stencil->stencil[face].value_mask; + + assert(ps); /* shouldn't get here if there's no stencil buffer */ + + /* get stencil values from cached tile */ + switch (ps->format) { + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.depth32[y][x] >> 24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.depth32[y][x] & 0xff; + } + break; + case PIPE_FORMAT_U_S8: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.stencil8[y][x]; + } + break; + default: + assert(0); + } + + /* do the stencil test first */ + { + unsigned passMask, failMask; + passMask = do_stencil_test(stencilVals, func, ref, valMask); + failMask = quad->mask & ~passMask; + quad->mask &= passMask; + + if (failOp != PIPE_STENCIL_OP_KEEP) { + apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); + } + } + + if (quad->mask) { + /* now the pixels that passed the stencil test are depth tested */ + if (softpipe->depth_stencil->depth.enabled) { + const unsigned origMask = quad->mask; + + sp_depth_test_quad(qs, quad); /* quad->mask is updated */ + + /* update stencil buffer values according to z pass/fail result */ + if (zFailOp != PIPE_STENCIL_OP_KEEP) { + const unsigned failMask = origMask & ~quad->mask; + apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); + } + + if (zPassOp != PIPE_STENCIL_OP_KEEP) { + const unsigned passMask = origMask & quad->mask; + apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); + } + } + else { + /* no depth test, apply Zpass operator to stencil buffer values */ + apply_stencil_op(stencilVals, quad->mask, zPassOp, ref, wrtMask); + } + + } + + /* put new stencil values into cached tile */ + switch (ps->format) { + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + uint s8z24 = tile->data.depth32[y][x]; + s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); + tile->data.depth32[y][x] = s8z24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + uint z24s8 = tile->data.depth32[y][x]; + z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; + tile->data.depth32[y][x] = z24s8; + } + break; + case PIPE_FORMAT_U_S8: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + tile->data.stencil8[y][x] = stencilVals[j]; + } + break; + default: + assert(0); + } + + if (quad->mask) + qs->next->run(qs->next, quad); +} + + +static void stencil_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void stencil_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = stencil_begin; + stage->run = stencil_test_quad; + stage->destroy = stencil_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c new file mode 100644 index 0000000000..8660432259 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -0,0 +1,94 @@ + +/** + * quad polygon stipple stage + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + + +/** + * Apply polygon stipple to quads produced by triangle rasterization + */ +static void +stipple_quad(struct quad_stage *qs, struct quad_header *quad) +{ + static const uint bit31 = 1 << 31; + static const uint bit30 = 1 << 30; + + if (quad->prim == PRIM_TRI) { + struct softpipe_context *softpipe = qs->softpipe; + /* need to invert Y to index into OpenGL's stipple pattern */ + int y0, y1; + uint stipple0, stipple1; + if (softpipe->rasterizer->origin_lower_left) { + y0 = softpipe->framebuffer.cbufs[0]->height - 1 - quad->y0; + y1 = y0 - 1; + } + else { + y0 = quad->y0; + y1 = y0 + 1; + } + stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; + stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; + +#if 1 + { + const int col0 = quad->x0 % 32; + if ((stipple0 & (bit31 >> col0)) == 0) + quad->mask &= ~MASK_TOP_LEFT; + + if ((stipple0 & (bit30 >> col0)) == 0) + quad->mask &= ~MASK_TOP_RIGHT; + + if ((stipple1 & (bit31 >> col0)) == 0) + quad->mask &= ~MASK_BOTTOM_LEFT; + + if ((stipple1 & (bit30 >> col0)) == 0) + quad->mask &= ~MASK_BOTTOM_RIGHT; + } +#else + /* We'd like to use this code, but we'd need to redefine + * MASK_TOP_LEFT to be (1 << 1) and MASK_TOP_RIGHT to be (1 << 0), + * and similarly for the BOTTOM bits. But that may have undesirable + * side effects elsewhere. + */ + const int col0 = 30 - (quad->x0 % 32); + quad->mask &= (((stipple0 >> col0) & 0x3) | + (((stipple1 >> col0) & 0x3) << 2)); +#endif + if (!quad->mask) + return; + } + + qs->next->run(qs->next, quad); +} + + +static void stipple_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void stipple_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage * +sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = stipple_begin; + stage->run = stipple_quad; + stage->destroy = stipple_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c new file mode 100644 index 0000000000..6a8a43aeda --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell + */ + +#include "pipe/draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_query.h" + +struct softpipe_query { + uint64 start; + uint64 end; +}; + + +static struct softpipe_query *softpipe_query( struct pipe_query *p ) +{ + return (struct softpipe_query *)p; +} + +static struct pipe_query * +softpipe_create_query(struct pipe_context *pipe, + unsigned type) +{ + assert(type == PIPE_QUERY_OCCLUSION_COUNTER); + return (struct pipe_query *)CALLOC_STRUCT( softpipe_query ); +} + + +static void +softpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) +{ + FREE(q); +} + + +static void +softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct softpipe_query *sq = softpipe_query(q); + + sq->start = softpipe->occlusion_count; +} + + +static void +softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct softpipe_query *sq = softpipe_query(q); + + sq->end = softpipe->occlusion_count; +} + + +static boolean +softpipe_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64 *result ) +{ + struct softpipe_query *sq = softpipe_query(q); + *result = sq->end - sq->start; + return TRUE; +} + + +void softpipe_init_query_funcs(struct softpipe_context *softpipe ) +{ + softpipe->pipe.create_query = softpipe_create_query; + softpipe->pipe.destroy_query = softpipe_destroy_query; + softpipe->pipe.begin_query = softpipe_begin_query; + softpipe->pipe.end_query = softpipe_end_query; + softpipe->pipe.get_query_result = softpipe_get_query_result; +} + + diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h new file mode 100644 index 0000000000..05060a4575 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_query.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell + */ + +#ifndef SP_QUERY_H +#define SP_QUERY_H + +struct softpipe_context; +extern void softpipe_init_query_funcs(struct softpipe_context * ); + + +#endif /* SP_QUERY_H */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h new file mode 100644 index 0000000000..b79db0d1f1 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -0,0 +1,187 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef SP_STATE_H +#define SP_STATE_H + +#include "pipe/p_state.h" + +#include "x86/rtasm/x86sse.h" + + +#define SP_NEW_VIEWPORT 0x1 +#define SP_NEW_RASTERIZER 0x2 +#define SP_NEW_FS 0x4 +#define SP_NEW_BLEND 0x8 +#define SP_NEW_CLIP 0x10 +#define SP_NEW_SCISSOR 0x20 +#define SP_NEW_STIPPLE 0x40 +#define SP_NEW_FRAMEBUFFER 0x80 +#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100 +#define SP_NEW_CONSTANTS 0x200 +#define SP_NEW_SAMPLER 0x400 +#define SP_NEW_TEXTURE 0x800 +#define SP_NEW_VERTEX 0x1000 +#define SP_NEW_VS 0x2000 +#define SP_NEW_QUERY 0x4000 + + + +#ifdef MESA_LLVM +struct gallivm_prog; +#endif + + +struct vertex_info; + + +/** Subclass of pipe_shader_state */ +struct sp_fragment_shader_state { + struct pipe_shader_state shader; +#if defined(__i386__) || defined(__386__) + struct x86_function sse2_program; +#endif +#ifdef MESA_LLVM + struct gallivm_prog *llvm_prog; +#endif +}; + + +/** Subclass of pipe_shader_state */ +struct sp_vertex_shader_state { + struct pipe_shader_state shader; + struct draw_vertex_shader *draw_data; +}; + + + +void * +softpipe_create_blend_state(struct pipe_context *, + const struct pipe_blend_state *); +void softpipe_bind_blend_state(struct pipe_context *, + void *); +void softpipe_delete_blend_state(struct pipe_context *, + void *); + +void * +softpipe_create_sampler_state(struct pipe_context *, + const struct pipe_sampler_state *); +void softpipe_bind_sampler_state(struct pipe_context *, unsigned, void *); +void softpipe_delete_sampler_state(struct pipe_context *, void *); + +void * +softpipe_create_depth_stencil_state(struct pipe_context *, + const struct pipe_depth_stencil_alpha_state *); +void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); +void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); + +void * +softpipe_create_rasterizer_state(struct pipe_context *, + const struct pipe_rasterizer_state *); +void softpipe_bind_rasterizer_state(struct pipe_context *, void *); +void softpipe_delete_rasterizer_state(struct pipe_context *, void *); + +void softpipe_set_framebuffer_state( struct pipe_context *, + const struct pipe_framebuffer_state * ); + +void softpipe_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ); + +void softpipe_set_clip_state( struct pipe_context *, + const struct pipe_clip_state * ); + +void softpipe_set_constant_buffer(struct pipe_context *, + uint shader, uint index, + const struct pipe_constant_buffer *buf); + +void *softpipe_create_fs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_fs_state(struct pipe_context *, void *); +void softpipe_delete_fs_state(struct pipe_context *, void *); +void *softpipe_create_vs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_vs_state(struct pipe_context *, void *); +void softpipe_delete_vs_state(struct pipe_context *, void *); + +void softpipe_set_polygon_stipple( struct pipe_context *, + const struct pipe_poly_stipple * ); + +void softpipe_set_scissor_state( struct pipe_context *, + const struct pipe_scissor_state * ); + +void softpipe_set_sampler_texture( struct pipe_context *, + unsigned unit, + struct pipe_texture * ); + +void softpipe_set_viewport_state( struct pipe_context *, + const struct pipe_viewport_state * ); + +void softpipe_set_vertex_element(struct pipe_context *, + unsigned index, + const struct pipe_vertex_element *); + +void softpipe_set_vertex_buffer(struct pipe_context *, + unsigned index, + const struct pipe_vertex_buffer *); + + +void softpipe_update_derived( struct softpipe_context *softpipe ); + + +boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); + +boolean softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); + + +void +softpipe_map_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_surfaces(struct softpipe_context *sp); + +void +softpipe_map_texture_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_texture_surfaces(struct softpipe_context *sp); + + +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe); + +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); + + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c new file mode 100644 index 0000000000..2d40d6bd8f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -0,0 +1,98 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_state.h" + + +void * +softpipe_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + return mem_dup(blend, sizeof(*blend)); +} + +void softpipe_bind_blend_state( struct pipe_context *pipe, + void *blend ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->blend = (const struct pipe_blend_state *)blend; + + softpipe->dirty |= SP_NEW_BLEND; +} + +void softpipe_delete_blend_state(struct pipe_context *pipe, + void *blend) +{ + FREE( blend ); +} + + +void softpipe_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->blend_color = *blend_color; + + softpipe->dirty |= SP_NEW_BLEND; +} + + +/** XXX move someday? Or consolidate all these simple state setters + * into one file. + */ + + +void * +softpipe_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + return mem_dup(depth_stencil, sizeof(*depth_stencil)); +} + +void +softpipe_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + + softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA; +} + +void +softpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) +{ + FREE( depth ); +} diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c new file mode 100644 index 0000000000..08c5f06d05 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ +#include "sp_context.h" +#include "sp_state.h" +#include "pipe/draw/draw_context.h" + + +void softpipe_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + /* pass the clip state to the draw module */ + draw_set_clip_state(softpipe->draw, clip); +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +void softpipe_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->viewport = *viewport; /* struct copy */ + softpipe->dirty |= SP_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(softpipe->draw, viewport); + + /* Using tnl/ and vf/ modules is temporary while getting started. + * Full pipe will have vertex shader, vertex fetch of its own. + */ +} + + +void softpipe_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + memcpy( &softpipe->scissor, scissor, sizeof(*scissor) ); + softpipe->dirty |= SP_NEW_SCISSOR; +} + + +void softpipe_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + memcpy( &softpipe->poly_stipple, stipple, sizeof(*stipple) ); + softpipe->dirty |= SP_NEW_STIPPLE; +} diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c new file mode 100644 index 0000000000..372597869f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -0,0 +1,235 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_vertex.h" +#include "pipe/draw/draw_private.h" +#include "sp_context.h" +#include "sp_state.h" + + +/** + * Search vertex program's outputs to find a match for the given + * semantic name/index. Return the index of the output slot. + * + * Return 0 if not found. This will cause the fragment program to use + * vertex attrib 0 (position) in the cases where the fragment program + * attempts to use a missing vertex program output. This is an undefined + * condition that users shouldn't hit anyway. + */ +static int +find_vs_output(const struct pipe_shader_state *vs, + uint semantic_name, + uint semantic_index) +{ + uint i; + for (i = 0; i < vs->num_outputs; i++) { + if (vs->output_semantic_name[i] == semantic_name && + vs->output_semantic_index[i] == semantic_index) + return i; + } + return 0; +} + + +/** + * Mark the current vertex layout as "invalid". + * We'll validate the vertex layout later, when we start to actually + * render a point or line or tri. + */ +static void +invalidate_vertex_layout(struct softpipe_context *softpipe) +{ + softpipe->vertex_info.num_attribs = 0; +} + + +/** + * The vertex info describes how to convert the post-transformed vertices + * (simple float[][4]) used by the 'draw' module into vertices for + * rasterization. + * + * This function validates the vertex layout and returns a pointer to a + * vertex_info object. + */ +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe) +{ + struct vertex_info *vinfo = &softpipe->vertex_info; + + if (vinfo->num_attribs == 0) { + /* compute vertex layout now */ + const struct pipe_shader_state *vs = &softpipe->vs->shader; + const struct pipe_shader_state *fs = &softpipe->fs->shader; + const enum interp_mode colorInterp + = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + uint i; + + if (softpipe->vbuf) { + /* if using the post-transform vertex buffer, tell draw_vbuf to + * simply emit the whole post-xform vertex as-is: + */ + struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; + vinfo_vbuf->num_attribs = 0; + draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); + vinfo_vbuf->size = 4 * vs->num_outputs + + sizeof(struct vertex_header) / 4; + } + + /* + * Loop over fragment shader inputs, searching for the matching output + * from the vertex shader. + */ + vinfo->num_attribs = 0; + for (i = 0; i < fs->num_inputs; i++) { + int src; + switch (fs->input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + break; + + case TGSI_SEMANTIC_COLOR: + src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, + fs->input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + break; + + case TGSI_SEMANTIC_FOG: + src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + case TGSI_SEMANTIC_GENERIC: + /* this includes texcoords and varying vars */ + src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC, + fs->input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + default: + assert(0); + } + } + + softpipe->psize_slot = find_vs_output(vs, TGSI_SEMANTIC_PSIZE, 0); + if (softpipe->psize_slot > 0) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, + softpipe->psize_slot); + } + + draw_compute_vertex_size(vinfo); + } + + return vinfo; +} + + +/** + * Called from vbuf module. + * + * Note that there's actually two different vertex layouts in softpipe. + * + * The normal one is computed in softpipe_get_vertex_info() above and is + * used by the point/line/tri "setup" code. + * + * The other one (this one) is only used by the vbuf module (which is + * not normally used by default but used in testing). For the vbuf module, + * we basically want to pass-through the draw module's vertex layout as-is. + * When the softpipe vbuf code begins drawing, the normal vertex layout + * will come into play again. + */ +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe) +{ + (void) softpipe_get_vertex_info(softpipe); + return &softpipe->vertex_info_vbuf; +} + + +/** + * Recompute cliprect from scissor bounds, scissor enable and surface size. + */ +static void +compute_cliprect(struct softpipe_context *sp) +{ + unsigned surfWidth, surfHeight; + + if (sp->framebuffer.num_cbufs > 0) { + surfWidth = sp->framebuffer.cbufs[0]->width; + surfHeight = sp->framebuffer.cbufs[0]->height; + } + else { + /* no surface? */ + surfWidth = sp->scissor.maxx; + surfHeight = sp->scissor.maxy; + } + + if (sp->rasterizer->scissor) { + /* clip to scissor rect */ + sp->cliprect.minx = MAX2(sp->scissor.minx, 0); + sp->cliprect.miny = MAX2(sp->scissor.miny, 0); + sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); + sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); + } + else { + /* clip to surface bounds */ + sp->cliprect.minx = 0; + sp->cliprect.miny = 0; + sp->cliprect.maxx = surfWidth; + sp->cliprect.maxy = surfHeight; + } +} + + +/* Hopefully this will remain quite simple, otherwise need to pull in + * something like the state tracker mechanism. + */ +void softpipe_update_derived( struct softpipe_context *softpipe ) +{ + if (softpipe->dirty & (SP_NEW_RASTERIZER | + SP_NEW_FS | + SP_NEW_VS)) + invalidate_vertex_layout( softpipe ); + + if (softpipe->dirty & (SP_NEW_SCISSOR | + SP_NEW_DEPTH_STENCIL_ALPHA | + SP_NEW_FRAMEBUFFER)) + compute_cliprect(softpipe); + + if (softpipe->dirty & (SP_NEW_BLEND | + SP_NEW_DEPTH_STENCIL_ALPHA | + SP_NEW_FRAMEBUFFER | + SP_NEW_RASTERIZER | + SP_NEW_FS | + SP_NEW_QUERY)) + sp_build_quad_pipeline(softpipe); + + softpipe->dirty = 0; +} diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c new file mode 100644 index 0000000000..0b814fc284 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -0,0 +1,179 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "sp_context.h" +#include "sp_state.h" + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/draw/draw_context.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/llvm/gallivm.h" +#include "pipe/tgsi/util/tgsi_dump.h" +#include "pipe/tgsi/exec/tgsi_sse2.h" + + +void * +softpipe_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_fragment_shader_state *state; + + /* Decide whether we'll be codegenerating this shader and if so do + * that now. + */ + + state = CALLOC_STRUCT(sp_fragment_shader_state); + if (!state) + return NULL; + + state->shader = *templ; + + if (softpipe->dump_fs) { + tgsi_dump(state->shader.tokens, 0); + } + +#ifdef MESA_LLVM + state->llvm_prog = 0; + +#if 0 + if (!gallivm_global_cpu_engine()) { + gallivm_cpu_engine_create(state->llvm_prog); + } + else + gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); +#endif + +#elif defined(__i386__) || defined(__386__) + if (softpipe->use_sse) { + x86_init_func( &state->sse2_program ); + tgsi_emit_sse2_fs( state->shader.tokens, &state->sse2_program ); + } +#endif + + return state; +} + + +void +softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->fs = (struct sp_fragment_shader_state *) fs; + + softpipe->dirty |= SP_NEW_FS; +} + + +void +softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct sp_fragment_shader_state *state = fs; + +#if defined(__i386__) || defined(__386__) + x86_release_func( &state->sse2_program ); +#endif + + FREE( state ); +} + + +void * +softpipe_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_vertex_shader_state *state; + + state = CALLOC_STRUCT(sp_vertex_shader_state); + if (state == NULL ) { + return NULL; + } + + state->shader = *templ; + + state->draw_data = draw_create_vertex_shader(softpipe->draw, + &state->shader); + if (state->draw_data == NULL) { + FREE( state ); + return NULL; + } + + return state; +} + + +void +softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->vs = (const struct sp_vertex_shader_state *)vs; + + draw_bind_vertex_shader(softpipe->draw, softpipe->vs->draw_data); + + softpipe->dirty |= SP_NEW_VS; +} + + +void +softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + struct sp_vertex_shader_state *state = + (struct sp_vertex_shader_state *)vs; + + draw_delete_vertex_shader(softpipe->draw, state->draw_data); + FREE( state ); +} + + + +void +softpipe_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* note: reference counting */ + pipe_buffer_reference(ws, + &softpipe->constants[shader].buffer, + buf->buffer); + softpipe->constants[shader].size = buf->size; + + softpipe->dirty |= SP_NEW_CONSTANTS; +} diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c new file mode 100644 index 0000000000..53755099dd --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_state.h" +#include "pipe/draw/draw_context.h" + + + +void * +softpipe_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rast) +{ + return mem_dup(rast, sizeof(*rast)); +} + +void softpipe_bind_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + /* pass-through to draw module */ + draw_set_rasterizer_state(softpipe->draw, setup); + + softpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + + softpipe->dirty |= SP_NEW_RASTERIZER; +} + +void softpipe_delete_rasterizer_state(struct pipe_context *pipe, + void *rasterizer) +{ + FREE( rasterizer ); +} + + diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c new file mode 100644 index 0000000000..ea348c7e95 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -0,0 +1,93 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Brian Paul + */ + +#include "pipe/p_util.h" + +#include "pipe/draw/draw_context.h" + +#include "sp_context.h" +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" +#include "pipe/draw/draw_context.h" + + + +void * +softpipe_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + return mem_dup(sampler, sizeof(*sampler)); +} + +void +softpipe_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + draw_flush(softpipe->draw); + + assert(unit < PIPE_MAX_SAMPLERS); + softpipe->sampler[unit] = (struct pipe_sampler_state *)sampler; + + softpipe->dirty |= SP_NEW_SAMPLER; +} + + +void +softpipe_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + +void +softpipe_set_sampler_texture(struct pipe_context *pipe, + unsigned unit, + struct pipe_texture *texture) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + draw_flush(softpipe->draw); + + assert(unit < PIPE_MAX_SAMPLERS); + softpipe->texture[unit] = softpipe_texture(texture); /* ptr, not struct */ + + sp_tile_cache_set_texture(softpipe->tex_cache[unit], texture); + + softpipe->dirty |= SP_NEW_TEXTURE; +} + + + diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c new file mode 100644 index 0000000000..e2c6893e9f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -0,0 +1,109 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ +#include "p_inlines.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" + + +/** + * XXX this might get moved someday + * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer. + * Here, we flush the old surfaces and update the tile cache to point to the new + * surfaces. + */ +void +softpipe_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct softpipe_context *sp = softpipe_context(pipe); + uint i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + /* check if changing cbuf */ + if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { + /* flush old */ + sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + + /* assign new */ + sp->framebuffer.cbufs[i] = fb->cbufs[i]; + + /* update cache */ + sp_tile_cache_set_surface(sp->cbuf_cache[i], fb->cbufs[i]); + } + } + + sp->framebuffer.num_cbufs = fb->num_cbufs; + + /* zbuf changing? */ + if (sp->framebuffer.zsbuf != fb->zsbuf) { + /* flush old */ + sp_flush_tile_cache(sp, sp->zsbuf_cache); + + /* assign new */ + sp->framebuffer.zsbuf = fb->zsbuf; + + /* update cache */ + sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf); + } + +#if 0 + /* XXX combined depth/stencil here */ + + /* sbuf changing? */ + if (sp->framebuffer.sbuf != fb->sbuf) { + /* flush old */ + sp_flush_tile_cache(sp, sp->sbuf_cache_sep); + + /* assign new */ + sp->framebuffer.sbuf = fb->sbuf; + + /* update cache */ + if (fb->sbuf != fb->zbuf) { + /* separate stencil buf */ + sp->sbuf_cache = sp->sbuf_cache_sep; + sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); + } + else { + /* combined depth/stencil */ + sp->sbuf_cache = sp->zbuf_cache; + sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); + } + } +#endif + + sp->dirty |= SP_NEW_FRAMEBUFFER; +} + + + + diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c new file mode 100644 index 0000000000..09ff540ccf --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -0,0 +1,64 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_surface.h" + +#include "pipe/draw/draw_context.h" + + +void +softpipe_set_vertex_element(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_element *attrib) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + assert(index < PIPE_ATTRIB_MAX); + softpipe->vertex_element[index] = *attrib; /* struct copy */ + softpipe->dirty |= SP_NEW_VERTEX; + + draw_set_vertex_element(softpipe->draw, index, attrib); +} + + +void +softpipe_set_vertex_buffer(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_buffer *buffer) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + assert(index < PIPE_ATTRIB_MAX); + softpipe->vertex_buffer[index] = *buffer; /* struct copy */ + softpipe->dirty |= SP_NEW_VERTEX; + + draw_set_vertex_buffer(softpipe->draw, index, buffer); +} diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c new file mode 100644 index 0000000000..8802ced187 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -0,0 +1,159 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/util/p_tile.h" +#include "sp_context.h" +#include "sp_surface.h" + + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +sp_surface_copy(struct pipe_context *pipe, + unsigned do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + assert( dst->cpp == src->cpp ); + + pipe_copy_rect(pipe_surface_map(dst), + dst->cpp, + dst->pitch, + dstx, dsty, + width, height, + pipe_surface_map(src), + do_flip ? -(int) src->pitch : src->pitch, + srcx, do_flip ? 1 - srcy - height : srcy); + + pipe_surface_unmap(src); + pipe_surface_unmap(dst); +} + + +static void * +get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) +{ + return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; +} + + +#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) + + +/** + * Fill a rectangular sub-region. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +static void +sp_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + unsigned i, j; + void *dst_map = pipe_surface_map(dst); + + assert(dst->pitch > 0); + assert(width <= dst->pitch); + + + switch (dst->cpp) { + case 1: + { + ubyte *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + memset(row, value, width); + row += dst->pitch; + } + } + break; + case 2: + { + ushort *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = (ushort) value; + row += dst->pitch; + } + } + break; + case 4: + { + unsigned *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = value; + row += dst->pitch; + } + } + break; + case 8: + { + /* expand the 4-byte clear value to an 8-byte value */ + ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); + ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); + ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); + ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); + ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); + val0 = (val0 << 8) | val0; + val1 = (val1 << 8) | val1; + val2 = (val2 << 8) | val2; + val3 = (val3 << 8) | val3; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + row[j*4+0] = val0; + row[j*4+1] = val1; + row[j*4+2] = val2; + row[j*4+3] = val3; + } + row += dst->pitch * 4; + } + } + break; + default: + assert(0); + break; + } + + pipe_surface_unmap( dst ); +} + + +void +sp_init_surface_functions(struct softpipe_context *sp) +{ + sp->pipe.surface_copy = sp_surface_copy; + sp->pipe.surface_fill = sp_surface_fill; +} diff --git a/src/gallium/drivers/softpipe/sp_surface.h b/src/gallium/drivers/softpipe/sp_surface.h new file mode 100644 index 0000000000..22de3ba43f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_surface.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef SP_SURFACE_H +#define SP_SURFACE_H + + +struct softpipe_context; + + +extern void +sp_init_surface_functions(struct softpipe_context *sp); + + +#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c new file mode 100644 index 0000000000..325bdb86da --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -0,0 +1,916 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling + * + * Authors: + * Brian Paul + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tex_sample.h" +#include "sp_tile_cache.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/tgsi/exec/tgsi_exec.h" + + +/* + * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes + * see 1-pixel bands of improperly weighted linear-filtered textures. + * The tests/texwrap.c demo is a good test. + * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. + * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). + */ +#define FRAC(f) ((f) - ifloor(f)) + + +/** + * Linear interpolation macro + */ +#define LERP(T, A, B) ( (A) + (T) * ((B) - (A)) ) + + +/** + * Do 2D/biliner interpolation of float values. + * v00, v10, v01 and v11 are typically four texture samples in a square/box. + * a and b are the horizontal and vertical interpolants. + * It's important that this function is inlined when compiled with + * optimization! If we find that's not true on some systems, convert + * to a macro. + */ +static INLINE float +lerp_2d(float a, float b, + float v00, float v10, float v01, float v11) +{ + const float temp0 = LERP(a, v00, v10); + const float temp1 = LERP(a, v01, v11); + return LERP(b, temp0, temp1); +} + + +/** + * If A is a signed integer, A % B doesn't give the right value for A < 0 + * (in terms of texture repeat). Just casting to unsigned fixes that. + */ +#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) + + +/** + * Apply texture coord wrapping mode and return integer texture index. + * \param wrapMode PIPE_TEX_WRAP_x + * \param s the texcoord + * \param size the texture image size + * \return integer texture index + */ +static INLINE int +nearest_texcoord(unsigned wrapMode, float s, unsigned size) +{ + int i; + switch (wrapMode) { + case PIPE_TEX_WRAP_REPEAT: + /* s limited to [0,1) */ + /* i limited to [0,size-1] */ + i = ifloor(s * size); + i = REMAINDER(i, size); + return i; + case PIPE_TEX_WRAP_CLAMP: + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + if (s <= 0.0F) + i = 0; + else if (s >= 1.0F) + i = size - 1; + else + i = ifloor(s * size); + return i; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + if (s < min) + i = 0; + else if (s > max) + i = size - 1; + else + i = ifloor(s * size); + } + return i; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + /* s limited to [min,max] */ + /* i limited to [-1, size] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + if (s <= min) + i = -1; + else if (s >= max) + i = size; + else + i = ifloor(s * size); + } + return i; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + { + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + const int flr = ifloor(s); + float u; + if (flr & 1) + u = 1.0F - (s - (float) flr); + else + u = s - (float) flr; + if (u < min) + i = 0; + else if (u > max) + i = size - 1; + else + i = ifloor(u * size); + } + return i; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + { + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + const float u = FABSF(s); + if (u <= 0.0F) + i = 0; + else if (u >= 1.0F) + i = size - 1; + else + i = ifloor(u * size); + } + return i; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + const float u = FABSF(s); + if (u < min) + i = 0; + else if (u > max) + i = size - 1; + else + i = ifloor(u * size); + } + return i; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + const float u = FABSF(s); + if (u < min) + i = -1; + else if (u > max) + i = size; + else + i = ifloor(u * size); + } + return i; + default: + assert(0); + return 0; + } +} + + +/** + * Used to compute texel locations for linear sampling. + * \param wrapMode PIPE_TEX_WRAP_x + * \param s the texcoord + * \param size the texture image size + * \param i0 returns first texture index + * \param i1 returns second texture index (usually *i0 + 1) + * \param a returns blend factor/weight between texture indexes + */ +static INLINE void +linear_texcoord(unsigned wrapMode, float s, unsigned size, + int *i0, int *i1, float *a) +{ + float u; + switch (wrapMode) { + case PIPE_TEX_WRAP_REPEAT: + u = s * size - 0.5F; + *i0 = REMAINDER(ifloor(u), size); + *i1 = REMAINDER(*i0 + 1, size); + break; + case PIPE_TEX_WRAP_CLAMP: + if (s <= 0.0F) + u = 0.0F; + else if (s >= 1.0F) + u = (float) size; + else + u = s * size; + u -= 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + if (s <= 0.0F) + u = 0.0F; + else if (s >= 1.0F) + u = (float) size; + else + u = s * size; + u -= 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + if (*i0 < 0) + *i0 = 0; + if (*i1 >= (int) size) + *i1 = size - 1; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + if (s <= min) + u = min * size; + else if (s >= max) + u = max * size; + else + u = s * size; + u -= 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + } + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + { + const int flr = ifloor(s); + if (flr & 1) + u = 1.0F - (s - (float) flr); + else + u = s - (float) flr; + u = (u * size) - 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + if (*i0 < 0) + *i0 = 0; + if (*i1 >= (int) size) + *i1 = size - 1; + } + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + u = FABSF(s); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + u = FABSF(s); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + if (*i0 < 0) + *i0 = 0; + if (*i1 >= (int) size) + *i1 = size - 1; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + u = FABSF(s); + if (u <= min) + u = min * size; + else if (u >= max) + u = max * size; + else + u *= size; + u -= 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + } + break; + default: + assert(0); + } + *a = FRAC(u); +} + + +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + const float arx = FABSF(rx), ary = FABSF(ry), arz = FABSF(rz); + unsigned face; + float sc, tc, ma; + + if (arx > ary && arx > arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; + } + } + else if (ary > arx && ary > arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + + *newS = ( sc / ma + 1.0F ) * 0.5F; + *newT = ( tc / ma + 1.0F ) * 0.5F; + + return face; +} + + +/** + * Examine the quad's texture coordinates to compute the partial + * derivatives w.r.t X and Y, then compute lambda (level of detail). + * + * This is only done for fragment shaders, not vertex shaders. + */ +static float +compute_lambda(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) +{ + float rho, lambda; + + assert(s); + { + float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; + float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; + dsdx = FABSF(dsdx); + dsdy = FABSF(dsdy); + rho = MAX2(dsdx, dsdy); + if (sampler->state->normalized_coords) + rho *= sampler->texture->width[0]; + } + if (t) { + float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; + float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; + float max; + dtdx = FABSF(dtdx); + dtdy = FABSF(dtdy); + max = MAX2(dtdx, dtdy); + if (sampler->state->normalized_coords) + max *= sampler->texture->height[0]; + rho = MAX2(rho, max); + } + if (p) { + float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; + float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; + float max; + dpdx = FABSF(dpdx); + dpdy = FABSF(dpdy); + max = MAX2(dpdx, dpdy); + if (sampler->state->normalized_coords) + max *= sampler->texture->depth[0]; + rho = MAX2(rho, max); + } + + lambda = LOG2(rho); + lambda += lodbias + sampler->state->lod_bias; + lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod); + + return lambda; +} + + +/** + * Do several things here: + * 1. Compute lambda from the texcoords, if needed + * 2. Determine if we're minifying or magnifying + * 3. If minifying, choose mipmap levels + * 4. Return image filter to use within mipmap images + */ +static void +choose_mipmap_levels(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + unsigned *level0, unsigned *level1, float *levelBlend, + unsigned *imgFilter) +{ + if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + /* no mipmap selection needed */ + *imgFilter = sampler->state->mag_img_filter; + *level0 = *level1 = (int) sampler->state->min_lod; + } + else { + float lambda; + + if (1) + /* fragment shader */ + lambda = compute_lambda(sampler, s, t, p, lodbias); + else + /* vertex shader */ + lambda = lodbias; /* not really a bias, but absolute LOD */ + + if (lambda < 0.0) { /* XXX threshold depends on the filter */ + /* magnifying */ + *imgFilter = sampler->state->mag_img_filter; + *level0 = *level1 = 0; + } + else { + /* minifying */ + *imgFilter = sampler->state->min_img_filter; + + /* choose mipmap level(s) and compute the blend factor between them */ + if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + /* Nearest mipmap level */ + const int lvl = (int) (lambda + 0.5); + *level0 = + *level1 = CLAMP(lvl, 0, (int) sampler->texture->last_level); + } + else { + /* Linear interpolation between mipmap levels */ + const int lvl = (int) lambda; + *level0 = CLAMP(lvl, 0, (int) sampler->texture->last_level); + *level1 = CLAMP(lvl + 1, 0, (int) sampler->texture->last_level); + *levelBlend = FRAC(lambda); /* blending weight between levels */ + } + } + } +} + + +/** + * Get a texel from a texture, using the texture tile cache. + * + * \param face the cube face in 0..5 + * \param level the mipmap level + * \param x the x coord of texel within 2D image + * \param y the y coord of texel within 2D image + * \param z which slice of a 3D texture + * \param rgba the quad to put the texel/color into + * \param j which element of the rgba quad to write to + * + * XXX maybe move this into sp_tile_cache.c and merge with the + * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... + */ +static void +get_texel(struct tgsi_sampler *sampler, + unsigned face, unsigned level, int x, int y, int z, + float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) +{ + const int tx = x % TILE_SIZE; + const int ty = y % TILE_SIZE; + const struct softpipe_cached_tile *tile + = sp_get_cached_tile_tex(sampler->pipe, sampler->cache, + x, y, z, face, level); + rgba[0][j] = tile->data.color[ty][tx][0]; + rgba[1][j] = tile->data.color[ty][tx][1]; + rgba[2][j] = tile->data.color[ty][tx][2]; + rgba[3][j] = tile->data.color[ty][tx][3]; +} + + +/** + * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' + * When we sampled the depth texture, the depth value was put into all + * RGBA channels. We look at the red channel here. + */ +static INLINE void +shadow_compare(uint compare_func, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const float p[QUAD_SIZE], + uint j) +{ + int k; + switch (compare_func) { + case PIPE_FUNC_LESS: + k = p[j] < rgba[0][j]; + break; + case PIPE_FUNC_LEQUAL: + k = p[j] <= rgba[0][j]; + break; + case PIPE_FUNC_GREATER: + k = p[j] > rgba[0][j]; + break; + case PIPE_FUNC_GEQUAL: + k = p[j] >= rgba[0][j]; + break; + case PIPE_FUNC_EQUAL: + k = p[j] == rgba[0][j]; + break; + case PIPE_FUNC_NOTEQUAL: + k = p[j] != rgba[0][j]; + break; + case PIPE_FUNC_ALWAYS: + k = 1; + break; + case PIPE_FUNC_NEVER: + k = 0; + break; + default: + assert(0); + } + + rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; +} + + +/** + * Common code for sampling 1D/2D/cube textures. + * Could probably extend for 3D... + */ +static void +sp_get_samples_2d_common(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const unsigned faces[4]) +{ + const uint compare_func = sampler->state->compare_func; + unsigned level0, level1, j, imgFilter; + int width, height; + float levelBlend; + + choose_mipmap_levels(sampler, s, t, p, lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + if (sampler->state->normalized_coords) { + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; + } + else { + width = height = 1; + } + + assert(width > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + for (j = 0; j < QUAD_SIZE; j++) { + int x = nearest_texcoord(sampler->state->wrap_s, s[j], width); + int y = nearest_texcoord(sampler->state->wrap_t, t[j], height); + get_texel(sampler, faces[j], level0, x, y, 0, rgba, j); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, rgba, p, j); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x = x / 2; + y = y / 2; + get_texel(sampler, faces[j], level1, x, y, 0, rgba2, j); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare(compare_func, rgba2, p, j); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4], a, b; + int x0, y0, x1, y1, c; + linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &a); + linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &b); + get_texel(sampler, faces[j], level0, x0, y0, 0, tx, 0); + get_texel(sampler, faces[j], level0, x1, y0, 0, tx, 1); + get_texel(sampler, faces[j], level0, x0, y1, 0, tx, 2); + get_texel(sampler, faces[j], level0, x1, y1, 0, tx, 3); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(a, b, tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + x0 = x0 / 2; + y0 = y0 / 2; + x1 = x1 / 2; + y1 = y1 / 2; + get_texel(sampler, faces[j], level1, x0, y0, 0, tx, 0); + get_texel(sampler, faces[j], level1, x1, y0, 0, tx, 1); + get_texel(sampler, faces[j], level1, x0, y1, 0, tx, 2); + get_texel(sampler, faces[j], level1, x1, y1, 0, tx, 3); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + + for (c = 0; c < 4; c++) { + rgba2[c][j] = lerp_2d(a, b, + tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + break; + default: + assert(0); + } +} + + +static void +sp_get_samples_1d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + static const unsigned faces[4] = {0, 0, 0, 0}; + static const float tzero[4] = {0, 0, 0, 0}; + sp_get_samples_2d_common(sampler, s, tzero, NULL, lodbias, rgba, faces); +} + + +static void +sp_get_samples_2d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + static const unsigned faces[4] = {0, 0, 0, 0}; + sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces); +} + + +static void +sp_get_samples_3d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + /* get/map pipe_surfaces corresponding to 3D tex slices */ + unsigned level0, level1, j, imgFilter; + int width, height, depth; + float levelBlend; + const uint face = 0; + + choose_mipmap_levels(sampler, s, t, p, lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + if (sampler->state->normalized_coords) { + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; + depth = sampler->texture->depth[level0]; + } + else { + width = height = depth = 1; + } + + assert(width > 0); + assert(height > 0); + assert(depth > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + for (j = 0; j < QUAD_SIZE; j++) { + int x = nearest_texcoord(sampler->state->wrap_s, s[j], width); + int y = nearest_texcoord(sampler->state->wrap_t, t[j], height); + int z = nearest_texcoord(sampler->state->wrap_r, p[j], depth); + get_texel(sampler, face, level0, x, y, z, rgba, j); + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x /= 2; + y /= 2; + z /= 2; + get_texel(sampler, face, level1, x, y, z, rgba2, j); + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = LERP(levelBlend, rgba2[c][j], rgba[c][j]); + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + for (j = 0; j < QUAD_SIZE; j++) { + float texel0[4][4], texel1[4][4]; + float xw, yw, zw; /* interpolation weights */ + int x0, x1, y0, y1, z0, z1, c; + linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &xw); + linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &yw); + linear_texcoord(sampler->state->wrap_r, p[j], depth, &z0, &z1, &zw); + get_texel(sampler, face, level0, x0, y0, z0, texel0, 0); + get_texel(sampler, face, level0, x1, y0, z0, texel0, 1); + get_texel(sampler, face, level0, x0, y1, z0, texel0, 2); + get_texel(sampler, face, level0, x1, y1, z0, texel0, 3); + get_texel(sampler, face, level0, x0, y0, z1, texel1, 0); + get_texel(sampler, face, level0, x1, y0, z1, texel1, 1); + get_texel(sampler, face, level0, x0, y1, z1, texel1, 2); + get_texel(sampler, face, level0, x1, y1, z1, texel1, 3); + + /* 3D lerp */ + for (c = 0; c < 4; c++) { + float ctemp0[4][4], ctemp1[4][4]; + ctemp0[c][j] = lerp_2d(xw, yw, + texel0[c][0], texel0[c][1], + texel0[c][2], texel0[c][3]); + ctemp1[c][j] = lerp_2d(xw, yw, + texel1[c][0], texel1[c][1], + texel1[c][2], texel1[c][3]); + rgba[c][j] = LERP(zw, ctemp0[c][j], ctemp1[c][j]); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + x0 /= 2; + y0 /= 2; + z0 /= 2; + x1 /= 2; + y1 /= 2; + z1 /= 2; + get_texel(sampler, face, level1, x0, y0, z0, texel0, 0); + get_texel(sampler, face, level1, x1, y0, z0, texel0, 1); + get_texel(sampler, face, level1, x0, y1, z0, texel0, 2); + get_texel(sampler, face, level1, x1, y1, z0, texel0, 3); + get_texel(sampler, face, level1, x0, y0, z1, texel1, 0); + get_texel(sampler, face, level1, x1, y0, z1, texel1, 1); + get_texel(sampler, face, level1, x0, y1, z1, texel1, 2); + get_texel(sampler, face, level1, x1, y1, z1, texel1, 3); + + /* 3D lerp */ + for (c = 0; c < 4; c++) { + float ctemp0[4][4], ctemp1[4][4]; + ctemp0[c][j] = lerp_2d(xw, yw, + texel0[c][0], texel0[c][1], + texel0[c][2], texel0[c][3]); + ctemp1[c][j] = lerp_2d(xw, yw, + texel1[c][0], texel1[c][1], + texel1[c][2], texel1[c][3]); + rgba2[c][j] = LERP(zw, ctemp0[c][j], ctemp1[c][j]); + } + + /* blend mipmap levels */ + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + break; + default: + assert(0); + } +} + + +static void +sp_get_samples_cube(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + unsigned faces[QUAD_SIZE], j; + float ssss[4], tttt[4]; + for (j = 0; j < QUAD_SIZE; j++) { + faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); + } + sp_get_samples_2d_common(sampler, ssss, tttt, NULL, lodbias, rgba, faces); +} + + +/** + * Called via tgsi_sampler::get_samples() + * Use the sampler's state setting to get a filtered RGBA value + * from the sampler's texture. + * + * XXX we can implement many versions of this function, each + * tightly coded for a specific combination of sampler state + * (nearest + repeat), (bilinear mipmap + clamp), etc. + * + * The update_samplers() function in st_atom_sampler.c could create + * a new tgsi_sampler object for each state combo it finds.... + */ +void +sp_get_samples(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + if (!sampler->texture) + return; + + switch (sampler->texture->target) { + case PIPE_TEXTURE_1D: + sp_get_samples_1d(sampler, s, t, p, lodbias, rgba); + break; + case PIPE_TEXTURE_2D: + sp_get_samples_2d(sampler, s, t, p, lodbias, rgba); + break; + case PIPE_TEXTURE_3D: + sp_get_samples_3d(sampler, s, t, p, lodbias, rgba); + break; + case PIPE_TEXTURE_CUBE: + sp_get_samples_cube(sampler, s, t, p, lodbias, rgba); + break; + default: + assert(0); + } +} + diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h new file mode 100644 index 0000000000..404bfd0c36 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -0,0 +1,17 @@ +#ifndef SP_TEX_SAMPLE_H +#define SP_TEX_SAMPLE_H + + +struct tgsi_sampler; + + +extern void +sp_get_samples(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + + +#endif /* SP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c new file mode 100644 index 0000000000..6de7a9b543 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -0,0 +1,166 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell + * Michel Dänzer + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" + + +/* Simple, maximally packed layout. + */ + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static void +softpipe_texture_layout(struct softpipe_texture * spt) +{ + struct pipe_texture *pt = &spt->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + + spt->buffer_size = 0; + + for (level = 0; level <= pt->last_level; level++) { + pt->width[level] = width; + pt->height[level] = height; + pt->depth[level] = depth; + + spt->level_offset[level] = spt->buffer_size; + + spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * + ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * + width * pt->cpp; + + width = minify(width); + height = minify(height); + depth = minify(depth); + } +} + + +struct pipe_texture * +softpipe_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat) +{ + struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); + if (!spt) + return NULL; + + spt->base = *templat; + + softpipe_texture_layout(spt); + + spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); + if (!spt->buffer) { + FREE(spt); + return NULL; + } + + return &spt->base; +} + + +void +softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct softpipe_texture *spt = softpipe_texture(*pt); + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); + */ + + pipe_buffer_reference(pipe->winsys, &spt->buffer, NULL); + + FREE(spt); + } + *pt = NULL; +} + + +/** + * Called via pipe->get_tex_surface() + */ +struct pipe_surface * +softpipe_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct softpipe_texture *spt = softpipe_texture(pt); + struct pipe_surface *ps; + + assert(level <= pt->last_level); + + ps = pipe->winsys->surface_alloc(pipe->winsys); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(pipe->winsys, &ps->buffer, spt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = ps->width; + ps->offset = spt->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + (pt->compressed ? ps->height/4 : ps->height) * + ps->width * ps->cpp; + } else { + assert(face == 0); + assert(zslice == 0); + } + } + return ps; +} diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h new file mode 100644 index 0000000000..fa646c0de9 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_TEXTURE_H +#define SP_TEXTURE_H + + +struct pipe_context; +struct pipe_texture; + + +struct softpipe_texture +{ + struct pipe_texture base; + + unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; + unsigned long buffer_size; +}; + + +/** cast wrapper */ +static INLINE struct softpipe_texture * +softpipe_texture(struct pipe_texture *pt) +{ + return (struct softpipe_texture *) pt; +} + + + +extern struct pipe_texture * +softpipe_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat); + +extern void +softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); + +extern struct pipe_surface * +softpipe_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice); + + +#endif /* SP_TEXTURE */ diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c new file mode 100644 index 0000000000..1597361b82 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -0,0 +1,585 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Framebuffer/surface tile caching. + * + * Author: + * Brian Paul + */ + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/util/p_tile.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" + +#define NUM_ENTRIES 30 + + +/** XXX move these */ +#define MAX_WIDTH 2048 +#define MAX_HEIGHT 2048 + + +struct softpipe_tile_cache +{ + struct pipe_surface *surface; /**< the surface we're caching */ + void *surface_map; + struct pipe_texture *texture; /**< if caching a texture */ + struct softpipe_cached_tile entries[NUM_ENTRIES]; + uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; + float clear_color[4]; + uint clear_val; + boolean depth_stencil; /** Is the surface a depth/stencil format? */ + + struct pipe_surface *tex_surf; + void *tex_surf_map; + int tex_face, tex_level, tex_z; + + struct softpipe_cached_tile tile; /**< scratch tile for clears */ +}; + + +/** + * Return the position in the cache for the tile that contains win pos (x,y). + * We currently use a direct mapped cache so this is like a hack key. + * At some point we should investige something more sophisticated, like + * a LRU replacement policy. + */ +#define CACHE_POS(x, y) \ + (((x) / TILE_SIZE + ((y) / TILE_SIZE) * 5) % NUM_ENTRIES) + + + +/** + * Is the tile at (x,y) in cleared state? + */ +static INLINE uint +is_clear_flag_set(const uint *bitvec, int x, int y) +{ + int pos, bit; + x /= TILE_SIZE; + y /= TILE_SIZE; + pos = y * (MAX_WIDTH / TILE_SIZE) + x; + assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); + bit = bitvec[pos / 32] & (1 << (pos & 31)); + return bit; +} + + +/** + * Mark the tile at (x,y) as not cleared. + */ +static INLINE void +clear_clear_flag(uint *bitvec, int x, int y) +{ + int pos; + x /= TILE_SIZE; + y /= TILE_SIZE; + pos = y * (MAX_WIDTH / TILE_SIZE) + x; + assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); + bitvec[pos / 32] &= ~(1 << (pos & 31)); +} + + +struct softpipe_tile_cache * +sp_create_tile_cache(void) +{ + struct softpipe_tile_cache *tc; + uint pos; + + tc = CALLOC_STRUCT( softpipe_tile_cache ); + if (tc) { + for (pos = 0; pos < NUM_ENTRIES; pos++) { + tc->entries[pos].x = + tc->entries[pos].y = -1; + } + } + return tc; +} + + +void +sp_destroy_tile_cache(struct softpipe_tile_cache *tc) +{ + uint pos; + + for (pos = 0; pos < NUM_ENTRIES; pos++) { + //assert(tc->entries[pos].x < 0); + } + if (tc->surface) { + pipe_surface_reference(&tc->surface, NULL); + } + if (tc->tex_surf) { + pipe_surface_reference(&tc->tex_surf, NULL); + } + + FREE( tc ); +} + + +/** + * Specify the surface to cache. + */ +void +sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, + struct pipe_surface *ps) +{ + assert(!tc->texture); + + if (tc->surface_map) { + /*assert(tc->surface != ps);*/ + pipe_surface_unmap(tc->surface); + } + + pipe_surface_reference(&tc->surface, ps); + + if (ps) { + if (tc->surface_map) + tc->surface_map = pipe_surface_map(ps); + + tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || + ps->format == PIPE_FORMAT_Z16_UNORM || + ps->format == PIPE_FORMAT_Z32_UNORM || + ps->format == PIPE_FORMAT_U_S8); + } +} + + +/** + * Return the surface being cached. + */ +struct pipe_surface * +sp_tile_cache_get_surface(struct softpipe_tile_cache *tc) +{ + return tc->surface; +} + + +void +sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc) +{ + if (tc->surface && !tc->surface_map) + tc->surface_map = pipe_surface_map(tc->surface); + + if (tc->tex_surf && !tc->tex_surf_map) + tc->tex_surf_map = pipe_surface_map(tc->tex_surf); +} + + +void +sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc) +{ + if (tc->surface_map) { + pipe_surface_unmap(tc->surface); + tc->surface_map = NULL; + } + + if (tc->tex_surf_map) { + pipe_surface_unmap(tc->tex_surf); + tc->tex_surf_map = NULL; + } +} + + +/** + * Specify the texture to cache. + */ +void +sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, + struct pipe_texture *texture) +{ + uint i; + + assert(!tc->surface); + + tc->texture = texture; + + if (tc->tex_surf_map) { + pipe_surface_unmap(tc->tex_surf); + tc->tex_surf_map = NULL; + } + pipe_surface_reference(&tc->tex_surf, NULL); + + /* mark as entries as invalid/empty */ + /* XXX we should try to avoid this when the teximage hasn't changed */ + for (i = 0; i < NUM_ENTRIES; i++) { + tc->entries[i].x = -1; + } + + tc->tex_face = -1; /* any invalid value here */ +} + + +/** + * Set pixels in a tile to the given clear color/value, float. + */ +static void +clear_tile_rgba(struct softpipe_cached_tile *tile, + enum pipe_format format, + const float clear_value[4]) +{ + if (clear_value[0] == 0.0 && + clear_value[1] == 0.0 && + clear_value[2] == 0.0 && + clear_value[3] == 0.0) { + memset(tile->data.color, 0, sizeof(tile->data.color)); + } + else { + uint i, j; + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile->data.color[i][j][0] = clear_value[0]; + tile->data.color[i][j][1] = clear_value[1]; + tile->data.color[i][j][2] = clear_value[2]; + tile->data.color[i][j][3] = clear_value[3]; + } + } + } +} + + +/** + * Set a tile to a solid value/color. + */ +static void +clear_tile(struct softpipe_cached_tile *tile, + enum pipe_format format, + uint clear_value) +{ + uint i, j; + + switch (pf_get_size(format)) { + case 1: + memset(tile->data.any, 0, TILE_SIZE * TILE_SIZE); + break; + case 2: + if (clear_value == 0) { + memset(tile->data.any, 0, 2 * TILE_SIZE * TILE_SIZE); + } + else { + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile->data.depth16[i][j] = (ushort) clear_value; + } + } + } + break; + case 4: + if (clear_value == 0) { + memset(tile->data.any, 0, 4 * TILE_SIZE * TILE_SIZE); + } + else { + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile->data.color32[i][j] = clear_value; + } + } + } + break; + default: + assert(0); + } +} + + +/** + * Actually clear the tiles which were flagged as being in a clear state. + */ +static void +sp_tile_cache_flush_clear(struct pipe_context *pipe, + struct softpipe_tile_cache *tc) +{ + struct pipe_surface *ps = tc->surface; + const uint w = tc->surface->width; + const uint h = tc->surface->height; + uint x, y; + uint numCleared = 0; + + /* clear the scratch tile to the clear value */ + clear_tile(&tc->tile, ps->format, tc->clear_val); + + /* push the tile to all positions marked as clear */ + for (y = 0; y < h; y += TILE_SIZE) { + for (x = 0; x < w; x += TILE_SIZE) { + if (is_clear_flag_set(tc->clear_flags, x, y)) { + pipe_put_tile_raw(pipe, ps, + x, y, TILE_SIZE, TILE_SIZE, + tc->tile.data.color32, 0/*STRIDE*/); + + /* do this? */ + clear_clear_flag(tc->clear_flags, x, y); + + numCleared++; + } + } + } +#if 0 + debug_printf("num cleared: %u\n", numCleared); +#endif +} + + +/** + * Flush the tile cache: write all dirty tiles back to the surface. + * any tiles "flagged" as cleared will be "really" cleared. + */ +void +sp_flush_tile_cache(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc) +{ + struct pipe_context *pipe = &softpipe->pipe; + struct pipe_surface *ps = tc->surface; + int inuse = 0, pos; + + if (!ps || !ps->buffer) + return; + + for (pos = 0; pos < NUM_ENTRIES; pos++) { + struct softpipe_cached_tile *tile = tc->entries + pos; + if (tile->x >= 0) { + if (tc->depth_stencil) { + pipe_put_tile_raw(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + tile->x = tile->y = -1; /* mark as empty */ + inuse++; + } + } + +#if TILE_CLEAR_OPTIMIZATION + sp_tile_cache_flush_clear(&softpipe->pipe, tc); +#endif + +#if 0 + debug_printf("flushed tiles in use: %d\n", inuse); +#endif +} + + +/** + * Get a tile from the cache. + * \param x, y position of tile, in pixels + */ +struct softpipe_cached_tile * +sp_get_cached_tile(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc, int x, int y) +{ + struct pipe_context *pipe = &softpipe->pipe; + struct pipe_surface *ps = tc->surface; + + /* tile pos in framebuffer: */ + const int tile_x = x & ~(TILE_SIZE - 1); + const int tile_y = y & ~(TILE_SIZE - 1); + + /* cache pos/entry: */ + const int pos = CACHE_POS(x, y); + struct softpipe_cached_tile *tile = tc->entries + pos; + + if (tile_x != tile->x || + tile_y != tile->y) { + + if (tile->x != -1) { + /* put dirty tile back in framebuffer */ + if (tc->depth_stencil) { + pipe_put_tile_raw(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + } + + tile->x = tile_x; + tile->y = tile_y; + + if (is_clear_flag_set(tc->clear_flags, x, y)) { + /* don't get tile from framebuffer, just clear it */ + if (tc->depth_stencil) { + clear_tile(tile, ps->format, tc->clear_val); + } + else { + clear_tile_rgba(tile, ps->format, tc->clear_color); + } + clear_clear_flag(tc->clear_flags, x, y); + } + else { + /* get new tile data from surface */ + if (tc->depth_stencil) { + pipe_get_tile_raw(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_get_tile_rgba(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + } + } + + return tile; +} + + +/** + * Given the texture face, level, zslice, x and y values, compute + * the cache entry position/index where we'd hope to find the + * cached texture tile. + * This is basically a direct-map cache. + * XXX There's probably lots of ways in which we can improve this. + */ +static INLINE uint +tex_cache_pos(int x, int y, int z, int face, int level) +{ + uint entry = x + y * 5 + z * 4 + face + level; + return entry % NUM_ENTRIES; +} + + +/** + * Similar to sp_get_cached_tile() but for textures. + * Tiles are read-only and indexed with more params. + */ +const struct softpipe_cached_tile * +sp_get_cached_tile_tex(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, int x, int y, int z, + int face, int level) +{ + /* tile pos in framebuffer: */ + const int tile_x = x & ~(TILE_SIZE - 1); + const int tile_y = y & ~(TILE_SIZE - 1); + /* cache pos/entry: */ + const uint pos = tex_cache_pos(x / TILE_SIZE, y / TILE_SIZE, z, + face, level); + struct softpipe_cached_tile *tile = tc->entries + pos; + + if (tile_x != tile->x || + tile_y != tile->y || + z != tile->z || + face != tile->face || + level != tile->level) { + /* cache miss */ + + /* check if we need to get a new surface */ + if (!tc->tex_surf || + tc->tex_face != face || + tc->tex_level != level || + tc->tex_z != z) { + /* get new surface (view into texture) */ + + if (tc->tex_surf_map) + pipe_surface_unmap(tc->tex_surf); + + tc->tex_surf = pipe->get_tex_surface(pipe, tc->texture, face, level, z); + tc->tex_surf_map = pipe_surface_map(tc->tex_surf); + + tc->tex_face = face; + tc->tex_level = level; + tc->tex_z = z; + } + + /* get tile from the surface (view into texture) */ + pipe_get_tile_rgba(pipe, tc->tex_surf, + tile_x, tile_y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + tile->x = tile_x; + tile->y = tile_y; + tile->z = z; + tile->face = face; + tile->level = level; + } + + return tile; +} + + +/** + * When a whole surface is being cleared to a value we can avoid + * fetching tiles above. + * Save the color and set a 'clearflag' for each tile of the screen. + */ +void +sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue) +{ + uint r, g, b, a; + uint pos; + + tc->clear_val = clearValue; + + switch (tc->surface->format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + r = (clearValue >> 24) & 0xff; + g = (clearValue >> 16) & 0xff; + b = (clearValue >> 8) & 0xff; + a = (clearValue ) & 0xff; + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + r = (clearValue >> 16) & 0xff; + g = (clearValue >> 8) & 0xff; + b = (clearValue ) & 0xff; + a = (clearValue >> 24) & 0xff; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + r = (clearValue >> 8) & 0xff; + g = (clearValue >> 16) & 0xff; + b = (clearValue >> 24) & 0xff; + a = (clearValue ) & 0xff; + break; + default: + r = g = b = a = 0; + } + + tc->clear_color[0] = r / 255.0f; + tc->clear_color[1] = g / 255.0f; + tc->clear_color[2] = b / 255.0f; + tc->clear_color[3] = a / 255.0f; + +#if TILE_CLEAR_OPTIMIZATION + /* set flags to indicate all the tiles are cleared */ + memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); +#else + /* disable the optimization */ + memset(tc->clear_flags, 0, sizeof(tc->clear_flags)); +#endif + + for (pos = 0; pos < NUM_ENTRIES; pos++) { + struct softpipe_cached_tile *tile = tc->entries + pos; + tile->x = tile->y = -1; + } +} diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h new file mode 100644 index 0000000000..7fd1081286 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -0,0 +1,104 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_TILE_CACHE_H +#define SP_TILE_CACHE_H + +#define TILE_CLEAR_OPTIMIZATION 1 + + +#include "pipe/p_compiler.h" + + +struct softpipe_context; +struct softpipe_tile_cache; + + +/** + * Cache tile size (width and height). This needs to be a power of two. + */ +#define TILE_SIZE 64 + + + +struct softpipe_cached_tile +{ + int x, y; /**< pos of tile in window coords */ + int z, face, level; /**< Extra texture indexes */ + union { + float color[TILE_SIZE][TILE_SIZE][4]; + uint color32[TILE_SIZE][TILE_SIZE]; + uint depth32[TILE_SIZE][TILE_SIZE]; + ushort depth16[TILE_SIZE][TILE_SIZE]; + ubyte stencil8[TILE_SIZE][TILE_SIZE]; + ubyte any[1]; + } data; +}; + + +extern struct softpipe_tile_cache * +sp_create_tile_cache(void); + +extern void +sp_destroy_tile_cache(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, + struct pipe_surface *sps); + +extern struct pipe_surface * +sp_tile_cache_get_surface(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, + struct pipe_texture *texture); + +extern void +sp_flush_tile_cache(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue); + +extern struct softpipe_cached_tile * +sp_get_cached_tile(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc, int x, int y); + +extern const struct softpipe_cached_tile * +sp_get_cached_tile_tex(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, int x, int y, int z, + int face, int level); + + +#endif /* SP_TILE_CACHE_H */ + diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h new file mode 100644 index 0000000000..d6b379f58c --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* This is the interface that softpipe requires any window system + * hosting it to implement. This is the only include file in softpipe + * which is public. + */ + + +#ifndef SP_WINSYS_H +#define SP_WINSYS_H + + +#include "pipe/p_compiler.h" /* for boolean */ + +enum pipe_format; + +struct softpipe_winsys { + /** test if the given format is supported for front/back color bufs */ + boolean (*is_format_supported)( struct softpipe_winsys *sws, + enum pipe_format format ); + +}; + +struct pipe_winsys; +struct pipe_context; + + +struct pipe_context *softpipe_create( struct pipe_winsys *, + struct softpipe_winsys * ); + + +#endif /* SP_WINSYS_H */ -- cgit v1.2.3 From 6acd63a4980951727939c0dd545a0324965b3834 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 15 Feb 2008 17:50:12 +0900 Subject: Code reorganization: update build. Update the Makefiles and includes for the new paths. Note that there hasn't been no separation of the Makefiles yet, and make is jumping all over the place. That will be taken care shortly. But for now, make should work. It was tested with linux and linux-dri. Linux-cell and linux-llvm might require some minor tweaks. --- configs/beos | 2 +- configs/darwin | 2 +- configs/darwin-x86ppc | 2 +- configs/default | 2 +- configs/freebsd-dri | 2 +- configs/linux-cell | 2 +- configs/linux-directfb | 2 +- configs/linux-dri | 6 +- configs/linux-dri-xcb | 4 +- configs/linux-fbdev | 2 +- configs/linux-osmesa | 2 +- configs/linux-osmesa16 | 2 +- configs/linux-osmesa16-static | 2 +- configs/linux-osmesa32 | 2 +- configs/linux-solo | 2 +- src/gallium/Makefile | 12 +-- src/gallium/Makefile.template | 7 +- src/gallium/aux/Makefile | 24 +++++ src/gallium/aux/draw/draw_private.h | 2 +- src/gallium/aux/draw/draw_vertex.c | 4 +- src/gallium/aux/draw/draw_vertex_shader.c | 4 +- src/gallium/aux/llvm/Makefile | 4 +- src/gallium/aux/llvm/gallivm.cpp | 4 +- src/gallium/aux/llvm/gallivm_cpu.cpp | 4 +- src/gallium/aux/llvm/tgsitollvm.cpp | 10 +- src/gallium/aux/pipebuffer/Makefile | 2 +- src/gallium/aux/tgsi/exec/tgsi_exec.c | 4 +- src/gallium/aux/tgsi/exec/tgsi_sse2.c | 4 +- src/gallium/aux/tgsi/util/tgsi_transform.h | 4 +- src/gallium/drivers/Makefile | 24 +++++ src/gallium/drivers/cell/ppu/Makefile | 7 +- src/gallium/drivers/cell/ppu/cell_clear.c | 2 +- src/gallium/drivers/cell/ppu/cell_context.c | 6 +- src/gallium/drivers/cell/ppu/cell_context.h | 6 +- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 2 +- src/gallium/drivers/cell/ppu/cell_flush.c | 2 +- src/gallium/drivers/cell/ppu/cell_render.c | 2 +- src/gallium/drivers/cell/ppu/cell_spu.c | 2 +- src/gallium/drivers/cell/ppu/cell_spu.h | 2 +- src/gallium/drivers/cell/ppu/cell_state_blend.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_clip.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_derived.c | 4 +- src/gallium/drivers/cell/ppu/cell_state_fs.c | 8 +- .../drivers/cell/ppu/cell_state_rasterizer.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_sampler.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 2 +- src/gallium/drivers/cell/ppu/cell_surface.c | 2 +- src/gallium/drivers/cell/ppu/cell_vbuf.c | 2 +- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 6 +- src/gallium/drivers/cell/spu/Makefile | 6 +- src/gallium/drivers/cell/spu/spu_exec.c | 4 +- src/gallium/drivers/cell/spu/spu_exec.h | 2 +- src/gallium/drivers/cell/spu/spu_main.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 4 +- src/gallium/drivers/cell/spu/spu_render.c | 2 +- src/gallium/drivers/cell/spu/spu_render.h | 2 +- src/gallium/drivers/cell/spu/spu_tile.h | 2 +- src/gallium/drivers/cell/spu/spu_util.c | 4 +- src/gallium/drivers/cell/spu/spu_vertex_shader.c | 6 +- src/gallium/drivers/failover/Makefile | 2 +- src/gallium/drivers/i915simple/Makefile | 2 +- src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/i915simple/i915_context.h | 2 +- .../drivers/i915simple/i915_fpc_translate.c | 4 +- src/gallium/drivers/i915simple/i915_prim_emit.c | 2 +- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 2 +- src/gallium/drivers/i915simple/i915_state.c | 2 +- .../drivers/i915simple/i915_state_derived.c | 4 +- src/gallium/drivers/i915simple/i915_strings.c | 2 +- src/gallium/drivers/i915simple/i915_surface.c | 2 +- src/gallium/drivers/i965simple/Makefile | 2 +- src/gallium/drivers/i965simple/brw_shader_info.c | 2 +- src/gallium/drivers/i965simple/brw_state.c | 2 +- src/gallium/drivers/i965simple/brw_strings.c | 2 +- src/gallium/drivers/i965simple/brw_surface.c | 2 +- src/gallium/drivers/i965simple/brw_vs_emit.c | 2 +- src/gallium/drivers/i965simple/brw_wm_decl.c | 2 +- src/gallium/drivers/i965simple/brw_wm_glsl.c | 2 +- src/gallium/drivers/softpipe/Makefile | 2 +- src/gallium/drivers/softpipe/sp_context.c | 2 +- src/gallium/drivers/softpipe/sp_context.h | 2 +- src/gallium/drivers/softpipe/sp_draw_arrays.c | 2 +- src/gallium/drivers/softpipe/sp_flush.c | 2 +- src/gallium/drivers/softpipe/sp_headers.h | 2 +- src/gallium/drivers/softpipe/sp_prim_setup.c | 4 +- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 6 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 2 +- src/gallium/drivers/softpipe/sp_query.c | 2 +- src/gallium/drivers/softpipe/sp_state_clip.c | 2 +- src/gallium/drivers/softpipe/sp_state_derived.c | 6 +- src/gallium/drivers/softpipe/sp_state_fs.c | 8 +- src/gallium/drivers/softpipe/sp_state_rasterizer.c | 2 +- src/gallium/drivers/softpipe/sp_state_sampler.c | 4 +- src/gallium/drivers/softpipe/sp_state_vertex.c | 2 +- src/gallium/drivers/softpipe/sp_surface.c | 2 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +- src/gallium/drivers/softpipe/sp_tile_cache.c | 2 +- src/gallium/winsys/dri/Makefile | 38 +++++++ src/gallium/winsys/dri/Makefile.template | 113 +++++++++++++++++++++ src/gallium/winsys/dri/intel/Makefile | 8 +- src/gallium/winsys/dri/intel/intel_winsys_i915.c | 2 +- .../winsys/dri/intel/intel_winsys_softpipe.c | 2 +- src/gallium/winsys/xlib/xm_winsys.c | 6 +- src/gallium/winsys/xlib/xm_winsys_aub.c | 2 +- src/mesa/Makefile | 16 ++- src/mesa/drivers/x11/xm_api.c | 2 +- src/mesa/drivers/x11/xm_dd.c | 2 +- src/mesa/drivers/x11/xm_surface.c | 8 +- src/mesa/drivers/x11/xm_winsys.c | 2 +- src/mesa/drivers/x11/xmesaP.h | 4 +- src/mesa/sources | 83 +++++++-------- src/mesa/state_tracker/st_atom_shader.c | 2 +- src/mesa/state_tracker/st_cache.c | 4 +- src/mesa/state_tracker/st_cache.h | 2 +- src/mesa/state_tracker/st_cb_accum.c | 2 +- src/mesa/state_tracker/st_cb_drawpixels.c | 2 +- src/mesa/state_tracker/st_cb_feedback.c | 6 +- src/mesa/state_tracker/st_cb_program.c | 4 +- src/mesa/state_tracker/st_cb_rasterpos.c | 4 +- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- src/mesa/state_tracker/st_cb_texture.c | 2 +- src/mesa/state_tracker/st_context.c | 4 +- src/mesa/state_tracker/st_debug.c | 4 +- src/mesa/state_tracker/st_draw.c | 4 +- src/mesa/state_tracker/st_gen_mipmap.c | 2 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 +- src/mesa/state_tracker/st_program.c | 4 +- 127 files changed, 445 insertions(+), 241 deletions(-) create mode 100644 src/gallium/aux/Makefile create mode 100644 src/gallium/drivers/Makefile create mode 100644 src/gallium/winsys/dri/Makefile create mode 100644 src/gallium/winsys/dri/Makefile.template (limited to 'src/gallium/drivers') diff --git a/configs/beos b/configs/beos index f07973d0c7..2b74af739d 100644 --- a/configs/beos +++ b/configs/beos @@ -86,7 +86,7 @@ else endif # Directories -SRC_DIRS = mesa glu glut/beos +SRC_DIRS = gallium mesa glu glut/beos GLU_DIRS = sgi DRIVER_DIRS = beos PROGRAM_DIRS = beos samples redbook demos tests diff --git a/configs/darwin b/configs/darwin index 7826ecc605..bba7802696 100644 --- a/configs/darwin +++ b/configs/darwin @@ -25,5 +25,5 @@ GLW_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXt $(TOP)/lib/GL.dylib APP_LIB_DEPS = -L$(TOP)/lib -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm # omit glw lib for now: -SRC_DIRS = mesa glu glut/glx +SRC_DIRS = gallium mesa glu glut/glx diff --git a/configs/darwin-x86ppc b/configs/darwin-x86ppc index 13172327a7..ebeb25051f 100644 --- a/configs/darwin-x86ppc +++ b/configs/darwin-x86ppc @@ -29,5 +29,5 @@ GLW_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXt $(TOP)/lib/GL.dylib APP_LIB_DEPS = -L$(TOP)/lib -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm # omit glw lib for now: -SRC_DIRS = mesa glu glut/glx +SRC_DIRS = gallium mesa glu glut/glx diff --git a/configs/default b/configs/default index 166205a1d3..25a87e66a1 100644 --- a/configs/default +++ b/configs/default @@ -60,7 +60,7 @@ GLW_SOURCES = GLwDrawA.c # Directories to build LIB_DIR = lib -SRC_DIRS = mesa glu glut/glx glw +SRC_DIRS = gallium mesa glu glut/glx glw GLU_DIRS = sgi DRIVER_DIRS = x11 osmesa # Which subdirs under $(TOP)/progs/ to enter: diff --git a/configs/freebsd-dri b/configs/freebsd-dri index 402883d1de..67d253b869 100644 --- a/configs/freebsd-dri +++ b/configs/freebsd-dri @@ -36,7 +36,7 @@ GLW_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -L/usr/X11R6/lib -lGL -lXt -lX11 # Directories -SRC_DIRS = glx/x11 mesa glu glut/glx glw +SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw DRIVER_DIRS = dri PROGRAM_DIRS = WINDOW_SYSTEM=dri diff --git a/configs/linux-cell b/configs/linux-cell index 3d874491e4..fdf20deeeb 100644 --- a/configs/linux-cell +++ b/configs/linux-cell @@ -21,7 +21,7 @@ CFLAGS = $(OPT_FLAGS) -Wall -Winline -fPIC -m32 -mabi=altivec -maltivec -I. -I$( CXXFLAGS = $(CFLAGS) # Omitting glw here: -SRC_DIRS = mesa glu glut/glx +SRC_DIRS = gallium mesa glu glut/glx MKDEP_OPTIONS = -fdepend -Y diff --git a/configs/linux-directfb b/configs/linux-directfb index 09332f4808..dff27f7850 100644 --- a/configs/linux-directfb +++ b/configs/linux-directfb @@ -22,7 +22,7 @@ ifeq ($(HAVE_X86), yes) endif # Directories -SRC_DIRS = mesa glu glut/directfb +SRC_DIRS = gallium mesa glu glut/directfb GLU_DIRS = sgi DRIVER_DIRS = directfb PROGRAM_DIRS = demos directfb diff --git a/configs/linux-dri b/configs/linux-dri index 936fce9982..e6135856fc 100644 --- a/configs/linux-dri +++ b/configs/linux-dri @@ -54,10 +54,10 @@ USING_EGL=0 # Directories ifeq ($(USING_EGL), 1) -SRC_DIRS = egl glx/x11 mesa glu glut/glx glw +SRC_DIRS = egl glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = egl else -SRC_DIRS = glx/x11 mesa glu glut/glx glw +SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = endif @@ -66,4 +66,4 @@ WINDOW_SYSTEM=dri # gamma are missing because they have not been converted to use the new # interface. -DRI_DIRS = intel_winsys +DRI_DIRS = intel diff --git a/configs/linux-dri-xcb b/configs/linux-dri-xcb index aa292a13ec..ea4bdf1864 100644 --- a/configs/linux-dri-xcb +++ b/configs/linux-dri-xcb @@ -53,10 +53,10 @@ USING_EGL=0 # Directories ifeq ($(USING_EGL), 1) -SRC_DIRS = egl glx/x11 mesa glu glut/glx glw +SRC_DIRS = egl glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = egl else -SRC_DIRS = glx/x11 mesa glu glut/glx glw +SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = endif diff --git a/configs/linux-fbdev b/configs/linux-fbdev index e36d20a702..1ddccb3f52 100644 --- a/configs/linux-fbdev +++ b/configs/linux-fbdev @@ -6,7 +6,7 @@ CONFIG_NAME = linux-fbdev CFLAGS = -O3 -ffast-math -ansi -pedantic -fPIC -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DPTHREADS -DUSE_GLFBDEV_DRIVER -SRC_DIRS = mesa glu glut/fbdev +SRC_DIRS = gallium mesa glu glut/fbdev DRIVER_DIRS = fbdev osmesa PROGRAM_DIRS = fbdev demos redbook samples diff --git a/configs/linux-osmesa b/configs/linux-osmesa index cc1fbbd109..0382a19553 100644 --- a/configs/linux-osmesa +++ b/configs/linux-osmesa @@ -14,7 +14,7 @@ CXXFLAGS = -O3 -ansi -pedantic -fPIC -ffast-math -D_POSIX_SOURCE -D_POSIX_C_SOUR # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = osdemos diff --git a/configs/linux-osmesa16 b/configs/linux-osmesa16 index 1fb0186d31..9a527592f1 100644 --- a/configs/linux-osmesa16 +++ b/configs/linux-osmesa16 @@ -17,7 +17,7 @@ OSMESA_LIB_NAME = libOSMesa16.so # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = diff --git a/configs/linux-osmesa16-static b/configs/linux-osmesa16-static index 6645504478..1e6380b02e 100644 --- a/configs/linux-osmesa16-static +++ b/configs/linux-osmesa16-static @@ -18,7 +18,7 @@ OSMESA_LIB_NAME = libOSMesa16.a # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = diff --git a/configs/linux-osmesa32 b/configs/linux-osmesa32 index a1e5a358d6..f0ef1831b0 100644 --- a/configs/linux-osmesa32 +++ b/configs/linux-osmesa32 @@ -17,7 +17,7 @@ OSMESA_LIB_NAME = libOSMesa32.so # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = diff --git a/configs/linux-solo b/configs/linux-solo index 220fe58b9a..d49b972228 100644 --- a/configs/linux-solo +++ b/configs/linux-solo @@ -43,7 +43,7 @@ GLUT_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLU_LIB) -l$(GL_LIB) -lm APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -lm -lpthread # Directories -SRC_DIRS = glx/mini mesa glu glut/mini +SRC_DIRS = glx/mini gallium mesa glu glut/mini DRIVER_DIRS = dri PROGRAM_DIRS = miniglx diff --git a/src/gallium/Makefile b/src/gallium/Makefile index d880d090c1..a13b9a52d3 100644 --- a/src/gallium/Makefile +++ b/src/gallium/Makefile @@ -1,16 +1,8 @@ -TOP = ../../.. +TOP = ../.. include $(TOP)/configs/current -ifeq ($(CONFIG_NAME), linux-cell) -CELL_DIR = cell -endif - -ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_DIR = llvm -endif - -SUBDIRS = softpipe i915simple i965simple failover pipebuffer $(CELL_DIR) $(LLVM_DIR) +SUBDIRS = aux drivers default: subdirs diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 8e84f8eb2d..0717ed8dd2 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -15,7 +15,10 @@ OBJECTS = $(C_SOURCES:.c=.o) \ ### Include directories INCLUDES = \ -I. \ - -I$(TOP)/src/mesa/pipe \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/include/pipe \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ -I$(TOP)/include \ $(DRIVER_INCLUDES) @@ -38,7 +41,7 @@ INCLUDES = \ default: depend symlinks $(LIBNAME) -$(LIBNAME): $(OBJECTS) Makefile $(TOP)/src/mesa/pipe/Makefile.template +$(LIBNAME): $(OBJECTS) Makefile $(TOP)/src/gallium/Makefile.template $(TOP)/bin/mklib -o $@ -static $(OBJECTS) $(DRIVER_LIBS) diff --git a/src/gallium/aux/Makefile b/src/gallium/aux/Makefile new file mode 100644 index 0000000000..da68498aa1 --- /dev/null +++ b/src/gallium/aux/Makefile @@ -0,0 +1,24 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +ifeq ($(CONFIG_NAME), linux-llvm) +LLVM_DIR = llvm +endif + +SUBDIRS = pipebuffer $(LLVM_DIR) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/aux/draw/draw_private.h b/src/gallium/aux/draw/draw_private.h index b17eaaed65..3d09aef87c 100644 --- a/src/gallium/aux/draw/draw_private.h +++ b/src/gallium/aux/draw/draw_private.h @@ -45,7 +45,7 @@ #include "pipe/p_defines.h" #include "x86/rtasm/x86sse.h" -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" struct gallivm_prog; diff --git a/src/gallium/aux/draw/draw_vertex.c b/src/gallium/aux/draw/draw_vertex.c index 2d6592150f..daf1ef4b80 100644 --- a/src/gallium/aux/draw/draw_vertex.c +++ b/src/gallium/aux/draw/draw_vertex.c @@ -34,8 +34,8 @@ */ -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" /** diff --git a/src/gallium/aux/draw/draw_vertex_shader.c b/src/gallium/aux/draw/draw_vertex_shader.c index c824c1407e..377ecbb931 100644 --- a/src/gallium/aux/draw/draw_vertex_shader.c +++ b/src/gallium/aux/draw/draw_vertex_shader.c @@ -34,13 +34,13 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #if defined(__i386__) || defined(__386__) -#include "pipe/tgsi/exec/tgsi_sse2.h" +#include "tgsi/exec/tgsi_sse2.h" #endif #include "draw_private.h" #include "draw_context.h" #include "x86/rtasm/x86sse.h" -#include "pipe/llvm/gallivm.h" +#include "llvm/gallivm.h" #define DBG_VS 0 diff --git a/src/gallium/aux/llvm/Makefile b/src/gallium/aux/llvm/Makefile index 9c6e16d86b..e6ac399d08 100644 --- a/src/gallium/aux/llvm/Makefile +++ b/src/gallium/aux/llvm/Makefile @@ -30,7 +30,9 @@ OBJECTS = $(C_SOURCES:.c=.o) \ ### Include directories INCLUDES = \ -I. \ - -I$(TOP)/src/mesa/pipe \ + -I$(TOP)/src/gallium/drivers + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/include \ -I$(TOP)/src/mesa \ -I$(TOP)/include diff --git a/src/gallium/aux/llvm/gallivm.cpp b/src/gallium/aux/llvm/gallivm.cpp index da0105c2c9..d14bb3b99a 100644 --- a/src/gallium/aux/llvm/gallivm.cpp +++ b/src/gallium/aux/llvm/gallivm.cpp @@ -42,8 +42,8 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" #include #include diff --git a/src/gallium/aux/llvm/gallivm_cpu.cpp b/src/gallium/aux/llvm/gallivm_cpu.cpp index dc4d92a72a..8f9830d0b1 100644 --- a/src/gallium/aux/llvm/gallivm_cpu.cpp +++ b/src/gallium/aux/llvm/gallivm_cpu.cpp @@ -42,8 +42,8 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" #include #include diff --git a/src/gallium/aux/llvm/tgsitollvm.cpp b/src/gallium/aux/llvm/tgsitollvm.cpp index 0de595e678..2cb4acce32 100644 --- a/src/gallium/aux/llvm/tgsitollvm.cpp +++ b/src/gallium/aux/llvm/tgsitollvm.cpp @@ -10,11 +10,11 @@ #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_util.h" -#include "pipe/tgsi/util/tgsi_build.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_dump.h" #include diff --git a/src/gallium/aux/pipebuffer/Makefile b/src/gallium/aux/pipebuffer/Makefile index 75764a9a18..588629e870 100644 --- a/src/gallium/aux/pipebuffer/Makefile +++ b/src/gallium/aux/pipebuffer/Makefile @@ -17,7 +17,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/aux/tgsi/exec/tgsi_exec.c b/src/gallium/aux/tgsi/exec/tgsi_exec.c index 37e6007068..a8f64c2287 100644 --- a/src/gallium/aux/tgsi/exec/tgsi_exec.c +++ b/src/gallium/aux/tgsi/exec/tgsi_exec.c @@ -54,8 +54,8 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "tgsi_exec.h" #define TILE_TOP_LEFT 0 diff --git a/src/gallium/aux/tgsi/exec/tgsi_sse2.c b/src/gallium/aux/tgsi/exec/tgsi_sse2.c index 1e56e4afb6..593464db3e 100755 --- a/src/gallium/aux/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/aux/tgsi/exec/tgsi_sse2.c @@ -27,8 +27,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "tgsi_exec.h" #include "tgsi_sse2.h" diff --git a/src/gallium/aux/tgsi/util/tgsi_transform.h b/src/gallium/aux/tgsi/util/tgsi_transform.h index 365d8c298c..fcf85d603b 100644 --- a/src/gallium/aux/tgsi/util/tgsi_transform.h +++ b/src/gallium/aux/tgsi/util/tgsi_transform.h @@ -31,8 +31,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_build.h" diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile new file mode 100644 index 0000000000..c0345a9cb5 --- /dev/null +++ b/src/gallium/drivers/Makefile @@ -0,0 +1,24 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +ifeq ($(CONFIG_NAME), linux-cell) +CELL_DIR = cell +endif + +SUBDIRS = softpipe i915simple i965simple failover pipebuffer $(CELL_DIR) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 50060f5cd3..011863c11e 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -40,8 +40,11 @@ SOURCES = \ OBJECTS = $(SOURCES:.c=.o) \ -INCLUDE_DIRS = -I$(TOP)/src/mesa - +INCLUDE_DIRS = \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers .c.o: $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index 07b908eec5..e588a30d5b 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -35,7 +35,7 @@ #include #include "pipe/p_inlines.h" #include "pipe/p_util.h" -#include "pipe/cell/common.h" +#include "cell/common.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_batch.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index bbe1fd7a11..e1eb22f468 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -37,9 +37,9 @@ #include "pipe/p_format.h" #include "pipe/p_util.h" #include "pipe/p_winsys.h" -#include "pipe/cell/common.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "cell/common.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 3b63419b5e..6196c0c72f 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -32,10 +32,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/draw/draw_vertex.h" -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_vbuf.h" #include "cell_winsys.h" -#include "pipe/cell/common.h" +#include "cell/common.h" struct cell_vbuf_render; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 717cd8370f..f12613649b 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -39,7 +39,7 @@ #include "cell_draw_arrays.h" #include "cell_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index f62bc4650c..20f27531fc 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -31,7 +31,7 @@ #include "cell_flush.h" #include "cell_spu.h" #include "cell_render.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c index 4ab277a4b2..b663b37622 100644 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -34,7 +34,7 @@ #include "cell_render.h" #include "cell_spu.h" #include "pipe/p_util.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_private.h" struct render_stage { diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 7c83a47e57..419e74dc40 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -31,7 +31,7 @@ #include "cell_spu.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/cell/common.h" +#include "cell/common.h" /* diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h index 19eff94f96..137f26612e 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.h +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -31,7 +31,7 @@ #include #include -#include "pipe/cell/common.h" +#include "cell/common.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_blend.c b/src/gallium/drivers/cell/ppu/cell_state_blend.c index 4fc60548c8..b6d6d71f0c 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_blend.c +++ b/src/gallium/drivers/cell/ppu/cell_state_blend.c @@ -29,7 +29,7 @@ */ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_clip.c b/src/gallium/drivers/cell/ppu/cell_state_clip.c index 4f43665941..0482f87e88 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_clip.c +++ b/src/gallium/drivers/cell/ppu/cell_state_clip.c @@ -30,7 +30,7 @@ #include "cell_context.h" #include "cell_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void cell_set_clip_state( struct pipe_context *pipe, diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index 56daf5dfde..0c46829258 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -27,8 +27,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" #include "cell_context.h" #include "cell_batch.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_fs.c b/src/gallium/drivers/cell/ppu/cell_state_fs.c index 3f46a87d18..b2ed699a5b 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_fs.c +++ b/src/gallium/drivers/cell/ppu/cell_state_fs.c @@ -29,12 +29,12 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #if 0 #include "pipe/p_shader_tokens.h" -#include "pipe/llvm/gallivm.h" -#include "pipe/tgsi/util/tgsi_dump.h" -#include "pipe/tgsi/exec/tgsi_sse2.h" +#include "llvm/gallivm.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_sse2.h" #endif #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c index d8128ece54..7eca5b5765 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c +++ b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c @@ -27,7 +27,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_sampler.c b/src/gallium/drivers/cell/ppu/cell_state_sampler.c index ade6cc8338..a33421a4ad 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_sampler.c +++ b/src/gallium/drivers/cell/ppu/cell_state_sampler.c @@ -30,7 +30,7 @@ */ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" #include "cell_texture.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 0f01e920f9..563831b62d 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -32,7 +32,7 @@ #include "cell_context.h" #include "cell_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index fca93e4742..a35db0ef99 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -29,7 +29,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "cell_context.h" #include "cell_surface.h" diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index e9fafe492e..cc727ff4ed 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -36,7 +36,7 @@ #include "cell_flush.h" #include "cell_spu.h" #include "cell_vbuf.h" -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_vbuf.h" /** Allow vertex data to be inlined after RENDER command */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 80dd500b34..0ba4506505 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -38,9 +38,9 @@ #include "cell_spu.h" #include "cell_batch.h" -#include "pipe/cell/common.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "cell/common.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" /** * Run the vertex shader on all vertices in the vertex queue. diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index f202971d73..7aa947299e 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -31,7 +31,11 @@ SPU_OBJECTS = $(SOURCES:.c=.o) \ SPU_ASM_OUT = $(SOURCES:.c=.s) \ -INCLUDE_DIRS = -I$(TOP)/src/mesa +INCLUDE_DIRS = \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers .c.o: diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index e51008b9b3..109540b1f7 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -67,8 +67,8 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "spu_exec.h" #include "spu_main.h" #include "spu_vertex_shader.h" diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index b4c7661ef6..3e17c490d2 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -29,7 +29,7 @@ #define SPU_EXEC_H #include "pipe/p_compiler.h" -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" #if defined __cplusplus extern "C" { diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index e375197fe6..1e7243b863 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -38,7 +38,7 @@ #include "spu_tile.h" //#include "spu_test.h" #include "spu_vertex_shader.h" -#include "pipe/cell/common.h" +#include "cell/common.h" #include "pipe/p_defines.h" diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 1710a17512..5c95d112ac 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -31,8 +31,8 @@ #include -#include "pipe/cell/common.h" -#include "pipe/draw/draw_vertex.h" +#include "cell/common.h" +#include "draw/draw_vertex.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 932fb500b3..20e77aa2e6 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -34,7 +34,7 @@ #include "spu_render.h" #include "spu_tri.h" #include "spu_tile.h" -#include "pipe/cell/common.h" +#include "cell/common.h" diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h index fbcdc5ec31..493434f087 100644 --- a/src/gallium/drivers/cell/spu/spu_render.h +++ b/src/gallium/drivers/cell/spu/spu_render.h @@ -29,7 +29,7 @@ #ifndef SPU_RENDER_H #define SPU_RENDER_H -#include "pipe/cell/common.h" +#include "cell/common.h" extern void cmd_render(const struct cell_command_render *render, uint *pos_incr); diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h index e53340a55a..3105b848fd 100644 --- a/src/gallium/drivers/cell/spu/spu_tile.h +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -32,7 +32,7 @@ #include #include #include "spu_main.h" -#include "pipe/cell/common.h" +#include "cell/common.h" diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index ac373240c1..ea4274a0a7 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,8 +1,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" //#include "tgsi_build.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_util.h" unsigned tgsi_util_get_src_register_swizzle( diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index c1cbbb6d1e..3f5bf41aa2 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -39,9 +39,9 @@ #include "pipe/p_shader_tokens.h" #include "spu_vertex_shader.h" #include "spu_exec.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_context.h" -#include "pipe/cell/common.h" +#include "draw/draw_private.h" +#include "draw/draw_context.h" +#include "cell/common.h" #include "spu_main.h" static INLINE unsigned diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile index 72d0895c74..14389bd055 100644 --- a/src/gallium/drivers/failover/Makefile +++ b/src/gallium/drivers/failover/Makefile @@ -15,7 +15,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile index 2f91de3afc..ee22ba86f9 100644 --- a/src/gallium/drivers/i915simple/Makefile +++ b/src/gallium/drivers/i915simple/Makefile @@ -32,7 +32,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 497623a700..7f71f8fd4f 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -32,7 +32,7 @@ #include "i915_texture.h" #include "i915_reg.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index b4ea63c3e7..2d876925b2 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_vertex.h" #define I915_TEX_UNITS 8 diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 868f0c7e04..6c1524c768 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -33,9 +33,9 @@ #include "i915_fpc.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_vertex.h" /** diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index c4a706c37d..44c4325936 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/draw/draw_private.h" +#include "draw/draw_private.h" #include "pipe/p_util.h" #include "i915_context.h" diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index e069773fd4..c5bf6174f6 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -38,7 +38,7 @@ */ -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_vbuf.h" #include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index abd5571b88..294e6fad03 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -29,7 +29,7 @@ */ -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_winsys.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 653983e4a9..4767584fc6 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -27,8 +27,8 @@ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i915simple/i915_strings.c b/src/gallium/drivers/i915simple/i915_strings.c index c713bf7208..301fedea19 100644 --- a/src/gallium/drivers/i915simple/i915_strings.c +++ b/src/gallium/drivers/i915simple/i915_strings.c @@ -70,7 +70,7 @@ static const char *i915_get_name( struct pipe_context *pipe ) break; } - sprintf(buffer, "pipe/i915 (chipset: %s)", chipset); + sprintf(buffer, "i915 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index de0cc5fe06..17fd27895a 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -33,7 +33,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" /* diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile index 48c00ab50b..1dec1f9749 100644 --- a/src/gallium/drivers/i965simple/Makefile +++ b/src/gallium/drivers/i965simple/Makefile @@ -61,6 +61,6 @@ ASM_SOURCES = DRIVER_DEFINES = -I. -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index 431b45466a..a762a870fe 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -3,7 +3,7 @@ #include "brw_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 95dfce88e4..f746d1cc57 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -33,7 +33,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_dump.h" #include "brw_context.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965simple/brw_strings.c b/src/gallium/drivers/i965simple/brw_strings.c index 29a41ed1e9..3d9c50961f 100644 --- a/src/gallium/drivers/i965simple/brw_strings.c +++ b/src/gallium/drivers/i965simple/brw_strings.c @@ -59,7 +59,7 @@ static const char *brw_get_name( struct pipe_context *pipe ) break; } - sprintf(buffer, "pipe/i965 (chipset: %s)", chipset); + sprintf(buffer, "i965 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 518845e4b2..376a42b1a6 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -32,7 +32,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" /* diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index 98915ba101..05df4860ed 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -33,7 +33,7 @@ #include "brw_vs.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" struct brw_prog_info { unsigned num_temps; diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index b45a333a2e..97418a52e7 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -4,7 +4,7 @@ #include "brw_wm.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index d95645d108..44f946ea74 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -4,7 +4,7 @@ #include "brw_wm.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 31438a882e..2304ea4246 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -44,7 +44,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index cea6b90104..5e98f190bb 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -29,7 +29,7 @@ * Keith Whitwell */ -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index aff8c2cc5d..8c79cb3ce4 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -34,7 +34,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_vertex.h" #include "sp_quad.h" diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 71a303a8b5..2049afda34 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -38,7 +38,7 @@ #include "sp_context.h" #include "sp_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index ced0d5d098..2cbd0d7cab 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -31,7 +31,7 @@ #include "pipe/p_defines.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "sp_flush.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h index 0ae31d8796..9cf8222133 100644 --- a/src/gallium/drivers/softpipe/sp_headers.h +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -31,7 +31,7 @@ #ifndef SP_HEADERS_H #define SP_HEADERS_H -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" #define PRIM_POINT 1 #define PRIM_LINE 2 diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 2772048661..d73521ccbe 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -38,8 +38,8 @@ #include "sp_quad.h" #include "sp_state.h" #include "sp_prim_setup.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 7f71fdb6a9..69bea8a8f5 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -39,9 +39,9 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_prim_vbuf.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" #define SP_MAX_VBUF_INDEXES 1024 diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 3316858413..b4c01a7ea8 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -42,7 +42,7 @@ #include "x86/rtasm/x86sse.h" #ifdef MESA_LLVM -#include "pipe/llvm/gallivm.h" +#include "llvm/gallivm.h" #endif #include "sp_context.h" diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 6a8a43aeda..adf9ccf64c 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -29,7 +29,7 @@ * Keith Whitwell */ -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c index 08c5f06d05..c797c0dd3b 100644 --- a/src/gallium/drivers/softpipe/sp_state_clip.c +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -29,7 +29,7 @@ */ #include "sp_context.h" #include "sp_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void softpipe_set_clip_state( struct pipe_context *pipe, diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 372597869f..9d8fd8b750 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -27,9 +27,9 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_vertex.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_private.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 0b814fc284..1e3cadd43d 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -32,11 +32,11 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/llvm/gallivm.h" -#include "pipe/tgsi/util/tgsi_dump.h" -#include "pipe/tgsi/exec/tgsi_sse2.h" +#include "llvm/gallivm.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_sse2.h" void * diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index 53755099dd..98e04352db 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -29,7 +29,7 @@ #include "pipe/p_util.h" #include "sp_context.h" #include "sp_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index ea348c7e95..460adccec4 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -31,14 +31,14 @@ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "sp_context.h" #include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" #include "sp_tile_cache.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 09ff540ccf..f01a10de3b 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -33,7 +33,7 @@ #include "sp_state.h" #include "sp_surface.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 8802ced187..653449c4f1 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -29,7 +29,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 325bdb86da..2f82fd6abe 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -40,7 +40,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" /* diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 1597361b82..dde3fabc81 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -34,7 +34,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "sp_context.h" #include "sp_surface.h" #include "sp_tile_cache.h" diff --git a/src/gallium/winsys/dri/Makefile b/src/gallium/winsys/dri/Makefile new file mode 100644 index 0000000000..f466ce6c3c --- /dev/null +++ b/src/gallium/winsys/dri/Makefile @@ -0,0 +1,38 @@ +# src/mesa/drivers/dri/Makefile + +TOP = ../../../.. + +include $(TOP)/configs/current + + + +default: $(TOP)/$(LIB_DIR) subdirs + + +$(TOP)/$(LIB_DIR): + -mkdir $(TOP)/$(LIB_DIR) + + +subdirs: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +install: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) install) || exit 1 ; \ + fi \ + done + + +clean: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) clean) ; \ + fi \ + done + -rm -f common/*.o diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template new file mode 100644 index 0000000000..b96305c094 --- /dev/null +++ b/src/gallium/winsys/dri/Makefile.template @@ -0,0 +1,113 @@ +# -*-makefile-*- + +MESA_MODULES = $(TOP)/src/mesa/libmesa.a + +COMMON_GALLIUM_SOURCES = \ + $(TOP)/src/mesa/drivers/dri/common/utils.c \ + $(TOP)/src/mesa/drivers/dri/common/vblank.c \ + $(TOP)/src/mesa/drivers/dri/common/dri_util.c \ + $(TOP)/src/mesa/drivers/dri/common/xmlconfig.c + +COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \ + $(TOP)/src/mesa/drivers/common/driverfuncs.c \ + $(TOP)/src/mesa/drivers/dri/common/texmem.c \ + $(TOP)/src/mesa/drivers/dri/common/drirenderbuffer.c + +COMMON_BM_SOURCES = \ + $(TOP)/src/mesa/drivers/dri/common/dri_bufmgr.c \ + $(TOP)/src/mesa/drivers/dri/common/dri_drmpool.c + + +ifeq ($(WINDOW_SYSTEM),dri) +WINOBJ= +WINLIB= +INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) + +else +# miniglx +WINOBJ= +WINLIB=-L$(MESA)/src/glx/mini +MINIGLX_INCLUDES = -I$(TOP)/src/glx/mini +INCLUDES = $(MINIGLX_INCLUDES) \ + $(SHARED_INCLUDES) \ + $(PCIACCESS_CFLAGS) + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(MINIGLX_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) +endif + + +### Include directories +SHARED_INCLUDES = \ + -I. \ + -I$(TOP)/src/mesa/drivers/dri/common \ + -Iserver \ + -I$(TOP)/include \ + -I$(TOP)/include/GL/internal \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/mesa/glapi \ + -I$(TOP)/src/mesa/math \ + -I$(TOP)/src/mesa/transform \ + -I$(TOP)/src/mesa/shader \ + -I$(TOP)/src/mesa/swrast \ + -I$(TOP)/src/mesa/swrast_setup \ + -I$(TOP)/src/egl/main \ + -I$(TOP)/src/egl/drivers/dri \ + $(LIBDRM_CFLAGS) + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + + +##### TARGETS ##### + +default: depend symlinks $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME) + + +$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template + $(TOP)/bin/mklib -noprefix -o $@ \ + $(OBJECTS) $(PIPE_DRIVERS) $(MESA_MODULES) $(WINOBJ) $(DRI_LIB_DEPS) + + +$(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME) + $(INSTALL) $(LIBNAME) $(TOP)/$(LIB_DIR) + + +depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) \ + $(ASM_SOURCES) 2> /dev/null + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +# Remove .o and backup files +clean: + -rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS) + -rm -f depend depend.bak + + +install: $(LIBNAME) + $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) + $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) + + +include depend diff --git a/src/gallium/winsys/dri/intel/Makefile b/src/gallium/winsys/dri/intel/Makefile index 9ae0f01325..40654bb2ac 100644 --- a/src/gallium/winsys/dri/intel/Makefile +++ b/src/gallium/winsys/dri/intel/Makefile @@ -7,8 +7,8 @@ LIBNAME = i915tex_dri.so MINIGLX_SOURCES = server/intel_dri.c PIPE_DRIVERS = \ - $(TOP)/src/mesa/pipe/softpipe/libsoftpipe.a \ - $(TOP)/src/mesa/pipe/i915simple/libi915simple.a + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/i915simple/libi915simple.a DRIVER_SOURCES = \ intel_winsys_pipe.c \ @@ -28,11 +28,11 @@ C_SOURCES = \ ASM_SOURCES = -DRIVER_DEFINES = -I../intel $(shell pkg-config libdrm --atleast-version=2.3.1 \ +DRIVER_DEFINES = -I$(TOP)/src/mesa/drivers/dri/intel $(shell pkg-config libdrm --atleast-version=2.3.1 \ && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") include ../Makefile.template -intel_tex_layout.o: ../intel/intel_tex_layout.c +intel_tex_layout.o: $(TOP)/src/mesa/drivers/dri/intel/intel_tex_layout.c symlinks: diff --git a/src/gallium/winsys/dri/intel/intel_winsys_i915.c b/src/gallium/winsys/dri/intel/intel_winsys_i915.c index 1ba6a9e1b2..0ed3890e93 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_i915.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_i915.c @@ -39,7 +39,7 @@ #include "intel_winsys.h" #include "pipe/p_util.h" -#include "pipe/i915simple/i915_winsys.h" +#include "i915simple/i915_winsys.h" struct intel_i915_winsys { diff --git a/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c b/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c index cec3437831..9e483bdc9f 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c @@ -34,7 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "pipe/p_format.h" -#include "pipe/softpipe/sp_winsys.h" +#include "softpipe/sp_winsys.h" struct intel_softpipe_winsys { diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index c3cd22eea3..8da596d419 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -41,11 +41,11 @@ #include "pipe/p_context.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "pipe/softpipe/sp_winsys.h" +#include "softpipe/sp_winsys.h" #ifdef GALLIUM_CELL -#include "pipe/cell/ppu/cell_context.h" -#include "pipe/cell/ppu/cell_winsys.h" +#include "cell/ppu/cell_context.h" +#include "cell/ppu/cell_winsys.h" #else #define TILE_SIZE 32 /* avoid compilation errors */ #endif diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index bf41570257..dbfd37bda2 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -39,7 +39,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "pipe/i965simple/brw_winsys.h" +#include "i965simple/brw_winsys.h" #include "brw_aub.h" #include "xm_winsys_aub.h" diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 720f1b2e02..561608fedd 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -12,16 +12,16 @@ GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) PIPE_LIB = \ - $(TOP)/src/mesa/pipe/softpipe/libsoftpipe.a \ - $(TOP)/src/mesa/pipe/i965simple/libi965simple.a + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/i965simple/libi965simple.a ifeq ($(CONFIG_NAME), linux-cell) -CELL_LIB = $(TOP)/src/mesa/pipe/cell/ppu/libcell.a -CELL_LIB_SPU = $(TOP)/src/mesa/pipe/cell/spu/g3d_spu.a +CELL_LIB = $(TOP)/src/gallium/drivers/cell/ppu/libcell.a +CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a endif ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_LIB = $(TOP)/src/mesa/pipe/llvm/libgallivm.a +LLVM_LIB = $(TOP)/src/gallium/aux/llvm/libgallivm.a endif .SUFFIXES : .cpp @@ -71,7 +71,7 @@ libmesa.a: $(SOLO_OBJECTS) fi linux-solo: depend subdirs libmesa.a - cd drivers/dri ; $(MAKE) + cd $(TOP)/src/gallium/winsys/dri ; $(MAKE) ##################################################################### @@ -165,7 +165,6 @@ depend: $(ALL_SOURCES) subdirs: @ (cd x86 ; $(MAKE)) @ (cd x86-64 ; $(MAKE)) - (cd pipe ; $(MAKE)) install: default $(INSTALL) -d $(INSTALL_DIR)/include/GL @@ -178,7 +177,7 @@ install: default $(INSTALL) $(TOP)/$(LIB_DIR)/libOSMesa* $(INSTALL_DIR)/$(LIB_DIR); \ fi @if [ "${DRIVER_DIRS}" = "dri" ] ; then \ - cd drivers/dri ; $(MAKE) install ; \ + cd $(TOP)/gallium/winsys/dri ; $(MAKE) install ; \ fi ## NOT INSTALLED YET: @@ -198,7 +197,6 @@ clean: (cd drivers/dri && $(MAKE) clean) (cd x86 && $(MAKE) clean) (cd x86-64 && $(MAKE) clean) - (cd pipe ; $(MAKE) clean ) include depend diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index 08c98eab48..18b033666f 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -85,7 +85,7 @@ #include "state_tracker/st_public.h" #include "state_tracker/st_context.h" -#include "pipe/softpipe/sp_context.h" +#include "softpipe/sp_context.h" #include "pipe/p_defines.h" /** diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index 8ae243ae66..34287effe1 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -53,7 +53,7 @@ #include "tnl/tnl.h" #include "tnl/t_context.h" -#include "pipe/softpipe/sp_context.h" +#include "softpipe/sp_context.h" #include "state_tracker/st_public.h" #include "state_tracker/st_context.h" #include "state_tracker/st_draw.h" diff --git a/src/mesa/drivers/x11/xm_surface.c b/src/mesa/drivers/x11/xm_surface.c index 5533158ece..81616b92d9 100644 --- a/src/mesa/drivers/x11/xm_surface.c +++ b/src/mesa/drivers/x11/xm_surface.c @@ -45,10 +45,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/softpipe/sp_context.h" -#include "pipe/softpipe/sp_clear.h" -#include "pipe/softpipe/sp_tile_cache.h" -#include "pipe/softpipe/sp_surface.h" +#include "softpipe/sp_context.h" +#include "softpipe/sp_clear.h" +#include "softpipe/sp_tile_cache.h" +#include "softpipe/sp_surface.h" #include "state_tracker/st_context.h" diff --git a/src/mesa/drivers/x11/xm_winsys.c b/src/mesa/drivers/x11/xm_winsys.c index a690df2772..2edc697693 100644 --- a/src/mesa/drivers/x11/xm_winsys.c +++ b/src/mesa/drivers/x11/xm_winsys.c @@ -38,7 +38,7 @@ #include "main/macros.h" #include "pipe/p_winsys.h" -#include "pipe/softpipe/sp_winsys.h" +#include "softpipe/sp_winsys.h" /** diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h index 4709d63394..fd2dfcd79a 100644 --- a/src/mesa/drivers/x11/xmesaP.h +++ b/src/mesa/drivers/x11/xmesaP.h @@ -37,8 +37,8 @@ #include "xm_image.h" #endif #include "state_tracker/st_cb_fbo.h" -#include "pipe/softpipe/sp_context.h" -#include "pipe/softpipe/sp_surface.h" +#include "softpipe/sp_context.h" +#include "softpipe/sp_surface.h" extern _glthread_Mutex _xmesa_lock; diff --git a/src/mesa/sources b/src/mesa/sources index 1165425183..2d07738210 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -158,45 +158,45 @@ VF_SOURCES = \ DRAW_SOURCES = \ - pipe/draw/draw_clip.c \ - pipe/draw/draw_context.c\ - pipe/draw/draw_cull.c \ - pipe/draw/draw_debug.c \ - pipe/draw/draw_flatshade.c \ - pipe/draw/draw_offset.c \ - pipe/draw/draw_prim.c \ - pipe/draw/draw_stipple.c \ - pipe/draw/draw_twoside.c \ - pipe/draw/draw_unfilled.c \ - pipe/draw/draw_validate.c \ - pipe/draw/draw_vbuf.c \ - pipe/draw/draw_vertex.c \ - pipe/draw/draw_vertex_cache.c \ - pipe/draw/draw_vertex_fetch.c \ - pipe/draw/draw_vertex_shader.c \ - pipe/draw/draw_vf.c \ - pipe/draw/draw_vf_generic.c \ - pipe/draw/draw_vf_sse.c \ - pipe/draw/draw_wide_prims.c + $(TOP)/src/gallium/aux/draw/draw_clip.c \ + $(TOP)/src/gallium/aux/draw/draw_context.c\ + $(TOP)/src/gallium/aux/draw/draw_cull.c \ + $(TOP)/src/gallium/aux/draw/draw_debug.c \ + $(TOP)/src/gallium/aux/draw/draw_flatshade.c \ + $(TOP)/src/gallium/aux/draw/draw_offset.c \ + $(TOP)/src/gallium/aux/draw/draw_prim.c \ + $(TOP)/src/gallium/aux/draw/draw_stipple.c \ + $(TOP)/src/gallium/aux/draw/draw_twoside.c \ + $(TOP)/src/gallium/aux/draw/draw_unfilled.c \ + $(TOP)/src/gallium/aux/draw/draw_validate.c \ + $(TOP)/src/gallium/aux/draw/draw_vbuf.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex_cache.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex_fetch.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex_shader.c \ + $(TOP)/src/gallium/aux/draw/draw_vf.c \ + $(TOP)/src/gallium/aux/draw/draw_vf_generic.c \ + $(TOP)/src/gallium/aux/draw/draw_vf_sse.c \ + $(TOP)/src/gallium/aux/draw/draw_wide_prims.c TGSIEXEC_SOURCES = \ - pipe/tgsi/exec/tgsi_exec.c \ - pipe/tgsi/exec/tgsi_sse2.c + $(TOP)/src/gallium/aux/tgsi/exec/tgsi_exec.c \ + $(TOP)/src/gallium/aux/tgsi/exec/tgsi_sse2.c TGSIUTIL_SOURCES = \ - pipe/tgsi/util/tgsi_build.c \ - pipe/tgsi/util/tgsi_dump.c \ - pipe/tgsi/util/tgsi_parse.c \ - pipe/tgsi/util/tgsi_util.c + $(TOP)/src/gallium/aux/tgsi/util/tgsi_build.c \ + $(TOP)/src/gallium/aux/tgsi/util/tgsi_dump.c \ + $(TOP)/src/gallium/aux/tgsi/util/tgsi_parse.c \ + $(TOP)/src/gallium/aux/tgsi/util/tgsi_util.c STATECACHE_SOURCES = \ - pipe/cso_cache/cso_hash.c \ - pipe/cso_cache/cso_cache.c + $(TOP)/src/gallium/aux/cso_cache/cso_hash.c \ + $(TOP)/src/gallium/aux/cso_cache/cso_cache.c PIPEUTIL_SOURCES = \ - pipe/util/p_debug.c \ - pipe/util/p_tile.c \ - pipe/util/p_util.c + $(TOP)/src/gallium/aux/util/p_debug.c \ + $(TOP)/src/gallium/aux/util/p_tile.c \ + $(TOP)/src/gallium/aux/util/p_util.c STATETRACKER_SOURCES = \ state_tracker/st_atom.c \ @@ -331,13 +331,13 @@ __COMMON_DRIVER_SOURCES = \ drivers/common/driverfuncs.c X11_DRIVER_SOURCES = \ - pipe/xlib/glxapi.c \ - pipe/xlib/fakeglx.c \ - pipe/xlib/xfonts.c \ - pipe/xlib/xm_api.c \ - pipe/xlib/xm_winsys.c \ - pipe/xlib/xm_winsys_aub.c \ - pipe/xlib/brw_aub.c + $(TOP)/src/gallium/winsys/xlib/glxapi.c \ + $(TOP)/src/gallium/winsys/xlib/fakeglx.c \ + $(TOP)/src/gallium/winsys/xlib/xfonts.c \ + $(TOP)/src/gallium/winsys/xlib/xm_api.c \ + $(TOP)/src/gallium/winsys/xlib/xm_winsys.c \ + $(TOP)/src/gallium/winsys/xlib/xm_winsys_aub.c \ + $(TOP)/src/gallium/winsys/xlib/brw_aub.c OSMESA_DRIVER_SOURCES = \ drivers/osmesa/osmesa.c @@ -425,7 +425,10 @@ FBDEV_DRIVER_OBJECTS = $(FBDEV_DRIVER_SOURCES:.c=.o) INCLUDE_DIRS = \ -I$(TOP)/include \ -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/aux OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/tnl \ @@ -435,4 +438,4 @@ OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/shader \ -I$(TOP)/src/mesa/shader/grammar \ -I$(TOP)/src/mesa/shader/slang \ - -I$(TOP)/src/mesa/pipe/tgsi + -I$(TOP)/s$(TOP)/src/gallium/aux/tgsi diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 2c6ec8421b..b67b620eaa 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -43,7 +43,7 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" #include "st_context.h" #include "st_cache.h" diff --git a/src/mesa/state_tracker/st_cache.c b/src/mesa/state_tracker/st_cache.c index e0965b217a..2979e7fae5 100644 --- a/src/mesa/state_tracker/st_cache.c +++ b/src/mesa/state_tracker/st_cache.c @@ -36,8 +36,8 @@ #include "pipe/p_state.h" -#include "pipe/cso_cache/cso_cache.h" -#include "pipe/cso_cache/cso_hash.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" /* Those function will either find the state of the given template diff --git a/src/mesa/state_tracker/st_cache.h b/src/mesa/state_tracker/st_cache.h index e0c176b0ff..b81de316ec 100644 --- a/src/mesa/state_tracker/st_cache.h +++ b/src/mesa/state_tracker/st_cache.h @@ -33,7 +33,7 @@ #ifndef ST_CACHE_H #define ST_CACHE_H -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" struct pipe_blend_state; struct pipe_sampler_state; diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 3a3bf9016d..663c4f205d 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -43,7 +43,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index f13199a3c0..e2d4e06da1 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -56,7 +56,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "shader/prog_instruction.h" diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c index 31744151f1..5315294c07 100644 --- a/src/mesa/state_tracker/st_cb_feedback.c +++ b/src/mesa/state_tracker/st_cb_feedback.c @@ -53,10 +53,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" /** diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index af3ee65504..61d4f4c41c 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -39,8 +39,8 @@ #include "shader/programopt.h" #include "shader/shader_api.h" -#include "pipe/cso_cache/cso_cache.h" -#include "pipe/draw/draw_context.h" +#include "cso_cache/cso_cache.h" +#include "draw/draw_context.h" #include "st_context.h" #include "st_program.h" diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index 7e347c4893..5b0eb6022b 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -44,8 +44,8 @@ #include "st_draw.h" #include "st_cb_rasterpos.h" #include "st_draw.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" #include "shader/prog_instruction.h" #include "vbo/vbo.h" diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 868c5f3c5f..c89c74229e 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -40,7 +40,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "st_context.h" #include "st_cb_readpixels.h" #include "st_cb_fbo.h" diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 91a40288cc..03dbb30b0f 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -47,7 +47,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #define DBG if (0) printf diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index bf4618bed8..09e389f9dc 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -54,8 +54,8 @@ #include "pipe/p_context.h" #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "pipe/draw/draw_context.h" -#include "pipe/cso_cache/cso_cache.h" +#include "draw/draw_context.h" +#include "cso_cache/cso_cache.h" /** diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 57450e52bf..5888bcb98a 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -31,9 +31,9 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_dump.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" #include "st_context.h" #include "st_debug.h" diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index ae9f5c8b11..1c0fa8c6aa 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -47,8 +47,8 @@ #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_context.h" static GLuint double_types[4] = { diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 459941cca8..6c09b86033 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -37,7 +37,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" #include "st_context.h" #include "st_draw.h" diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 325aa20173..97206752af 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -32,9 +32,9 @@ #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_build.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_util.h" #include "st_mesa_to_tgsi.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index c8297baded..dc992ee9c2 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -38,8 +38,8 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "pipe/draw/draw_context.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "draw/draw_context.h" +#include "tgsi/util/tgsi_dump.h" #include "st_context.h" #include "st_cache.h" -- cgit v1.2.3