summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/SConscript1
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c4
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.h2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c90
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c249
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h35
-rw-r--r--src/gallium/auxiliary/util/u_debug.h14
-rw-r--r--src/gallium/auxiliary/util/u_fifo.h94
-rw-r--r--src/gallium/auxiliary/util/u_format.csv10
-rw-r--r--src/gallium/auxiliary/util/u_math.h42
-rw-r--r--src/gallium/auxiliary/util/u_simple_screen.c29
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.c10
-rw-r--r--src/gallium/auxiliary/util/u_tile.c54
-rw-r--r--src/gallium/auxiliary/vl/Makefile12
-rw-r--r--src/gallium/auxiliary/vl/SConscript12
-rw-r--r--src/gallium/auxiliary/vl/vl_bitstream_parser.c140
-rw-r--r--src/gallium/auxiliary/vl/vl_bitstream_parser.h36
-rw-r--r--src/gallium/auxiliary/vl/vl_compositor.c590
-rw-r--r--src/gallium/auxiliary/vl/vl_compositor.h47
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c1627
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h93
-rw-r--r--src/gallium/auxiliary/vl/vl_shader_build.c215
-rw-r--r--src/gallium/auxiliary/vl/vl_shader_build.h61
-rw-r--r--src/gallium/drivers/cell/ppu/cell_screen.c4
-rw-r--r--src/gallium/drivers/i915simple/i915_context.c7
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_vbuf.c129
-rw-r--r--src/gallium/drivers/i915simple/i915_screen.c4
-rw-r--r--src/gallium/drivers/i915simple/i915_state.c10
-rw-r--r--src/gallium/drivers/i915simple/i915_texture.c49
-rw-r--r--src/gallium/drivers/i915simple/intel_winsys.h11
-rw-r--r--src/gallium/drivers/i965simple/brw_screen.c4
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile5
-rw-r--r--src/gallium/drivers/llvmpipe/README23
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.c270
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.h50
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_const.c30
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_const.h32
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_conv.c46
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_conv.h14
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_debug.c17
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_debug.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_flow.c410
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_flow.h42
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format.h64
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_aos.c30
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_soa.c208
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_logic.c19
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_logic.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample.h135
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c416
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_struct.c21
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_struct.h12
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_swizzle.c21
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_swizzle.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi.h39
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c466
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_type.c28
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_type.h123
-rw-r--r--src/gallium/drivers/llvmpipe/lp_clear.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c44
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h30
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c25
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c238
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_sampler.c10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_surface.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test.h20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c34
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c76
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_main.c33
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_cache.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample.h23
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample_c.c (renamed from src/gallium/drivers/llvmpipe/lp_tex_sample.c)133
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c196
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_cache.c48
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_cache.h37
-rw-r--r--src/gallium/drivers/nv04/nv04_screen.c2
-rw-r--r--src/gallium/drivers/nv10/nv10_screen.c2
-rw-r--r--src/gallium/drivers/nv20/nv20_screen.c2
-rw-r--r--src/gallium/drivers/nv30/nv30_screen.c2
-rw-r--r--src/gallium/drivers/nv40/nv40_screen.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c8
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h3
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c1
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c1728
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h35
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c4
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c6
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c28
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c6
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c4
-rw-r--r--src/gallium/drivers/r300/Makefile1
-rw-r--r--src/gallium/drivers/r300/r300_context.c5
-rw-r--r--src/gallium/drivers/r300/r300_context.h55
-rw-r--r--src/gallium/drivers/r300/r300_cs.h49
-rw-r--r--src/gallium/drivers/r300/r300_debug.c88
-rw-r--r--src/gallium/drivers/r300/r300_emit.c8
-rw-r--r--src/gallium/drivers/r300/r300_emit.h5
-rw-r--r--src/gallium/drivers/r300/r300_fs.c2
-rw-r--r--src/gallium/drivers/r300/r300_query.c2
-rw-r--r--src/gallium/drivers/r300/r300_reg.h1
-rw-r--r--src/gallium/drivers/r300/r300_render.c70
-rw-r--r--src/gallium/drivers/r300/r300_screen.c15
-rw-r--r--src/gallium/drivers/r300/r300_state.c6
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c60
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.h6
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.c6
-rw-r--r--src/gallium/drivers/r300/r300_surface.c6
-rw-r--r--src/gallium/drivers/r300/r300_texture.c67
-rw-r--r--src/gallium/drivers/r300/r300_texture.h9
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c1
-rw-r--r--src/gallium/drivers/r300/r300_vs.c2
-rw-r--r--src/gallium/drivers/softpipe/Makefile17
-rw-r--r--src/gallium/drivers/softpipe/SConscript13
-rw-r--r--src/gallium/drivers/softpipe/sp_clear.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_clear.h1
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c123
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h66
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.c25
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_exec.c85
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_llvm.c205
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_sse.c77
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_setup.c190
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_setup.h85
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.c189
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.h4
-rw-r--r--src/gallium/drivers/softpipe/sp_quad.h6
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_alpha_test.c108
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_blend.c1401
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_bufloop.c74
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_colormask.c116
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_coverage.c94
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_depth_test.c903
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_earlyz.c88
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_fs.c105
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_occlusion.c85
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_output.c103
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_pipe.c89
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_pipe.h4
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_stencil.c352
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_stipple.c22
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c8
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.c487
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h1
-rw-r--r--src/gallium/drivers/softpipe/sp_state_blend.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_state_derived.c103
-rw-r--r--src/gallium/drivers/softpipe/sp_state_fs.c17
-rw-r--r--src/gallium/drivers/softpipe/sp_state_sampler.c127
-rw-r--r--src/gallium/drivers/softpipe/sp_state_surface.c58
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c2506
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.h123
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.c273
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.h155
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c97
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.h23
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.c276
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.h88
-rw-r--r--src/gallium/drivers/softpipe/sp_video_context.c268
-rw-r--r--src/gallium/drivers/softpipe/sp_video_context.h30
-rw-r--r--src/gallium/drivers/trace/tr_context.c10
-rw-r--r--src/gallium/include/pipe/p_defines.h26
-rw-r--r--src/gallium/include/pipe/p_format.h18
-rw-r--r--src/gallium/include/pipe/p_inlines.h28
-rw-r--r--src/gallium/include/pipe/p_screen.h16
-rw-r--r--src/gallium/include/pipe/p_state.h24
-rw-r--r--src/gallium/include/pipe/p_video_context.h92
-rw-r--r--src/gallium/include/pipe/p_video_state.h158
-rw-r--r--src/gallium/state_trackers/egl/egl_surface.c7
-rw-r--r--src/gallium/state_trackers/egl/egl_tracker.h1
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_basic_csc.c12
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc14
-rw-r--r--src/gallium/state_trackers/wgl/SConscript25
-rw-r--r--src/gallium/state_trackers/wgl/opengl32.def1
-rw-r--r--src/gallium/state_trackers/wgl/opengl32.mingw.def1
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_context.c382
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_public.h73
-rw-r--r--src/gallium/state_trackers/wgl/stw_context.c (renamed from src/gallium/state_trackers/wgl/icd/stw_icd.c)478
-rw-r--r--src/gallium/state_trackers/wgl/stw_context.h (renamed from src/gallium/state_trackers/wgl/shared/stw_context.h)8
-rw-r--r--src/gallium/state_trackers/wgl/stw_device.c (renamed from src/gallium/state_trackers/wgl/shared/stw_device.c)85
-rw-r--r--src/gallium/state_trackers/wgl/stw_device.h (renamed from src/gallium/state_trackers/wgl/shared/stw_device.h)9
-rw-r--r--src/gallium/state_trackers/wgl/stw_ext_extensionsstring.c (renamed from src/gallium/state_trackers/wgl/shared/stw_extensionsstring.c)0
-rw-r--r--src/gallium/state_trackers/wgl/stw_ext_gallium.c (renamed from src/gallium/state_trackers/wgl/shared/stw_extgallium.c)2
-rw-r--r--src/gallium/state_trackers/wgl/stw_ext_gallium.h (renamed from src/gallium/state_trackers/wgl/shared/stw_extgallium.h)0
-rw-r--r--src/gallium/state_trackers/wgl/stw_ext_pixelformat.c (renamed from src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.c)1
-rw-r--r--src/gallium/state_trackers/wgl/stw_ext_swapinterval.c (renamed from src/gallium/state_trackers/wgl/shared/stw_extswapinterval.c)0
-rw-r--r--src/gallium/state_trackers/wgl/stw_framebuffer.c (renamed from src/gallium/state_trackers/wgl/shared/stw_framebuffer.c)208
-rw-r--r--src/gallium/state_trackers/wgl/stw_framebuffer.h (renamed from src/gallium/state_trackers/wgl/shared/stw_framebuffer.h)16
-rw-r--r--src/gallium/state_trackers/wgl/stw_getprocaddress.c (renamed from src/gallium/state_trackers/wgl/shared/stw_getprocaddress.c)7
-rw-r--r--src/gallium/state_trackers/wgl/stw_icd.h (renamed from src/gallium/state_trackers/wgl/icd/stw_icd.h)114
-rw-r--r--src/gallium/state_trackers/wgl/stw_pixelformat.c (renamed from src/gallium/state_trackers/wgl/shared/stw_pixelformat.c)67
-rw-r--r--src/gallium/state_trackers/wgl/stw_pixelformat.h (renamed from src/gallium/state_trackers/wgl/shared/stw_pixelformat.h)11
-rw-r--r--src/gallium/state_trackers/wgl/stw_tls.c (renamed from src/gallium/state_trackers/wgl/shared/stw_tls.c)0
-rw-r--r--src/gallium/state_trackers/wgl/stw_tls.h (renamed from src/gallium/state_trackers/wgl/shared/stw_tls.h)0
-rw-r--r--src/gallium/state_trackers/wgl/stw_wgl.c (renamed from src/gallium/state_trackers/wgl/wgl/stw_wgl.c)63
-rw-r--r--src/gallium/state_trackers/wgl/stw_wgl.h (renamed from src/gallium/state_trackers/wgl/wgl/stw_wgl.h)0
-rw-r--r--src/gallium/state_trackers/wgl/stw_winsys.h (renamed from src/gallium/state_trackers/wgl/shared/stw_winsys.h)54
-rw-r--r--src/gallium/state_trackers/xorg/xorg_composite.c636
-rw-r--r--src/gallium/state_trackers/xorg/xorg_composite.h12
-rw-r--r--src/gallium/state_trackers/xorg/xorg_crtc.c4
-rw-r--r--src/gallium/state_trackers/xorg/xorg_dri2.c1
-rw-r--r--src/gallium/state_trackers/xorg/xorg_exa.c269
-rw-r--r--src/gallium/state_trackers/xorg/xorg_exa.h18
-rw-r--r--src/gallium/state_trackers/xorg/xorg_exa_tgsi.c55
-rw-r--r--src/gallium/state_trackers/xorg/xorg_output.c4
-rw-r--r--src/gallium/state_trackers/xorg/xorg_tracker.h6
-rw-r--r--src/gallium/state_trackers/xorg/xorg_xv.c212
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/Makefile16
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/SConscript27
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/attributes.c19
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/block.c61
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/context.c214
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/subpicture.c168
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/surface.c382
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/tests/.gitignore5
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/tests/Makefile28
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/tests/test_blocks.c84
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/tests/test_context.c92
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c290
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/tests/test_surface.c71
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/tests/testlib.c119
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/tests/testlib.h42
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/tests/xvmc_bench.c273
-rw-r--r--src/gallium/state_trackers/xorg/xvmc/xvmc_private.h31
-rw-r--r--src/gallium/winsys/drm/intel/dri/SConscript3
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_drm_api.c9
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c21
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c22
-rw-r--r--src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c2
-rw-r--r--src/gallium/winsys/drm/nouveau/xorg/Makefile61
-rw-r--r--src/gallium/winsys/drm/nouveau/xorg/nouveau_xorg.c149
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_buffer.c11
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_drm.c28
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_r300.c2
-rw-r--r--src/gallium/winsys/g3dvl/Makefile11
-rw-r--r--src/gallium/winsys/g3dvl/vl_winsys.h21
-rw-r--r--src/gallium/winsys/g3dvl/xlib/Makefile74
-rw-r--r--src/gallium/winsys/g3dvl/xlib/xsp_winsys.c307
-rw-r--r--src/gallium/winsys/g3dvl/xsp_winsys.c1
-rw-r--r--src/gallium/winsys/gdi/SConscript60
-rw-r--r--src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c288
-rw-r--r--src/gallium/winsys/gdi/gdi_softpipe_winsys.c15
-rw-r--r--src/gallium/winsys/xlib/xlib_softpipe.c98
261 files changed, 18097 insertions, 7089 deletions
diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index 89c69d7205..8be84cddbe 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -23,6 +23,7 @@ SConscript([
'auxiliary/pipebuffer/SConscript',
'auxiliary/indices/SConscript',
'auxiliary/rbug/SConscript',
+ 'auxiliary/vl/SConscript',
])
for driver in env['drivers']:
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 00d7197b13..e25f16c354 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -104,7 +104,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
unsigned clipped = 0;
unsigned j;
- if (0) debug_printf("%s\n");
+ if (0) debug_printf("%s\n", __FUNCTION__);
for (j = 0; j < count; j++) {
float *position = out->data[pos];
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index 109ac7c9d6..d01f866622 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -542,7 +542,7 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
debug_printf("%10p %7u %7u\n",
fenced_buf,
fenced_buf->base.base.size,
- fenced_buf->base.base.reference.count);
+ p_atomic_read(&fenced_buf->base.base.reference.count));
curr = next;
next = curr->next;
}
@@ -556,7 +556,7 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
debug_printf("%10p %7u %7u %10p %s\n",
fenced_buf,
fenced_buf->base.base.size,
- fenced_buf->base.base.reference.count,
+ p_atomic_read(&fenced_buf->base.base.reference.count),
fenced_buf->fence,
signaled == 0 ? "y" : "n");
curr = next;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index 1b4df28c70..6e3214ca9c 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -350,7 +350,7 @@ pb_debug_manager_dump(struct pb_debug_manager *mgr)
buf = LIST_ENTRY(struct pb_debug_buffer, curr, head);
debug_printf("buffer = %p\n", buf);
- debug_printf(" .size = %p\n", buf->base.base.size);
+ debug_printf(" .size = 0x%x\n", buf->base.base.size);
debug_backtrace_dump(buf->create_backtrace, PB_DEBUG_CREATE_BACKTRACE);
curr = next;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 8a13885da9..53e13b30e6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -358,7 +358,7 @@ epilog(
boolean
tgsi_sanity_check(
- struct tgsi_token *tokens )
+ const struct tgsi_token *tokens )
{
struct sanity_check_ctx ctx;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h
index ca45e94c7a..52263ff883 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h
@@ -40,7 +40,7 @@ extern "C" {
*/
boolean
tgsi_sanity_check(
- struct tgsi_token *tokens );
+ const struct tgsi_token *tokens );
#if defined __cplusplus
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index c535788819..0db4481a3d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -132,6 +132,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
if (file == TGSI_FILE_INPUT) {
info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
+ info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate;
info->num_inputs++;
}
else if (file == TGSI_FILE_OUTPUT) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 2c1a75bc81..8a7ee0c7e4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -45,6 +45,7 @@ struct tgsi_shader_info
ubyte num_outputs;
ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+ ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 1e719940ec..5f6b83b236 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -39,8 +39,9 @@
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
-#include "tgsi_exec.h"
-#include "tgsi_sse2.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_sse2.h"
#include "rtasm/rtasm_x86sse.h"
@@ -1360,6 +1361,32 @@ emit_store(
const struct tgsi_full_instruction *inst,
unsigned chan_index )
{
+ switch( inst->Instruction.Saturate ) {
+ case TGSI_SAT_NONE:
+ break;
+
+ case TGSI_SAT_ZERO_ONE:
+ sse_maxps(
+ func,
+ make_xmm( xmm ),
+ get_temp(
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C ) );
+
+ sse_minps(
+ func,
+ make_xmm( xmm ),
+ get_temp(
+ TGSI_EXEC_TEMP_ONE_I,
+ TGSI_EXEC_TEMP_ONE_C ) );
+ break;
+
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ assert( 0 );
+ break;
+ }
+
+
switch( reg->DstRegister.File ) {
case TGSI_FILE_OUTPUT:
emit_output(
@@ -1388,19 +1415,6 @@ emit_store(
default:
assert( 0 );
}
-
- switch( inst->Instruction.Saturate ) {
- case TGSI_SAT_NONE:
- break;
-
- case TGSI_SAT_ZERO_ONE:
- /* assert( 0 ); */
- break;
-
- case TGSI_SAT_MINUS_PLUS_ONE:
- assert( 0 );
- break;
- }
}
#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
@@ -1747,14 +1761,6 @@ emit_instruction(
if (indirect_temp_reference(inst))
return FALSE;
- /* we don't handle saturation/clamping yet */
- if (inst->Instruction.Saturate != TGSI_SAT_NONE)
- return FALSE;
-
- /* need to use extra temps to fix SOA dependencies : */
- if (tgsi_check_soa_dependencies(inst))
- return FALSE;
-
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -1768,8 +1774,10 @@ emit_instruction(
case TGSI_OPCODE_MOV:
case TGSI_OPCODE_SWZ:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- STORE( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 4 + chan_index, 0, chan_index );
+ }
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 4 + chan_index, 0, chan_index );
}
break;
@@ -1847,7 +1855,6 @@ emit_instruction(
break;
case TGSI_OPCODE_RCP:
- /* TGSI_OPCODE_RECIP */
FETCH( func, *inst, 0, 0, CHAN_X );
emit_rcp( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -1856,7 +1863,6 @@ emit_instruction(
break;
case TGSI_OPCODE_RSQ:
- /* TGSI_OPCODE_RECIPSQRT */
FETCH( func, *inst, 0, 0, CHAN_X );
emit_abs( func, 0 );
emit_rsqrt( func, 1, 0 );
@@ -1954,7 +1960,6 @@ emit_instruction(
break;
case TGSI_OPCODE_DP3:
- /* TGSI_OPCODE_DOT3 */
FETCH( func, *inst, 0, 0, CHAN_X );
FETCH( func, *inst, 1, 1, CHAN_X );
emit_mul( func, 0, 1 );
@@ -1972,7 +1977,6 @@ emit_instruction(
break;
case TGSI_OPCODE_DP4:
- /* TGSI_OPCODE_DOT4 */
FETCH( func, *inst, 0, 0, CHAN_X );
FETCH( func, *inst, 1, 1, CHAN_X );
emit_mul( func, 0, 1 );
@@ -2043,17 +2047,14 @@ emit_instruction(
break;
case TGSI_OPCODE_SLT:
- /* TGSI_OPCODE_SETLT */
emit_setcc( func, inst, cc_LessThan );
break;
case TGSI_OPCODE_SGE:
- /* TGSI_OPCODE_SETGE */
emit_setcc( func, inst, cc_NotLessThan );
break;
case TGSI_OPCODE_MAD:
- /* TGSI_OPCODE_MADD */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
FETCH( func, *inst, 1, 1, chan_index );
@@ -2283,7 +2284,7 @@ emit_instruction(
break;
case TGSI_OPCODE_SEQ:
- return 0;
+ emit_setcc( func, inst, cc_Equal );
break;
case TGSI_OPCODE_SFL:
@@ -2291,7 +2292,7 @@ emit_instruction(
break;
case TGSI_OPCODE_SGT:
- return 0;
+ emit_setcc( func, inst, cc_NotLessThanEqual );
break;
case TGSI_OPCODE_SIN:
@@ -2303,11 +2304,11 @@ emit_instruction(
break;
case TGSI_OPCODE_SLE:
- return 0;
+ emit_setcc( func, inst, cc_LessThanEqual );
break;
case TGSI_OPCODE_SNE:
- return 0;
+ emit_setcc( func, inst, cc_NotEqual );
break;
case TGSI_OPCODE_STR:
@@ -2371,7 +2372,6 @@ emit_instruction(
break;
case TGSI_OPCODE_SSG:
- /* TGSI_OPCODE_SGN */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
emit_sgn( func, 0, 0 );
@@ -2929,6 +2929,22 @@ tgsi_emit_sse2(
parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
"vertex shader" : "fragment shader");
}
+
+ if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
+ uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
+
+ /* XXX: we only handle src/dst aliasing in a few opcodes
+ * currently. Need to use an additional temporay to hold
+ * the result in the cases where the code is too opaque to
+ * fix.
+ */
+ if (opcode != TGSI_OPCODE_MOV &&
+ opcode != TGSI_OPCODE_SWZ) {
+ debug_printf("Warning: src/dst aliasing in instruction"
+ " is not handled:\n");
+ tgsi_dump_instruction(&parse.FullToken.FullInstruction, 1);
+ }
+ }
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index f7096bd8e2..654426a903 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -31,6 +31,7 @@
#include "tgsi/tgsi_ureg.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_sanity.h"
#include "util/u_memory.h"
#include "util/u_math.h"
@@ -70,6 +71,7 @@ struct ureg_tokens {
#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS
#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS
+#define UREG_MAX_CONSTANT_RANGE 32
#define UREG_MAX_IMMEDIATE 32
#define UREG_MAX_TEMP 256
#define UREG_MAX_ADDR 2
@@ -86,8 +88,10 @@ struct ureg_program
unsigned semantic_name;
unsigned semantic_index;
unsigned interp;
- } input[UREG_MAX_INPUT];
- unsigned nr_inputs;
+ } fs_input[UREG_MAX_INPUT];
+ unsigned nr_fs_inputs;
+
+ unsigned vs_inputs[UREG_MAX_INPUT/32];
struct {
unsigned semantic_name;
@@ -107,9 +111,13 @@ struct ureg_program
unsigned temps_active[UREG_MAX_TEMP / 32];
unsigned nr_temps;
- unsigned nr_addrs;
+ struct {
+ unsigned first;
+ unsigned last;
+ } constant_range[UREG_MAX_CONSTANT_RANGE];
+ unsigned nr_constant_ranges;
- unsigned nr_constants;
+ unsigned nr_addrs;
unsigned nr_instructions;
struct ureg_tokens domain[2];
@@ -119,6 +127,9 @@ static union tgsi_any_token error_tokens[32];
static void tokens_error( struct ureg_tokens *tokens )
{
+ if (tokens->tokens && tokens->tokens != error_tokens)
+ FREE(tokens->tokens);
+
tokens->tokens = error_tokens;
tokens->size = Elements(error_tokens);
tokens->count = 0;
@@ -228,25 +239,25 @@ ureg_src_register( unsigned file,
-static struct ureg_src
-ureg_DECL_input( struct ureg_program *ureg,
- unsigned name,
- unsigned index,
- unsigned interp_mode )
+struct ureg_src
+ureg_DECL_fs_input( struct ureg_program *ureg,
+ unsigned name,
+ unsigned index,
+ unsigned interp_mode )
{
unsigned i;
- for (i = 0; i < ureg->nr_inputs; i++) {
- if (ureg->input[i].semantic_name == name &&
- ureg->input[i].semantic_index == index)
+ for (i = 0; i < ureg->nr_fs_inputs; i++) {
+ if (ureg->fs_input[i].semantic_name == name &&
+ ureg->fs_input[i].semantic_index == index)
goto out;
}
- if (ureg->nr_inputs < UREG_MAX_INPUT) {
- ureg->input[i].semantic_name = name;
- ureg->input[i].semantic_index = index;
- ureg->input[i].interp = interp_mode;
- ureg->nr_inputs++;
+ if (ureg->nr_fs_inputs < UREG_MAX_INPUT) {
+ ureg->fs_input[i].semantic_name = name;
+ ureg->fs_input[i].semantic_index = index;
+ ureg->fs_input[i].interp = interp_mode;
+ ureg->nr_fs_inputs++;
}
else {
set_bad( ureg );
@@ -257,25 +268,14 @@ out:
}
-
-struct ureg_src
-ureg_DECL_fs_input( struct ureg_program *ureg,
- unsigned name,
- unsigned index,
- unsigned interp )
-{
- assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
- return ureg_DECL_input( ureg, name, index, interp );
-}
-
-
struct ureg_src
ureg_DECL_vs_input( struct ureg_program *ureg,
- unsigned name,
unsigned index )
{
assert(ureg->processor == TGSI_PROCESSOR_VERTEX);
- return ureg_DECL_input( ureg, name, index, TGSI_INTERPOLATE_CONSTANT );
+
+ ureg->vs_inputs[index/32] |= 1 << (index % 32);
+ return ureg_src_register( TGSI_FILE_INPUT, index );
}
@@ -313,9 +313,57 @@ out:
* value or manage any constant_buffer contents -- that's the
* resposibility of the calling code.
*/
-struct ureg_src ureg_DECL_constant(struct ureg_program *ureg )
+struct ureg_src ureg_DECL_constant(struct ureg_program *ureg,
+ unsigned index )
{
- return ureg_src_register( TGSI_FILE_CONSTANT, ureg->nr_constants++ );
+ unsigned minconst = index, maxconst = index;
+ unsigned i;
+
+ /* Inside existing range?
+ */
+ for (i = 0; i < ureg->nr_constant_ranges; i++) {
+ if (ureg->constant_range[i].first <= index &&
+ ureg->constant_range[i].last >= index)
+ goto out;
+ }
+
+ /* Extend existing range?
+ */
+ for (i = 0; i < ureg->nr_constant_ranges; i++) {
+ if (ureg->constant_range[i].last == index - 1) {
+ ureg->constant_range[i].last = index;
+ goto out;
+ }
+
+ if (ureg->constant_range[i].first == index + 1) {
+ ureg->constant_range[i].first = index;
+ goto out;
+ }
+
+ minconst = MIN2(minconst, ureg->constant_range[i].first);
+ maxconst = MAX2(maxconst, ureg->constant_range[i].last);
+ }
+
+ /* Create new range?
+ */
+ if (ureg->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
+ i = ureg->nr_constant_ranges++;
+ ureg->constant_range[i].first = index;
+ ureg->constant_range[i].last = index;
+ }
+
+ /* Collapse all ranges down to one:
+ */
+ i = 0;
+ ureg->constant_range[0].first = minconst;
+ ureg->constant_range[0].last = maxconst;
+ ureg->nr_constant_ranges = 1;
+
+out:
+ assert(i < ureg->nr_constant_ranges);
+ assert(ureg->constant_range[i].first <= index);
+ assert(ureg->constant_range[i].last >= index);
+ return ureg_src_register( TGSI_FILE_CONSTANT, index );
}
@@ -566,6 +614,19 @@ ureg_emit_dst( struct ureg_program *ureg,
}
+static void validate( unsigned opcode,
+ unsigned nr_dst,
+ unsigned nr_src )
+{
+#ifdef DEBUG
+ const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
+ assert(info);
+ if(info) {
+ assert(nr_dst == info->num_dst);
+ assert(nr_src == info->num_src);
+ }
+#endif
+}
unsigned
ureg_emit_insn(struct ureg_program *ureg,
@@ -576,6 +637,8 @@ ureg_emit_insn(struct ureg_program *ureg,
{
union tgsi_any_token *out;
+ validate( opcode, num_dst, num_src );
+
out = get_tokens( ureg, DOMAIN_INSN, 1 );
out[0].value = 0;
out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
@@ -678,17 +741,6 @@ ureg_insn(struct ureg_program *ureg,
unsigned insn, i;
boolean saturate;
-#ifdef DEBUG
- {
- const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
- assert(info);
- if(info) {
- assert(nr_dst == info->num_dst);
- assert(nr_src == info->num_src);
- }
- }
-#endif
-
saturate = nr_dst ? dst[0].Saturate : FALSE;
insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src );
@@ -702,6 +754,53 @@ ureg_insn(struct ureg_program *ureg,
ureg_fixup_insn_size( ureg, insn );
}
+void
+ureg_tex_insn(struct ureg_program *ureg,
+ unsigned opcode,
+ const struct ureg_dst *dst,
+ unsigned nr_dst,
+ unsigned target,
+ const struct ureg_src *src,
+ unsigned nr_src )
+{
+ unsigned insn, i;
+ boolean saturate;
+
+ saturate = nr_dst ? dst[0].Saturate : FALSE;
+
+ insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src );
+
+ ureg_emit_texture( ureg, insn, target ); \
+
+ for (i = 0; i < nr_dst; i++)
+ ureg_emit_dst( ureg, dst[i] );
+
+ for (i = 0; i < nr_src; i++)
+ ureg_emit_src( ureg, src[i] );
+
+ ureg_fixup_insn_size( ureg, insn );
+}
+
+
+void
+ureg_label_insn(struct ureg_program *ureg,
+ unsigned opcode,
+ const struct ureg_src *src,
+ unsigned nr_src,
+ unsigned *label_token )
+{
+ unsigned insn, i;
+
+ insn = ureg_emit_insn( ureg, opcode, FALSE, 0, nr_src );
+
+ ureg_emit_label( ureg, insn, label_token ); \
+
+ for (i = 0; i < nr_src; i++)
+ ureg_emit_src( ureg, src[i] );
+
+ ureg_fixup_insn_size( ureg, insn );
+}
+
static void emit_decl( struct ureg_program *ureg,
@@ -777,13 +876,22 @@ static void emit_decls( struct ureg_program *ureg )
{
unsigned i;
- for (i = 0; i < ureg->nr_inputs; i++) {
- emit_decl( ureg,
- TGSI_FILE_INPUT,
- i,
- ureg->input[i].semantic_name,
- ureg->input[i].semantic_index,
- ureg->input[i].interp );
+ if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
+ for (i = 0; i < UREG_MAX_INPUT; i++) {
+ if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
+ emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
+ }
+ }
+ }
+ else {
+ for (i = 0; i < ureg->nr_fs_inputs; i++) {
+ emit_decl( ureg,
+ TGSI_FILE_INPUT,
+ i,
+ ureg->fs_input[i].semantic_name,
+ ureg->fs_input[i].semantic_index,
+ ureg->fs_input[i].interp );
+ }
}
for (i = 0; i < ureg->nr_outputs; i++) {
@@ -801,10 +909,13 @@ static void emit_decls( struct ureg_program *ureg )
ureg->sampler[i].Index, 1 );
}
- if (ureg->nr_constants) {
- emit_decl_range( ureg,
- TGSI_FILE_CONSTANT,
- 0, ureg->nr_constants );
+ if (ureg->nr_constant_ranges) {
+ for (i = 0; i < ureg->nr_constant_ranges; i++)
+ emit_decl_range( ureg,
+ TGSI_FILE_CONSTANT,
+ ureg->constant_range[i].first,
+ (ureg->constant_range[i].last + 1 -
+ ureg->constant_range[i].first) );
}
if (ureg->nr_temps) {
@@ -890,6 +1001,15 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
ureg->domain[DOMAIN_DECL].count);
tgsi_dump( tokens, 0 );
}
+
+#if DEBUG
+ if (tokens && !tgsi_sanity_check(tokens)) {
+ debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n");
+ tgsi_dump(tokens, 0);
+ assert(0);
+ }
+#endif
+
return tokens;
}
@@ -911,6 +1031,25 @@ void *ureg_create_shader( struct ureg_program *ureg,
}
+const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
+ unsigned *nr_tokens )
+{
+ const struct tgsi_token *tokens;
+
+ ureg_finalize(ureg);
+
+ tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
+
+ if (nr_tokens)
+ *nr_tokens = ureg->domain[DOMAIN_DECL].size;
+
+ ureg->domain[DOMAIN_DECL].tokens = 0;
+ ureg->domain[DOMAIN_DECL].size = 0;
+ ureg->domain[DOMAIN_DECL].order = 0;
+ ureg->domain[DOMAIN_DECL].count = 0;
+
+ return tokens;
+}
struct ureg_program *ureg_create( unsigned processor )
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index acbca59040..f04f443b9e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -82,10 +82,21 @@ ureg_create( unsigned processor );
const struct tgsi_token *
ureg_finalize( struct ureg_program * );
+/* Create and return a shader:
+ */
void *
ureg_create_shader( struct ureg_program *,
struct pipe_context *pipe );
+
+/* Alternately, return the built token stream and hand ownership of
+ * that memory to the caller:
+ */
+const struct tgsi_token *
+ureg_get_tokens( struct ureg_program *ureg,
+ unsigned *nr_tokens );
+
+
void
ureg_destroy( struct ureg_program * );
@@ -116,8 +127,7 @@ ureg_DECL_fs_input( struct ureg_program *,
struct ureg_src
ureg_DECL_vs_input( struct ureg_program *,
- unsigned semantic_name,
- unsigned semantic_index );
+ unsigned index );
struct ureg_dst
ureg_DECL_output( struct ureg_program *,
@@ -130,7 +140,8 @@ ureg_DECL_immediate( struct ureg_program *,
unsigned nr );
struct ureg_src
-ureg_DECL_constant( struct ureg_program * );
+ureg_DECL_constant( struct ureg_program *,
+ unsigned index );
struct ureg_dst
ureg_DECL_temporary( struct ureg_program * );
@@ -233,6 +244,24 @@ ureg_insn(struct ureg_program *ureg,
unsigned nr_src );
+void
+ureg_tex_insn(struct ureg_program *ureg,
+ unsigned opcode,
+ const struct ureg_dst *dst,
+ unsigned nr_dst,
+ unsigned target,
+ const struct ureg_src *src,
+ unsigned nr_src );
+
+
+void
+ureg_label_insn(struct ureg_program *ureg,
+ unsigned opcode,
+ const struct ureg_src *src,
+ unsigned nr_src,
+ unsigned *label);
+
+
/***********************************************************************
* Internal instruction helpers, don't call these directly:
*/
diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
index 1380d98d7e..b82e7cb4d4 100644
--- a/src/gallium/auxiliary/util/u_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -65,6 +65,11 @@ extern "C" {
#define __FUNCTION__ "???"
#endif
+#if defined(__GNUC__)
+#define _util_printf_format(fmt, list) __attribute__ ((format (printf, fmt, list)))
+#else
+#define _util_printf_format(fmt, list)
+#endif
void _debug_vprintf(const char *format, va_list ap);
@@ -82,14 +87,17 @@ _debug_printf(const char *format, ...)
/**
* Print debug messages.
*
- * The actual channel used to output debug message is platform specific. To
- * avoid misformating or truncation, follow these rules of thumb:
+ * The actual channel used to output debug message is platform specific. To
+ * avoid misformating or truncation, follow these rules of thumb:
* - output whole lines
- * - avoid outputing large strings (512 bytes is the current maximum length
+ * - avoid outputing large strings (512 bytes is the current maximum length
* that is guaranteed to be printed in all platforms)
*/
#if !defined(PIPE_OS_HAIKU)
static INLINE void
+debug_printf(const char *format, ...) _util_printf_format(1,2);
+
+static INLINE void
debug_printf(const char *format, ...)
{
#ifdef DEBUG
diff --git a/src/gallium/auxiliary/util/u_fifo.h b/src/gallium/auxiliary/util/u_fifo.h
new file mode 100644
index 0000000000..9e007de1ad
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_fifo.h
@@ -0,0 +1,94 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 Jakob Bornecrantz
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_FIFO_H
+#define U_FIFO_H
+
+#include "util/u_memory.h"
+
+struct util_fifo
+{
+ size_t head;
+ size_t tail;
+ size_t num;
+ size_t size;
+};
+
+static INLINE struct util_fifo *
+u_fifo_create(size_t size)
+{
+ struct util_fifo *fifo;
+ fifo = MALLOC(sizeof(*fifo) + size * sizeof(void*));
+
+ fifo->head = 0;
+ fifo->tail = 0;
+ fifo->num = 0;
+ fifo->size = size;
+
+ return fifo;
+}
+
+static INLINE boolean
+u_fifo_add(struct util_fifo *fifo, void *ptr)
+{
+ void **array = (void**)&fifo[1];
+ if (fifo->num >= fifo->size)
+ return FALSE;
+
+ if (++fifo->head >= fifo->size)
+ fifo->head = 0;
+
+ array[fifo->head] = ptr;
+
+ ++fifo->num;
+
+ return TRUE;
+}
+
+static INLINE boolean
+u_fifo_pop(struct util_fifo *fifo, void **ptr)
+{
+ void **array = (void**)&fifo[1];
+
+ if (!fifo->num)
+ return FALSE;
+
+ if (++fifo->tail >= fifo->size)
+ fifo->tail = 0;
+
+ *ptr = array[fifo->tail];
+
+ ++fifo->num;
+
+ return TRUE;
+}
+
+static INLINE void
+u_fifo_destroy(struct util_fifo *fifo)
+{
+ FREE(fifo);
+}
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index 00a46d0cc4..f1bf94f17d 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -2,7 +2,7 @@ PIPE_FORMAT_A8R8G8B8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , zyxw,
PIPE_FORMAT_X8R8G8B8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , zyx1, rgb
PIPE_FORMAT_B8G8R8A8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , yzwx, rgb
PIPE_FORMAT_B8G8R8X8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , yzw1, rgb
-PIPE_FORMAT_A1R5G5B5_UNORM , arith , 1, 1, un1 , un5 , un5 , un5 , zyxw, rgb
+PIPE_FORMAT_A1R5G5B5_UNORM , arith , 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb
PIPE_FORMAT_A4R4G4B4_UNORM , arith , 1, 1, un4 , un4 , un4 , un4 , zyxw, rgb
PIPE_FORMAT_R5G6B5_UNORM , arith , 1, 1, un5 , un6 , un5 , , zyx1, rgb
PIPE_FORMAT_A2B10G10R10_UNORM , arith , 1, 1, un10, un10, un10, un2 , xyzw, rgb
@@ -14,10 +14,10 @@ PIPE_FORMAT_L16_UNORM , arith , 1, 1, un16, , , , xxx1,
PIPE_FORMAT_Z16_UNORM , array , 1, 1, un16, , , , x___, zs
PIPE_FORMAT_Z32_UNORM , array , 1, 1, un32, , , , x___, zs
PIPE_FORMAT_Z32_FLOAT , array , 1, 1, f32 , , , , x___, zs
-PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs
-PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs
-PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un8 , un24, , , y___, zs
-PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un24, un8 , , , x___, zs
+PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs
+PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs
+PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un24, un8 , , , x___, zs
+PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un8 , un24, , , y___, zs
PIPE_FORMAT_S8_UNORM , array , 1, 1, un8 , , , , _x__, zs
PIPE_FORMAT_R64_FLOAT , array , 1, 1, f64 , , , , x001, rgb
PIPE_FORMAT_R64G64_FLOAT , array , 1, 1, f64 , f64 , , , xy01, rgb
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 4c6c2bc00e..75b075f160 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -283,6 +283,14 @@ util_fast_pow(float x, float y)
return util_fast_exp2(util_fast_log2(x) * y);
}
+/* Note that this counts zero as a power of two.
+ */
+static INLINE boolean
+util_is_power_of_two( unsigned v )
+{
+ return (v & (v-1)) == 0;
+}
+
/**
* Floor(x), returned as int.
@@ -341,10 +349,22 @@ util_is_inf_or_nan(float x)
/**
+ * Test whether x is a power of two.
+ */
+static INLINE boolean
+util_is_pot(unsigned x)
+{
+ return (x & (x - 1)) == 0;
+}
+
+
+/**
* Find first bit set in word. Least significant bit is 1.
* Return 0 if no bits set.
*/
-#if defined(_MSC_VER) && _MSC_VER >= 1300
+#if defined(_MSC_VER) && _MSC_VER >= 1300 && (_M_IX86 || _M_AMD64 || _M_IA64)
+unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask);
+#pragma intrinsic(_BitScanForward)
static INLINE
unsigned long ffs( unsigned long u )
{
@@ -451,6 +471,26 @@ util_logbase2(unsigned n)
/**
+ * Returns the smallest power of two >= x
+ */
+static INLINE unsigned
+util_next_power_of_two(unsigned x)
+{
+ unsigned i;
+
+ if (x == 0)
+ return 1;
+
+ --x;
+
+ for (i = 1; i < sizeof(unsigned) * 8; i <<= 1)
+ x |= x >> i;
+
+ return x + 1;
+}
+
+
+/**
* Clamp X to [MIN, MAX].
* This is a macro to allow float, int, uint, etc. types.
*/
diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c
index f01296b40f..5238299015 100644
--- a/src/gallium/auxiliary/util/u_simple_screen.c
+++ b/src/gallium/auxiliary/util/u_simple_screen.c
@@ -52,8 +52,7 @@ pass_user_buffer_create(struct pipe_screen *screen,
unsigned bytes)
{
struct pipe_buffer *buffer =
- screen->winsys->user_buffer_create(screen->winsys,
- ptr, bytes);
+ screen->winsys->user_buffer_create(screen->winsys, ptr, bytes);
buffer->screen = screen;
@@ -69,9 +68,8 @@ pass_surface_buffer_create(struct pipe_screen *screen,
unsigned *stride)
{
struct pipe_buffer *buffer =
- screen->winsys->surface_buffer_create(screen->winsys,
- width, height,
- format, usage, tex_usage, stride);
+ screen->winsys->surface_buffer_create(screen->winsys, width, height,
+ format, usage, tex_usage, stride);
buffer->screen = screen;
@@ -83,8 +81,7 @@ pass_buffer_map(struct pipe_screen *screen,
struct pipe_buffer *buf,
unsigned usage)
{
- return screen->winsys->buffer_map(screen->winsys,
- buf, usage);
+ return screen->winsys->buffer_map(screen->winsys, buf, usage);
}
static void
@@ -106,8 +103,7 @@ pass_flush_frontbuffer(struct pipe_screen *screen,
struct pipe_surface *surf,
void *context_private)
{
- screen->winsys->flush_frontbuffer(screen->winsys,
- surf, context_private);
+ screen->winsys->flush_frontbuffer(screen->winsys, surf, context_private);
}
static void
@@ -115,8 +111,7 @@ pass_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence)
{
- screen->winsys->fence_reference(screen->winsys,
- ptr, fence);
+ screen->winsys->fence_reference(screen->winsys, ptr, fence);
}
static int
@@ -124,8 +119,7 @@ pass_fence_signalled(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
unsigned flag)
{
- return screen->winsys->fence_signalled(screen->winsys,
- fence, flag);
+ return screen->winsys->fence_signalled(screen->winsys, fence, flag);
}
static int
@@ -133,11 +127,11 @@ pass_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
unsigned flag)
{
- return screen->winsys->fence_finish(screen->winsys,
- fence, flag);
+ return screen->winsys->fence_finish(screen->winsys, fence, flag);
}
-void u_simple_screen_init(struct pipe_screen *screen)
+void
+u_simple_screen_init(struct pipe_screen *screen)
{
screen->buffer_create = pass_buffer_create;
screen->user_buffer_create = pass_user_buffer_create;
@@ -152,7 +146,8 @@ void u_simple_screen_init(struct pipe_screen *screen)
screen->fence_finish = pass_fence_finish;
}
-const char* u_simple_screen_winsys_name(struct pipe_screen *screen)
+const char *
+u_simple_screen_winsys_name(struct pipe_screen *screen)
{
return screen->winsys->get_name(screen->winsys);
}
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index ab754296fa..0d706f9449 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -34,14 +34,8 @@
#include "pipe/p_context.h"
-#include "util/u_debug.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_screen.h"
#include "pipe/p_shader_tokens.h"
-
-#include "util/u_memory.h"
#include "util/u_simple_shaders.h"
-
#include "tgsi/tgsi_ureg.h"
@@ -67,9 +61,7 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe,
struct ureg_src src;
struct ureg_dst dst;
- src = ureg_DECL_vs_input( ureg,
- semantic_names[i],
- semantic_indexes[i]);
+ src = ureg_DECL_vs_input( ureg, i );
dst = ureg_DECL_output( ureg,
semantic_names[i],
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index 1235a67d26..0d6489c26e 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -170,7 +170,7 @@ x8r8g8b8_get_tile_rgba(const unsigned *src,
pRow[0] = ubyte_to_float((pixel >> 16) & 0xff);
pRow[1] = ubyte_to_float((pixel >> 8) & 0xff);
pRow[2] = ubyte_to_float((pixel >> 0) & 0xff);
- pRow[3] = ubyte_to_float(0xff);
+ pRow[3] = 1.0F;
}
p += dst_stride;
}
@@ -394,6 +394,52 @@ r5g6b5_put_tile_rgba(ushort *dst,
+/*** PIPE_FORMAT_R8G8B8_UNORM ***/
+
+static void
+r8g8b8_get_tile_rgba(const ubyte *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] = ubyte_to_float(src[0]);
+ pRow[1] = ubyte_to_float(src[1]);
+ pRow[2] = ubyte_to_float(src[2]);
+ pRow[3] = 1.0f;
+ src += 3;
+ }
+ p += dst_stride;
+ }
+}
+
+
+static void
+r8g8b8_put_tile_rgba(ubyte *dst,
+ unsigned w, unsigned h,
+ const float *p,
+ unsigned src_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ const float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ dst[0] = float_to_ubyte(pRow[0]);
+ dst[1] = float_to_ubyte(pRow[1]);
+ dst[2] = float_to_ubyte(pRow[2]);
+ dst += 3;
+ }
+ p += src_stride;
+ }
+}
+
+
+
/*** PIPE_FORMAT_Z16_UNORM ***/
/**
@@ -1106,6 +1152,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
case PIPE_FORMAT_R5G6B5_UNORM:
r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
break;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ r8g8b8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
+ break;
case PIPE_FORMAT_L8_UNORM:
l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
break;
@@ -1222,6 +1271,9 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
case PIPE_FORMAT_R5G6B5_UNORM:
r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
break;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ r8g8b8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);
+ break;
case PIPE_FORMAT_R8G8B8A8_UNORM:
assert(0);
break;
diff --git a/src/gallium/auxiliary/vl/Makefile b/src/gallium/auxiliary/vl/Makefile
new file mode 100644
index 0000000000..71bfb937ad
--- /dev/null
+++ b/src/gallium/auxiliary/vl/Makefile
@@ -0,0 +1,12 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = vl
+
+C_SOURCES = \
+ vl_bitstream_parser.c \
+ vl_mpeg12_mc_renderer.c \
+ vl_compositor.c \
+ vl_shader_build.c
+
+include ../../Makefile.template
diff --git a/src/gallium/auxiliary/vl/SConscript b/src/gallium/auxiliary/vl/SConscript
new file mode 100644
index 0000000000..eb50940c35
--- /dev/null
+++ b/src/gallium/auxiliary/vl/SConscript
@@ -0,0 +1,12 @@
+Import('*')
+
+vl = env.ConvenienceLibrary(
+ target = 'vl',
+ source = [
+ 'vl_bitstream_parser.c',
+ 'vl_mpeg12_mc_renderer.c',
+ 'vl_compositor.c',
+ 'vl_shader_build.c',
+ ])
+
+auxiliaries.insert(0, vl)
diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.c b/src/gallium/auxiliary/vl/vl_bitstream_parser.c
new file mode 100644
index 0000000000..45826bad45
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.c
@@ -0,0 +1,140 @@
+#include "vl_bitstream_parser.h"
+#include <assert.h>
+#include <limits.h>
+#include <util/u_memory.h>
+
+static unsigned
+grab_bits(unsigned cursor, unsigned how_many_bits, unsigned bitstream_elt)
+{
+ unsigned excess_bits = sizeof(unsigned) * CHAR_BIT - how_many_bits - cursor;
+
+ assert(cursor < sizeof(unsigned) * CHAR_BIT);
+ assert(how_many_bits > 0 && how_many_bits <= sizeof(unsigned) * CHAR_BIT);
+ assert(cursor + how_many_bits <= sizeof(unsigned) * CHAR_BIT);
+
+ return (bitstream_elt << excess_bits) >> (excess_bits + cursor);
+}
+
+static unsigned
+show_bits(unsigned cursor, unsigned how_many_bits, const unsigned *bitstream)
+{
+ unsigned cur_int = cursor / (sizeof(unsigned) * CHAR_BIT);
+ unsigned cur_bit = cursor % (sizeof(unsigned) * CHAR_BIT);
+
+ assert(bitstream);
+
+ if (cur_bit + how_many_bits > sizeof(unsigned) * CHAR_BIT) {
+ unsigned lower = grab_bits(cur_bit, sizeof(unsigned) * CHAR_BIT - cur_bit,
+ bitstream[cur_int]);
+ unsigned upper = grab_bits(0, cur_bit + how_many_bits - sizeof(unsigned) * CHAR_BIT,
+ bitstream[cur_int + 1]);
+ return lower | upper << (sizeof(unsigned) * CHAR_BIT - cur_bit);
+ }
+ else
+ return grab_bits(cur_bit, how_many_bits, bitstream[cur_int]);
+}
+
+bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser,
+ unsigned num_bitstreams,
+ const void **bitstreams,
+ const unsigned *sizes)
+{
+ assert(parser);
+ assert(num_bitstreams);
+ assert(bitstreams);
+ assert(sizes);
+
+ parser->num_bitstreams = num_bitstreams;
+ parser->bitstreams = (const unsigned**)bitstreams;
+ parser->sizes = sizes;
+ parser->cur_bitstream = 0;
+ parser->cursor = 0;
+
+ return true;
+}
+
+void vl_bitstream_parser_cleanup(struct vl_bitstream_parser *parser)
+{
+ assert(parser);
+}
+
+unsigned
+vl_bitstream_parser_get_bits(struct vl_bitstream_parser *parser,
+ unsigned how_many_bits)
+{
+ unsigned bits;
+
+ assert(parser);
+
+ bits = vl_bitstream_parser_show_bits(parser, how_many_bits);
+
+ vl_bitstream_parser_forward(parser, how_many_bits);
+
+ return bits;
+}
+
+unsigned
+vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser,
+ unsigned how_many_bits)
+{
+ unsigned bits = 0;
+ unsigned shift = 0;
+ unsigned cursor;
+ unsigned cur_bitstream;
+
+ assert(parser);
+
+ cursor = parser->cursor;
+ cur_bitstream = parser->cur_bitstream;
+
+ while (1) {
+ unsigned bits_left = parser->sizes[cur_bitstream] * CHAR_BIT - cursor;
+ unsigned bits_to_show = how_many_bits > bits_left ? bits_left : how_many_bits;
+
+ bits |= show_bits(cursor, bits_to_show,
+ parser->bitstreams[cur_bitstream]) << shift;
+
+ if (how_many_bits > bits_to_show) {
+ how_many_bits -= bits_to_show;
+ cursor = 0;
+ ++cur_bitstream;
+ shift += bits_to_show;
+ }
+ else
+ break;
+ }
+
+ return bits;
+}
+
+void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser,
+ unsigned how_many_bits)
+{
+ assert(parser);
+ assert(how_many_bits);
+
+ parser->cursor += how_many_bits;
+
+ while (parser->cursor > parser->sizes[parser->cur_bitstream] * CHAR_BIT) {
+ parser->cursor -= parser->sizes[parser->cur_bitstream++] * CHAR_BIT;
+ assert(parser->cur_bitstream < parser->num_bitstreams);
+ }
+}
+
+void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser,
+ unsigned how_many_bits)
+{
+ signed c;
+
+ assert(parser);
+ assert(how_many_bits);
+
+ c = parser->cursor - how_many_bits;
+
+ while (c < 0) {
+ c += parser->sizes[parser->cur_bitstream--] * CHAR_BIT;
+ assert(parser->cur_bitstream < parser->num_bitstreams);
+ }
+
+ parser->cursor = (unsigned)c;
+}
diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.h b/src/gallium/auxiliary/vl/vl_bitstream_parser.h
new file mode 100644
index 0000000000..91ebaab45b
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.h
@@ -0,0 +1,36 @@
+#ifndef vl_bitstream_parser_h
+#define vl_bitstream_parser_h
+
+#include "pipe/p_compiler.h"
+
+struct vl_bitstream_parser
+{
+ unsigned num_bitstreams;
+ const unsigned **bitstreams;
+ const unsigned *sizes;
+ unsigned cur_bitstream;
+ unsigned cursor;
+};
+
+bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser,
+ unsigned num_bitstreams,
+ const void **bitstreams,
+ const unsigned *sizes);
+
+void vl_bitstream_parser_cleanup(struct vl_bitstream_parser *parser);
+
+unsigned
+vl_bitstream_parser_get_bits(struct vl_bitstream_parser *parser,
+ unsigned how_many_bits);
+
+unsigned
+vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser,
+ unsigned how_many_bits);
+
+void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser,
+ unsigned how_many_bits);
+
+void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser,
+ unsigned how_many_bits);
+
+#endif /* vl_bitstream_parser_h */
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
new file mode 100644
index 0000000000..6431da6611
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -0,0 +1,590 @@
+#include "vl_compositor.h"
+#include <assert.h>
+#include <pipe/p_context.h>
+#include <pipe/p_inlines.h>
+#include <tgsi/tgsi_parse.h>
+#include <tgsi/tgsi_build.h>
+#include <util/u_memory.h>
+#include "vl_shader_build.h"
+
+struct vertex2f
+{
+ float x, y;
+};
+
+struct vertex4f
+{
+ float x, y, z, w;
+};
+
+struct vertex_shader_consts
+{
+ struct vertex4f dst_scale;
+ struct vertex4f dst_trans;
+ struct vertex4f src_scale;
+ struct vertex4f src_trans;
+};
+
+struct fragment_shader_consts
+{
+ struct vertex4f bias;
+ float matrix[16];
+};
+
+/*
+ * Represents 2 triangles in a strip in normalized coords.
+ * Used to render the surface onto the frame buffer.
+ */
+static const struct vertex2f surface_verts[4] =
+{
+ {0.0f, 0.0f},
+ {0.0f, 1.0f},
+ {1.0f, 0.0f},
+ {1.0f, 1.0f}
+};
+
+/*
+ * Represents texcoords for the above. We can use the position values directly.
+ * TODO: Duplicate these in the shader, no need to create a buffer.
+ */
+static const struct vertex2f *surface_texcoords = surface_verts;
+
+/*
+ * Identity color conversion constants, for debugging
+ */
+static const struct fragment_shader_consts identity =
+{
+ {
+ 0.0f, 0.0f, 0.0f, 0.0f
+ },
+ {
+ 1.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 1.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f
+ }
+};
+
+/*
+ * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [16,235]
+ */
+static const struct fragment_shader_consts bt_601 =
+{
+ {
+ 0.0f, 0.501960784f, 0.501960784f, 0.0f
+ },
+ {
+ 1.0f, 0.0f, 1.371f, 0.0f,
+ 1.0f, -0.336f, -0.698f, 0.0f,
+ 1.0f, 1.732f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f
+ }
+};
+
+/*
+ * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [0,255]
+ */
+static const struct fragment_shader_consts bt_601_full =
+{
+ {
+ 0.062745098f, 0.501960784f, 0.501960784f, 0.0f
+ },
+ {
+ 1.164f, 0.0f, 1.596f, 0.0f,
+ 1.164f, -0.391f, -0.813f, 0.0f,
+ 1.164f, 2.018f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f
+ }
+};
+
+/*
+ * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [16,235]
+ */
+static const struct fragment_shader_consts bt_709 =
+{
+ {
+ 0.0f, 0.501960784f, 0.501960784f, 0.0f
+ },
+ {
+ 1.0f, 0.0f, 1.540f, 0.0f,
+ 1.0f, -0.183f, -0.459f, 0.0f,
+ 1.0f, 1.816f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f
+ }
+};
+
+/*
+ * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [0,255]
+ */
+const struct fragment_shader_consts bt_709_full =
+{
+ {
+ 0.062745098f, 0.501960784f, 0.501960784f, 0.0f
+ },
+ {
+ 1.164f, 0.0f, 1.793f, 0.0f,
+ 1.164f, -0.213f, -0.534f, 0.0f,
+ 1.164f, 2.115f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f
+ }
+};
+
+static void
+create_vert_shader(struct vl_compositor *c)
+{
+ const unsigned max_tokens = 50;
+
+ struct pipe_shader_state vs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned ti;
+
+ unsigned i;
+
+ assert(c);
+
+ tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token));
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Vertex pos
+ * decl i1 ; Vertex texcoords
+ */
+ for (i = 0; i < 2; i++) {
+ decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * decl c0 ; Scaling vector to scale vertex pos rect to destination size
+ * decl c1 ; Translation vector to move vertex pos rect into position
+ * decl c2 ; Scaling vector to scale texcoord rect to source size
+ * decl c3 ; Translation vector to move texcoord rect into position
+ */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl o0 ; Vertex pos
+ * decl o1 ; Vertex texcoords
+ */
+ for (i = 0; i < 2; i++) {
+ decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* decl t0, t1 */
+ decl = vl_decl_temps(0, 1);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * mad o0, i0, c0, c1 ; Scale and translate unit output rect to destination size and pos
+ * mad o1, i1, c2, c3 ; Scale and translate unit texcoord rect to source size and pos
+ */
+ for (i = 0; i < 2; ++i) {
+ inst = vl_inst4(TGSI_OPCODE_MAD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ assert(ti <= max_tokens);
+
+ vs.tokens = tokens;
+ c->vertex_shader = c->pipe->create_vs_state(c->pipe, &vs);
+ FREE(tokens);
+}
+
+static void
+create_frag_shader(struct vl_compositor *c)
+{
+ const unsigned max_tokens = 50;
+
+ struct pipe_shader_state fs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned ti;
+
+ unsigned i;
+
+ assert(c);
+
+ tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token));
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+ ti = 3;
+
+ /* decl i0 ; Texcoords for s0 */
+ decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl c0 ; Bias vector for CSC
+ * decl c1-c4 ; CSC matrix c1-c4
+ */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl o0 ; Fragment color */
+ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl t0 */
+ decl = vl_decl_temps(0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl s0 ; Sampler for tex containing picture to display */
+ decl = vl_decl_samplers(0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* tex2d t0, i0, s0 ; Read src pixel */
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* sub t0, t0, c0 ; Subtract bias vector from pixel */
+ inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix
+ * dp4 o0.y, t0, c2
+ * dp4 o0.z, t0, c3
+ */
+ for (i = 0; i < 3; ++i) {
+ inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1);
+ inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ assert(ti <= max_tokens);
+
+ fs.tokens = tokens;
+ c->fragment_shader = c->pipe->create_fs_state(c->pipe, &fs);
+ FREE(tokens);
+}
+
+static bool
+init_pipe_state(struct vl_compositor *c)
+{
+ struct pipe_sampler_state sampler;
+
+ assert(c);
+
+ c->fb_state.nr_cbufs = 1;
+ c->fb_state.zsbuf = NULL;
+
+ sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
+ sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
+ sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+ sampler.compare_func = PIPE_FUNC_ALWAYS;
+ sampler.normalized_coords = 1;
+ /*sampler.prefilter = ;*/
+ /*sampler.lod_bias = ;*/
+ /*sampler.min_lod = ;*/
+ /*sampler.max_lod = ;*/
+ /*sampler.border_color[i] = ;*/
+ /*sampler.max_anisotropy = ;*/
+ c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler);
+
+ return true;
+}
+
+static void cleanup_pipe_state(struct vl_compositor *c)
+{
+ assert(c);
+
+ c->pipe->delete_sampler_state(c->pipe, c->sampler);
+}
+
+static bool
+init_shaders(struct vl_compositor *c)
+{
+ assert(c);
+
+ create_vert_shader(c);
+ create_frag_shader(c);
+
+ return true;
+}
+
+static void cleanup_shaders(struct vl_compositor *c)
+{
+ assert(c);
+
+ c->pipe->delete_vs_state(c->pipe, c->vertex_shader);
+ c->pipe->delete_fs_state(c->pipe, c->fragment_shader);
+}
+
+static bool
+init_buffers(struct vl_compositor *c)
+{
+ assert(c);
+
+ /*
+ * Create our vertex buffer and vertex buffer element
+ * VB contains 4 vertices that render a quad covering the entire window
+ * to display a rendered surface
+ * Quad is rendered as a tri strip
+ */
+ c->vertex_bufs[0].stride = sizeof(struct vertex2f);
+ c->vertex_bufs[0].max_index = 3;
+ c->vertex_bufs[0].buffer_offset = 0;
+ c->vertex_bufs[0].buffer = pipe_buffer_create
+ (
+ c->pipe->screen,
+ 1,
+ PIPE_BUFFER_USAGE_VERTEX,
+ sizeof(struct vertex2f) * 4
+ );
+
+ memcpy
+ (
+ pipe_buffer_map(c->pipe->screen, c->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+ surface_verts,
+ sizeof(struct vertex2f) * 4
+ );
+
+ pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer);
+
+ c->vertex_elems[0].src_offset = 0;
+ c->vertex_elems[0].vertex_buffer_index = 0;
+ c->vertex_elems[0].nr_components = 2;
+ c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /*
+ * Create our texcoord buffer and texcoord buffer element
+ * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
+ */
+ c->vertex_bufs[1].stride = sizeof(struct vertex2f);
+ c->vertex_bufs[1].max_index = 3;
+ c->vertex_bufs[1].buffer_offset = 0;
+ c->vertex_bufs[1].buffer = pipe_buffer_create
+ (
+ c->pipe->screen,
+ 1,
+ PIPE_BUFFER_USAGE_VERTEX,
+ sizeof(struct vertex2f) * 4
+ );
+
+ memcpy
+ (
+ pipe_buffer_map(c->pipe->screen, c->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+ surface_texcoords,
+ sizeof(struct vertex2f) * 4
+ );
+
+ pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer);
+
+ c->vertex_elems[1].src_offset = 0;
+ c->vertex_elems[1].vertex_buffer_index = 1;
+ c->vertex_elems[1].nr_components = 2;
+ c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /*
+ * Create our vertex shader's constant buffer
+ * Const buffer contains scaling and translation vectors
+ */
+ c->vs_const_buf.buffer = pipe_buffer_create
+ (
+ c->pipe->screen,
+ 1,
+ PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
+ sizeof(struct vertex_shader_consts)
+ );
+
+ /*
+ * Create our fragment shader's constant buffer
+ * Const buffer contains the color conversion matrix and bias vectors
+ */
+ c->fs_const_buf.buffer = pipe_buffer_create
+ (
+ c->pipe->screen,
+ 1,
+ PIPE_BUFFER_USAGE_CONSTANT,
+ sizeof(struct fragment_shader_consts)
+ );
+
+ /*
+ * TODO: Refactor this into a seperate function,
+ * allow changing the CSC matrix at runtime to switch between regular & full versions
+ */
+ memcpy
+ (
+ pipe_buffer_map(c->pipe->screen, c->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+ &bt_601_full,
+ sizeof(struct fragment_shader_consts)
+ );
+
+ pipe_buffer_unmap(c->pipe->screen, c->fs_const_buf.buffer);
+
+ return true;
+}
+
+static void
+cleanup_buffers(struct vl_compositor *c)
+{
+ unsigned i;
+
+ assert(c);
+
+ for (i = 0; i < 2; ++i)
+ pipe_buffer_reference(&c->vertex_bufs[i].buffer, NULL);
+
+ pipe_buffer_reference(&c->vs_const_buf.buffer, NULL);
+ pipe_buffer_reference(&c->fs_const_buf.buffer, NULL);
+}
+
+bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe)
+{
+ assert(compositor);
+
+ memset(compositor, 0, sizeof(struct vl_compositor));
+
+ compositor->pipe = pipe;
+
+ if (!init_pipe_state(compositor))
+ return false;
+ if (!init_shaders(compositor)) {
+ cleanup_pipe_state(compositor);
+ return false;
+ }
+ if (!init_buffers(compositor)) {
+ cleanup_shaders(compositor);
+ cleanup_pipe_state(compositor);
+ return false;
+ }
+
+ return true;
+}
+
+void vl_compositor_cleanup(struct vl_compositor *compositor)
+{
+ assert(compositor);
+
+ cleanup_buffers(compositor);
+ cleanup_shaders(compositor);
+ cleanup_pipe_state(compositor);
+}
+
+void vl_compositor_render(struct vl_compositor *compositor,
+ /*struct pipe_texture *backround,
+ struct pipe_video_rect *backround_area,*/
+ struct pipe_texture *src_surface,
+ enum pipe_mpeg12_picture_type picture_type,
+ /*unsigned num_past_surfaces,
+ struct pipe_texture *past_surfaces,
+ unsigned num_future_surfaces,
+ struct pipe_texture *future_surfaces,*/
+ struct pipe_video_rect *src_area,
+ struct pipe_texture *dst_surface,
+ struct pipe_video_rect *dst_area,
+ /*unsigned num_layers,
+ struct pipe_texture *layers,
+ struct pipe_video_rect *layer_src_areas,
+ struct pipe_video_rect *layer_dst_areas*/
+ struct pipe_fence_handle **fence)
+{
+ struct vertex_shader_consts *vs_consts;
+
+ assert(compositor);
+ assert(src_surface);
+ assert(src_area);
+ assert(dst_surface);
+ assert(dst_area);
+ assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME);
+
+ compositor->fb_state.width = dst_surface->width[0];
+ compositor->fb_state.height = dst_surface->height[0];
+ compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface
+ (
+ compositor->pipe->screen,
+ dst_surface,
+ 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
+ );
+
+ compositor->viewport.scale[0] = compositor->fb_state.width;
+ compositor->viewport.scale[1] = compositor->fb_state.height;
+ compositor->viewport.scale[2] = 1;
+ compositor->viewport.scale[3] = 1;
+ compositor->viewport.translate[0] = 0;
+ compositor->viewport.translate[1] = 0;
+ compositor->viewport.translate[2] = 0;
+ compositor->viewport.translate[3] = 0;
+
+ compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state);
+ compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport);
+ compositor->pipe->bind_sampler_states(compositor->pipe, 1, &compositor->sampler);
+ compositor->pipe->set_sampler_textures(compositor->pipe, 1, &src_surface);
+ compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
+ compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
+ compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs);
+ compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems);
+ compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, &compositor->vs_const_buf);
+ compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, &compositor->fs_const_buf);
+
+ vs_consts = pipe_buffer_map
+ (
+ compositor->pipe->screen,
+ compositor->vs_const_buf.buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
+ );
+
+ vs_consts->dst_scale.x = dst_area->w / (float)compositor->fb_state.cbufs[0]->width;
+ vs_consts->dst_scale.y = dst_area->h / (float)compositor->fb_state.cbufs[0]->height;
+ vs_consts->dst_scale.z = 1;
+ vs_consts->dst_scale.w = 1;
+ vs_consts->dst_trans.x = dst_area->x / (float)compositor->fb_state.cbufs[0]->width;
+ vs_consts->dst_trans.y = dst_area->y / (float)compositor->fb_state.cbufs[0]->height;
+ vs_consts->dst_trans.z = 0;
+ vs_consts->dst_trans.w = 0;
+
+ vs_consts->src_scale.x = src_area->w / (float)src_surface->width[0];
+ vs_consts->src_scale.y = src_area->h / (float)src_surface->height[0];
+ vs_consts->src_scale.z = 1;
+ vs_consts->src_scale.w = 1;
+ vs_consts->src_trans.x = src_area->x / (float)src_surface->width[0];
+ vs_consts->src_trans.y = src_area->y / (float)src_surface->height[0];
+ vs_consts->src_trans.z = 0;
+ vs_consts->src_trans.w = 0;
+
+ pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf.buffer);
+
+ compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
+ compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence);
+
+ pipe_surface_reference(&compositor->fb_state.cbufs[0], NULL);
+}
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
new file mode 100644
index 0000000000..19ad66d9c6
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -0,0 +1,47 @@
+#ifndef vl_compositor_h
+#define vl_compositor_h
+
+#include <pipe/p_compiler.h>
+#include <pipe/p_state.h>
+#include <pipe/p_video_state.h>
+
+struct pipe_context;
+struct pipe_texture;
+
+struct vl_compositor
+{
+ struct pipe_context *pipe;
+
+ struct pipe_framebuffer_state fb_state;
+ void *sampler;
+ void *vertex_shader;
+ void *fragment_shader;
+ struct pipe_viewport_state viewport;
+ struct pipe_vertex_buffer vertex_bufs[2];
+ struct pipe_vertex_element vertex_elems[2];
+ struct pipe_constant_buffer vs_const_buf, fs_const_buf;
+};
+
+bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe);
+
+void vl_compositor_cleanup(struct vl_compositor *compositor);
+
+void vl_compositor_render(struct vl_compositor *compositor,
+ /*struct pipe_texture *backround,
+ struct pipe_video_rect *backround_area,*/
+ struct pipe_texture *src_surface,
+ enum pipe_mpeg12_picture_type picture_type,
+ /*unsigned num_past_surfaces,
+ struct pipe_texture *past_surfaces,
+ unsigned num_future_surfaces,
+ struct pipe_texture *future_surfaces,*/
+ struct pipe_video_rect *src_area,
+ struct pipe_texture *dst_surface,
+ struct pipe_video_rect *dst_area,
+ /*unsigned num_layers,
+ struct pipe_texture *layers,
+ struct pipe_video_rect *layer_src_areas,
+ struct pipe_video_rect *layer_dst_areas,*/
+ struct pipe_fence_handle **fence);
+
+#endif /* vl_compositor_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
new file mode 100644
index 0000000000..9b69f2956c
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -0,0 +1,1627 @@
+#include "vl_mpeg12_mc_renderer.h"
+#include <assert.h>
+#include <pipe/p_context.h>
+#include <pipe/p_inlines.h>
+#include <util/u_math.h>
+#include <util/u_memory.h>
+#include <tgsi/tgsi_parse.h>
+#include <tgsi/tgsi_build.h>
+#include "vl_shader_build.h"
+
+#define DEFAULT_BUF_ALIGNMENT 1
+#define MACROBLOCK_WIDTH 16
+#define MACROBLOCK_HEIGHT 16
+#define BLOCK_WIDTH 8
+#define BLOCK_HEIGHT 8
+#define ZERO_BLOCK_NIL -1.0f
+#define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
+
+struct vertex2f
+{
+ float x, y;
+};
+
+struct vertex4f
+{
+ float x, y, z, w;
+};
+
+struct vertex_shader_consts
+{
+ struct vertex4f denorm;
+};
+
+struct fragment_shader_consts
+{
+ struct vertex4f multiplier;
+ struct vertex4f div;
+};
+
+/*
+ * Muliplier renormalizes block samples from 16 bits to 12 bits.
+ * Divider is used when calculating Y % 2 for choosing top or bottom
+ * field for P or B macroblocks.
+ * TODO: Use immediates.
+ */
+static const struct fragment_shader_consts fs_consts = {
+ {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
+ {0.5f, 2.0f, 0.0f, 0.0f}
+};
+
+struct vert_stream_0
+{
+ struct vertex2f pos;
+ struct vertex2f luma_tc;
+ struct vertex2f cb_tc;
+ struct vertex2f cr_tc;
+};
+
+enum MACROBLOCK_TYPE
+{
+ MACROBLOCK_TYPE_INTRA,
+ MACROBLOCK_TYPE_FWD_FRAME_PRED,
+ MACROBLOCK_TYPE_FWD_FIELD_PRED,
+ MACROBLOCK_TYPE_BKWD_FRAME_PRED,
+ MACROBLOCK_TYPE_BKWD_FIELD_PRED,
+ MACROBLOCK_TYPE_BI_FRAME_PRED,
+ MACROBLOCK_TYPE_BI_FIELD_PRED,
+
+ NUM_MACROBLOCK_TYPES
+};
+
+static void
+create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
+{
+ const unsigned max_tokens = 50;
+
+ struct pipe_shader_state vs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned ti;
+
+ unsigned i;
+
+ assert(r);
+
+ tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
+ *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
+ header = (struct tgsi_header *) &tokens[1];
+ *header = tgsi_build_header();
+ *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Vertex pos
+ * decl i1 ; Luma texcoords
+ * decl i2 ; Chroma Cb texcoords
+ * decl i3 ; Chroma Cr texcoords
+ */
+ for (i = 0; i < 4; i++) {
+ decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * decl o0 ; Vertex pos
+ * decl o1 ; Luma texcoords
+ * decl o2 ; Chroma Cb texcoords
+ * decl o3 ; Chroma Cr texcoords
+ */
+ for (i = 0; i < 4; i++) {
+ decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * mov o0, i0 ; Move input vertex pos to output
+ * mov o1, i1 ; Move input luma texcoords to output
+ * mov o2, i2 ; Move input chroma Cb texcoords to output
+ * mov o3, i3 ; Move input chroma Cr texcoords to output
+ */
+ for (i = 0; i < 4; ++i) {
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ assert(ti <= max_tokens);
+
+ vs.tokens = tokens;
+ r->i_vs = r->pipe->create_vs_state(r->pipe, &vs);
+ free(tokens);
+}
+
+static void
+create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
+{
+ const unsigned max_tokens = 100;
+
+ struct pipe_shader_state fs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned ti;
+
+ unsigned i;
+
+ assert(r);
+
+ tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
+ *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
+ header = (struct tgsi_header *) &tokens[1];
+ *header = tgsi_build_header();
+ *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Luma texcoords
+ * decl i1 ; Chroma Cb texcoords
+ * decl i2 ; Chroma Cr texcoords
+ */
+ for (i = 0; i < 3; ++i) {
+ decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl o0 ; Fragment color */
+ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl t0, t1 */
+ decl = vl_decl_temps(0, 1);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl s0 ; Sampler for luma texture
+ * decl s1 ; Sampler for chroma Cb texture
+ * decl s2 ; Sampler for chroma Cr texture
+ */
+ for (i = 0; i < 3; ++i) {
+ decl = vl_decl_samplers(i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
+ */
+ for (i = 0; i < 3; ++i) {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* mul o0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ assert(ti <= max_tokens);
+
+ fs.tokens = tokens;
+ r->i_fs = r->pipe->create_fs_state(r->pipe, &fs);
+ free(tokens);
+}
+
+static void
+create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
+{
+ const unsigned max_tokens = 100;
+
+ struct pipe_shader_state vs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned ti;
+
+ unsigned i;
+
+ assert(r);
+
+ tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
+ *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
+ header = (struct tgsi_header *) &tokens[1];
+ *header = tgsi_build_header();
+ *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Vertex pos
+ * decl i1 ; Luma texcoords
+ * decl i2 ; Chroma Cb texcoords
+ * decl i3 ; Chroma Cr texcoords
+ * decl i4 ; Ref surface top field texcoords
+ * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
+ */
+ for (i = 0; i < 6; i++) {
+ decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * decl o0 ; Vertex pos
+ * decl o1 ; Luma texcoords
+ * decl o2 ; Chroma Cb texcoords
+ * decl o3 ; Chroma Cr texcoords
+ * decl o4 ; Ref macroblock texcoords
+ */
+ for (i = 0; i < 5; i++) {
+ decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * mov o0, i0 ; Move input vertex pos to output
+ * mov o1, i1 ; Move input luma texcoords to output
+ * mov o2, i2 ; Move input chroma Cb texcoords to output
+ * mov o3, i3 ; Move input chroma Cr texcoords to output
+ */
+ for (i = 0; i < 4; ++i) {
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
+ inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ assert(ti <= max_tokens);
+
+ vs.tokens = tokens;
+ r->p_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
+ free(tokens);
+}
+
+static void
+create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
+{
+ assert(false);
+}
+
+static void
+create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
+{
+ const unsigned max_tokens = 100;
+
+ struct pipe_shader_state fs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned ti;
+
+ unsigned i;
+
+ assert(r);
+
+ tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
+ *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
+ header = (struct tgsi_header *) &tokens[1];
+ *header = tgsi_build_header();
+ *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Luma texcoords
+ * decl i1 ; Chroma Cb texcoords
+ * decl i2 ; Chroma Cr texcoords
+ * decl i3 ; Ref macroblock texcoords
+ */
+ for (i = 0; i < 4; ++i) {
+ decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl o0 ; Fragment color */
+ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl t0, t1 */
+ decl = vl_decl_temps(0, 1);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl s0 ; Sampler for luma texture
+ * decl s1 ; Sampler for chroma Cb texture
+ * decl s2 ; Sampler for chroma Cr texture
+ * decl s3 ; Sampler for ref surface texture
+ */
+ for (i = 0; i < 4; ++i) {
+ decl = vl_decl_samplers(i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
+ */
+ for (i = 0; i < 3; ++i) {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* mul t0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* add o0, t0, t1 ; Add ref and differential to form final output */
+ inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ assert(ti <= max_tokens);
+
+ fs.tokens = tokens;
+ r->p_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
+ free(tokens);
+}
+
+static void
+create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
+{
+ assert(false);
+}
+
+static void
+create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
+{
+ const unsigned max_tokens = 100;
+
+ struct pipe_shader_state vs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned ti;
+
+ unsigned i;
+
+ assert(r);
+
+ tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
+ *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
+ header = (struct tgsi_header *) &tokens[1];
+ *header = tgsi_build_header();
+ *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Vertex pos
+ * decl i1 ; Luma texcoords
+ * decl i2 ; Chroma Cb texcoords
+ * decl i3 ; Chroma Cr texcoords
+ * decl i4 ; First ref macroblock top field texcoords
+ * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
+ * decl i6 ; Second ref macroblock top field texcoords
+ * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
+ */
+ for (i = 0; i < 8; i++) {
+ decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * decl o0 ; Vertex pos
+ * decl o1 ; Luma texcoords
+ * decl o2 ; Chroma Cb texcoords
+ * decl o3 ; Chroma Cr texcoords
+ * decl o4 ; First ref macroblock texcoords
+ * decl o5 ; Second ref macroblock texcoords
+ */
+ for (i = 0; i < 6; i++) {
+ decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * mov o0, i0 ; Move input vertex pos to output
+ * mov o1, i1 ; Move input luma texcoords to output
+ * mov o2, i2 ; Move input chroma Cb texcoords to output
+ * mov o3, i3 ; Move input chroma Cr texcoords to output
+ */
+ for (i = 0; i < 4; ++i) {
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
+ * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
+ */
+ for (i = 0; i < 2; ++i) {
+ inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ assert(ti <= max_tokens);
+
+ vs.tokens = tokens;
+ r->b_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
+ free(tokens);
+}
+
+static void
+create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
+{
+ assert(false);
+}
+
+static void
+create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
+{
+ const unsigned max_tokens = 100;
+
+ struct pipe_shader_state fs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned ti;
+
+ unsigned i;
+
+ assert(r);
+
+ tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
+ *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
+ header = (struct tgsi_header *) &tokens[1];
+ *header = tgsi_build_header();
+ *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Luma texcoords
+ * decl i1 ; Chroma Cb texcoords
+ * decl i2 ; Chroma Cr texcoords
+ * decl i3 ; First ref macroblock texcoords
+ * decl i4 ; Second ref macroblock texcoords
+ */
+ for (i = 0; i < 5; ++i) {
+ decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+ * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
+ */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl o0 ; Fragment color */
+ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl t0-t2 */
+ decl = vl_decl_temps(0, 2);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl s0 ; Sampler for luma texture
+ * decl s1 ; Sampler for chroma Cb texture
+ * decl s2 ; Sampler for chroma Cr texture
+ * decl s3 ; Sampler for first ref surface texture
+ * decl s4 ; Sampler for second ref surface texture
+ */
+ for (i = 0; i < 5; ++i) {
+ decl = vl_decl_samplers(i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
+ */
+ for (i = 0; i < 3; ++i) {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* mul t0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * tex2d t1, i3, s3 ; Read texel from first ref macroblock
+ * tex2d t2, i4, s4 ; Read texel from second ref macroblock
+ */
+ for (i = 0; i < 2; ++i) {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
+ inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
+ inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ assert(ti <= max_tokens);
+
+ fs.tokens = tokens;
+ r->b_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
+ free(tokens);
+}
+
+static void
+create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
+{
+ assert(false);
+}
+
+static void
+xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
+{
+ unsigned i;
+
+ assert(r);
+
+ for (i = 0; i < 3; ++i) {
+ r->tex_transfer[i] = r->pipe->screen->get_tex_transfer
+ (
+ r->pipe->screen, r->textures.all[i],
+ 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
+ r->textures.all[i]->width[0], r->textures.all[i]->height[0]
+ );
+
+ r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
+ }
+}
+
+static void
+xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
+{
+ unsigned i;
+
+ assert(r);
+
+ for (i = 0; i < 3; ++i) {
+ r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]);
+ r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]);
+ }
+}
+
+static bool
+init_pipe_state(struct vl_mpeg12_mc_renderer *r)
+{
+ struct pipe_sampler_state sampler;
+ unsigned filters[5];
+ unsigned i;
+
+ assert(r);
+
+ r->viewport.scale[0] = r->pot_buffers ?
+ util_next_power_of_two(r->picture_width) : r->picture_width;
+ r->viewport.scale[1] = r->pot_buffers ?
+ util_next_power_of_two(r->picture_height) : r->picture_height;
+ r->viewport.scale[2] = 1;
+ r->viewport.scale[3] = 1;
+ r->viewport.translate[0] = 0;
+ r->viewport.translate[1] = 0;
+ r->viewport.translate[2] = 0;
+ r->viewport.translate[3] = 0;
+
+ r->fb_state.width = r->pot_buffers ?
+ util_next_power_of_two(r->picture_width) : r->picture_width;
+ r->fb_state.height = r->pot_buffers ?
+ util_next_power_of_two(r->picture_height) : r->picture_height;
+ r->fb_state.nr_cbufs = 1;
+ r->fb_state.zsbuf = NULL;
+
+ /* Luma filter */
+ filters[0] = PIPE_TEX_FILTER_NEAREST;
+ /* Chroma filters */
+ if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
+ r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
+ filters[1] = PIPE_TEX_FILTER_NEAREST;
+ filters[2] = PIPE_TEX_FILTER_NEAREST;
+ }
+ else {
+ filters[1] = PIPE_TEX_FILTER_LINEAR;
+ filters[2] = PIPE_TEX_FILTER_LINEAR;
+ }
+ /* Fwd, bkwd ref filters */
+ filters[3] = PIPE_TEX_FILTER_LINEAR;
+ filters[4] = PIPE_TEX_FILTER_LINEAR;
+
+ for (i = 0; i < 5; ++i) {
+ sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.min_img_filter = filters[i];
+ sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ sampler.mag_img_filter = filters[i];
+ sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+ sampler.compare_func = PIPE_FUNC_ALWAYS;
+ sampler.normalized_coords = 1;
+ /*sampler.prefilter = ; */
+ /*sampler.shadow_ambient = ; */
+ /*sampler.lod_bias = ; */
+ sampler.min_lod = 0;
+ /*sampler.max_lod = ; */
+ /*sampler.border_color[i] = ; */
+ /*sampler.max_anisotropy = ; */
+ r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
+ }
+
+ return true;
+}
+
+static void
+cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
+{
+ unsigned i;
+
+ assert(r);
+
+ for (i = 0; i < 5; ++i)
+ r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
+}
+
+static bool
+init_shaders(struct vl_mpeg12_mc_renderer *r)
+{
+ assert(r);
+
+ create_intra_vert_shader(r);
+ create_intra_frag_shader(r);
+ create_frame_pred_vert_shader(r);
+ create_frame_pred_frag_shader(r);
+ create_frame_bi_pred_vert_shader(r);
+ create_frame_bi_pred_frag_shader(r);
+
+ return true;
+}
+
+static void
+cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
+{
+ assert(r);
+
+ r->pipe->delete_vs_state(r->pipe, r->i_vs);
+ r->pipe->delete_fs_state(r->pipe, r->i_fs);
+ r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
+ r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
+ r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
+ r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
+}
+
+static bool
+init_buffers(struct vl_mpeg12_mc_renderer *r)
+{
+ struct pipe_texture template;
+
+ const unsigned mbw =
+ align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
+ const unsigned mbh =
+ align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
+
+ unsigned i;
+
+ assert(r);
+
+ r->macroblocks_per_batch =
+ mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
+ r->num_macroblocks = 0;
+ r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
+
+ memset(&template, 0, sizeof(struct pipe_texture));
+ template.target = PIPE_TEXTURE_2D;
+ /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
+ template.format = PIPE_FORMAT_R16_SNORM;
+ template.last_level = 0;
+ template.width[0] = r->pot_buffers ?
+ util_next_power_of_two(r->picture_width) : r->picture_width;
+ template.height[0] = r->pot_buffers ?
+ util_next_power_of_two(r->picture_height) : r->picture_height;
+ template.depth[0] = 1;
+ pf_get_block(template.format, &template.block);
+ template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
+
+ r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
+
+ if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+ template.width[0] = r->pot_buffers ?
+ util_next_power_of_two(r->picture_width / 2) :
+ r->picture_width / 2;
+ template.height[0] = r->pot_buffers ?
+ util_next_power_of_two(r->picture_height / 2) :
+ r->picture_height / 2;
+ }
+ else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
+ template.height[0] = r->pot_buffers ?
+ util_next_power_of_two(r->picture_height / 2) :
+ r->picture_height / 2;
+
+ r->textures.individual.cb =
+ r->pipe->screen->texture_create(r->pipe->screen, &template);
+ r->textures.individual.cr =
+ r->pipe->screen->texture_create(r->pipe->screen, &template);
+
+ r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
+ r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
+ r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
+ r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
+ (
+ r->pipe->screen,
+ DEFAULT_BUF_ALIGNMENT,
+ PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
+ sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
+ );
+
+ for (i = 1; i < 3; ++i) {
+ r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
+ r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
+ r->vertex_bufs.all[i].buffer_offset = 0;
+ r->vertex_bufs.all[i].buffer = pipe_buffer_create
+ (
+ r->pipe->screen,
+ DEFAULT_BUF_ALIGNMENT,
+ PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
+ sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
+ );
+ }
+
+ /* Position element */
+ r->vertex_elems[0].src_offset = 0;
+ r->vertex_elems[0].vertex_buffer_index = 0;
+ r->vertex_elems[0].nr_components = 2;
+ r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Luma, texcoord element */
+ r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
+ r->vertex_elems[1].vertex_buffer_index = 0;
+ r->vertex_elems[1].nr_components = 2;
+ r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Chroma Cr texcoord element */
+ r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
+ r->vertex_elems[2].vertex_buffer_index = 0;
+ r->vertex_elems[2].nr_components = 2;
+ r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Chroma Cb texcoord element */
+ r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
+ r->vertex_elems[3].vertex_buffer_index = 0;
+ r->vertex_elems[3].nr_components = 2;
+ r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* First ref surface top field texcoord element */
+ r->vertex_elems[4].src_offset = 0;
+ r->vertex_elems[4].vertex_buffer_index = 1;
+ r->vertex_elems[4].nr_components = 2;
+ r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* First ref surface bottom field texcoord element */
+ r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
+ r->vertex_elems[5].vertex_buffer_index = 1;
+ r->vertex_elems[5].nr_components = 2;
+ r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Second ref surface top field texcoord element */
+ r->vertex_elems[6].src_offset = 0;
+ r->vertex_elems[6].vertex_buffer_index = 2;
+ r->vertex_elems[6].nr_components = 2;
+ r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Second ref surface bottom field texcoord element */
+ r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
+ r->vertex_elems[7].vertex_buffer_index = 2;
+ r->vertex_elems[7].nr_components = 2;
+ r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ r->vs_const_buf.buffer = pipe_buffer_create
+ (
+ r->pipe->screen,
+ DEFAULT_BUF_ALIGNMENT,
+ PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
+ sizeof(struct vertex_shader_consts)
+ );
+
+ r->fs_const_buf.buffer = pipe_buffer_create
+ (
+ r->pipe->screen,
+ DEFAULT_BUF_ALIGNMENT,
+ PIPE_BUFFER_USAGE_CONSTANT, sizeof(struct fragment_shader_consts)
+ );
+
+ memcpy
+ (
+ pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+ &fs_consts, sizeof(struct fragment_shader_consts)
+ );
+
+ pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer);
+
+ return true;
+}
+
+static void
+cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
+{
+ unsigned i;
+
+ assert(r);
+
+ pipe_buffer_reference(&r->vs_const_buf.buffer, NULL);
+ pipe_buffer_reference(&r->fs_const_buf.buffer, NULL);
+
+ for (i = 0; i < 3; ++i)
+ pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
+
+ for (i = 0; i < 3; ++i)
+ pipe_texture_reference(&r->textures.all[i], NULL);
+
+ FREE(r->macroblock_buf);
+}
+
+static enum MACROBLOCK_TYPE
+get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
+{
+ assert(mb);
+
+ switch (mb->mb_type) {
+ case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
+ return MACROBLOCK_TYPE_INTRA;
+ case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
+ return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
+ MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
+ case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
+ return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
+ MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
+ case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
+ return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
+ MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
+ default:
+ assert(0);
+ }
+
+ /* Unreachable */
+ return -1;
+}
+
+/* XXX: One of these days this will have to be killed with fire */
+#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \
+ do { \
+ (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
+ (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
+ (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
+ (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
+ \
+ if (!use_zb || (cbp) & (lm)) \
+ { \
+ (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ } \
+ else \
+ { \
+ (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
+ (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
+ (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
+ (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
+ (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
+ (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
+ } \
+ \
+ if (!use_zb || (cbp) & (cbm)) \
+ { \
+ (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ } \
+ else \
+ { \
+ (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
+ (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
+ (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
+ (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
+ (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
+ (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
+ } \
+ \
+ if (!use_zb || (cbp) & (crm)) \
+ { \
+ (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ } \
+ else \
+ { \
+ (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
+ (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
+ (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
+ (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
+ (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
+ (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
+ } \
+ } while (0)
+
+static void
+gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
+ struct pipe_mpeg12_macroblock *mb, unsigned pos,
+ struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
+{
+ struct vertex2f mo_vec[2];
+
+ unsigned i;
+
+ assert(r);
+ assert(mb);
+ assert(ycbcr_vb);
+ assert(pos < r->macroblocks_per_batch);
+
+ switch (mb->mb_type) {
+ case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
+ {
+ struct vertex2f *vb;
+
+ assert(ref_vb && ref_vb[1]);
+
+ vb = ref_vb[1] + pos * 2 * 24;
+
+ mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
+ mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
+
+ if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+ for (i = 0; i < 24 * 2; i += 2) {
+ vb[i].x = mo_vec[0].x;
+ vb[i].y = mo_vec[0].y;
+ }
+ }
+ else {
+ mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
+ mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
+
+ for (i = 0; i < 24 * 2; i += 2) {
+ vb[i].x = mo_vec[0].x;
+ vb[i].y = mo_vec[0].y;
+ vb[i + 1].x = mo_vec[1].x;
+ vb[i + 1].y = mo_vec[1].y;
+ }
+ }
+
+ /* fall-through */
+ }
+ case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
+ case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
+ {
+ struct vertex2f *vb;
+
+ assert(ref_vb && ref_vb[0]);
+
+ vb = ref_vb[0] + pos * 2 * 24;
+
+ if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
+ mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
+ mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
+
+ if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
+ mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
+ mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
+ }
+ }
+ else {
+ mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
+ mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
+
+ if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
+ mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
+ mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
+ }
+ }
+
+ if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+ for (i = 0; i < 24 * 2; i += 2) {
+ vb[i].x = mo_vec[0].x;
+ vb[i].y = mo_vec[0].y;
+ }
+ }
+ else {
+ for (i = 0; i < 24 * 2; i += 2) {
+ vb[i].x = mo_vec[0].x;
+ vb[i].y = mo_vec[0].y;
+ vb[i + 1].x = mo_vec[1].x;
+ vb[i + 1].y = mo_vec[1].y;
+ }
+ }
+
+ /* fall-through */
+ }
+ case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
+ {
+ const struct vertex2f unit =
+ {
+ r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
+ r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
+ };
+ const struct vertex2f half =
+ {
+ r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
+ r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
+ };
+ const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
+
+ struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
+
+ SET_BLOCK(vb, mb->cbp, mb->mbx, mb->mby,
+ unit.x, unit.y, 0, 0, half.x, half.y,
+ 32, 2, 1, use_zb, r->zero_block);
+
+ SET_BLOCK(vb + 6, mb->cbp, mb->mbx, mb->mby,
+ unit.x, unit.y, half.x, 0, half.x, half.y,
+ 16, 2, 1, use_zb, r->zero_block);
+
+ SET_BLOCK(vb + 12, mb->cbp, mb->mbx, mb->mby,
+ unit.x, unit.y, 0, half.y, half.x, half.y,
+ 8, 2, 1, use_zb, r->zero_block);
+
+ SET_BLOCK(vb + 18, mb->cbp, mb->mbx, mb->mby,
+ unit.x, unit.y, half.x, half.y, half.x, half.y,
+ 4, 2, 1, use_zb, r->zero_block);
+
+ break;
+ }
+ default:
+ assert(0);
+ }
+}
+
+static void
+gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
+ unsigned *num_macroblocks)
+{
+ unsigned offset[NUM_MACROBLOCK_TYPES];
+ struct vert_stream_0 *ycbcr_vb;
+ struct vertex2f *ref_vb[2];
+ unsigned i;
+
+ assert(r);
+ assert(num_macroblocks);
+
+ for (i = 0; i < r->num_macroblocks; ++i) {
+ enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
+ ++num_macroblocks[mb_type];
+ }
+
+ offset[0] = 0;
+
+ for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
+ offset[i] = offset[i - 1] + num_macroblocks[i - 1];
+
+ ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
+ (
+ r->pipe->screen,
+ r->vertex_bufs.individual.ycbcr.buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
+ );
+
+ for (i = 0; i < 2; ++i)
+ ref_vb[i] = (struct vertex2f *)pipe_buffer_map
+ (
+ r->pipe->screen,
+ r->vertex_bufs.individual.ref[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
+ );
+
+ for (i = 0; i < r->num_macroblocks; ++i) {
+ enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
+
+ gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
+ ycbcr_vb, ref_vb);
+
+ ++offset[mb_type];
+ }
+
+ pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer);
+ for (i = 0; i < 2; ++i)
+ pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer);
+}
+
+static void
+flush(struct vl_mpeg12_mc_renderer *r)
+{
+ unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
+ unsigned vb_start = 0;
+ struct vertex_shader_consts *vs_consts;
+ unsigned i;
+
+ assert(r);
+ assert(r->num_macroblocks == r->macroblocks_per_batch);
+
+ gen_macroblock_stream(r, num_macroblocks);
+
+ r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface
+ (
+ r->pipe->screen, r->surface,
+ 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
+ );
+
+ r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
+ r->pipe->set_viewport_state(r->pipe, &r->viewport);
+
+ vs_consts = pipe_buffer_map
+ (
+ r->pipe->screen, r->vs_const_buf.buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
+ );
+
+ vs_consts->denorm.x = r->surface->width[0];
+ vs_consts->denorm.y = r->surface->height[0];
+
+ pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
+
+ r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
+ &r->vs_const_buf);
+ r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0,
+ &r->fs_const_buf);
+
+ if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
+ r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
+ r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
+ r->pipe->set_sampler_textures(r->pipe, 3, r->textures.all);
+ r->pipe->bind_sampler_states(r->pipe, 3, r->samplers.all);
+ r->pipe->bind_vs_state(r->pipe, r->i_vs);
+ r->pipe->bind_fs_state(r->pipe, r->i_fs);
+
+ r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+ num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
+ vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
+ }
+
+ if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
+ r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
+ r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+ r->textures.individual.ref[0] = r->past;
+ r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
+ r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
+ r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
+ r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
+
+ r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+ num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
+ vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
+ }
+
+ if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
+ r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
+ r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+ r->textures.individual.ref[0] = r->past;
+ r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
+ r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
+ r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
+ r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
+
+ r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+ num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
+ vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
+ }
+
+ if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
+ r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
+ r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+ r->textures.individual.ref[0] = r->future;
+ r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
+ r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
+ r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
+ r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
+
+ r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+ num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
+ vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
+ }
+
+ if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
+ r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
+ r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+ r->textures.individual.ref[0] = r->future;
+ r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
+ r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
+ r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
+ r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
+
+ r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+ num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
+ vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
+ }
+
+ if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
+ r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
+ r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+ r->textures.individual.ref[0] = r->past;
+ r->textures.individual.ref[1] = r->future;
+ r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all);
+ r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all);
+ r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
+ r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
+
+ r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+ num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
+ vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
+ }
+
+ if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
+ r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
+ r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+ r->textures.individual.ref[0] = r->past;
+ r->textures.individual.ref[1] = r->future;
+ r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all);
+ r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all);
+ r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
+ r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
+
+ r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+ num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
+ vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
+ }
+
+ r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
+ pipe_surface_reference(&r->fb_state.cbufs[0], NULL);
+
+ if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
+ for (i = 0; i < 3; ++i)
+ r->zero_block[i].x = ZERO_BLOCK_NIL;
+
+ r->num_macroblocks = 0;
+}
+
+static void
+grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
+{
+ unsigned y;
+
+ assert(src);
+ assert(dst);
+
+ for (y = 0; y < BLOCK_HEIGHT; ++y)
+ memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
+}
+
+static void
+grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
+{
+ unsigned y;
+
+ assert(src);
+ assert(dst);
+
+ for (y = 0; y < BLOCK_HEIGHT; ++y)
+ memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
+}
+
+static void
+fill_zero_block(short *dst, unsigned dst_pitch)
+{
+ unsigned y;
+
+ assert(dst);
+
+ for (y = 0; y < BLOCK_HEIGHT; ++y)
+ memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
+}
+
+static void
+grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
+ enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
+{
+ unsigned tex_pitch;
+ short *texels;
+ unsigned tb = 0, sb = 0;
+ unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
+ unsigned x, y;
+
+ assert(r);
+ assert(blocks);
+
+ tex_pitch = r->tex_transfer[0]->stride / r->tex_transfer[0]->block.size;
+ texels = r->texels[0] + mbpy * tex_pitch + mbpx;
+
+ for (y = 0; y < 2; ++y) {
+ for (x = 0; x < 2; ++x, ++tb) {
+ if ((cbp >> (5 - tb)) & 1) {
+ if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
+ grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
+ texels + y * tex_pitch * BLOCK_WIDTH +
+ x * BLOCK_WIDTH, tex_pitch);
+ }
+ else {
+ grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
+ texels + y * tex_pitch + x * BLOCK_WIDTH,
+ tex_pitch);
+ }
+
+ ++sb;
+ }
+ else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
+ if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
+ ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
+ fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
+ if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
+ r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
+ r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
+ }
+ }
+ }
+ }
+ }
+
+ /* TODO: Implement 422, 444 */
+ assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+
+ mbpx /= 2;
+ mbpy /= 2;
+
+ for (tb = 0; tb < 2; ++tb) {
+ tex_pitch = r->tex_transfer[tb + 1]->stride / r->tex_transfer[tb + 1]->block.size;
+ texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
+
+ if ((cbp >> (1 - tb)) & 1) {
+ grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
+ ++sb;
+ }
+ else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
+ if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
+ ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
+ fill_zero_block(texels, tex_pitch);
+ if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
+ r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
+ r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
+ }
+ }
+ }
+ }
+}
+
+static void
+grab_macroblock(struct vl_mpeg12_mc_renderer *r,
+ struct pipe_mpeg12_macroblock *mb)
+{
+ assert(r);
+ assert(mb);
+ assert(r->num_macroblocks < r->macroblocks_per_batch);
+
+ memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
+ sizeof(struct pipe_mpeg12_macroblock));
+
+ grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
+
+ ++r->num_macroblocks;
+}
+
+bool
+vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
+ struct pipe_context *pipe,
+ unsigned picture_width,
+ unsigned picture_height,
+ enum pipe_video_chroma_format chroma_format,
+ enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
+ enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
+ bool pot_buffers)
+{
+ unsigned i;
+
+ assert(renderer);
+ assert(pipe);
+ /* TODO: Implement other policies */
+ assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
+ /* TODO: Implement this */
+ /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
+ assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
+ /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
+ assert(pot_buffers);
+
+ memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
+
+ renderer->pipe = pipe;
+ renderer->picture_width = picture_width;
+ renderer->picture_height = picture_height;
+ renderer->chroma_format = chroma_format;
+ renderer->bufmode = bufmode;
+ renderer->eb_handling = eb_handling;
+ renderer->pot_buffers = pot_buffers;
+
+ if (!init_pipe_state(renderer))
+ return false;
+ if (!init_shaders(renderer)) {
+ cleanup_pipe_state(renderer);
+ return false;
+ }
+ if (!init_buffers(renderer)) {
+ cleanup_shaders(renderer);
+ cleanup_pipe_state(renderer);
+ return false;
+ }
+
+ renderer->surface = NULL;
+ renderer->past = NULL;
+ renderer->future = NULL;
+ for (i = 0; i < 3; ++i)
+ renderer->zero_block[i].x = ZERO_BLOCK_NIL;
+ renderer->num_macroblocks = 0;
+
+ xfer_buffers_map(renderer);
+
+ return true;
+}
+
+void
+vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
+{
+ assert(renderer);
+
+ xfer_buffers_unmap(renderer);
+
+ cleanup_pipe_state(renderer);
+ cleanup_shaders(renderer);
+ cleanup_buffers(renderer);
+}
+
+void
+vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
+ *renderer,
+ struct pipe_texture *surface,
+ struct pipe_texture *past,
+ struct pipe_texture *future,
+ unsigned num_macroblocks,
+ struct pipe_mpeg12_macroblock
+ *mpeg12_macroblocks,
+ struct pipe_fence_handle **fence)
+{
+ bool new_surface = false;
+
+ assert(renderer);
+ assert(surface);
+ assert(num_macroblocks);
+ assert(mpeg12_macroblocks);
+
+ if (renderer->surface) {
+ if (surface != renderer->surface) {
+ if (renderer->num_macroblocks > 0) {
+ xfer_buffers_unmap(renderer);
+ flush(renderer);
+ }
+
+ new_surface = true;
+ }
+
+ /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
+ assert(surface != renderer->surface || renderer->past == past);
+ assert(surface != renderer->surface || renderer->future == future);
+ }
+ else
+ new_surface = true;
+
+ if (new_surface) {
+ renderer->surface = surface;
+ renderer->past = past;
+ renderer->future = future;
+ renderer->fence = fence;
+ renderer->surface_tex_inv_size.x = 1.0f / surface->width[0];
+ renderer->surface_tex_inv_size.y = 1.0f / surface->height[0];
+ }
+
+ while (num_macroblocks) {
+ unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
+ unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
+ unsigned i;
+
+ for (i = 0; i < num_to_submit; ++i) {
+ assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
+ grab_macroblock(renderer, &mpeg12_macroblocks[i]);
+ }
+
+ num_macroblocks -= num_to_submit;
+
+ if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
+ xfer_buffers_unmap(renderer);
+ flush(renderer);
+ xfer_buffers_map(renderer);
+ /* Next time we get this surface it may have new ref frames */
+ renderer->surface = NULL;
+ }
+ }
+}
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
new file mode 100644
index 0000000000..0c2f679664
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -0,0 +1,93 @@
+#ifndef vl_mpeg12_mc_renderer_h
+#define vl_mpeg12_mc_renderer_h
+
+#include <pipe/p_compiler.h>
+#include <pipe/p_state.h>
+#include <pipe/p_video_state.h>
+
+struct pipe_context;
+struct pipe_video_surface;
+struct pipe_macroblock;
+
+/* A slice is video-width (rounded up to a multiple of macroblock width) x macroblock height */
+enum VL_MPEG12_MC_RENDERER_BUFFER_MODE
+{
+ VL_MPEG12_MC_RENDERER_BUFFER_SLICE, /* Saves memory at the cost of smaller batches */
+ VL_MPEG12_MC_RENDERER_BUFFER_PICTURE /* Larger batches, more memory */
+};
+
+enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK
+{
+ VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL, /* Waste of memory bandwidth */
+ VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE, /* Can only do point-filtering when interpolating subsampled chroma channels */
+ VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE /* Needs conditional texel fetch! */
+};
+
+struct vl_mpeg12_mc_renderer
+{
+ struct pipe_context *pipe;
+ unsigned picture_width;
+ unsigned picture_height;
+ enum pipe_video_chroma_format chroma_format;
+ enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode;
+ enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling;
+ bool pot_buffers;
+ unsigned macroblocks_per_batch;
+
+ struct pipe_viewport_state viewport;
+ struct pipe_constant_buffer vs_const_buf;
+ struct pipe_constant_buffer fs_const_buf;
+ struct pipe_framebuffer_state fb_state;
+ struct pipe_vertex_element vertex_elems[8];
+
+ union
+ {
+ void *all[5];
+ struct { void *y, *cb, *cr, *ref[2]; } individual;
+ } samplers;
+
+ void *i_vs, *p_vs[2], *b_vs[2];
+ void *i_fs, *p_fs[2], *b_fs[2];
+
+ union
+ {
+ struct pipe_texture *all[5];
+ struct { struct pipe_texture *y, *cb, *cr, *ref[2]; } individual;
+ } textures;
+
+ union
+ {
+ struct pipe_vertex_buffer all[3];
+ struct { struct pipe_vertex_buffer ycbcr, ref[2]; } individual;
+ } vertex_bufs;
+
+ struct pipe_texture *surface, *past, *future;
+ struct pipe_fence_handle **fence;
+ unsigned num_macroblocks;
+ struct pipe_mpeg12_macroblock *macroblock_buf;
+ struct pipe_transfer *tex_transfer[3];
+ short *texels[3];
+ struct { float x, y; } surface_tex_inv_size;
+ struct { float x, y; } zero_block[3];
+};
+
+bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
+ struct pipe_context *pipe,
+ unsigned picture_width,
+ unsigned picture_height,
+ enum pipe_video_chroma_format chroma_format,
+ enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
+ enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
+ bool pot_buffers);
+
+void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
+
+void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
+ struct pipe_texture *surface,
+ struct pipe_texture *past,
+ struct pipe_texture *future,
+ unsigned num_macroblocks,
+ struct pipe_mpeg12_macroblock *mpeg12_macroblocks,
+ struct pipe_fence_handle **fence);
+
+#endif /* vl_mpeg12_mc_renderer_h */
diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c
new file mode 100644
index 0000000000..9ad1e052c6
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_shader_build.c
@@ -0,0 +1,215 @@
+#include "vl_shader_build.h"
+#include <assert.h>
+#include <tgsi/tgsi_parse.h>
+#include <tgsi/tgsi_build.h>
+
+struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last)
+{
+ struct tgsi_full_declaration decl = tgsi_default_full_declaration();
+
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = name;
+ decl.Semantic.SemanticIndex = index;
+ decl.DeclarationRange.First = first;
+ decl.DeclarationRange.Last = last;
+
+ return decl;
+}
+
+struct tgsi_full_declaration vl_decl_interpolated_input
+(
+ unsigned int name,
+ unsigned int index,
+ unsigned int first,
+ unsigned int last,
+ int interpolation
+)
+{
+ struct tgsi_full_declaration decl = tgsi_default_full_declaration();
+
+ assert
+ (
+ interpolation == TGSI_INTERPOLATE_CONSTANT ||
+ interpolation == TGSI_INTERPOLATE_LINEAR ||
+ interpolation == TGSI_INTERPOLATE_PERSPECTIVE
+ );
+
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = name;
+ decl.Semantic.SemanticIndex = index;
+ decl.Declaration.Interpolate = interpolation;;
+ decl.DeclarationRange.First = first;
+ decl.DeclarationRange.Last = last;
+
+ return decl;
+}
+
+struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last)
+{
+ struct tgsi_full_declaration decl = tgsi_default_full_declaration();
+
+ decl.Declaration.File = TGSI_FILE_CONSTANT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = name;
+ decl.Semantic.SemanticIndex = index;
+ decl.DeclarationRange.First = first;
+ decl.DeclarationRange.Last = last;
+
+ return decl;
+}
+
+struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last)
+{
+ struct tgsi_full_declaration decl = tgsi_default_full_declaration();
+
+ decl.Declaration.File = TGSI_FILE_OUTPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = name;
+ decl.Semantic.SemanticIndex = index;
+ decl.DeclarationRange.First = first;
+ decl.DeclarationRange.Last = last;
+
+ return decl;
+}
+
+struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last)
+{
+ struct tgsi_full_declaration decl = tgsi_default_full_declaration();
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.DeclarationRange.First = first;
+ decl.DeclarationRange.Last = last;
+
+ return decl;
+}
+
+struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last)
+{
+ struct tgsi_full_declaration decl = tgsi_default_full_declaration();
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.DeclarationRange.First = first;
+ decl.DeclarationRange.Last = last;
+
+ return decl;
+}
+
+struct tgsi_full_instruction vl_inst2
+(
+ int opcode,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src_file,
+ unsigned int src_index
+)
+{
+ struct tgsi_full_instruction inst = tgsi_default_full_instruction();
+
+ inst.Instruction.Opcode = opcode;
+ inst.Instruction.NumDstRegs = 1;
+ inst.FullDstRegisters[0].DstRegister.File = dst_file;
+ inst.FullDstRegisters[0].DstRegister.Index = dst_index;
+ inst.Instruction.NumSrcRegs = 1;
+ inst.FullSrcRegisters[0].SrcRegister.File = src_file;
+ inst.FullSrcRegisters[0].SrcRegister.Index = src_index;
+
+ return inst;
+}
+
+struct tgsi_full_instruction vl_inst3
+(
+ int opcode,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src1_file,
+ unsigned int src1_index,
+ enum tgsi_file_type src2_file,
+ unsigned int src2_index
+)
+{
+ struct tgsi_full_instruction inst = tgsi_default_full_instruction();
+
+ inst.Instruction.Opcode = opcode;
+ inst.Instruction.NumDstRegs = 1;
+ inst.FullDstRegisters[0].DstRegister.File = dst_file;
+ inst.FullDstRegisters[0].DstRegister.Index = dst_index;
+ inst.Instruction.NumSrcRegs = 2;
+ inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
+ inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
+ inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
+ inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
+
+ return inst;
+}
+
+struct tgsi_full_instruction vl_tex
+(
+ int tex,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src1_file,
+ unsigned int src1_index,
+ enum tgsi_file_type src2_file,
+ unsigned int src2_index
+)
+{
+ struct tgsi_full_instruction inst = tgsi_default_full_instruction();
+
+ inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ inst.Instruction.NumDstRegs = 1;
+ inst.FullDstRegisters[0].DstRegister.File = dst_file;
+ inst.FullDstRegisters[0].DstRegister.Index = dst_index;
+ inst.Instruction.NumSrcRegs = 2;
+ inst.InstructionExtTexture.Texture = tex;
+ inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
+ inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
+ inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
+ inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
+
+ return inst;
+}
+
+struct tgsi_full_instruction vl_inst4
+(
+ int opcode,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src1_file,
+ unsigned int src1_index,
+ enum tgsi_file_type src2_file,
+ unsigned int src2_index,
+ enum tgsi_file_type src3_file,
+ unsigned int src3_index
+)
+{
+ struct tgsi_full_instruction inst = tgsi_default_full_instruction();
+
+ inst.Instruction.Opcode = opcode;
+ inst.Instruction.NumDstRegs = 1;
+ inst.FullDstRegisters[0].DstRegister.File = dst_file;
+ inst.FullDstRegisters[0].DstRegister.Index = dst_index;
+ inst.Instruction.NumSrcRegs = 3;
+ inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
+ inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
+ inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
+ inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
+ inst.FullSrcRegisters[2].SrcRegister.File = src3_file;
+ inst.FullSrcRegisters[2].SrcRegister.Index = src3_index;
+
+ return inst;
+}
+
+struct tgsi_full_instruction vl_end(void)
+{
+ struct tgsi_full_instruction inst = tgsi_default_full_instruction();
+
+ inst.Instruction.Opcode = TGSI_OPCODE_END;
+ inst.Instruction.NumDstRegs = 0;
+ inst.Instruction.NumSrcRegs = 0;
+
+ return inst;
+}
diff --git a/src/gallium/auxiliary/vl/vl_shader_build.h b/src/gallium/auxiliary/vl/vl_shader_build.h
new file mode 100644
index 0000000000..c6c60b5552
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_shader_build.h
@@ -0,0 +1,61 @@
+#ifndef vl_shader_build_h
+#define vl_shader_build_h
+
+#include <pipe/p_shader_tokens.h>
+
+struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last);
+struct tgsi_full_declaration vl_decl_interpolated_input
+(
+ unsigned int name,
+ unsigned int index,
+ unsigned int first,
+ unsigned int last,
+ int interpolation
+);
+struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last);
+struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last);
+struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last);
+struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last);
+struct tgsi_full_instruction vl_inst2
+(
+ int opcode,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src_file,
+ unsigned int src_index
+);
+struct tgsi_full_instruction vl_inst3
+(
+ int opcode,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src1_file,
+ unsigned int src1_index,
+ enum tgsi_file_type src2_file,
+ unsigned int src2_index
+);
+struct tgsi_full_instruction vl_tex
+(
+ int tex,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src1_file,
+ unsigned int src1_index,
+ enum tgsi_file_type src2_file,
+ unsigned int src2_index
+);
+struct tgsi_full_instruction vl_inst4
+(
+ int opcode,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src1_file,
+ unsigned int src1_index,
+ enum tgsi_file_type src2_file,
+ unsigned int src2_index,
+ enum tgsi_file_type src3_file,
+ unsigned int src3_index
+);
+struct tgsi_full_instruction vl_end(void);
+
+#endif
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c
index bd48ce7005..d185c6b849 100644
--- a/src/gallium/drivers/cell/ppu/cell_screen.c
+++ b/src/gallium/drivers/cell/ppu/cell_screen.c
@@ -41,7 +41,7 @@
static const char *
cell_get_vendor(struct pipe_screen *screen)
{
- return "Tungsten Graphics, Inc.";
+ return "VMware, Inc.";
}
@@ -64,8 +64,6 @@ cell_get_param(struct pipe_screen *screen, int param)
return 1;
case PIPE_CAP_GLSL:
return 1;
- case PIPE_CAP_S3TC:
- return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 0;
case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c
index b43f735245..e745f3342d 100644
--- a/src/gallium/drivers/i915simple/i915_context.c
+++ b/src/gallium/drivers/i915simple/i915_context.c
@@ -175,12 +175,19 @@ i915_is_buffer_referenced(struct pipe_context *pipe,
static void i915_destroy(struct pipe_context *pipe)
{
struct i915_context *i915 = i915_context(pipe);
+ int i;
draw_destroy(i915->draw);
if(i915->batch)
i915->iws->batchbuffer_destroy(i915->batch);
+ /* unbind framebuffer */
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ pipe_surface_reference(&i915->framebuffer.cbufs[i], NULL);
+ }
+ pipe_surface_reference(&i915->framebuffer.zsbuf, NULL);
+
FREE(i915);
}
diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
index 508f4560e4..d50201642b 100644
--- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
@@ -44,6 +44,7 @@
#include "pipe/p_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_fifo.h"
#include "i915_context.h"
#include "i915_reg.h"
@@ -76,8 +77,13 @@ struct i915_vbuf_render {
size_t vbo_size;
size_t vbo_offset;
void *vbo_ptr;
- size_t vbo_alloc_size;
size_t vbo_max_used;
+
+ /* stuff for the pool */
+ struct util_fifo *pool_fifo;
+ unsigned pool_used;
+ unsigned pool_buffer_size;
+ boolean pool_not_used;
};
@@ -106,33 +112,72 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render)
}
static boolean
+i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size)
+{
+ struct i915_context *i915 = i915_render->i915;
+
+ if (i915_render->vbo_size < size + i915_render->vbo_offset)
+ return FALSE;
+
+ if (i915->vbo_flushed)
+ return FALSE;
+
+ return TRUE;
+}
+
+static void
+i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
+{
+ struct i915_context *i915 = i915_render->i915;
+ struct intel_winsys *iws = i915->iws;
+
+ if (i915_render->vbo) {
+ if (i915_render->pool_not_used)
+ iws->buffer_destroy(iws, i915_render->vbo);
+ else
+ u_fifo_add(i915_render->pool_fifo, i915_render->vbo);
+ i915_render->vbo = NULL;
+ }
+
+ i915->vbo_flushed = 0;
+
+ i915_render->vbo_size = MAX2(size, i915_render->pool_buffer_size);
+ i915_render->vbo_offset = 0;
+
+ if (i915_render->vbo_size != i915_render->pool_buffer_size) {
+ i915_render->pool_not_used = TRUE;
+ i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64,
+ INTEL_NEW_VERTEX);
+ } else {
+ i915_render->pool_not_used = FALSE;
+
+ if (i915_render->pool_used >= 2) {
+ FLUSH_BATCH(NULL);
+ i915->vbo_flushed = 0;
+ i915_render->pool_used = 0;
+ }
+ u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo);
+ }
+}
+
+static boolean
i915_vbuf_render_allocate_vertices(struct vbuf_render *render,
ushort vertex_size,
ushort nr_vertices)
{
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
struct i915_context *i915 = i915_render->i915;
- struct intel_winsys *iws = i915->iws;
size_t size = (size_t)vertex_size * (size_t)nr_vertices;
/* FIXME: handle failure */
assert(!i915->vbo);
- if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) {
- } else {
- i915->vbo_flushed = 0;
- if (i915_render->vbo) {
- iws->buffer_destroy(iws, i915_render->vbo);
- i915_render->vbo = NULL;
- }
- }
+ if (!i915_vbuf_render_reserve(i915_render, size)) {
- if (!i915_render->vbo) {
- i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size);
- i915_render->vbo_offset = 0;
- i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64,
- INTEL_NEW_VERTEX);
+ if (i915->vbo_flushed)
+ i915_render->pool_used = 0;
+ i915_vbuf_render_new_buf(i915_render, size);
}
i915_render->vertex_size = vertex_size;
@@ -153,7 +198,7 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render)
struct intel_winsys *iws = i915->iws;
if (i915->vbo_flushed)
- debug_printf("%s bad vbo flush occured stalling on hw\n");
+ debug_printf("%s bad vbo flush occured stalling on hw\n", __func__);
i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE);
@@ -344,14 +389,43 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render,
uint nr)
{
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ struct i915_context *i915 = i915_render->i915;
if (i915_render->fallback) {
draw_arrays_fallback(render, start, nr);
return;
}
- /* JB: TODO submit direct cmds */
- draw_arrays_fallback(render, start, nr);
+ if (i915->dirty)
+ i915_update_derived(i915);
+
+ if (i915->hardware_dirty)
+ i915_emit_hardware_state(i915);
+
+ if (!BEGIN_BATCH(2, 0)) {
+ FLUSH_BATCH(NULL);
+
+ /* Make sure state is re-emitted after a flush:
+ */
+ i915_update_derived(i915);
+ i915_emit_hardware_state(i915);
+ i915->vbo_flushed = 1;
+
+ if (!BEGIN_BATCH(2, 0)) {
+ assert(0);
+ goto out;
+ }
+ }
+
+ OUT_BATCH(_3DPRIMITIVE |
+ PRIM_INDIRECT |
+ PRIM_INDIRECT_SEQUENTIAL |
+ i915_render->hwprim |
+ nr);
+ OUT_BATCH(start); /* Beginning vertex index */
+
+out:
+ return;
}
/**
@@ -504,6 +578,7 @@ i915_vbuf_render_create(struct i915_context *i915)
{
struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render);
struct intel_winsys *iws = i915->iws;
+ int i;
i915_render->i915 = i915;
@@ -524,14 +599,24 @@ i915_vbuf_render_create(struct i915_context *i915)
i915_render->base.release_vertices = i915_vbuf_render_release_vertices;
i915_render->base.destroy = i915_vbuf_render_destroy;
- i915_render->vbo_alloc_size = 128 * 4096;
- i915_render->vbo_size = i915_render->vbo_alloc_size;
+
+ i915_render->vbo = NULL;
+ i915_render->vbo_size = 0;
i915_render->vbo_offset = 0;
- i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64,
- INTEL_NEW_VERTEX);
+
+ i915_render->pool_used = FALSE;
+ i915_render->pool_buffer_size = 128 * 4096;
+ i915_render->pool_fifo = u_fifo_create(6);
+ for (i = 0; i < 6; i++)
+ u_fifo_add(i915_render->pool_fifo,
+ iws->buffer_create(iws, i915_render->pool_buffer_size, 64,
+ INTEL_NEW_VERTEX));
+
+#if 0
/* TODO JB: is this realy needed? */
i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE);
iws->buffer_unmap(iws, i915_render->vbo);
+#endif
return &i915_render->base;
}
diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c
index 9f017a14cc..c66558c320 100644
--- a/src/gallium/drivers/i915simple/i915_screen.c
+++ b/src/gallium/drivers/i915simple/i915_screen.c
@@ -46,7 +46,7 @@
static const char *
i915_get_vendor(struct pipe_screen *screen)
{
- return "Tungsten Graphics, Inc.";
+ return "VMware, Inc.";
}
static const char *
@@ -101,8 +101,6 @@ i915_get_param(struct pipe_screen *screen, int param)
return 1;
case PIPE_CAP_GLSL:
return 0;
- case PIPE_CAP_S3TC:
- return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 0;
case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c
index 0087dfa410..7d48e6e84d 100644
--- a/src/gallium/drivers/i915simple/i915_state.c
+++ b/src/gallium/drivers/i915simple/i915_state.c
@@ -588,9 +588,17 @@ static void i915_set_framebuffer_state(struct pipe_context *pipe,
const struct pipe_framebuffer_state *fb)
{
struct i915_context *i915 = i915_context(pipe);
+ int i;
+
draw_flush(i915->draw);
- i915->framebuffer = *fb; /* struct copy */
+ i915->framebuffer.width = fb->width;
+ i915->framebuffer.height = fb->height;
+ i915->framebuffer.nr_cbufs = fb->nr_cbufs;
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ pipe_surface_reference(&i915->framebuffer.cbufs[i], fb->cbufs[i]);
+ }
+ pipe_surface_reference(&i915->framebuffer.zsbuf, fb->zsbuf);
i915->dirty |= I915_NEW_FRAMEBUFFER;
}
diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c
index 6a6c654271..15ccc1fc73 100644
--- a/src/gallium/drivers/i915simple/i915_texture.c
+++ b/src/gallium/drivers/i915simple/i915_texture.c
@@ -165,7 +165,7 @@ i915_scanout_layout(struct i915_texture *tex)
struct pipe_texture *pt = &tex->base;
if (pt->last_level > 0 || pt->block.size != 4)
- return 0;
+ return FALSE;
i915_miptree_set_level_info(tex, 0, 1,
tex->base.width[0],
@@ -191,6 +191,38 @@ i915_scanout_layout(struct i915_texture *tex)
return TRUE;
}
+/**
+ * Special case to deal with shared textures.
+ */
+static boolean
+i915_display_target_layout(struct i915_texture *tex)
+{
+ struct pipe_texture *pt = &tex->base;
+
+ if (pt->last_level > 0 || pt->block.size != 4)
+ return FALSE;
+
+ /* fallback to normal textures for small textures */
+ if (tex->base.width[0] < 240)
+ return FALSE;
+
+ i915_miptree_set_level_info(tex, 0, 1,
+ tex->base.width[0],
+ tex->base.height[0],
+ 1);
+ i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
+
+ tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
+ tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
+ tex->hw_tiled = INTEL_TILE_X;
+
+ debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
+ tex->base.width[0], tex->base.height[0], pt->block.size,
+ tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy);
+
+ return TRUE;
+}
+
static void
i915_miptree_layout_2d(struct i915_texture *tex)
{
@@ -201,6 +233,16 @@ i915_miptree_layout_2d(struct i915_texture *tex)
unsigned nblocksx = pt->nblocksx[0];
unsigned nblocksy = pt->nblocksy[0];
+ /* used for scanouts that need special layouts */
+ if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
+ if (i915_scanout_layout(tex))
+ return;
+
+ /* for shared buffers we use some very like scanout */
+ if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET)
+ if (i915_display_target_layout(tex))
+ return;
+
tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
tex->total_nblocksy = 0;
@@ -351,6 +393,11 @@ i945_miptree_layout_2d(struct i915_texture *tex)
if (i915_scanout_layout(tex))
return;
+ /* for shared buffers we use some very like scanout */
+ if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET)
+ if (i915_display_target_layout(tex))
+ return;
+
tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
/* May need to adjust pitch to accomodate the placement of
diff --git a/src/gallium/drivers/i915simple/intel_winsys.h b/src/gallium/drivers/i915simple/intel_winsys.h
index f949f52a9c..42c5e7470e 100644
--- a/src/gallium/drivers/i915simple/intel_winsys.h
+++ b/src/gallium/drivers/i915simple/intel_winsys.h
@@ -150,6 +150,17 @@ struct intel_winsys {
void (*buffer_unmap)(struct intel_winsys *iws,
struct intel_buffer *buffer);
+ /**
+ * Write to a buffer.
+ *
+ * Arguments follows pwrite(2)
+ */
+ int (*buffer_write)(struct intel_winsys *iws,
+ struct intel_buffer *dst,
+ const void *src,
+ size_t size,
+ size_t offset);
+
void (*buffer_destroy)(struct intel_winsys *iws,
struct intel_buffer *buffer);
/*@}*/
diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c
index b22e105f10..4a84c4db23 100644
--- a/src/gallium/drivers/i965simple/brw_screen.c
+++ b/src/gallium/drivers/i965simple/brw_screen.c
@@ -39,7 +39,7 @@
static const char *
brw_get_vendor( struct pipe_screen *screen )
{
- return "Tungsten Graphics, Inc.";
+ return "VMware, Inc.";
}
@@ -85,8 +85,6 @@ brw_get_param(struct pipe_screen *screen, int param)
return 1;
case PIPE_CAP_GLSL:
return 0;
- case PIPE_CAP_S3TC:
- return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 0;
case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 5ac09de79e..cd7b6356d2 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -17,9 +17,11 @@ C_SOURCES = \
lp_bld_depth.c \
lp_bld_flow.c \
lp_bld_format_aos.c \
+ lp_bld_format_soa.c \
lp_bld_interp.c \
lp_bld_intr.c \
lp_bld_logic.c \
+ lp_bld_sample_soa.c \
lp_bld_swizzle.c \
lp_bld_struct.c \
lp_bld_tgsi_soa.c \
@@ -46,7 +48,8 @@ C_SOURCES = \
lp_state_vs.c \
lp_surface.c \
lp_tex_cache.c \
- lp_tex_sample.c \
+ lp_tex_sample_c.c \
+ lp_tex_sample_llvm.c \
lp_texture.c \
lp_tile_cache.c \
lp_tile_soa.c
diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 498d21dea6..89d08834a3 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -8,13 +8,16 @@ Done so far is:
- the whole fragment pipeline is code generated in a single function
+ - input interpolation
+
- depth testing
+ - texture sampling (not all state/formats are supported)
+
- fragment shader TGSI translation
- same level of support as the TGSI SSE2 exec machine, with the exception
we don't fallback to TGSI interpretation when an unsupported opcode is
found, but just ignore it
- - texture sampling via an intrinsic call
- done in SoA layout
- input interpolation also code generated
@@ -28,16 +31,17 @@ Done so far is:
any width and length
- not all operations are implemented for these types yet though
-Most mesa/progs/demos/* work. Speed is on par with Keith's softpipe-opt branch,
-which includes hand written fast implementations for common cases.
+Most mesa/progs/demos/* work.
To do (probably by this order):
- code generate stipple and stencil testing
- - code generate texture sampling
+ - translate the remaining bits of texture sampling state
- translate TGSI control flow instructions, and all other remaining opcodes
+
+ - integrate with the draw module for VS code generation
- code generate the triangle setup and rasterization
@@ -93,7 +97,7 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as
make linux-llvm
-but the rest of these instructions assume scons is used.
+but the rest of these instructions assume that scons is used.
Using
@@ -108,6 +112,9 @@ or
export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH
+For performance evaluation pass debug=no to scons, and use the corresponding
+lib directory without the "-debug" suffix.
+
Unit testing
============
@@ -119,7 +126,7 @@ build/linux-???-debug/gallium/drivers/llvmpipe:
- lp_test_conv: SIMD vector conversion
- lp_test_format: pixel unpacking/packing
-Some of this tests can output results and benchmarks to a tab-seperated-file
+Some of this tests can output results and benchmarks to a tab-separated-file
for posterior analysis, e.g.:
build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
@@ -133,10 +140,10 @@ Development Notes
at the top of the lp_bld_*.c functions.
- All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be
- put in a standalone Gallium state -> LLVM IR translation module.
+ put in a stand-alone Gallium state -> LLVM IR translation module.
- We use LLVM-C bindings for now. They are not documented, but follow the C++
interfaces very closely, and appear to be complete enough for code
generation. See
http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
- for a standalone example.
+ for a stand-alone example.
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 5c29bdac56..f4a9a3b22e 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -3,7 +3,7 @@ Import('*')
env = env.Clone()
env.Tool('llvm')
-if 'LLVM_VERSION' not in env:
+if not env.has_key('LLVM_VERSION'):
print 'warning: LLVM not found: not building llvmpipe'
Return()
@@ -23,8 +23,10 @@ llvmpipe = env.ConvenienceLibrary(
'lp_bld_depth.c',
'lp_bld_flow.c',
'lp_bld_format_aos.c',
+ 'lp_bld_format_soa.c',
'lp_bld_interp.c',
'lp_bld_intr.c',
+ 'lp_bld_sample_soa.c',
'lp_bld_struct.c',
'lp_bld_logic.c',
'lp_bld_swizzle.c',
@@ -52,7 +54,8 @@ llvmpipe = env.ConvenienceLibrary(
'lp_state_vs.c',
'lp_surface.c',
'lp_tex_cache.c',
- 'lp_tex_sample.c',
+ 'lp_tex_sample_c.c',
+ 'lp_tex_sample_llvm.c',
'lp_texture.c',
'lp_tile_cache.c',
'lp_tile_soa.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
index 49c2f911af..2b4bc5c819 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
@@ -45,7 +45,7 @@
void
lp_build_alpha_test(LLVMBuilderRef builder,
const struct pipe_alpha_state *state,
- union lp_type type,
+ struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef alpha,
LLVMValueRef ref)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
index 9dbcdb4daa..634575670d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
@@ -38,14 +38,14 @@
#include <llvm-c/Core.h>
struct pipe_alpha_state;
-union lp_type;
+struct lp_type;
struct lp_build_mask_context;
void
lp_build_alpha_test(LLVMBuilderRef builder,
const struct pipe_alpha_state *state,
- union lp_type type,
+ struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef alpha,
LLVMValueRef ref);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 09a57ff33d..31433318a7 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -65,7 +65,7 @@ lp_build_min_simple(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
const char *intrinsic = NULL;
LLVMValueRef cond;
@@ -113,7 +113,7 @@ lp_build_max_simple(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
const char *intrinsic = NULL;
LLVMValueRef cond;
@@ -159,7 +159,7 @@ LLVMValueRef
lp_build_comp(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
if(a == bld->one)
return bld->zero;
@@ -188,7 +188,7 @@ lp_build_add(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMValueRef res;
if(a == bld->zero)
@@ -241,7 +241,7 @@ lp_build_sub(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMValueRef res;
if(b == bld->zero)
@@ -405,7 +405,7 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
if(a == bld->zero)
return bld->zero;
@@ -477,7 +477,7 @@ lp_build_div(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
if(a == bld->zero)
return bld->zero;
@@ -502,6 +502,31 @@ lp_build_div(struct lp_build_context *bld,
}
+LLVMValueRef
+lp_build_lerp(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef v0,
+ LLVMValueRef v1)
+{
+ return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
+}
+
+
+LLVMValueRef
+lp_build_lerp_2d(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef v00,
+ LLVMValueRef v01,
+ LLVMValueRef v10,
+ LLVMValueRef v11)
+{
+ LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01);
+ LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11);
+ return lp_build_lerp(bld, y, v0, v1);
+}
+
+
/**
* Generate min(a, b)
* Do checks for special cases.
@@ -565,21 +590,32 @@ LLVMValueRef
lp_build_abs(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
if(!type.sign)
return a;
- /* XXX: is this really necessary? */
+ if(type.floating) {
+ /* Mask out the sign bit */
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1);
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+ return a;
+ }
+
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(!type.floating && type.width*type.length == 128) {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- if(type.width == 8)
+ if(type.width*type.length == 128) {
+ switch(type.width) {
+ case 8:
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
- if(type.width == 16)
+ case 16:
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
- if(type.width == 32)
+ case 32:
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
+ }
}
#endif
@@ -588,10 +624,188 @@ lp_build_abs(struct lp_build_context *bld,
LLVMValueRef
+lp_build_sgn(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef cond;
+ LLVMValueRef res;
+
+ /* Handle non-zero case */
+ if(!type.sign) {
+ /* if not zero then sign must be positive */
+ res = bld->one;
+ }
+ else if(type.floating) {
+ /* Take the sign bit and add it to 1 constant */
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef one;
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ one = LLVMConstBitCast(bld->one, int_vec_type);
+ res = LLVMBuildOr(bld->builder, sign, one, "");
+ res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ }
+ else
+ {
+ LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
+ res = lp_build_select(bld, cond, bld->one, minus_one);
+ }
+
+ /* Handle zero */
+ cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
+ res = lp_build_select(bld, cond, bld->zero, bld->one);
+
+ return res;
+}
+
+
+enum lp_build_round_sse41_mode
+{
+ LP_BUILD_ROUND_SSE41_NEAREST = 0,
+ LP_BUILD_ROUND_SSE41_FLOOR = 1,
+ LP_BUILD_ROUND_SSE41_CEIL = 2,
+ LP_BUILD_ROUND_SSE41_TRUNCATE = 3
+};
+
+
+static INLINE LLVMValueRef
+lp_build_round_sse41(struct lp_build_context *bld,
+ LLVMValueRef a,
+ enum lp_build_round_sse41_mode mode)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ const char *intrinsic;
+
+ assert(type.floating);
+ assert(type.width*type.length == 128);
+
+ switch(type.width) {
+ case 32:
+ intrinsic = "llvm.x86.sse41.round.ps";
+ break;
+ case 64:
+ intrinsic = "llvm.x86.sse41.round.pd";
+ break;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+
+ return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
+ LLVMConstInt(LLVMInt32Type(), mode, 0));
+}
+
+
+LLVMValueRef
+lp_build_round(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+
+ assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+#endif
+
+ /* FIXME */
+ assert(0);
+ return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_floor(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+
+ assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+#endif
+
+ /* FIXME */
+ assert(0);
+ return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_ceil(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+
+ assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+#endif
+
+ /* FIXME */
+ assert(0);
+ return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_trunc(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+
+ assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
+#endif
+
+ /* FIXME */
+ assert(0);
+ return bld->undef;
+}
+
+
+/**
+ * Convert to integer, through whichever rounding method that's fastest,
+ * typically truncating to zero.
+ */
+LLVMValueRef
+lp_build_int(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+
+ assert(type.floating);
+
+ return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+}
+
+
+LLVMValueRef
+lp_build_ifloor(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ a = lp_build_floor(bld, a);
+ a = lp_build_int(bld, a);
+ return a;
+}
+
+
+LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
@@ -609,7 +823,7 @@ LLVMValueRef
lp_build_rcp(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
if(a == bld->zero)
return bld->undef;
@@ -640,7 +854,7 @@ LLVMValueRef
lp_build_rsqrt(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
assert(type.floating);
@@ -661,7 +875,7 @@ LLVMValueRef
lp_build_cos(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
@@ -681,7 +895,7 @@ LLVMValueRef
lp_build_sin(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
@@ -704,7 +918,8 @@ lp_build_pow(struct lp_build_context *bld,
{
/* TODO: optimize the constant case */
if(LLVMIsConstant(x) && LLVMIsConstant(y))
- debug_printf("%s: inefficient/imprecise constant arithmetic\n");
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+ __FUNCTION__);
return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y));
}
@@ -752,13 +967,14 @@ lp_build_polynomial(struct lp_build_context *bld,
const double *coeffs,
unsigned num_coeffs)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMValueRef res = NULL;
unsigned i;
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
- debug_printf("%s: inefficient/imprecise constant arithmetic\n");
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+ __FUNCTION__);
for (i = num_coeffs; i--; ) {
LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
@@ -800,7 +1016,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
LLVMValueRef *p_frac_part,
LLVMValueRef *p_exp2)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
LLVMValueRef ipart = NULL;
@@ -812,7 +1028,8 @@ lp_build_exp2_approx(struct lp_build_context *bld,
if(p_exp2_int_part || p_frac_part || p_exp2) {
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
- debug_printf("%s: inefficient/imprecise constant arithmetic\n");
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+ __FUNCTION__);
assert(type.floating && type.width == 32);
@@ -893,7 +1110,7 @@ lp_build_log2_approx(struct lp_build_context *bld,
LLVMValueRef *p_floor_log2,
LLVMValueRef *p_log2)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
@@ -911,7 +1128,8 @@ lp_build_log2_approx(struct lp_build_context *bld,
if(p_exp || p_floor_log2 || p_log2) {
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
- debug_printf("%s: inefficient/imprecise constant arithmetic\n");
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+ __FUNCTION__);
assert(type.floating && type.width == 32);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index fc8cb25966..d68a97c4b8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-union lp_type type;
+struct lp_type type;
struct lp_build_context;
@@ -72,6 +72,26 @@ lp_build_div(struct lp_build_context *bld,
LLVMValueRef b);
LLVMValueRef
+lp_build_lerp(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef v0,
+ LLVMValueRef v1);
+
+/**
+ * Bilinear interpolation.
+ *
+ * Values indices are in v_{yx}.
+ */
+LLVMValueRef
+lp_build_lerp_2d(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef v00,
+ LLVMValueRef v01,
+ LLVMValueRef v10,
+ LLVMValueRef v11);
+
+LLVMValueRef
lp_build_min(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
@@ -86,6 +106,34 @@ lp_build_abs(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
+lp_build_sgn(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_round(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_floor(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_ceil(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_trunc(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_int(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_ifloor(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
index d19e18846c..da272e549f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -46,7 +46,7 @@
struct pipe_blend_state;
-union lp_type;
+struct lp_type;
struct lp_build_context;
@@ -74,7 +74,7 @@ lp_build_blend_func(struct lp_build_context *bld,
LLVMValueRef
lp_build_blend_aos(LLVMBuilderRef builder,
const struct pipe_blend_state *blend,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef src,
LLVMValueRef dst,
LLVMValueRef const_,
@@ -84,7 +84,7 @@ lp_build_blend_aos(LLVMBuilderRef builder,
void
lp_build_blend_soa(LLVMBuilderRef builder,
const struct pipe_blend_state *blend,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef src[4],
LLVMValueRef dst[4],
LLVMValueRef const_[4],
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index c11a9398f8..d14f468ba9 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -303,7 +303,7 @@ lp_build_blend_func(struct lp_build_context *bld,
LLVMValueRef
lp_build_blend_aos(LLVMBuilderRef builder,
const struct pipe_blend_state *blend,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef src,
LLVMValueRef dst,
LLVMValueRef const_,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
index b92254a7d6..9511299d55 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
@@ -199,7 +199,7 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
void
lp_build_blend_soa(LLVMBuilderRef builder,
const struct pipe_blend_state *blend,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef src[4],
LLVMValueRef dst[4],
LLVMValueRef con[4],
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/drivers/llvmpipe/lp_bld_const.c
index 21487365ea..c8eaa8c394 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_const.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.c
@@ -42,7 +42,7 @@
unsigned
-lp_mantissa(union lp_type type)
+lp_mantissa(struct lp_type type)
{
assert(type.floating);
@@ -72,7 +72,7 @@ lp_mantissa(union lp_type type)
* Same as lp_const_scale(), but in terms of shifts.
*/
unsigned
-lp_const_shift(union lp_type type)
+lp_const_shift(struct lp_type type)
{
if(type.floating)
return 0;
@@ -86,7 +86,7 @@ lp_const_shift(union lp_type type)
unsigned
-lp_const_offset(union lp_type type)
+lp_const_offset(struct lp_type type)
{
if(type.floating || type.fixed)
return 0;
@@ -104,7 +104,7 @@ lp_const_offset(union lp_type type)
* else for the fixed points types and normalized integers.
*/
double
-lp_const_scale(union lp_type type)
+lp_const_scale(struct lp_type type)
{
unsigned long long llscale;
double dscale;
@@ -122,7 +122,7 @@ lp_const_scale(union lp_type type)
* Minimum value representable by the type.
*/
double
-lp_const_min(union lp_type type)
+lp_const_min(struct lp_type type)
{
unsigned bits;
@@ -158,7 +158,7 @@ lp_const_min(union lp_type type)
* Maximum value representable by the type.
*/
double
-lp_const_max(union lp_type type)
+lp_const_max(struct lp_type type)
{
unsigned bits;
@@ -190,7 +190,7 @@ lp_const_max(union lp_type type)
double
-lp_const_eps(union lp_type type)
+lp_const_eps(struct lp_type type)
{
if (type.floating) {
switch(type.width) {
@@ -211,7 +211,7 @@ lp_const_eps(union lp_type type)
LLVMValueRef
-lp_build_undef(union lp_type type)
+lp_build_undef(struct lp_type type)
{
LLVMTypeRef vec_type = lp_build_vec_type(type);
return LLVMGetUndef(vec_type);
@@ -219,7 +219,7 @@ lp_build_undef(union lp_type type)
LLVMValueRef
-lp_build_zero(union lp_type type)
+lp_build_zero(struct lp_type type)
{
LLVMTypeRef vec_type = lp_build_vec_type(type);
return LLVMConstNull(vec_type);
@@ -227,7 +227,7 @@ lp_build_zero(union lp_type type)
LLVMValueRef
-lp_build_one(union lp_type type)
+lp_build_one(struct lp_type type)
{
LLVMTypeRef elem_type;
LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
@@ -269,7 +269,7 @@ lp_build_one(union lp_type type)
LLVMValueRef
-lp_build_const_scalar(union lp_type type,
+lp_build_const_scalar(struct lp_type type,
double val)
{
LLVMTypeRef elem_type = lp_build_elem_type(type);
@@ -295,7 +295,7 @@ lp_build_const_scalar(union lp_type type,
LLVMValueRef
-lp_build_int_const_scalar(union lp_type type,
+lp_build_int_const_scalar(struct lp_type type,
long long val)
{
LLVMTypeRef elem_type = lp_build_int_elem_type(type);
@@ -312,7 +312,7 @@ lp_build_int_const_scalar(union lp_type type,
LLVMValueRef
-lp_build_const_aos(union lp_type type,
+lp_build_const_aos(struct lp_type type,
double r, double g, double b, double a,
const unsigned char *swizzle)
{
@@ -352,8 +352,8 @@ lp_build_const_aos(union lp_type type,
LLVMValueRef
-lp_build_const_mask_aos(union lp_type type,
- boolean cond[4])
+lp_build_const_mask_aos(struct lp_type type,
+ const boolean cond[4])
{
LLVMTypeRef elem_type = LLVMIntType(type.width);
LLVMValueRef masks[LP_MAX_VECTOR_LENGTH];
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h
index 1934530ea3..ffb302f736 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_const.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h
@@ -42,67 +42,67 @@
#include <pipe/p_compiler.h>
-union lp_type type;
+struct lp_type type;
unsigned
-lp_mantissa(union lp_type type);
+lp_mantissa(struct lp_type type);
unsigned
-lp_const_shift(union lp_type type);
+lp_const_shift(struct lp_type type);
unsigned
-lp_const_offset(union lp_type type);
+lp_const_offset(struct lp_type type);
double
-lp_const_scale(union lp_type type);
+lp_const_scale(struct lp_type type);
double
-lp_const_min(union lp_type type);
+lp_const_min(struct lp_type type);
double
-lp_const_max(union lp_type type);
+lp_const_max(struct lp_type type);
double
-lp_const_eps(union lp_type type);
+lp_const_eps(struct lp_type type);
LLVMValueRef
-lp_build_undef(union lp_type type);
+lp_build_undef(struct lp_type type);
LLVMValueRef
-lp_build_zero(union lp_type type);
+lp_build_zero(struct lp_type type);
LLVMValueRef
-lp_build_one(union lp_type type);
+lp_build_one(struct lp_type type);
LLVMValueRef
-lp_build_const_scalar(union lp_type type,
+lp_build_const_scalar(struct lp_type type,
double val);
LLVMValueRef
-lp_build_int_const_scalar(union lp_type type,
+lp_build_int_const_scalar(struct lp_type type,
long long val);
LLVMValueRef
-lp_build_const_aos(union lp_type type,
+lp_build_const_aos(struct lp_type type,
double r, double g, double b, double a,
const unsigned char *swizzle);
LLVMValueRef
-lp_build_const_mask_aos(union lp_type type,
- boolean cond[4]);
+lp_build_const_mask_aos(struct lp_type type,
+ const boolean cond[4]);
#endif /* !LP_BLD_CONST_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
index c8954c8a34..186cac70f6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@@ -86,7 +86,7 @@
*/
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
- union lp_type src_type,
+ struct lp_type src_type,
unsigned dst_width,
LLVMValueRef src)
{
@@ -122,7 +122,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
int shift = dst_width - n;
res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), "");
- /* Fill in the empty lower bits for added precision? */
+ /* TODO: Fill in the empty lower bits for additional precision? */
#if 0
{
LLVMValueRef msb;
@@ -152,7 +152,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
unsigned src_width,
- union lp_type dst_type,
+ struct lp_type dst_type,
LLVMValueRef src)
{
LLVMTypeRef vec_type = lp_build_vec_type(dst_type);
@@ -244,12 +244,12 @@ lp_build_const_pack_shuffle(unsigned n)
* Expand the bit width.
*
* This will only change the number of bits the values are represented, not the
- * values themselved.
+ * values themselves.
*/
static void
lp_build_expand(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
LLVMValueRef src,
LLVMValueRef *dst, unsigned num_dsts)
{
@@ -266,7 +266,7 @@ lp_build_expand(LLVMBuilderRef builder,
dst[0] = src;
while(src_type.width < dst_type.width) {
- union lp_type new_type = src_type;
+ struct lp_type new_type = src_type;
LLVMTypeRef new_vec_type;
new_type.width *= 2;
@@ -314,8 +314,8 @@ lp_build_expand(LLVMBuilderRef builder,
*/
static LLVMValueRef
lp_build_pack2(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
boolean clamped,
LLVMValueRef lo,
LLVMValueRef hi)
@@ -391,11 +391,11 @@ lp_build_pack2(LLVMBuilderRef builder,
* TODO: Handle saturation consistently.
*/
static LLVMValueRef
-lp_build_trunc(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
- boolean clamped,
- const LLVMValueRef *src, unsigned num_srcs)
+lp_build_pack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ boolean clamped,
+ const LLVMValueRef *src, unsigned num_srcs)
{
LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
unsigned i;
@@ -410,7 +410,7 @@ lp_build_trunc(LLVMBuilderRef builder,
tmp[i] = src[i];
while(src_type.width > dst_type.width) {
- union lp_type new_type = src_type;
+ struct lp_type new_type = src_type;
new_type.width /= 2;
new_type.length *= 2;
@@ -442,12 +442,12 @@ lp_build_trunc(LLVMBuilderRef builder,
*/
void
lp_build_conv(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
const LLVMValueRef *src, unsigned num_srcs,
LLVMValueRef *dst, unsigned num_dsts)
{
- union lp_type tmp_type;
+ struct lp_type tmp_type;
LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
unsigned num_tmps;
unsigned i;
@@ -470,7 +470,7 @@ lp_build_conv(LLVMBuilderRef builder,
* Clamp if necessary
*/
- if(src_type.value != dst_type.value) {
+ if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) {
struct lp_build_context bld;
double src_min = lp_const_min(src_type);
double dst_min = lp_const_min(dst_type);
@@ -565,7 +565,7 @@ lp_build_conv(LLVMBuilderRef builder,
if(tmp_type.width > dst_type.width) {
assert(num_dsts == 1);
- tmp[0] = lp_build_trunc(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
+ tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
tmp_type.width = dst_type.width;
tmp_type.length = dst_type.length;
num_tmps = 1;
@@ -656,8 +656,8 @@ lp_build_conv(LLVMBuilderRef builder,
*/
void
lp_build_conv_mask(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
const LLVMValueRef *src, unsigned num_srcs,
LLVMValueRef *dst, unsigned num_dsts)
{
@@ -689,7 +689,7 @@ lp_build_conv_mask(LLVMBuilderRef builder,
if(src_type.width > dst_type.width) {
assert(num_dsts == 1);
- dst[0] = lp_build_trunc(builder, src_type, dst_type, TRUE, src, num_srcs);
+ dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
}
else if(src_type.width < dst_type.width) {
assert(num_srcs == 1);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
index 05c1ef2a10..ca378804d2 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
@@ -40,33 +40,33 @@
#include <llvm-c/Core.h>
-union lp_type type;
+struct lp_type type;
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
- union lp_type src_type,
+ struct lp_type src_type,
unsigned dst_width,
LLVMValueRef src);
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
unsigned src_width,
- union lp_type dst_type,
+ struct lp_type dst_type,
LLVMValueRef src);
void
lp_build_conv(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
const LLVMValueRef *srcs, unsigned num_srcs,
LLVMValueRef *dsts, unsigned num_dsts);
void
lp_build_conv_mask(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
const LLVMValueRef *src, unsigned num_srcs,
LLVMValueRef *dst, unsigned num_dsts);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
index 30925b5f41..59d8f492e6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
@@ -30,10 +30,27 @@
#include <udis86.h>
#endif
+#include "util/u_math.h"
#include "util/u_debug.h"
#include "lp_bld_debug.h"
+/**
+ * Check alignment.
+ *
+ * It is important that this check is not implemented as a macro or inlined
+ * function, as the compiler assumptions in respect to alignment of global
+ * and stack variables would often make the check a no op, defeating the
+ * whole purpose of the exercise.
+ */
+boolean
+lp_check_alignment(const void *ptr, unsigned alignment)
+{
+ assert(util_is_pot(alignment));
+ return ((uintptr_t)ptr & (alignment - 1)) == 0;
+}
+
+
void
lp_disassemble(const void* func)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/drivers/llvmpipe/lp_bld_debug.h
index ecdafef76d..583e6132b4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.h
@@ -53,6 +53,10 @@ lp_build_name(LLVMValueRef val, const char *format, ...)
}
+boolean
+lp_check_alignment(const void *ptr, unsigned alignment);
+
+
void
lp_disassemble(const void* func);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 2cd6e6b921..21c665c4d4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -71,11 +71,11 @@
/**
* Return a type appropriate for depth/stencil testing.
*/
-union lp_type
+struct lp_type
lp_depth_type(const struct util_format_description *format_desc,
unsigned length)
{
- union lp_type type;
+ struct lp_type type;
unsigned swizzle;
assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
@@ -85,7 +85,7 @@ lp_depth_type(const struct util_format_description *format_desc,
swizzle = format_desc->swizzle[0];
assert(swizzle < 4);
- type.value = 0;
+ memset(&type, 0, sizeof type);
type.width = format_desc->block.bits;
if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
@@ -114,7 +114,7 @@ lp_depth_type(const struct util_format_description *format_desc,
void
lp_build_depth_test(LLVMBuilderRef builder,
const struct pipe_depth_state *state,
- union lp_type type,
+ struct lp_type type,
const struct util_format_description *format_desc,
struct lp_build_mask_context *mask,
LLVMValueRef src,
@@ -179,12 +179,13 @@ lp_build_depth_test(LLVMBuilderRef builder,
padding_right = 0;
for(chan = 0; chan < z_swizzle; ++chan)
padding_right += format_desc->channel[chan].size;
- padding_left = format_desc->block.bits - format_desc->channel[z_swizzle].size;
+ padding_left = format_desc->block.bits -
+ (padding_right + format_desc->channel[z_swizzle].size);
if(padding_left || padding_right) {
- const long long mask_left = ((long long)1 << (format_desc->block.bits - padding_left)) - 1;
- const long long mask_right = ((long long)1 << (padding_right)) - 1;
- z_bitmask = lp_build_int_const_scalar(type, mask_left & mask_right);
+ const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1;
+ const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1;
+ z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right);
}
if(padding_left)
@@ -210,5 +211,6 @@ lp_build_depth_test(LLVMBuilderRef builder,
LLVMBuildStore(builder, dst, dst_ptr);
}
+ /* FIXME */
assert(!state->occlusion_count);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index 5d2e042fcc..79d6981bb5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -41,11 +41,11 @@
struct pipe_depth_state;
struct util_format_description;
-union lp_type;
+struct lp_type;
struct lp_build_mask_context;
-union lp_type
+struct lp_type
lp_depth_type(const struct util_format_description *format_desc,
unsigned length);
@@ -53,7 +53,7 @@ lp_depth_type(const struct util_format_description *format_desc,
void
lp_build_depth_test(LLVMBuilderRef builder,
const struct pipe_depth_state *state,
- union lp_type type,
+ struct lp_type type,
const struct util_format_description *format_desc,
struct lp_build_mask_context *mask,
LLVMValueRef src,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
index 9d99e1a9d9..dcc25fbff8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
@@ -32,59 +32,261 @@
*/
#include "util/u_debug.h"
+#include "util/u_memory.h"
#include "lp_bld_type.h"
#include "lp_bld_flow.h"
+#define LP_BUILD_FLOW_MAX_VARIABLES 32
+#define LP_BUILD_FLOW_MAX_DEPTH 32
+
+
+/**
+ * Enumeration of all possible flow constructs.
+ */
+enum lp_build_flow_construct_kind {
+ lP_BUILD_FLOW_SCOPE,
+ LP_BUILD_FLOW_SKIP,
+};
+
+
+/**
+ * Variable declaration scope.
+ */
+struct lp_build_flow_scope
+{
+ /** Number of variables declared in this scope */
+ unsigned num_variables;
+};
+
+
+/**
+ * Early exit. Useful to skip to the end of a function or block when
+ * the execution mask becomes zero or when there is an error condition.
+ */
+struct lp_build_flow_skip
+{
+ /** Block to skip to */
+ LLVMBasicBlockRef block;
+
+ /** Number of variables declared at the beginning */
+ unsigned num_variables;
+
+ LLVMValueRef *phi;
+};
+
+
+/**
+ * Union of all possible flow constructs' data
+ */
+union lp_build_flow_construct_data
+{
+ struct lp_build_flow_scope scope;
+ struct lp_build_flow_skip skip;
+};
+
+
+/**
+ * Element of the flow construct stack.
+ */
+struct lp_build_flow_construct
+{
+ enum lp_build_flow_construct_kind kind;
+ union lp_build_flow_construct_data data;
+};
+
+
+/**
+ * All necessary data to generate LLVM control flow constructs.
+ *
+ * Besides keeping track of the control flow construct themselves we also
+ * need to keep track of variables in order to generate SSA Phi values.
+ */
+struct lp_build_flow_context
+{
+ LLVMBuilderRef builder;
+
+ /**
+ * Control flow stack.
+ */
+ struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH];
+ unsigned num_constructs;
+
+ /**
+ * Variable stack
+ */
+ LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES];
+ unsigned num_variables;
+};
+
+
+struct lp_build_flow_context *
+lp_build_flow_create(LLVMBuilderRef builder)
+{
+ struct lp_build_flow_context *flow;
+
+ flow = CALLOC_STRUCT(lp_build_flow_context);
+ if(!flow)
+ return NULL;
+
+ flow->builder = builder;
+
+ return flow;
+}
+
+
void
-lp_build_mask_begin(struct lp_build_mask_context *mask,
- LLVMBuilderRef builder,
- union lp_type type,
- LLVMValueRef value)
+lp_build_flow_destroy(struct lp_build_flow_context *flow)
{
- memset(mask, 0, sizeof *mask);
+ assert(flow->num_constructs == 0);
+ assert(flow->num_variables == 0);
+ FREE(flow);
+}
- mask->builder = builder;
- mask->reg_type = LLVMIntType(type.width * type.length);
- mask->value = value;
+
+static union lp_build_flow_construct_data *
+lp_build_flow_push(struct lp_build_flow_context *flow,
+ enum lp_build_flow_construct_kind kind)
+{
+ assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH);
+ if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH)
+ return NULL;
+
+ flow->constructs[flow->num_constructs].kind = kind;
+ return &flow->constructs[flow->num_constructs++].data;
+}
+
+
+static union lp_build_flow_construct_data *
+lp_build_flow_peek(struct lp_build_flow_context *flow,
+ enum lp_build_flow_construct_kind kind)
+{
+ assert(flow->num_constructs);
+ if(!flow->num_constructs)
+ return NULL;
+
+ assert(flow->constructs[flow->num_constructs - 1].kind == kind);
+ if(flow->constructs[flow->num_constructs - 1].kind != kind)
+ return NULL;
+
+ return &flow->constructs[flow->num_constructs - 1].data;
}
+static union lp_build_flow_construct_data *
+lp_build_flow_pop(struct lp_build_flow_context *flow,
+ enum lp_build_flow_construct_kind kind)
+{
+ assert(flow->num_constructs);
+ if(!flow->num_constructs)
+ return NULL;
+
+ assert(flow->constructs[flow->num_constructs - 1].kind == kind);
+ if(flow->constructs[flow->num_constructs - 1].kind != kind)
+ return NULL;
+
+ return &flow->constructs[--flow->num_constructs].data;
+}
+
+
+/**
+ * Begin a variable scope.
+ *
+ *
+ */
void
-lp_build_mask_update(struct lp_build_mask_context *mask,
- LLVMValueRef value)
+lp_build_flow_scope_begin(struct lp_build_flow_context *flow)
{
+ struct lp_build_flow_scope *scope;
- LLVMValueRef cond;
- LLVMBasicBlockRef current_block;
- LLVMBasicBlockRef next_block;
- LLVMBasicBlockRef new_block;
+ scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope;
+ if(!scope)
+ return;
- if(mask->value)
- mask->value = LLVMBuildAnd(mask->builder, mask->value, value, "");
- else
- mask->value = value;
+ scope->num_variables = 0;
+}
- /* FIXME: disabled until we have proper control flow helpers */
-#if 0
- cond = LLVMBuildICmp(mask->builder,
- LLVMIntEQ,
- LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""),
- LLVMConstNull(mask->reg_type),
- "");
- current_block = LLVMGetInsertBlock(mask->builder);
+/**
+ * Declare a variable.
+ *
+ * A variable is a named entity which can have different LLVMValueRef's at
+ * different points of the program. This is relevant for control flow because
+ * when there are mutiple branches to a same location we need to replace
+ * the variable's value with a Phi function as explained in
+ * http://en.wikipedia.org/wiki/Static_single_assignment_form .
+ *
+ * We keep track of variables by keeping around a pointer to where their
+ * current.
+ *
+ * There are a few cautions to observe:
+ *
+ * - Variable's value must not be NULL. If there is no initial value then
+ * LLVMGetUndef() should be used.
+ *
+ * - Variable's value must be kept up-to-date. If the variable is going to be
+ * modified by a function then a pointer should be passed so that its value
+ * is accurate. Failure to do this will cause some of the variables'
+ * transient values to be lost, leading to wrong results.
+ *
+ * - A program should be written from top to bottom, by always appending
+ * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or
+ * modifying existing statements will most likely lead to wrong results.
+ *
+ */
+void
+lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
+ LLVMValueRef *variable)
+{
+ struct lp_build_flow_scope *scope;
+
+ scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope;
+ if(!scope)
+ return;
- if(!mask->skip_block) {
- LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
- mask->skip_block = LLVMAppendBasicBlock(function, "skip");
+ assert(*variable);
+ if(!*variable)
+ return;
+
+ assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES);
+ if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES)
+ return;
+
+ flow->variables[flow->num_variables++] = variable;
+ ++scope->num_variables;
+}
+
+
+void
+lp_build_flow_scope_end(struct lp_build_flow_context *flow)
+{
+ struct lp_build_flow_scope *scope;
+
+ scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope;
+ if(!scope)
+ return;
- mask->phi = LLVMBuildPhi(mask->builder, LLVMTypeOf(mask->value), "");
+ assert(flow->num_variables >= scope->num_variables);
+ if(flow->num_variables < scope->num_variables) {
+ flow->num_variables = 0;
+ return;
}
+ flow->num_variables -= scope->num_variables;
+}
+
+
+static LLVMBasicBlockRef
+lp_build_flow_insert_block(struct lp_build_flow_context *flow)
+{
+ LLVMBasicBlockRef current_block;
+ LLVMBasicBlockRef next_block;
+ LLVMBasicBlockRef new_block;
+
+ current_block = LLVMGetInsertBlock(flow->builder);
+
next_block = LLVMGetNextBasicBlock(current_block);
- assert(next_block);
if(next_block) {
new_block = LLVMInsertBasicBlock(next_block, "");
}
@@ -93,30 +295,148 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
new_block = LLVMAppendBasicBlock(function, "");
}
- LLVMAddIncoming(mask->phi, &mask->value, &current_block, 1);
- LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block);
+ return new_block;
+}
+
+void
+lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
+{
+ struct lp_build_flow_skip *skip;
+ LLVMBuilderRef builder;
+ unsigned i;
+
+ skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip;
+ if(!skip)
+ return;
+
+ skip->block = lp_build_flow_insert_block(flow);
+ skip->num_variables = flow->num_variables;
+ if(!skip->num_variables) {
+ skip->phi = NULL;
+ return;
+ }
- LLVMPositionBuilderAtEnd(mask->builder, new_block);
-#endif
+ skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi);
+ if(!skip->phi) {
+ skip->num_variables = 0;
+ return;
+ }
+
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, skip->block);
+
+ for(i = 0; i < skip->num_variables; ++i)
+ skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");
+
+ LLVMDisposeBuilder(builder);
}
-LLVMValueRef
-lp_build_mask_end(struct lp_build_mask_context *mask)
+void
+lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
+ LLVMValueRef cond)
+{
+ struct lp_build_flow_skip *skip;
+ LLVMBasicBlockRef current_block;
+ LLVMBasicBlockRef new_block;
+ unsigned i;
+
+ skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip;
+ if(!skip)
+ return;
+
+ current_block = LLVMGetInsertBlock(flow->builder);
+
+ new_block = lp_build_flow_insert_block(flow);
+
+ for(i = 0; i < skip->num_variables; ++i) {
+ assert(*flow->variables[i]);
+ LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
+ }
+
+ LLVMBuildCondBr(flow->builder, cond, skip->block, new_block);
+
+ LLVMPositionBuilderAtEnd(flow->builder, new_block);
+ }
+
+
+void
+lp_build_flow_skip_end(struct lp_build_flow_context *flow)
{
- if(mask->skip_block) {
- LLVMBasicBlockRef current_block = LLVMGetInsertBlock(mask->builder);
+ struct lp_build_flow_skip *skip;
+ LLVMBasicBlockRef current_block;
+ unsigned i;
- LLVMAddIncoming(mask->phi, &mask->value, &current_block, 1);
- LLVMBuildBr(mask->builder, mask->skip_block);
+ skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip;
+ if(!skip)
+ return;
- LLVMPositionBuilderAtEnd(mask->builder, mask->skip_block);
+ current_block = LLVMGetInsertBlock(flow->builder);
- mask->value = mask->phi;
- mask->phi = NULL;
- mask->skip_block = NULL;
+ for(i = 0; i < skip->num_variables; ++i) {
+ assert(*flow->variables[i]);
+ LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
+ *flow->variables[i] = skip->phi[i];
}
+ LLVMBuildBr(flow->builder, skip->block);
+ LLVMPositionBuilderAtEnd(flow->builder, skip->block);
+
+ FREE(skip->phi);
+}
+
+
+static void
+lp_build_mask_check(struct lp_build_mask_context *mask)
+{
+ LLVMBuilderRef builder = mask->flow->builder;
+ LLVMValueRef cond;
+
+ cond = LLVMBuildICmp(builder,
+ LLVMIntEQ,
+ LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""),
+ LLVMConstNull(mask->reg_type),
+ "");
+
+ lp_build_flow_skip_cond_break(mask->flow, cond);
+}
+
+
+void
+lp_build_mask_begin(struct lp_build_mask_context *mask,
+ struct lp_build_flow_context *flow,
+ struct lp_type type,
+ LLVMValueRef value)
+{
+ memset(mask, 0, sizeof *mask);
+
+ mask->flow = flow;
+ mask->reg_type = LLVMIntType(type.width * type.length);
+ mask->value = value;
+
+ lp_build_flow_scope_begin(flow);
+ lp_build_flow_scope_declare(flow, &mask->value);
+ lp_build_flow_skip_begin(flow);
+
+ lp_build_mask_check(mask);
+}
+
+
+void
+lp_build_mask_update(struct lp_build_mask_context *mask,
+ LLVMValueRef value)
+{
+ mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, "");
+
+ lp_build_mask_check(mask);
+}
+
+
+LLVMValueRef
+lp_build_mask_end(struct lp_build_mask_context *mask)
+{
+ lp_build_flow_skip_end(mask->flow);
+ lp_build_flow_scope_end(mask->flow);
return mask->value;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h
index 1b634ff038..e61999ff06 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h
@@ -38,27 +38,53 @@
#include <llvm-c/Core.h>
-union lp_type;
+struct lp_type;
+
+
+struct lp_build_flow_context;
+
+
+struct lp_build_flow_context *
+lp_build_flow_create(LLVMBuilderRef builder);
+
+void
+lp_build_flow_destroy(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_scope_begin(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
+ LLVMValueRef *variable);
+
+void
+lp_build_flow_scope_end(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_skip_begin(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
+ LLVMValueRef cond);
+
+void
+lp_build_flow_skip_end(struct lp_build_flow_context *flow);
struct lp_build_mask_context
{
- LLVMBuilderRef builder;
+ struct lp_build_flow_context *flow;
LLVMTypeRef reg_type;
LLVMValueRef value;
-
- LLVMValueRef phi;
-
- LLVMBasicBlockRef skip_block;
};
void
lp_build_mask_begin(struct lp_build_mask_context *mask,
- LLVMBuilderRef builder,
- union lp_type type,
+ struct lp_build_flow_context *flow,
+ struct lp_type type,
LLVMValueRef value);
/**
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index 01c8a752d1..6d3f692619 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -31,21 +31,15 @@
/**
* @file
- * LLVM IR building helpers interfaces.
- *
- * We use LLVM-C bindings for now. They are not documented, but follow the C++
- * interfaces very closely, and appear to be complete enough for code
- * genration. See
- * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
- * for a standalone example.
+ * Pixel format helpers.
*/
#include <llvm-c/Core.h>
-
-#include "pipe/p_format.h"
+#include "pipe/p_format.h"
-union lp_type;
+struct util_format_description;
+struct lp_type;
/**
@@ -56,9 +50,9 @@ union lp_type;
* @return RGBA in a 4 floats vector.
*/
LLVMValueRef
-lp_build_unpack_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef packed);
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef packed);
/**
@@ -67,9 +61,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
* @param rgba 4 float vector with the unpacked components.
*/
LLVMValueRef
-lp_build_pack_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef rgba);
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef rgba);
/**
@@ -81,9 +75,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
* @return RGBA in a 4 floats vector.
*/
LLVMValueRef
-lp_build_load_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr);
+lp_build_load_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef ptr);
/**
@@ -92,10 +86,34 @@ lp_build_load_rgba(LLVMBuilderRef builder,
* @param rgba 4 float vector with the unpacked components.
*/
void
-lp_build_store_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr,
- LLVMValueRef rgba);
+lp_build_store_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef ptr,
+ LLVMValueRef rgba);
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets);
+
+
+void
+lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba);
+
+
+void
+lp_build_load_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ LLVMValueRef *rgba);
#endif /* !LP_BLD_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index dcbc0076c7..b9b5d84bed 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -32,9 +32,9 @@
LLVMValueRef
-lp_build_unpack_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef packed)
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef packed)
{
const struct util_format_description *desc;
LLVMTypeRef type;
@@ -152,9 +152,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
LLVMValueRef
-lp_build_pack_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef rgba)
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef rgba)
{
const struct util_format_description *desc;
LLVMTypeRef type;
@@ -250,9 +250,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
LLVMValueRef
-lp_build_load_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr)
+lp_build_load_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef ptr)
{
const struct util_format_description *desc;
LLVMTypeRef type;
@@ -272,15 +272,15 @@ lp_build_load_rgba(LLVMBuilderRef builder,
packed = LLVMBuildLoad(builder, ptr, "");
- return lp_build_unpack_rgba(builder, format, packed);
+ return lp_build_unpack_rgba_aos(builder, format, packed);
}
void
-lp_build_store_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr,
- LLVMValueRef rgba)
+lp_build_store_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef ptr,
+ LLVMValueRef rgba)
{
const struct util_format_description *desc;
LLVMTypeRef type;
@@ -294,7 +294,7 @@ lp_build_store_rgba(LLVMBuilderRef builder,
type = LLVMIntType(desc->block.bits);
- packed = lp_build_pack_rgba(builder, format, rgba);
+ packed = lp_build_pack_rgba_aos(builder, format, rgba);
ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
new file mode 100644
index 0000000000..b5ff434e1a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -0,0 +1,208 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_format.h"
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ *
+ * @param src_width src element width
+ * @param dst_width result element width (source will be expanded to fit)
+ * @param length length of the offsets,
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets)
+{
+ LLVMTypeRef src_type = LLVMIntType(src_width);
+ LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+ LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+ LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+ LLVMValueRef res;
+ unsigned i;
+
+ res = LLVMGetUndef(dst_vec_type);
+ for(i = 0; i < length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef elem_offset;
+ LLVMValueRef elem_ptr;
+ LLVMValueRef elem;
+
+ elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
+ elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
+ elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
+ elem = LLVMBuildLoad(builder, elem_ptr, "");
+
+ assert(src_width <= dst_width);
+ if(src_width > dst_width)
+ elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
+ if(src_width < dst_width)
+ elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
+
+ res = LLVMBuildInsertElement(builder, res, elem, index, "");
+ }
+
+ return res;
+}
+
+
+static LLVMValueRef
+lp_build_format_swizzle(struct lp_type type,
+ const LLVMValueRef *inputs,
+ enum util_format_swizzle swizzle)
+{
+ switch (swizzle) {
+ case UTIL_FORMAT_SWIZZLE_X:
+ case UTIL_FORMAT_SWIZZLE_Y:
+ case UTIL_FORMAT_SWIZZLE_Z:
+ case UTIL_FORMAT_SWIZZLE_W:
+ return inputs[swizzle];
+ case UTIL_FORMAT_SWIZZLE_0:
+ return lp_build_zero(type);
+ case UTIL_FORMAT_SWIZZLE_1:
+ return lp_build_one(type);
+ case UTIL_FORMAT_SWIZZLE_NONE:
+ return lp_build_undef(type);
+ default:
+ assert(0);
+ return lp_build_undef(type);
+ }
+}
+
+
+void
+lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba)
+{
+ LLVMValueRef inputs[4];
+ unsigned start;
+ unsigned chan;
+
+ /* FIXME: Support more formats */
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+ assert(format_desc->block.bits <= 32);
+
+ /* Decode the input vector components */
+ start = 0;
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned width = format_desc->channel[chan].size;
+ unsigned stop = start + width;
+ LLVMValueRef input;
+
+ input = packed;
+
+ switch(format_desc->channel[chan].type) {
+ case UTIL_FORMAT_TYPE_VOID:
+ input = NULL;
+ break;
+
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if(type.floating) {
+ if(start)
+ input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), "");
+ if(stop < format_desc->block.bits) {
+ unsigned mask = ((unsigned long long)1 << width) - 1;
+ input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), "");
+ }
+
+ if(format_desc->channel[chan].normalized)
+ input = lp_build_unsigned_norm_to_float(builder, width, type, input);
+ else
+ input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), "");
+ }
+ else {
+ /* FIXME */
+ assert(0);
+ input = lp_build_undef(type);
+ }
+ break;
+
+ default:
+ /* fall through */
+ input = lp_build_undef(type);
+ break;
+ }
+
+ inputs[chan] = input;
+
+ start = stop;
+ }
+
+ if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[0];
+ LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle);
+ rgba[2] = rgba[1] = rgba[0] = depth;
+ rgba[3] = lp_build_one(type);
+ }
+ else {
+ for (chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+ rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle);
+ }
+ }
+}
+
+
+void
+lp_build_load_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ LLVMValueRef *rgba)
+{
+ LLVMValueRef packed;
+
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+ assert(format_desc->block.bits <= 32);
+
+ packed = lp_build_gather(builder,
+ type.length, format_desc->block.bits, type.width,
+ base_ptr, offsets);
+
+ lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index cfe20a0d75..338dbca6d1 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -292,7 +292,7 @@ void
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
const struct tgsi_token *tokens,
LLVMBuilderRef builder,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef a0_ptr,
LLVMValueRef dadx_ptr,
LLVMValueRef dady_ptr,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 9194f6233a..9c57a10879 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -83,7 +83,7 @@ void
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
const struct tgsi_token *tokens,
LLVMBuilderRef builder,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef a0_ptr,
LLVMValueRef dadx_ptr,
LLVMValueRef dady_ptr,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
index 8631efd6c3..6b6f820769 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
@@ -45,7 +45,7 @@ lp_build_cmp(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
LLVMValueRef zeros = LLVMConstNull(int_vec_type);
@@ -301,7 +301,7 @@ lp_build_select(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- union lp_type type = bld->type;
+ struct lp_type type = bld->type;
LLVMValueRef res;
if(a == b)
@@ -313,8 +313,6 @@ lp_build_select(struct lp_build_context *bld,
b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
}
- /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */
-
a = LLVMBuildAnd(bld->builder, a, mask, "");
/* This often gets translated to PANDN, but sometimes the NOT is
@@ -339,9 +337,9 @@ LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- boolean cond[4])
+ const boolean cond[4])
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
@@ -376,9 +374,9 @@ lp_build_select_aos(struct lp_build_context *bld,
return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
}
+ else {
#if 0
- else if(0) {
- /* FIXME: Unfortunately select of vectors do not work */
+ /* XXX: Unfortunately select of vectors do not work */
/* Use a select */
LLVMTypeRef elem_type = LLVMInt1Type();
LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
@@ -388,10 +386,9 @@ lp_build_select_aos(struct lp_build_context *bld,
cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
- }
-#endif
- else {
+#else
LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
return lp_build_select(bld, mask, a, b);
+#endif
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
index 29b9e1c45b..a4ee7723b5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
@@ -42,7 +42,7 @@
#include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */
-union lp_type type;
+struct lp_type type;
struct lp_build_context;
@@ -66,7 +66,7 @@ LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- boolean cond[4]);
+ const boolean cond[4]);
#endif /* !LP_BLD_LOGIC_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
new file mode 100644
index 0000000000..403d0e4836
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -0,0 +1,135 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_SAMPLE_H
+#define LP_BLD_SAMPLE_H
+
+
+#include <llvm-c/Core.h>
+
+struct pipe_texture;
+struct pipe_sampler_state;
+struct lp_type;
+
+
+/**
+ * Sampler static state.
+ *
+ * These are the bits of state from pipe_texture and pipe_sampler_state that
+ * are embedded in the generated code.
+ */
+struct lp_sampler_static_state
+{
+ /* pipe_texture's state */
+ enum pipe_format format;
+ unsigned target:2;
+ unsigned pot_width:1;
+ unsigned pot_height:1;
+ unsigned pot_depth:1;
+
+ /* pipe_sampler_state's state */
+ unsigned wrap_s:3;
+ unsigned wrap_t:3;
+ unsigned wrap_r:3;
+ unsigned min_img_filter:2;
+ unsigned min_mip_filter:2;
+ unsigned mag_img_filter:2;
+ unsigned compare_mode:1;
+ unsigned compare_func:3;
+ unsigned normalized_coords:1;
+ unsigned prefilter:4;
+};
+
+
+/**
+ * Sampler dynamic state.
+ *
+ * These are the bits of state from pipe_texture and pipe_sampler_state that
+ * are computed in runtime.
+ *
+ * There are obtained through callbacks, as we don't want to tie the texture
+ * sampling code generation logic to any particular texture layout or pipe
+ * driver.
+ */
+struct lp_sampler_dynamic_state
+{
+
+ /** Obtain the base texture width. */
+ LLVMValueRef
+ (*width)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ /** Obtain the base texture height. */
+ LLVMValueRef
+ (*height)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ LLVMValueRef
+ (*stride)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ LLVMValueRef
+ (*data_ptr)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+};
+
+
+/**
+ * Derive the sampler static state.
+ */
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+ const struct pipe_texture *texture,
+ const struct pipe_sampler_state *sampler);
+
+
+void
+lp_build_sample_soa(LLVMBuilderRef builder,
+ const struct lp_sampler_static_state *static_state,
+ struct lp_sampler_dynamic_state *dynamic_state,
+ struct lp_type fp_type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel);
+
+
+
+#endif /* LP_BLD_SAMPLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
new file mode 100644
index 0000000000..8ca1be6f1b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -0,0 +1,416 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_format.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_format.h"
+#include "lp_bld_sample.h"
+
+
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+ const struct pipe_texture *texture,
+ const struct pipe_sampler_state *sampler)
+{
+ memset(state, 0, sizeof *state);
+
+ if(!texture)
+ return;
+
+ if(!sampler)
+ return;
+
+ state->format = texture->format;
+ state->target = texture->target;
+ state->pot_width = util_is_pot(texture->width[0]);
+ state->pot_height = util_is_pot(texture->height[0]);
+ state->pot_depth = util_is_pot(texture->depth[0]);
+
+ state->wrap_s = sampler->wrap_s;
+ state->wrap_t = sampler->wrap_t;
+ state->wrap_r = sampler->wrap_r;
+ state->min_img_filter = sampler->min_img_filter;
+ state->min_mip_filter = sampler->min_mip_filter;
+ state->mag_img_filter = sampler->mag_img_filter;
+ if(sampler->compare_mode) {
+ state->compare_mode = sampler->compare_mode;
+ state->compare_func = sampler->compare_func;
+ }
+ state->normalized_coords = sampler->normalized_coords;
+ state->prefilter = sampler->prefilter;
+}
+
+
+
+/**
+ * Keep all information for sampling code generation in a single place.
+ */
+struct lp_build_sample_context
+{
+ LLVMBuilderRef builder;
+
+ const struct lp_sampler_static_state *static_state;
+
+ struct lp_sampler_dynamic_state *dynamic_state;
+
+ const struct util_format_description *format_desc;
+
+ /** Incoming coordinates type and build context */
+ struct lp_type coord_type;
+ struct lp_build_context coord_bld;
+
+ /** Integer coordinates */
+ struct lp_type int_coord_type;
+ struct lp_build_context int_coord_bld;
+
+ /** Output texels type and build context */
+ struct lp_type texel_type;
+ struct lp_build_context texel_bld;
+};
+
+
+static void
+lp_build_sample_texel(struct lp_build_sample_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
+{
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef x_stride;
+ LLVMValueRef offset;
+
+ x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
+
+ if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ LLVMValueRef x_lo, x_hi;
+ LLVMValueRef y_lo, y_hi;
+ LLVMValueRef x_stride_lo, x_stride_hi;
+ LLVMValueRef y_stride_lo, y_stride_hi;
+ LLVMValueRef x_offset_lo, x_offset_hi;
+ LLVMValueRef y_offset_lo, y_offset_hi;
+ LLVMValueRef offset_lo, offset_hi;
+
+ x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, "");
+ y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, "");
+
+ x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, "");
+ y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, "");
+
+ x_stride_lo = x_stride;
+ y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8);
+
+ x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8);
+ y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, "");
+
+ x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo);
+ y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo);
+ offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo);
+
+ x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi);
+ y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi);
+ offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi);
+
+ offset = lp_build_add(int_coord_bld, offset_hi, offset_lo);
+ }
+ else {
+ LLVMValueRef x_offset;
+ LLVMValueRef y_offset;
+
+ x_offset = lp_build_mul(int_coord_bld, x, x_stride);
+ y_offset = lp_build_mul(int_coord_bld, y, y_stride);
+
+ offset = lp_build_add(int_coord_bld, x_offset, y_offset);
+ }
+
+ lp_build_load_rgba_soa(bld->builder,
+ bld->format_desc,
+ bld->texel_type,
+ data_ptr,
+ offset,
+ texel);
+}
+
+
+static LLVMValueRef
+lp_build_sample_wrap(struct lp_build_sample_context *bld,
+ LLVMValueRef coord,
+ LLVMValueRef length,
+ boolean is_pot,
+ unsigned wrap_mode)
+{
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef length_minus_one;
+
+ length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ if(is_pot)
+ coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
+ else
+ /* Signed remainder won't give the right results for negative
+ * dividends but unsigned remainder does.*/
+ coord = LLVMBuildURem(bld->builder, coord, length, "");
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
+ coord = lp_build_min(int_coord_bld, coord, length_minus_one);
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ /* FIXME */
+ _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode);
+ coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
+ coord = lp_build_min(int_coord_bld, coord, length_minus_one);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ return coord;
+}
+
+
+static void
+lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
+{
+ LLVMValueRef x;
+ LLVMValueRef y;
+
+ x = lp_build_ifloor(&bld->coord_bld, s);
+ y = lp_build_ifloor(&bld->coord_bld, t);
+
+ x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ lp_build_sample_texel(bld, x, y, stride, data_ptr, texel);
+}
+
+
+static void
+lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
+{
+ LLVMValueRef half;
+ LLVMValueRef s_ipart;
+ LLVMValueRef t_ipart;
+ LLVMValueRef s_fpart;
+ LLVMValueRef t_fpart;
+ LLVMValueRef x0, x1;
+ LLVMValueRef y0, y1;
+ LLVMValueRef neighbors[2][2][4];
+ unsigned chan;
+
+ half = lp_build_const_scalar(bld->coord_type, 0.5);
+ s = lp_build_sub(&bld->coord_bld, s, half);
+ t = lp_build_sub(&bld->coord_bld, t, half);
+
+ s_ipart = lp_build_floor(&bld->coord_bld, s);
+ t_ipart = lp_build_floor(&bld->coord_bld, t);
+
+ s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart);
+ t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart);
+
+ x0 = lp_build_int(&bld->coord_bld, s_ipart);
+ y0 = lp_build_int(&bld->coord_bld, t_ipart);
+
+ x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
+ y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+
+ x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
+ lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
+ lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
+ lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
+
+ /* TODO: Don't interpolate missing channels */
+ for(chan = 0; chan < 4; ++chan) {
+ texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
+ s_fpart, t_fpart,
+ neighbors[0][0][chan],
+ neighbors[0][1][chan],
+ neighbors[1][0][chan],
+ neighbors[1][1][chan]);
+ }
+}
+
+
+static void
+lp_build_sample_compare(struct lp_build_sample_context *bld,
+ LLVMValueRef p,
+ LLVMValueRef *texel)
+{
+ struct lp_build_context *texel_bld = &bld->texel_bld;
+ LLVMValueRef res;
+ unsigned chan;
+
+ if(!bld->static_state->compare_mode)
+ return;
+
+ /* TODO: Compare before swizzling, to avoid redundant computations */
+ res = NULL;
+ for(chan = 0; chan < 4; ++chan) {
+ LLVMValueRef cmp;
+ cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
+ cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
+
+ if(res)
+ res = lp_build_add(texel_bld, res, cmp);
+ else
+ res = cmp;
+ }
+
+ assert(res);
+ res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25));
+
+ /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
+ for(chan = 0; chan < 3; ++chan)
+ texel[chan] = res;
+ texel[3] = texel_bld->one;
+}
+
+
+void
+lp_build_sample_soa(LLVMBuilderRef builder,
+ const struct lp_sampler_static_state *static_state,
+ struct lp_sampler_dynamic_state *dynamic_state,
+ struct lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel)
+{
+ struct lp_build_sample_context bld;
+ LLVMValueRef width;
+ LLVMValueRef height;
+ LLVMValueRef stride;
+ LLVMValueRef data_ptr;
+ LLVMValueRef s;
+ LLVMValueRef t;
+ LLVMValueRef p;
+
+ /* Setup our build context */
+ memset(&bld, 0, sizeof bld);
+ bld.builder = builder;
+ bld.static_state = static_state;
+ bld.dynamic_state = dynamic_state;
+ bld.format_desc = util_format_description(static_state->format);
+ bld.coord_type = type;
+ bld.int_coord_type = lp_int_type(type);
+ bld.texel_type = type;
+ lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
+ lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
+ lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
+
+ /* Get the dynamic state */
+ width = dynamic_state->width(dynamic_state, builder, unit);
+ height = dynamic_state->height(dynamic_state, builder, unit);
+ stride = dynamic_state->stride(dynamic_state, builder, unit);
+ data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit);
+
+ s = coords[0];
+ t = coords[1];
+ p = coords[2];
+
+ width = lp_build_broadcast_scalar(&bld.int_coord_bld, width);
+ height = lp_build_broadcast_scalar(&bld.int_coord_bld, height);
+ stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride);
+
+ if(static_state->target == PIPE_TEXTURE_1D)
+ t = bld.coord_bld.zero;
+
+ if(static_state->normalized_coords) {
+ LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type);
+ LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, "");
+ LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, "");
+ s = lp_build_mul(&bld.coord_bld, s, fp_width);
+ t = lp_build_mul(&bld.coord_bld, t, fp_height);
+ }
+
+ switch (static_state->min_img_filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ case PIPE_TEX_FILTER_ANISO:
+ lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+ break;
+ default:
+ assert(0);
+ }
+
+ /* FIXME: respect static_state->min_mip_filter */;
+ /* FIXME: respect static_state->mag_img_filter */;
+ /* FIXME: respect static_state->prefilter */;
+
+ lp_build_sample_compare(&bld, p, texel);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/drivers/llvmpipe/lp_bld_struct.c
index 14d2b10df9..3998ac374f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_struct.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.c
@@ -42,17 +42,30 @@
LLVMValueRef
+lp_build_struct_get_ptr(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ unsigned member,
+ const char *name)
+{
+ LLVMValueRef indices[2];
+ LLVMValueRef member_ptr;
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
+ member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+ lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name);
+ return member_ptr;
+}
+
+
+LLVMValueRef
lp_build_struct_get(LLVMBuilderRef builder,
LLVMValueRef ptr,
unsigned member,
const char *name)
{
- LLVMValueRef indices[2];
LLVMValueRef member_ptr;
LLVMValueRef res;
- indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
- indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
- member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+ member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name);
res = LLVMBuildLoad(builder, member_ptr, "");
lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name);
return res;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/drivers/llvmpipe/lp_bld_struct.h
index cbefdc9f81..740392f561 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_struct.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.h
@@ -53,6 +53,18 @@
offsetof(_ctype, _cmember))
+/**
+ * Get value pointer to a structure member.
+ */
+LLVMValueRef
+lp_build_struct_get_ptr(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ unsigned member,
+ const char *name);
+
+/**
+ * Get the value of a structure member.
+ */
LLVMValueRef
lp_build_struct_get(LLVMBuilderRef builder,
LLVMValueRef ptr,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
index ac7eed9379..64e81f7b1f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
@@ -64,7 +64,7 @@ LLVMValueRef
lp_build_broadcast_scalar(struct lp_build_context *bld,
LLVMValueRef scalar)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMValueRef res;
unsigned i;
@@ -83,7 +83,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef a,
unsigned channel)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
@@ -115,7 +115,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
* YY00 YY00 .... YY00
* YYYY YYYY .... YYYY <= output
*/
- union lp_type type4 = type;
+ struct lp_type type4 = type;
const char shifts[4][2] = {
{ 1, 2},
{-1, 2},
@@ -161,7 +161,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef
lp_build_swizzle1_aos(struct lp_build_context *bld,
LLVMValueRef a,
- unsigned char swizzle[4])
+ const unsigned char swizzle[4])
{
const unsigned n = bld->type.length;
unsigned i, j;
@@ -192,7 +192,7 @@ LLVMValueRef
lp_build_swizzle2_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- unsigned char swizzle[4])
+ const unsigned char swizzle[4])
{
const unsigned n = bld->type.length;
unsigned i, j;
@@ -201,11 +201,12 @@ lp_build_swizzle2_aos(struct lp_build_context *bld,
return lp_build_swizzle1_aos(bld, a, swizzle);
if(a == b) {
- swizzle[0] %= 4;
- swizzle[1] %= 4;
- swizzle[2] %= 4;
- swizzle[3] %= 4;
- return lp_build_swizzle1_aos(bld, a, swizzle);
+ unsigned char swizzle1[4];
+ swizzle1[0] = swizzle[0] % 4;
+ swizzle1[1] = swizzle[1] % 4;
+ swizzle1[2] = swizzle[2] % 4;
+ swizzle1[3] = swizzle[3] % 4;
+ return lp_build_swizzle1_aos(bld, a, swizzle1);
}
if(swizzle[0] % 4 == 0 &&
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
index d7dd6a8a60..1f6da80448 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-union lp_type type;
+struct lp_type type;
struct lp_build_context;
@@ -73,7 +73,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef
lp_build_swizzle1_aos(struct lp_build_context *bld,
LLVMValueRef a,
- unsigned char swizzle[4]);
+ const unsigned char swizzle[4]);
/**
@@ -85,7 +85,7 @@ LLVMValueRef
lp_build_swizzle2_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- unsigned char swizzle[4]);
+ const unsigned char swizzle[4]);
#endif /* !LP_BLD_SWIZZLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
index 912db24aec..eddb7a83fa 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
@@ -39,31 +39,46 @@
struct tgsi_token;
-union lp_type;
+struct lp_type;
struct lp_build_context;
struct lp_build_mask_context;
-typedef void
-(*lp_emit_fetch_texel_soa_callback)( LLVMBuilderRef builder,
- void *context,
- unsigned unit,
- unsigned num_coords,
- const LLVMValueRef *coords,
- LLVMValueRef lodbias,
- LLVMValueRef *texel);
+/**
+ * Sampler code generation interface.
+ *
+ * Although texture sampling is a requirement for TGSI translation, it is
+ * a very different problem with several different approaches to it. This
+ * structure establishes an interface for texture sampling code generation, so
+ * that we can easily use different texture sampling strategies.
+ */
+struct lp_build_sampler_soa
+{
+ void
+ (*destroy)( struct lp_build_sampler_soa *sampler );
+
+ void
+ (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel);
+};
+
void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
- union lp_type type,
+ struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef consts_ptr,
const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[4],
LLVMValueRef (*outputs)[4],
- lp_emit_fetch_texel_soa_callback emit_fetch_texel,
- void *emit_fetch_texel_context);
+ struct lp_build_sampler_soa *sampler);
#endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index d4d18febec..adc81569ed 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -78,6 +78,11 @@
#define CHAN_Z 2
#define CHAN_W 3
+#define QUAD_TOP_LEFT 0
+#define QUAD_TOP_RIGHT 1
+#define QUAD_BOTTOM_LEFT 2
+#define QUAD_BOTTOM_RIGHT 3
+
struct lp_build_tgsi_soa_context
{
@@ -88,8 +93,7 @@ struct lp_build_tgsi_soa_context
const LLVMValueRef (*inputs)[NUM_CHANNELS];
LLVMValueRef (*outputs)[NUM_CHANNELS];
- lp_emit_fetch_texel_soa_callback emit_fetch_texel;
- void *emit_fetch_texel_context;
+ struct lp_build_sampler_soa *sampler;
LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
@@ -98,6 +102,51 @@ struct lp_build_tgsi_soa_context
};
+static const unsigned char
+swizzle_left[4] = {
+ QUAD_TOP_LEFT, QUAD_TOP_LEFT,
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
+};
+
+static const unsigned char
+swizzle_right[4] = {
+ QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
+ QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
+};
+
+static const unsigned char
+swizzle_top[4] = {
+ QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
+ QUAD_TOP_LEFT, QUAD_TOP_RIGHT
+};
+
+static const unsigned char
+swizzle_bottom[4] = {
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
+};
+
+
+static LLVMValueRef
+emit_ddx(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef src)
+{
+ LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
+ LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
+ return lp_build_sub(&bld->base, src_right, src_left);
+}
+
+
+static LLVMValueRef
+emit_ddy(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef src)
+{
+ LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
+ LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
+ return lp_build_sub(&bld->base, src_top, src_bottom);
+}
+
+
/**
* Register fetch.
*/
@@ -168,6 +217,7 @@ emit_fetch(
break;
case TGSI_UTIL_SIGN_SET:
+ /* TODO: Use bitwese OR for floating point */
res = lp_build_abs( &bld->base, res );
res = LLVMBuildNeg( bld->base.builder, res, "" );
break;
@@ -185,6 +235,36 @@ emit_fetch(
/**
+ * Register fetch with derivatives.
+ */
+static void
+emit_fetch_deriv(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned index,
+ const unsigned chan_index,
+ LLVMValueRef *res,
+ LLVMValueRef *ddx,
+ LLVMValueRef *ddy)
+{
+ LLVMValueRef src;
+
+ src = emit_fetch(bld, inst, index, chan_index);
+
+ if(res)
+ *res = src;
+
+ /* TODO: use interpolation coeffs for inputs */
+
+ if(ddx)
+ *ddx = emit_ddx(bld, src);
+
+ if(ddy)
+ *ddy = emit_ddy(bld, src);
+}
+
+
+/**
* Register store.
*/
static void
@@ -239,17 +319,18 @@ emit_store(
* High-level instruction translators.
*/
+
static void
emit_tex( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
boolean apply_lodbias,
- boolean projected)
+ boolean projected,
+ LLVMValueRef *texel)
{
const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
LLVMValueRef lodbias;
LLVMValueRef oow;
LLVMValueRef coords[3];
- LLVMValueRef texel[4];
unsigned num_coords;
unsigned i;
@@ -289,12 +370,11 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
coords[i] = lp_build_mul(&bld->base, coords[i], oow);
}
- bld->emit_fetch_texel(bld->base.builder, bld->emit_fetch_texel_context,
- unit, num_coords, coords, lodbias, texel);
-
- FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
- emit_store( bld, inst, 0, i, texel[i] );
- }
+ bld->sampler->emit_fetch_texel(bld->sampler,
+ bld->base.builder,
+ bld->base.type,
+ unit, num_coords, coords, lodbias,
+ texel);
}
@@ -347,14 +427,6 @@ emit_kil(
}
-static void
-emit_kilp(
- struct lp_build_tgsi_soa_context *bld )
-{
- /* XXX todo / fix me */
-}
-
-
/**
* Check if inst src/dest regs use indirect addressing into temporary
* register file.
@@ -382,25 +454,35 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
static int
emit_instruction(
struct lp_build_tgsi_soa_context *bld,
- struct tgsi_full_instruction *inst )
+ const struct tgsi_full_instruction *inst,
+ const struct tgsi_opcode_info *info)
{
unsigned chan_index;
LLVMValueRef src0, src1, src2;
LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- LLVMValueRef dst0;
+ LLVMValueRef res;
+ LLVMValueRef dst0[NUM_CHANNELS];
/* we can't handle indirect addressing into temp register file yet */
if (indirect_temp_reference(inst))
return FALSE;
+ assert(info->num_dst <= 1);
+ if(info->num_dst) {
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.undef;
+ }
+ }
+
switch (inst->Instruction.Opcode) {
#if 0
case TGSI_OPCODE_ARL:
+ /* FIXME */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
emit_flr(bld, 0, 0);
emit_f2it( bld, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
#endif
@@ -408,19 +490,17 @@ emit_instruction(
case TGSI_OPCODE_MOV:
case TGSI_OPCODE_SWZ:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
}
break;
case TGSI_OPCODE_LIT:
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
- emit_store( bld, inst, 0, CHAN_X, bld->base.one);
+ dst0[CHAN_X] = bld->base.one;
}
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
src0 = emit_fetch( bld, inst, 0, CHAN_X );
- dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
- emit_store( bld, inst, 0, CHAN_Y, dst0);
+ dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
}
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
/* XMM[1] = SrcReg[0].yyyy */
@@ -432,20 +512,19 @@ emit_instruction(
tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
- dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
- emit_store( bld, inst, 0, CHAN_Z, dst0);
+ dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
}
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
- emit_store( bld, inst, 0, CHAN_W, bld->base.one);
+ dst0[CHAN_W] = bld->base.one;
}
break;
case TGSI_OPCODE_RCP:
/* TGSI_OPCODE_RECIP */
src0 = emit_fetch( bld, inst, 0, CHAN_X );
- dst0 = lp_build_rcp(&bld->base, src0);
+ res = lp_build_rcp(&bld->base, src0);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, dst0 );
+ dst0[chan_index] = res;
}
break;
@@ -453,9 +532,9 @@ emit_instruction(
/* TGSI_OPCODE_RECIPSQRT */
src0 = emit_fetch( bld, inst, 0, CHAN_X );
src0 = lp_build_abs(&bld->base, src0);
- dst0 = lp_build_rsqrt(&bld->base, src0);
+ res = lp_build_rsqrt(&bld->base, src0);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, dst0 );
+ dst0[chan_index] = res;
}
break;
@@ -479,16 +558,15 @@ emit_instruction(
lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
- emit_store( bld, inst, 0, CHAN_X, tmp0);
+ dst0[CHAN_X] = tmp0;
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
- emit_store( bld, inst, 0, CHAN_Y, tmp1);
+ dst0[CHAN_Y] = tmp1;
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
- emit_store( bld, inst, 0, CHAN_Z, tmp2);
+ dst0[CHAN_Z] = tmp2;
}
/* dst.w = 1.0 */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
break;
@@ -514,20 +592,18 @@ emit_instruction(
/* dst.x = floor(lg2(abs(src.x))) */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
- emit_store( bld, inst, 0, CHAN_X, tmp0);
+ dst0[CHAN_X] = tmp0;
/* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
- tmp1 = lp_build_div( &bld->base, src0, tmp1);
- emit_store( bld, inst, 0, CHAN_Y, tmp1);
+ dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
}
/* dst.z = lg2(abs(src.x)) */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
- emit_store( bld, inst, 0, CHAN_Z, tmp2);
+ dst0[CHAN_Z] = tmp2;
}
/* dst.w = 1.0 */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
break;
@@ -535,8 +611,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0 = lp_build_mul(&bld->base, src0, src1);
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
}
break;
@@ -544,8 +619,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0 = lp_build_add(&bld->base, src0, src1);
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
}
break;
@@ -563,7 +637,7 @@ emit_instruction(
tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
@@ -585,28 +659,24 @@ emit_instruction(
tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_DST:
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_X, tmp0);
+ dst0[CHAN_X] = bld->base.one;
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
- tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
- emit_store( bld, inst, 0, CHAN_Y, tmp0);
+ dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
- tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
- emit_store( bld, inst, 0, CHAN_Z, tmp0);
+ dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
- tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
}
break;
@@ -614,8 +684,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0 = lp_build_min( &bld->base, src0, src1 );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
}
break;
@@ -623,8 +692,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0 = lp_build_max( &bld->base, src0, src1 );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
}
break;
@@ -634,8 +702,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
@@ -645,8 +712,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
@@ -658,7 +724,7 @@ emit_instruction(
tmp2 = emit_fetch( bld, inst, 2, chan_index );
tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
@@ -666,8 +732,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
tmp1 = emit_fetch( bld, inst, 1, chan_index );
- tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
}
break;
@@ -678,13 +743,19 @@ emit_instruction(
src2 = emit_fetch( bld, inst, 2, chan_index );
tmp0 = lp_build_sub( &bld->base, src1, src2 );
tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
- dst0 = lp_build_add( &bld->base, tmp0, src2 );
- emit_store( bld, inst, 0, chan_index, dst0 );
+ dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
}
break;
case TGSI_OPCODE_CND:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
+ }
break;
case TGSI_OPCODE_DP2A:
@@ -698,45 +769,49 @@ emit_instruction(
tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
+ dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
}
break;
-#if 0
case TGSI_OPCODE_FRC:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_frc( bld, 0, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ tmp0 = lp_build_floor(&bld->base, src0);
+ tmp0 = lp_build_sub(&bld->base, tmp0, src0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_CLAMP:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp0 = lp_build_max(&bld->base, tmp0, src1);
+ tmp0 = lp_build_min(&bld->base, tmp0, src2);
+ dst0[chan_index] = tmp0;
+ }
break;
case TGSI_OPCODE_FLR:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_flr( bld, 0, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
}
break;
case TGSI_OPCODE_ROUND:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_rnd( bld, 0, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_round(&bld->base, tmp0);
}
break;
-#endif
case TGSI_OPCODE_EX2: {
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp0 = lp_build_exp2( &bld->base, tmp0);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
}
@@ -745,16 +820,16 @@ emit_instruction(
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp0 = lp_build_log2( &bld->base, tmp0);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_POW:
src0 = emit_fetch( bld, inst, 0, CHAN_X );
src1 = emit_fetch( bld, inst, 1, CHAN_X );
- dst0 = lp_build_pow( &bld->base, src0, src1 );
+ res = lp_build_pow( &bld->base, src0, src1 );
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, dst0 );
+ dst0[chan_index] = res;
}
break;
@@ -775,7 +850,7 @@ emit_instruction(
tmp5 = tmp3;
tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
- emit_store( bld, inst, 0, CHAN_X, tmp2);
+ dst0[CHAN_X] = tmp2;
}
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
@@ -786,31 +861,30 @@ emit_instruction(
tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
- emit_store( bld, inst, 0, CHAN_Y, tmp3);
+ dst0[CHAN_Y] = tmp3;
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
- emit_store( bld, inst, 0, CHAN_Z, tmp5);
+ dst0[CHAN_Z] = tmp5;
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
break;
case TGSI_OPCODE_ABS:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
}
break;
case TGSI_OPCODE_RCC:
+ /* deprecated? */
+ assert(0);
return 0;
- break;
case TGSI_OPCODE_DPH:
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
@@ -827,7 +901,7 @@ emit_instruction(
tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
@@ -835,25 +909,27 @@ emit_instruction(
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp0 = lp_build_cos( &bld->base, tmp0 );
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_DDX:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
+ }
break;
case TGSI_OPCODE_DDY:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
+ }
break;
-#if 0
case TGSI_OPCODE_KILP:
/* predicated kill */
- emit_kilp( bld );
- return 0; /* XXX fix me */
+ /* FIXME */
+ return 0;
break;
-#endif
case TGSI_OPCODE_KIL:
/* conditional kill */
@@ -885,13 +961,14 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
case TGSI_OPCODE_SFL:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.zero;
+ }
break;
case TGSI_OPCODE_SGT:
@@ -899,8 +976,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
@@ -908,7 +984,7 @@ emit_instruction(
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp0 = lp_build_sin( &bld->base, tmp0 );
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
@@ -917,8 +993,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
@@ -927,85 +1002,99 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
case TGSI_OPCODE_STR:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.one;
+ }
break;
case TGSI_OPCODE_TEX:
- emit_tex( bld, inst, FALSE, FALSE );
+ emit_tex( bld, inst, FALSE, FALSE, dst0 );
break;
case TGSI_OPCODE_TXD:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_UP2H:
+ /* deprecated */
+ assert (0);
return 0;
break;
case TGSI_OPCODE_UP2US:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_UP4B:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_UP4UB:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_X2D:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ARA:
+ /* deprecated */
+ assert(0);
return 0;
break;
#if 0
case TGSI_OPCODE_ARR:
+ /* FIXME */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
emit_rnd( bld, 0, 0 );
emit_f2it( bld, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
#endif
case TGSI_OPCODE_BRA:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_CAL:
+ /* FIXME */
return 0;
break;
-#if 0
case TGSI_OPCODE_RET:
- emit_ret( bld );
+ /* FIXME */
+ return 0;
break;
-#endif
case TGSI_OPCODE_END:
break;
-#if 0
case TGSI_OPCODE_SSG:
/* TGSI_OPCODE_SGN */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_sgn( bld, 0, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
}
break;
-#endif
case TGSI_OPCODE_CMP:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
@@ -1013,34 +1102,29 @@ emit_instruction(
src1 = emit_fetch( bld, inst, 1, chan_index );
src2 = emit_fetch( bld, inst, 2, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
- dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
}
break;
case TGSI_OPCODE_SCS:
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
- tmp0 = lp_build_cos( &bld->base, tmp0 );
- emit_store( bld, inst, 0, CHAN_X, tmp0);
+ dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
- tmp0 = lp_build_sin( &bld->base, tmp0 );
- emit_store( bld, inst, 0, CHAN_Y, tmp0);
+ dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
- tmp0 = bld->base.zero;
- emit_store( bld, inst, 0, CHAN_Z, tmp0);
+ dst0[CHAN_Z] = bld->base.zero;
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
break;
case TGSI_OPCODE_TXB:
- emit_tex( bld, inst, TRUE, FALSE );
+ emit_tex( bld, inst, TRUE, FALSE, dst0 );
break;
case TGSI_OPCODE_NRM:
@@ -1099,38 +1183,35 @@ emit_instruction(
/* dst.x = xmm1 * src.x */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
- tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
- emit_store(bld, inst, 0, CHAN_X, tmp4);
+ dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
}
/* dst.y = xmm1 * src.y */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
- tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
- emit_store(bld, inst, 0, CHAN_Y, tmp5);
+ dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
}
/* dst.z = xmm1 * src.z */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
- tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
- emit_store(bld, inst, 0, CHAN_Z, tmp6);
+ dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
}
/* dst.w = xmm1 * src.w */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
- tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
- emit_store(bld, inst, 0, CHAN_W, tmp7);
+ dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
}
}
- /* dst0.w = 1.0 */
+ /* dst.w = 1.0 */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
- tmp0 = bld->base.one;
- emit_store(bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
}
break;
case TGSI_OPCODE_DIV:
+ /* deprecated */
+ assert( 0 );
return 0;
break;
@@ -1143,118 +1224,157 @@ emit_instruction(
tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
+ dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
}
break;
case TGSI_OPCODE_TXL:
- emit_tex( bld, inst, TRUE, FALSE );
+ emit_tex( bld, inst, TRUE, FALSE, dst0 );
break;
case TGSI_OPCODE_TXP:
- emit_tex( bld, inst, FALSE, TRUE );
+ emit_tex( bld, inst, FALSE, TRUE, dst0 );
break;
case TGSI_OPCODE_BRK:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_IF:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_BGNFOR:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_REP:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ELSE:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_ENDIF:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_ENDFOR:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ENDREP:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_PUSHA:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_POPA:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_CEIL:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
+ }
break;
case TGSI_OPCODE_I2F:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_NOT:
+ /* deprecated? */
+ assert(0);
return 0;
break;
-#if 0
case TGSI_OPCODE_TRUNC:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_f2it( bld, 0 );
- emit_i2f( bld, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
}
break;
-#endif
case TGSI_OPCODE_SHL:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_SHR:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_AND:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_OR:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_MOD:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_XOR:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_SAD:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_TXF:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_TXQ:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_CONT:
+ /* deprecated? */
+ assert(0);
return 0;
break;
@@ -1266,10 +1386,28 @@ emit_instruction(
return 0;
break;
+ case TGSI_OPCODE_NOISE1:
+ case TGSI_OPCODE_NOISE2:
+ case TGSI_OPCODE_NOISE3:
+ case TGSI_OPCODE_NOISE4:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.zero;
+ }
+ break;
+
+ case TGSI_OPCODE_NOP:
+ break;
+
default:
return 0;
}
+ if(info->num_dst) {
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
+ }
+ }
+
return 1;
}
@@ -1277,14 +1415,13 @@ emit_instruction(
void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
- union lp_type type,
+ struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef consts_ptr,
const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[NUM_CHANNELS],
LLVMValueRef (*outputs)[NUM_CHANNELS],
- lp_emit_fetch_texel_soa_callback emit_fetch_texel,
- void *emit_fetch_texel_context)
+ struct lp_build_sampler_soa *sampler)
{
struct lp_build_tgsi_soa_context bld;
struct tgsi_parse_context parse;
@@ -1299,8 +1436,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
bld.inputs = inputs;
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
- bld.emit_fetch_texel = emit_fetch_texel;
- bld.emit_fetch_texel_context = emit_fetch_texel_context;
+ bld.sampler = sampler;
tgsi_parse_init( &parse, tokens );
@@ -1309,16 +1445,18 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
- /* Input already interpolated */
+ /* Inputs already interpolated */
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
+ {
unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
- _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
- info ? info->mnemonic : "<invalid>");
- }
+ if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
+ _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
+ info ? info->mnemonic : "<invalid>");
+ }
+
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c
index 8e0026fd97..606243d6c5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c
@@ -33,7 +33,7 @@
LLVMTypeRef
-lp_build_elem_type(union lp_type type)
+lp_build_elem_type(struct lp_type type)
{
if (type.floating) {
switch(type.width) {
@@ -55,7 +55,7 @@ lp_build_elem_type(union lp_type type)
LLVMTypeRef
-lp_build_vec_type(union lp_type type)
+lp_build_vec_type(struct lp_type type)
{
LLVMTypeRef elem_type = lp_build_elem_type(type);
return LLVMVectorType(elem_type, type.length);
@@ -69,7 +69,7 @@ lp_build_vec_type(union lp_type type)
* type and check for identity.
*/
boolean
-lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type)
+lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type)
{
LLVMTypeKind elem_kind;
@@ -107,7 +107,7 @@ lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type)
boolean
-lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)
+lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type)
{
LLVMTypeRef elem_type;
@@ -128,7 +128,7 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)
boolean
-lp_check_value(union lp_type type, LLVMValueRef val)
+lp_check_value(struct lp_type type, LLVMValueRef val)
{
LLVMTypeRef vec_type;
@@ -143,24 +143,36 @@ lp_check_value(union lp_type type, LLVMValueRef val)
LLVMTypeRef
-lp_build_int_elem_type(union lp_type type)
+lp_build_int_elem_type(struct lp_type type)
{
return LLVMIntType(type.width);
}
LLVMTypeRef
-lp_build_int_vec_type(union lp_type type)
+lp_build_int_vec_type(struct lp_type type)
{
LLVMTypeRef elem_type = lp_build_int_elem_type(type);
return LLVMVectorType(elem_type, type.length);
}
+struct lp_type
+lp_int_type(struct lp_type type)
+{
+ struct lp_type int_type;
+
+ memset(&int_type, 0, sizeof int_type);
+ int_type.width = type.width;
+ int_type.length = type.length;
+ return int_type;
+}
+
+
void
lp_build_context_init(struct lp_build_context *bld,
LLVMBuilderRef builder,
- union lp_type type)
+ struct lp_type type)
{
bld->builder = builder;
bld->type = type;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
index 3ce566be64..ee5ca3483c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -56,58 +56,55 @@
* on the types used for intermediate computations, such as signed vs unsigned,
* normalized values, or fixed point.
*/
-union lp_type {
- struct {
- /**
- * Floating-point. Cannot be used with fixed. Integer numbers are
- * represented by this zero.
- */
- unsigned floating:1;
-
- /**
- * Fixed-point. Cannot be used with floating. Integer numbers are
- * represented by this zero.
- */
- unsigned fixed:1;
-
- /**
- * Whether it can represent negative values or not.
- *
- * If this is not set for floating point, it means that all values are
- * assumed to be positive.
- */
- unsigned sign:1;
-
- /**
- * Whether values are normalized to fit [0, 1] interval, or [-1, 1]
- * interval for signed types.
- *
- * For integer types it means the representable integer range should be
- * interpreted as the interval above.
- *
- * For floating and fixed point formats it means the values should be
- * clamped to the interval above.
- */
- unsigned norm:1;
-
- /**
- * Element width.
- *
- * For fixed point values, the fixed point is assumed to be at half the
- * width.
- */
- unsigned width:14;
-
- /**
- * Vector length.
- *
- * width*length should be a power of two greater or equal to eight.
- *
- * @sa LP_MAX_VECTOR_LENGTH
- */
- unsigned length:14;
- };
- uint32_t value;
+struct lp_type {
+ /**
+ * Floating-point. Cannot be used with fixed. Integer numbers are
+ * represented by this zero.
+ */
+ unsigned floating:1;
+
+ /**
+ * Fixed-point. Cannot be used with floating. Integer numbers are
+ * represented by this zero.
+ */
+ unsigned fixed:1;
+
+ /**
+ * Whether it can represent negative values or not.
+ *
+ * If this is not set for floating point, it means that all values are
+ * assumed to be positive.
+ */
+ unsigned sign:1;
+
+ /**
+ * Whether values are normalized to fit [0, 1] interval, or [-1, 1]
+ * interval for signed types.
+ *
+ * For integer types it means the representable integer range should be
+ * interpreted as the interval above.
+ *
+ * For floating and fixed point formats it means the values should be
+ * clamped to the interval above.
+ */
+ unsigned norm:1;
+
+ /**
+ * Element width.
+ *
+ * For fixed point values, the fixed point is assumed to be at half the
+ * width.
+ */
+ unsigned width:14;
+
+ /**
+ * Vector length.
+ *
+ * width*length should be a power of two greater or equal to eight.
+ *
+ * @sa LP_MAX_VECTOR_LENGTH
+ */
+ unsigned length:14;
};
@@ -124,7 +121,7 @@ struct lp_build_context
* This not only describes the input/output LLVM types, but also whether
* to normalize/clamp the results.
*/
- union lp_type type;
+ struct lp_type type;
/** Same as lp_build_undef(type) */
LLVMValueRef undef;
@@ -138,37 +135,41 @@ struct lp_build_context
LLVMTypeRef
-lp_build_elem_type(union lp_type type);
+lp_build_elem_type(struct lp_type type);
LLVMTypeRef
-lp_build_vec_type(union lp_type type);
+lp_build_vec_type(struct lp_type type);
boolean
-lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type);
+lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type);
boolean
-lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type);
+lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type);
boolean
-lp_check_value(union lp_type type, LLVMValueRef val);
+lp_check_value(struct lp_type type, LLVMValueRef val);
LLVMTypeRef
-lp_build_int_elem_type(union lp_type type);
+lp_build_int_elem_type(struct lp_type type);
LLVMTypeRef
-lp_build_int_vec_type(union lp_type type);
+lp_build_int_vec_type(struct lp_type type);
+
+
+struct lp_type
+lp_int_type(struct lp_type type);
void
lp_build_context_init(struct lp_build_context *bld,
LLVMBuilderRef builder,
- union lp_type type);
+ struct lp_type type);
#endif /* !LP_BLD_TYPE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c
index 580cca5b46..bdcff94b9b 100644
--- a/src/gallium/drivers/llvmpipe/lp_clear.c
+++ b/src/gallium/drivers/llvmpipe/lp_clear.c
@@ -67,6 +67,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
util_pack_color(rgba, ps->format, &cv);
lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv);
}
+ llvmpipe->dirty_render_cache = TRUE;
}
if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 233d1df0e1..202cb8ef43 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -107,11 +107,16 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
if (llvmpipe->draw)
draw_destroy( llvmpipe->draw );
- for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]);
+ pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL);
+ }
+ pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL);
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
lp_destroy_tex_tile_cache(llvmpipe->tex_cache[i]);
+ pipe_texture_reference(&llvmpipe->texture[i], NULL);
+ }
for (i = 0; i < Elements(llvmpipe->constants); i++) {
if (llvmpipe->constants[i].buffer) {
@@ -141,8 +146,6 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
return PIPE_REFERENCED_FOR_WRITE;
}
- /* FIXME: we also need to do the same for the texture cache */
-
return PIPE_UNREFERENCED;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index d288460a1b..f7111c1e5c 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -44,15 +44,47 @@
static void
lp_jit_init_globals(struct llvmpipe_screen *screen)
{
- /* struct lp_jit_context */
+ LLVMTypeRef texture_type;
+
+ /* struct lp_jit_texture */
{
LLVMTypeRef elem_types[4];
+
+ elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0);
+
+ texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_WIDTH);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_HEIGHT);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_STRIDE);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_DATA);
+ LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
+ screen->target, texture_type);
+
+ LLVMAddTypeName(screen->module, "texture", texture_type);
+ }
+
+ /* struct lp_jit_context */
+ {
+ LLVMTypeRef elem_types[5];
LLVMTypeRef context_type;
elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */
elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */
elem_types[2] = LLVMFloatType(); /* alpha_ref_value */
elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */
+ elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
@@ -64,6 +96,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
screen->target, context_type, 2);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
screen->target, context_type, 3);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
+ screen->target, context_type,
+ LP_JIT_CONTEXT_TEXTURES_INDEX);
LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
screen->target, context_type);
@@ -112,12 +147,17 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
{
char *error = NULL;
+#ifdef LLVM_NATIVE_ARCH
+ LLVMLinkInJIT();
+ LLVMInitializeNativeTarget();
+#endif
+
screen->module = LLVMModuleCreateWithName("llvmpipe");
screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module);
if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) {
- fprintf(stderr, "%s\n", error);
+ _debug_printf("%s\n", error);
LLVMDisposeMessage(error);
abort();
}
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index a7fb60f9f5..58f716ede2 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -38,11 +38,31 @@
#include "lp_bld_struct.h"
+#include "pipe/p_state.h"
+
struct tgsi_sampler;
struct llvmpipe_screen;
+struct lp_jit_texture
+{
+ uint32_t width;
+ uint32_t height;
+ uint32_t stride;
+ const void *data;
+};
+
+
+enum {
+ LP_JIT_TEXTURE_WIDTH = 0,
+ LP_JIT_TEXTURE_HEIGHT,
+ LP_JIT_TEXTURE_STRIDE,
+ LP_JIT_TEXTURE_DATA
+};
+
+
+
/**
* This structure is passed directly to the generated fragment shader.
*
@@ -60,11 +80,12 @@ struct lp_jit_context
struct tgsi_sampler **samplers;
- /* TODO: alpha reference value */
float alpha_ref_value;
- /* TODO: blend constant color */
+ /* FIXME: store (also?) in floats */
uint8_t *blend_color;
+
+ struct lp_jit_texture textures[PIPE_MAX_SAMPLERS];
};
@@ -80,6 +101,11 @@ struct lp_jit_context
#define lp_jit_context_blend_color(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, 3, "blend_color")
+#define LP_JIT_CONTEXT_TEXTURES_INDEX 4
+
+#define lp_jit_context_textures(_builder, _ptr) \
+ lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures")
+
typedef void
(*lp_jit_frag_func)(struct lp_jit_context *context,
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 125035771e..0518927458 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -27,8 +27,6 @@
#include "util/u_memory.h"
-#include "util/u_simple_screen.h"
-#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
@@ -67,8 +65,6 @@ llvmpipe_get_param(struct pipe_screen *screen, int param)
return 1;
case PIPE_CAP_GLSL:
return 1;
- case PIPE_CAP_S3TC:
- return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 0;
case PIPE_CAP_POINT_SPRITE:
@@ -86,7 +82,7 @@ llvmpipe_get_param(struct pipe_screen *screen, int param)
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
return 13; /* max 4Kx4K */
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- return 8; /* max 128x128x128 */
+ return 9; /* max 256x256x256 */
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return 13; /* max 4Kx4K */
case PIPE_CAP_TGSI_CONT_SUPPORTED:
@@ -196,8 +192,7 @@ static void
llvmpipe_destroy_screen( struct pipe_screen *_screen )
{
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
-
- struct pipe_winsys *winsys = _screen->winsys;
+ struct llvmpipe_winsys *winsys = screen->winsys;
lp_jit_screen_cleanup(screen);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index d145f6d6bb..2d2fc19a65 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -44,6 +44,7 @@
#include "pipe/p_thread.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "lp_bld_debug.h"
#include "lp_tile_cache.h"
#include "lp_tile_soa.h"
@@ -162,12 +163,12 @@ shade_quads(struct llvmpipe_context *llvmpipe,
else
depth = NULL;
- /* TODO: blend color */
+ /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */
+ assert(lp_check_alignment(mask, 16));
- assert((((uintptr_t)mask) & 0xf) == 0);
- assert((((uintptr_t)depth) & 0xf) == 0);
- assert((((uintptr_t)color) & 0xf) == 0);
- assert((((uintptr_t)llvmpipe->jit_context.blend_color) & 0xf) == 0);
+ assert(lp_check_alignment(depth, 16));
+ assert(lp_check_alignment(color, 16));
+ assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16));
/* run shader */
fs->current->jit_function( &llvmpipe->jit_context,
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index fb10329887..7b26ce61a3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -36,6 +36,7 @@
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"
#include "lp_jit.h"
+#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */
#define LP_NEW_VIEWPORT 0x1
@@ -57,16 +58,20 @@
struct tgsi_sampler;
struct vertex_info;
-
+struct pipe_context;
+struct llvmpipe_context;
struct lp_fragment_shader;
struct lp_fragment_shader_variant_key
{
+ enum pipe_format zsbuf_format;
struct pipe_depth_state depth;
struct pipe_alpha_state alpha;
struct pipe_blend_state blend;
+
+ struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS];
};
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 6fbb057937..30fb41ea65 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -93,6 +93,23 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
vinfo->num_attribs = 0;
for (i = 0; i < lpfs->info.num_inputs; i++) {
int src;
+ enum interp_mode interp;
+
+ switch (lpfs->info.input_interpolate[i]) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ interp = INTERP_CONSTANT;
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ interp = INTERP_LINEAR;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ interp = INTERP_PERSPECTIVE;
+ break;
+ default:
+ assert(0);
+ interp = INTERP_LINEAR;
+ }
+
switch (lpfs->info.input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
src = draw_find_vs_output(llvmpipe->draw,
@@ -108,7 +125,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
case TGSI_SEMANTIC_FOG:
src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
break;
case TGSI_SEMANTIC_GENERIC:
@@ -116,7 +133,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
/* this includes texcoords and varying vars */
src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC,
lpfs->info.input_semantic_index[i]);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
break;
default:
@@ -250,7 +267,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
if (llvmpipe->dirty & (LP_NEW_FS |
LP_NEW_BLEND |
- LP_NEW_DEPTH_STENCIL_ALPHA))
+ LP_NEW_DEPTH_STENCIL_ALPHA |
+ LP_NEW_SAMPLER |
+ LP_NEW_TEXTURE))
llvmpipe_update_fs( llvmpipe );
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 94170bd716..9faed5a0b1 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -85,6 +85,7 @@
#include "lp_context.h"
#include "lp_state.h"
#include "lp_quad.h"
+#include "lp_tex_sample.h"
static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
@@ -130,21 +131,20 @@ generate_pos0(LLVMBuilderRef builder,
* Generate the depth test.
*/
static void
-generate_depth(struct llvmpipe_context *lp,
- LLVMBuilderRef builder,
- const struct pipe_depth_state *state,
- union lp_type src_type,
+generate_depth(LLVMBuilderRef builder,
+ const struct lp_fragment_shader_variant_key *key,
+ struct lp_type src_type,
struct lp_build_mask_context *mask,
LLVMValueRef src,
LLVMValueRef dst_ptr)
{
const struct util_format_description *format_desc;
- union lp_type dst_type;
+ struct lp_type dst_type;
- if(!lp->framebuffer.zsbuf)
+ if(!key->depth.enabled)
return;
- format_desc = util_format_description(lp->framebuffer.zsbuf->format);
+ format_desc = util_format_description(key->zsbuf_format);
assert(format_desc);
/* Pick the depth type. */
@@ -164,7 +164,7 @@ generate_depth(struct llvmpipe_context *lp,
#endif
lp_build_depth_test(builder,
- state,
+ &key->depth,
dst_type,
format_desc,
mask,
@@ -173,107 +173,6 @@ generate_depth(struct llvmpipe_context *lp,
}
-struct build_fetch_texel_context
-{
- LLVMValueRef context_ptr;
-
- LLVMValueRef samplers_ptr;
-
- /** Coords/texels store */
- LLVMValueRef store_ptr;
-};
-
-
-void PIPE_CDECL
-lp_fetch_texel_soa( struct tgsi_sampler **samplers,
- uint32_t unit,
- float *store )
-{
- struct tgsi_sampler *sampler = samplers[unit];
-
-#if 0
- uint j;
-
- debug_printf("%s sampler: %p (%p) store: %p\n",
- __FUNCTION__,
- sampler, *sampler,
- store );
-
- debug_printf("lodbias %f\n", store[12]);
-
- for (j = 0; j < 4; j++)
- debug_printf("sample %d texcoord %f %f\n",
- j,
- store[0+j],
- store[4+j]);
-#endif
-
- {
- float rgba[NUM_CHANNELS][QUAD_SIZE];
- sampler->get_samples(sampler,
- &store[0],
- &store[4],
- &store[8],
- 0.0f, /*store[12], lodbias */
- rgba);
- memcpy(store, rgba, sizeof rgba);
- }
-
-#if 0
- for (j = 0; j < 4; j++)
- debug_printf("sample %d result %f %f %f %f\n",
- j,
- store[0+j],
- store[4+j],
- store[8+j],
- store[12+j]);
-#endif
-}
-
-
-static void
-emit_fetch_texel( LLVMBuilderRef builder,
- void *context,
- unsigned unit,
- unsigned num_coords,
- const LLVMValueRef *coords,
- LLVMValueRef lodbias,
- LLVMValueRef *texel)
-{
- struct build_fetch_texel_context *bld = context;
- LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
- LLVMValueRef args[3];
- unsigned i;
-
- if(!bld->samplers_ptr)
- bld->samplers_ptr = lp_jit_context_samplers(builder, bld->context_ptr);
-
- if(!bld->store_ptr)
- bld->store_ptr = LLVMBuildArrayAlloca(builder,
- vec_type,
- LLVMConstInt(LLVMInt32Type(), 4, 0),
- "texel_store");
-
- for (i = 0; i < num_coords; i++) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef coord_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, "");
- LLVMBuildStore(builder, coords[i], coord_ptr);
- }
-
- args[0] = bld->samplers_ptr;
- args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
- args[2] = bld->store_ptr;
-
- lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
-
- for (i = 0; i < NUM_CHANNELS; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef texel_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, "");
- texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
- }
-}
-
-
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
*/
@@ -282,11 +181,11 @@ generate_fs(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
const struct lp_fragment_shader_variant_key *key,
LLVMBuilderRef builder,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef context_ptr,
unsigned i,
const struct lp_build_interp_soa_context *interp,
- struct build_fetch_texel_context *sampler,
+ struct lp_build_sampler_soa *sampler,
LLVMValueRef *pmask,
LLVMValueRef *color,
LLVMValueRef depth_ptr)
@@ -298,6 +197,7 @@ generate_fs(struct llvmpipe_context *lp,
LLVMValueRef consts_ptr;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
LLVMValueRef z = interp->pos[2];
+ struct lp_build_flow_context *flow;
struct lp_build_mask_context mask;
boolean early_depth_test;
unsigned attrib;
@@ -309,25 +209,35 @@ generate_fs(struct llvmpipe_context *lp,
consts_ptr = lp_jit_context_constants(builder, context_ptr);
- lp_build_mask_begin(&mask, builder, type, *pmask);
+ flow = lp_build_flow_create(builder);
+
+ memset(outputs, 0, sizeof outputs);
+
+ lp_build_flow_scope_begin(flow);
+
+ /* Declare the color and z variables */
+ for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ color[chan] = LLVMGetUndef(vec_type);
+ lp_build_flow_scope_declare(flow, &color[chan]);
+ }
+ lp_build_flow_scope_declare(flow, &z);
+
+ lp_build_mask_begin(&mask, flow, type, *pmask);
early_depth_test =
- lp->depth_stencil->depth.enabled &&
- lp->framebuffer.zsbuf &&
- !lp->depth_stencil->alpha.enabled &&
- !lp->fs->info.uses_kill &&
- !lp->fs->info.writes_z;
+ key->depth.enabled &&
+ !key->alpha.enabled &&
+ !shader->info.uses_kill &&
+ !shader->info.writes_z;
if(early_depth_test)
- generate_depth(lp, builder, &key->depth,
+ generate_depth(builder, key,
type, &mask,
z, depth_ptr);
- memset(outputs, 0, sizeof outputs);
-
lp_build_tgsi_soa(builder, tokens, type, &mask,
consts_ptr, interp->pos, interp->inputs,
- outputs, emit_fetch_texel, sampler);
+ outputs, sampler);
for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
@@ -368,12 +278,16 @@ generate_fs(struct llvmpipe_context *lp,
}
if(!early_depth_test)
- generate_depth(lp, builder, &key->depth,
+ generate_depth(builder, key,
type, &mask,
z, depth_ptr);
lp_build_mask_end(&mask);
+ lp_build_flow_scope_end(flow);
+
+ lp_build_flow_destroy(flow);
+
*pmask = mask.value;
}
@@ -385,13 +299,15 @@ generate_fs(struct llvmpipe_context *lp,
static void
generate_blend(const struct pipe_blend_state *blend,
LLVMBuilderRef builder,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef context_ptr,
LLVMValueRef mask,
LLVMValueRef *src,
LLVMValueRef dst_ptr)
{
struct lp_build_context bld;
+ struct lp_build_flow_context *flow;
+ struct lp_build_mask_context mask_ctx;
LLVMTypeRef vec_type;
LLVMTypeRef int_vec_type;
LLVMValueRef const_ptr;
@@ -400,11 +316,14 @@ generate_blend(const struct pipe_blend_state *blend,
LLVMValueRef res[4];
unsigned chan;
+ lp_build_context_init(&bld, builder, type);
+
+ flow = lp_build_flow_create(builder);
+ lp_build_mask_begin(&mask_ctx, flow, type, mask);
+
vec_type = lp_build_vec_type(type);
int_vec_type = lp_build_int_vec_type(type);
- lp_build_context_init(&bld, builder, type);
-
const_ptr = lp_jit_context_blend_color(builder, context_ptr);
const_ptr = LLVMBuildBitCast(builder, const_ptr,
LLVMPointerType(vec_type, 0), "");
@@ -422,11 +341,16 @@ generate_blend(const struct pipe_blend_state *blend,
lp_build_blend_soa(builder, blend, type, src, dst, con, res);
for(chan = 0; chan < 4; ++chan) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
- lp_build_name(res[chan], "res.%c", "rgba"[chan]);
- res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
- LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
+ if(blend->colormask & (1 << chan)) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+ lp_build_name(res[chan], "res.%c", "rgba"[chan]);
+ res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
+ LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
+ }
}
+
+ lp_build_mask_end(&mask_ctx);
+ lp_build_flow_destroy(flow);
}
@@ -440,8 +364,8 @@ generate_fragment(struct llvmpipe_context *lp,
{
struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
struct lp_fragment_shader_variant *variant;
- union lp_type fs_type;
- union lp_type blend_type;
+ struct lp_type fs_type;
+ struct lp_type blend_type;
LLVMTypeRef fs_elem_type;
LLVMTypeRef fs_vec_type;
LLVMTypeRef fs_int_vec_type;
@@ -462,7 +386,7 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMBuilderRef builder;
LLVMValueRef x0;
LLVMValueRef y0;
- struct build_fetch_texel_context sampler;
+ struct lp_build_sampler_soa *sampler;
struct lp_build_interp_soa_context interp;
LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
@@ -507,7 +431,7 @@ generate_fragment(struct llvmpipe_context *lp,
/* TODO: actually pick these based on the fs and color buffer
* characteristics. */
- fs_type.value = 0;
+ memset(&fs_type, 0, sizeof fs_type);
fs_type.floating = TRUE; /* floating point values */
fs_type.sign = TRUE; /* values are signed */
fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
@@ -515,7 +439,7 @@ generate_fragment(struct llvmpipe_context *lp,
fs_type.length = 4; /* 4 element per vector */
num_fs = 4;
- blend_type.value = 0;
+ memset(&blend_type, 0, sizeof blend_type);
blend_type.floating = FALSE; /* values are integers */
blend_type.sign = FALSE; /* values are unsigned */
blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */
@@ -586,8 +510,13 @@ generate_fragment(struct llvmpipe_context *lp,
a0_ptr, dadx_ptr, dady_ptr,
x0, y0, 2, 0);
- memset(&sampler, 0, sizeof sampler);
- sampler.context_ptr = context_ptr;
+#if 0
+ /* C texture sampling */
+ sampler = lp_c_sampler_soa_create(context_ptr);
+#else
+ /* code generated texture sampling */
+ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
+#endif
for(i = 0; i < num_fs; ++i) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
@@ -606,7 +535,7 @@ generate_fragment(struct llvmpipe_context *lp,
context_ptr,
i,
&interp,
- &sampler,
+ sampler,
&fs_mask[i],
out_color,
depth_ptr_i);
@@ -615,6 +544,8 @@ generate_fragment(struct llvmpipe_context *lp,
fs_out_color[chan][i] = out_color[chan];
}
+ sampler->destroy(sampler);
+
/*
* Convert the fs's output color and mask to fit to the blending type.
*/
@@ -765,18 +696,45 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
*/
static void
make_variant_key(struct llvmpipe_context *lp,
+ struct lp_fragment_shader *shader,
struct lp_fragment_shader_variant_key *key)
{
+ unsigned i;
+
memset(key, 0, sizeof *key);
- memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
+ if(lp->framebuffer.zsbuf &&
+ lp->depth_stencil->depth.enabled) {
+ key->zsbuf_format = lp->framebuffer.zsbuf->format;
+ memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
+ }
key->alpha.enabled = lp->depth_stencil->alpha.enabled;
if(key->alpha.enabled)
key->alpha.func = lp->depth_stencil->alpha.func;
/* alpha.ref_value is passed in jit_context */
- memcpy(&key->blend, lp->blend, sizeof key->blend);
+ if(lp->framebuffer.cbufs[0]) {
+ const struct util_format_description *format_desc;
+ unsigned chan;
+
+ memcpy(&key->blend, lp->blend, sizeof key->blend);
+
+ format_desc = util_format_description(lp->framebuffer.cbufs[0]->format);
+ assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB ||
+ format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB);
+
+ /* mask out color channels not present in the color buffer */
+ for(chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+ if(swizzle > 4)
+ key->blend.colormask &= ~(1 << chan);
+ }
+ }
+
+ for(i = 0; i < PIPE_MAX_SAMPLERS; ++i)
+ if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
+ lp_sampler_static_state(&key->sampler[i], lp->texture[i], lp->sampler[i]);
}
@@ -787,7 +745,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
struct lp_fragment_shader_variant_key key;
struct lp_fragment_shader_variant *variant;
- make_variant_key(lp, &key);
+ make_variant_key(lp, shader, &key);
variant = shader->variants;
while(variant) {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 4fef541b1e..c69d90c723 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -98,6 +98,16 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
pipe_texture_reference(&llvmpipe->texture[i], tex);
lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex);
+
+ if(tex) {
+ struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex);
+ struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i];
+ jit_tex->width = tex->width[0];
+ jit_tex->height = tex->height[0];
+ jit_tex->stride = lp_tex->stride[0];
+ if(!lp_tex->dt)
+ jit_tex->data = lp_tex->data;
+ }
}
llvmpipe->num_textures = num;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c
index 177a26b7b1..2c29144c03 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -56,7 +56,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
lp_flush_tile_cache(lp->cbuf_cache[i]);
/* assign new */
- lp->framebuffer.cbufs[i] = fb->cbufs[i];
+ pipe_surface_reference(&lp->framebuffer.cbufs[i], fb->cbufs[i]);
/* update cache */
lp_tile_cache_set_surface(lp->cbuf_cache[i], fb->cbufs[i]);
@@ -81,7 +81,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
}
/* assign new */
- lp->framebuffer.zsbuf = fb->zsbuf;
+ pipe_surface_reference(&lp->framebuffer.zsbuf, fb->zsbuf);
/* Tell draw module how deep the Z/depth buffer is */
if (lp->framebuffer.zsbuf) {
diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
index 69aaae26e0..a88e110c66 100644
--- a/src/gallium/drivers/llvmpipe/lp_test.h
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -86,43 +86,43 @@ random_float(void);
void
-dump_type(FILE *fp, union lp_type type);
+dump_type(FILE *fp, struct lp_type type);
double
-read_elem(union lp_type type, const void *src, unsigned index);
+read_elem(struct lp_type type, const void *src, unsigned index);
void
-write_elem(union lp_type type, void *dst, unsigned index, double src);
+write_elem(struct lp_type type, void *dst, unsigned index, double src);
void
-random_elem(union lp_type type, void *dst, unsigned index);
+random_elem(struct lp_type type, void *dst, unsigned index);
void
-read_vec(union lp_type type, const void *src, double *dst);
+read_vec(struct lp_type type, const void *src, double *dst);
void
-write_vec(union lp_type type, void *dst, const double *src);
+write_vec(struct lp_type type, void *dst, const double *src);
void
-random_vec(union lp_type type, void *dst);
+random_vec(struct lp_type type, void *dst);
boolean
-compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps);
+compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps);
boolean
-compare_vec(union lp_type type, const void *res, const void *ref);
+compare_vec(struct lp_type type, const void *res, const void *ref);
void
-dump_vec(FILE *fp, union lp_type type, const void *src);
+dump_vec(FILE *fp, struct lp_type type, const void *src);
#endif /* !LP_TEST_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 8dfad468e3..94b661dcba 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -80,7 +80,7 @@ static void
write_tsv_row(FILE *fp,
const struct pipe_blend_state *blend,
enum vector_mode mode,
- union lp_type type,
+ struct lp_type type,
double cycles,
boolean success)
{
@@ -125,7 +125,7 @@ static void
dump_blend_type(FILE *fp,
const struct pipe_blend_state *blend,
enum vector_mode mode,
- union lp_type type)
+ struct lp_type type)
{
fprintf(fp, "%s", mode ? "soa" : "aos");
@@ -153,7 +153,7 @@ static LLVMValueRef
add_blend_test(LLVMModuleRef module,
const struct pipe_blend_state *blend,
enum vector_mode mode,
- union lp_type type)
+ struct lp_type type)
{
LLVMTypeRef ret_type;
LLVMTypeRef vec_type;
@@ -467,7 +467,7 @@ test_one(unsigned verbose,
FILE *fp,
const struct pipe_blend_state *blend,
enum vector_mode mode,
- union lp_type type)
+ struct lp_type type)
{
LLVMModuleRef module = NULL;
LLVMValueRef func = NULL;
@@ -765,10 +765,10 @@ blend_funcs[] = {
};
-const union lp_type blend_types[] = {
+const struct lp_type blend_types[] = {
/* float, fixed, sign, norm, width, len */
- {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }}, /* f32 x 4 */
- {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, /* u8n x 16 */
+ { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */
+ { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */
};
@@ -788,7 +788,7 @@ test_all(unsigned verbose, FILE *fp)
const unsigned *alpha_dst_factor;
struct pipe_blend_state blend;
enum vector_mode mode;
- const union lp_type *type;
+ const struct lp_type *type;
bool success = TRUE;
for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
@@ -841,27 +841,27 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
const unsigned *alpha_dst_factor;
struct pipe_blend_state blend;
enum vector_mode mode;
- const union lp_type *type;
+ const struct lp_type *type;
unsigned long i;
bool success = TRUE;
for(i = 0; i < n; ++i) {
- rgb_func = &blend_funcs[random() % num_funcs];
- alpha_func = &blend_funcs[random() % num_funcs];
- rgb_src_factor = &blend_factors[random() % num_factors];
- alpha_src_factor = &blend_factors[random() % num_factors];
+ rgb_func = &blend_funcs[rand() % num_funcs];
+ alpha_func = &blend_funcs[rand() % num_funcs];
+ rgb_src_factor = &blend_factors[rand() % num_factors];
+ alpha_src_factor = &blend_factors[rand() % num_factors];
do {
- rgb_dst_factor = &blend_factors[random() % num_factors];
+ rgb_dst_factor = &blend_factors[rand() % num_factors];
} while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
do {
- alpha_dst_factor = &blend_factors[random() % num_factors];
+ alpha_dst_factor = &blend_factors[rand() % num_factors];
} while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
- mode = random() & 1;
+ mode = rand() & 1;
- type = &blend_types[random() % num_types];
+ type = &blend_types[rand() % num_types];
memset(&blend, 0, sizeof blend);
blend.blend_enable = 1;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index e6489834af..9dcf58e5dc 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -59,8 +59,8 @@ write_tsv_header(FILE *fp)
static void
write_tsv_row(FILE *fp,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
double cycles,
boolean success)
{
@@ -80,8 +80,8 @@ write_tsv_row(FILE *fp,
static void
dump_conv_types(FILE *fp,
- union lp_type src_type,
- union lp_type dst_type)
+ struct lp_type src_type,
+ struct lp_type dst_type)
{
fprintf(fp, "src_type=");
dump_type(fp, src_type);
@@ -96,8 +96,8 @@ dump_conv_types(FILE *fp,
static LLVMValueRef
add_conv_test(LLVMModuleRef module,
- union lp_type src_type, unsigned num_srcs,
- union lp_type dst_type, unsigned num_dsts)
+ struct lp_type src_type, unsigned num_srcs,
+ struct lp_type dst_type, unsigned num_dsts)
{
LLVMTypeRef args[2];
LLVMValueRef func;
@@ -145,8 +145,8 @@ add_conv_test(LLVMModuleRef module,
static boolean
test_one(unsigned verbose,
FILE *fp,
- union lp_type src_type,
- union lp_type dst_type)
+ struct lp_type src_type,
+ struct lp_type dst_type)
{
LLVMModuleRef module = NULL;
LLVMValueRef func = NULL;
@@ -343,35 +343,35 @@ test_one(unsigned verbose,
}
-const union lp_type conv_types[] = {
+const struct lp_type conv_types[] = {
/* float, fixed, sign, norm, width, len */
- {{ TRUE, FALSE, TRUE, TRUE, 32, 4 }},
- {{ TRUE, FALSE, TRUE, FALSE, 32, 4 }},
- {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }},
- {{ TRUE, FALSE, FALSE, FALSE, 32, 4 }},
+ { TRUE, FALSE, TRUE, TRUE, 32, 4 },
+ { TRUE, FALSE, TRUE, FALSE, 32, 4 },
+ { TRUE, FALSE, FALSE, TRUE, 32, 4 },
+ { TRUE, FALSE, FALSE, FALSE, 32, 4 },
/* TODO: test fixed formats too */
- {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }},
- {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }},
- {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }},
- {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }},
-
- {{ FALSE, FALSE, TRUE, TRUE, 32, 4 }},
- {{ FALSE, FALSE, TRUE, FALSE, 32, 4 }},
- {{ FALSE, FALSE, FALSE, TRUE, 32, 4 }},
- {{ FALSE, FALSE, FALSE, FALSE, 32, 4 }},
-
- {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }},
- {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }},
- {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }},
- {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }},
-
- {{ FALSE, FALSE, TRUE, TRUE, 8, 16 }},
- {{ FALSE, FALSE, TRUE, FALSE, 8, 16 }},
- {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }},
- {{ FALSE, FALSE, FALSE, FALSE, 8, 16 }},
+ { FALSE, FALSE, TRUE, TRUE, 16, 8 },
+ { FALSE, FALSE, TRUE, FALSE, 16, 8 },
+ { FALSE, FALSE, FALSE, TRUE, 16, 8 },
+ { FALSE, FALSE, FALSE, FALSE, 16, 8 },
+
+ { FALSE, FALSE, TRUE, TRUE, 32, 4 },
+ { FALSE, FALSE, TRUE, FALSE, 32, 4 },
+ { FALSE, FALSE, FALSE, TRUE, 32, 4 },
+ { FALSE, FALSE, FALSE, FALSE, 32, 4 },
+
+ { FALSE, FALSE, TRUE, TRUE, 16, 8 },
+ { FALSE, FALSE, TRUE, FALSE, 16, 8 },
+ { FALSE, FALSE, FALSE, TRUE, 16, 8 },
+ { FALSE, FALSE, FALSE, FALSE, 16, 8 },
+
+ { FALSE, FALSE, TRUE, TRUE, 8, 16 },
+ { FALSE, FALSE, TRUE, FALSE, 8, 16 },
+ { FALSE, FALSE, FALSE, TRUE, 8, 16 },
+ { FALSE, FALSE, FALSE, FALSE, 8, 16 },
};
@@ -381,8 +381,8 @@ const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]);
boolean
test_all(unsigned verbose, FILE *fp)
{
- const union lp_type *src_type;
- const union lp_type *dst_type;
+ const struct lp_type *src_type;
+ const struct lp_type *dst_type;
bool success = TRUE;
for(src_type = conv_types; src_type < &conv_types[num_types]; ++src_type) {
@@ -407,16 +407,16 @@ test_all(unsigned verbose, FILE *fp)
boolean
test_some(unsigned verbose, FILE *fp, unsigned long n)
{
- const union lp_type *src_type;
- const union lp_type *dst_type;
+ const struct lp_type *src_type;
+ const struct lp_type *dst_type;
unsigned long i;
bool success = TRUE;
for(i = 0; i < n; ++i) {
- src_type = &conv_types[random() % num_types];
+ src_type = &conv_types[rand() % num_types];
do {
- dst_type = &conv_types[random() % num_types];
+ dst_type = &conv_types[rand() % num_types];
} while (src_type == dst_type || src_type->norm != dst_type->norm);
if(!test_one(verbose, fp, *src_type, *dst_type))
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 1d192355ee..7d83f899e6 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -119,7 +119,7 @@ add_load_rgba_test(LLVMModuleRef module,
lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop);
- rgba = lp_build_load_rgba(builder, format, ptr);
+ rgba = lp_build_load_rgba_aos(builder, format, ptr);
LLVMBuildStore(builder, rgba, rgba_ptr);
lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop);
@@ -160,7 +160,7 @@ add_store_rgba_test(LLVMModuleRef module,
rgba = LLVMBuildLoad(builder, rgba_ptr, "");
- lp_build_store_rgba(builder, format, ptr, rgba);
+ lp_build_store_rgba_aos(builder, format, ptr, rgba);
LLVMBuildRetVoid(builder);
@@ -264,6 +264,11 @@ int main(int argc, char **argv)
unsigned i;
int ret;
+#ifdef LLVM_NATIVE_ARCH
+ LLVMLinkInJIT();
+ LLVMInitializeNativeTarget();
+#endif
+
for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i)
if(!test_format(&test_cases[i]))
ret = 1;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index 49213fb4f0..f07fa256f1 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -40,7 +40,7 @@
void
dump_type(FILE *fp,
- union lp_type type)
+ struct lp_type type)
{
fprintf(fp, "%s%s%u%sx%u",
type.sign ? (type.floating || type.fixed ? "" : "s") : "u",
@@ -52,7 +52,7 @@ dump_type(FILE *fp,
double
-read_elem(union lp_type type, const void *src, unsigned index)
+read_elem(struct lp_type type, const void *src, unsigned index)
{
double scale = lp_const_scale(type);
double value;
@@ -115,7 +115,7 @@ read_elem(union lp_type type, const void *src, unsigned index)
void
-write_elem(union lp_type type, void *dst, unsigned index, double value)
+write_elem(struct lp_type type, void *dst, unsigned index, double value)
{
assert(index < type.length);
if(!type.sign && value < 0.0)
@@ -184,11 +184,11 @@ write_elem(union lp_type type, void *dst, unsigned index, double value)
void
-random_elem(union lp_type type, void *dst, unsigned index)
+random_elem(struct lp_type type, void *dst, unsigned index)
{
double value;
assert(index < type.length);
- value = (double)random()/(double)RAND_MAX;
+ value = (double)rand()/(double)RAND_MAX;
if(!type.norm) {
unsigned long long mask;
if (type.floating)
@@ -199,17 +199,17 @@ random_elem(union lp_type type, void *dst, unsigned index)
mask = ((unsigned long long)1 << (type.width - 1)) - 1;
else
mask = ((unsigned long long)1 << type.width) - 1;
- value += (double)(mask & random());
+ value += (double)(mask & rand());
}
if(!type.sign)
- if(random() & 1)
+ if(rand() & 1)
value = -value;
write_elem(type, dst, index, value);
}
void
-read_vec(union lp_type type, const void *src, double *dst)
+read_vec(struct lp_type type, const void *src, double *dst)
{
unsigned i;
for (i = 0; i < type.length; ++i)
@@ -218,7 +218,7 @@ read_vec(union lp_type type, const void *src, double *dst)
void
-write_vec(union lp_type type, void *dst, const double *src)
+write_vec(struct lp_type type, void *dst, const double *src)
{
unsigned i;
for (i = 0; i < type.length; ++i)
@@ -229,12 +229,12 @@ write_vec(union lp_type type, void *dst, const double *src)
float
random_float(void)
{
- return (float)((double)random()/(double)RAND_MAX);
+ return (float)((double)rand()/(double)RAND_MAX);
}
void
-random_vec(union lp_type type, void *dst)
+random_vec(struct lp_type type, void *dst)
{
unsigned i;
for (i = 0; i < type.length; ++i)
@@ -243,7 +243,7 @@ random_vec(union lp_type type, void *dst)
boolean
-compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps)
+compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps)
{
unsigned i;
for (i = 0; i < type.length; ++i) {
@@ -259,7 +259,7 @@ compare_vec_with_eps(union lp_type type, const void *res, const void *ref, doubl
boolean
-compare_vec(union lp_type type, const void *res, const void *ref)
+compare_vec(struct lp_type type, const void *res, const void *ref)
{
double eps = lp_const_eps(type);
return compare_vec_with_eps(type, res, ref, eps);
@@ -267,7 +267,7 @@ compare_vec(union lp_type type, const void *res, const void *ref)
void
-dump_vec(FILE *fp, union lp_type type, const void *src)
+dump_vec(FILE *fp, struct lp_type type, const void *src)
{
unsigned i;
for (i = 0; i < type.length; ++i) {
@@ -365,6 +365,11 @@ int main(int argc, char **argv)
n = atoi(argv[i]);
}
+#ifdef LLVM_NATIVE_ARCH
+ LLVMLinkInJIT();
+ LLVMInitializeNativeTarget();
+#endif
+
if(fp) {
/* Warm up the caches */
test_some(0, NULL, 100);
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
index 23a94b5b0d..773e848242 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
@@ -154,7 +154,7 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc)
if (lpt->timestamp != tc->timestamp) {
/* texture was modified, invalidate all cached tiles */
uint i;
- _debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp);
+ debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp);
for (i = 0; i < NUM_ENTRIES; i++) {
tc->entries[i].addr.bits.invalid = 1;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index 628ec3f1ef..9ad1bde956 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -29,10 +29,13 @@
#define LP_TEX_SAMPLE_H
+#include <llvm-c/Core.h>
+
#include "tgsi/tgsi_exec.h"
struct llvmpipe_tex_tile_cache;
+struct lp_sampler_static_state;
/**
@@ -75,4 +78,24 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler,
float rgba[NUM_CHANNELS][QUAD_SIZE]);
+/**
+ * Texture sampling code generator that just calls lp_get_samples C function
+ * for the actual sampling computation.
+ *
+ * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
+ */
+struct lp_build_sampler_soa *
+lp_c_sampler_soa_create(LLVMValueRef context_ptr);
+
+
+/**
+ * Pure-LLVM texture sampling code generator.
+ *
+ * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
+ */
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key,
+ LLVMValueRef context_ptr);
+
+
#endif /* LP_TEX_SAMPLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index 94eb6dad5a..a1365a045f 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -1578,3 +1578,136 @@ out:
tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
}
+
+void PIPE_CDECL
+lp_fetch_texel_soa( struct tgsi_sampler **samplers,
+ uint32_t unit,
+ float *store )
+{
+ struct tgsi_sampler *sampler = samplers[unit];
+
+#if 0
+ uint j;
+
+ debug_printf("%s sampler: %p (%p) store: %p\n",
+ __FUNCTION__,
+ sampler, *sampler,
+ store );
+
+ debug_printf("lodbias %f\n", store[12]);
+
+ for (j = 0; j < 4; j++)
+ debug_printf("sample %d texcoord %f %f\n",
+ j,
+ store[0+j],
+ store[4+j]);
+#endif
+
+ {
+ float rgba[NUM_CHANNELS][QUAD_SIZE];
+ sampler->get_samples(sampler,
+ &store[0],
+ &store[4],
+ &store[8],
+ 0.0f, /*store[12], lodbias */
+ rgba);
+ memcpy(store, rgba, sizeof rgba);
+ }
+
+#if 0
+ for (j = 0; j < 4; j++)
+ debug_printf("sample %d result %f %f %f %f\n",
+ j,
+ store[0+j],
+ store[4+j],
+ store[8+j],
+ store[12+j]);
+#endif
+}
+
+
+#include "lp_bld_type.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_tgsi.h"
+
+
+struct lp_c_sampler_soa
+{
+ struct lp_build_sampler_soa base;
+
+ LLVMValueRef context_ptr;
+
+ LLVMValueRef samplers_ptr;
+
+ /** Coords/texels store */
+ LLVMValueRef store_ptr;
+};
+
+
+static void
+lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+ FREE(sampler);
+}
+
+
+static void
+lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel)
+{
+ struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler;
+ LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
+ LLVMValueRef args[3];
+ unsigned i;
+
+ if(!sampler->samplers_ptr)
+ sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr);
+
+ if(!sampler->store_ptr)
+ sampler->store_ptr = LLVMBuildArrayAlloca(builder,
+ vec_type,
+ LLVMConstInt(LLVMInt32Type(), 4, 0),
+ "texel_store");
+
+ for (i = 0; i < num_coords; i++) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
+ LLVMBuildStore(builder, coords[i], coord_ptr);
+ }
+
+ args[0] = sampler->samplers_ptr;
+ args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+ args[2] = sampler->store_ptr;
+
+ lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
+
+ for (i = 0; i < NUM_CHANNELS; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
+ texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
+ }
+}
+
+
+struct lp_build_sampler_soa *
+lp_c_sampler_soa_create(LLVMValueRef context_ptr)
+{
+ struct lp_c_sampler_soa *sampler;
+
+ sampler = CALLOC_STRUCT(lp_c_sampler_soa);
+ if(!sampler)
+ return NULL;
+
+ sampler->base.destroy = lp_c_sampler_soa_destroy;
+ sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel;
+ sampler->context_ptr = context_ptr;
+
+ return &sampler->base;
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
new file mode 100644
index 0000000000..d2a6ae21f5
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
@@ -0,0 +1,196 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Texture sampling code generation
+ *
+ * This file is nothing more than ugly glue between three largely independent
+ * entities:
+ * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa)
+ * - texture sampling code generation (i.e., lp_build_sample_soa)
+ * - LLVM pipe driver
+ *
+ * All interesting code is in the functions mentioned above. There is really
+ * nothing to see here.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_type.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_sample.h"
+#include "lp_bld_tgsi.h"
+#include "lp_state.h"
+#include "lp_tex_sample.h"
+
+
+/**
+ * This provides the bridge between the sampler state store in lp_jit_context
+ * and lp_jit_texture and the sampler code generator. It provides the
+ * texture layout information required by the texture sampler code generator
+ * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime.
+ */
+struct llvmpipe_sampler_dynamic_state
+{
+ struct lp_sampler_dynamic_state base;
+
+ const struct lp_sampler_static_state *static_state;
+
+ LLVMValueRef context_ptr;
+};
+
+
+/**
+ * This is the bridge between our sampler and the TGSI translator.
+ */
+struct lp_llvm_sampler_soa
+{
+ struct lp_build_sampler_soa base;
+
+ struct llvmpipe_sampler_dynamic_state dynamic_state;
+};
+
+
+/**
+ * Fetch the specified member of the lp_jit_texture structure.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+lp_llvm_texture_member(struct lp_sampler_dynamic_state *base,
+ LLVMBuilderRef builder,
+ unsigned unit,
+ unsigned member_index,
+ const char *member_name)
+{
+ struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base;
+ LLVMValueRef indices[4];
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ assert(unit < PIPE_MAX_SAMPLERS);
+
+ /* context[0] */
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ /* context[0].textures */
+ indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CONTEXT_TEXTURES_INDEX, 0);
+ /* context[0].textures[unit] */
+ indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+ /* context[0].textures[unit].member */
+ indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0);
+
+ ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), "");
+
+ res = LLVMBuildLoad(builder, ptr, "");
+
+ lp_build_name(res, "context.texture%u.%s", unit, member_name);
+
+ return res;
+}
+
+
+/**
+ * Helper macro to instantiate the functions that generate the code to fetch
+ * the members of lp_jit_texture to fulfill the sampler code generator requests.
+ *
+ * This complexity is the price we have to pay to keep the texture sampler code
+ * generator a reusable module without dependencies to llvmpipe internals.
+ */
+#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \
+ static LLVMValueRef \
+ lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \
+ LLVMBuilderRef builder, \
+ unsigned unit) \
+ { \
+ return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \
+ }
+
+
+LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH)
+LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT)
+LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE)
+LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA)
+
+
+static void
+lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+ FREE(sampler);
+}
+
+
+static void
+lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel)
+{
+ struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base;
+
+ assert(unit < PIPE_MAX_SAMPLERS);
+
+ lp_build_sample_soa(builder,
+ &sampler->dynamic_state.static_state[unit],
+ &sampler->dynamic_state.base,
+ type,
+ unit,
+ num_coords,
+ coords,
+ lodbias,
+ texel);
+}
+
+
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
+ LLVMValueRef context_ptr)
+{
+ struct lp_llvm_sampler_soa *sampler;
+
+ sampler = CALLOC_STRUCT(lp_llvm_sampler_soa);
+ if(!sampler)
+ return NULL;
+
+ sampler->base.destroy = lp_llvm_sampler_soa_destroy;
+ sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel;
+ sampler->dynamic_state.base.width = lp_llvm_texture_width;
+ sampler->dynamic_state.base.height = lp_llvm_texture_height;
+ sampler->dynamic_state.base.stride = lp_llvm_texture_stride;
+ sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr;
+ sampler->dynamic_state.static_state = static_state;
+ sampler->dynamic_state.context_ptr = context_ptr;
+
+ return &sampler->base;
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
index 143afec3d3..0c06b659a1 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
@@ -44,10 +44,53 @@
#include "lp_tile_cache.h"
+#define MAX_WIDTH 4096
+#define MAX_HEIGHT 4096
+
+
+enum llvmpipe_tile_status
+{
+ LP_TILE_STATUS_UNDEFINED = 0,
+ LP_TILE_STATUS_CLEAR = 1,
+ LP_TILE_STATUS_DEFINED = 2
+};
+
+
+struct llvmpipe_cached_tile
+{
+ enum llvmpipe_tile_status status;
+
+ /** color in SOA format */
+ uint8_t *color;
+};
+
+
+struct llvmpipe_tile_cache
+{
+ struct pipe_screen *screen;
+ struct pipe_surface *surface; /**< the surface we're caching */
+ struct pipe_transfer *transfer;
+ void *transfer_map;
+
+ struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE];
+
+ uint8_t clear_color[4]; /**< for color bufs */
+ uint clear_val; /**< for z+stencil, or packed color clear value */
+
+ struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */
+};
+
+
struct llvmpipe_tile_cache *
lp_create_tile_cache( struct pipe_screen *screen )
{
struct llvmpipe_tile_cache *tc;
+ int maxLevels, maxTexSize;
+
+ /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */
+ maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
+ maxTexSize = 1 << (maxLevels - 1);
+ assert(MAX_WIDTH >= maxTexSize);
tc = CALLOC_STRUCT( llvmpipe_tile_cache );
if(!tc)
@@ -225,11 +268,14 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc)
tc->clear_val);
screen->transfer_unmap(screen, pt);
+
+ tile->status = LP_TILE_STATUS_UNDEFINED;
break;
}
case LP_TILE_STATUS_DEFINED:
lp_put_tile_rgba_soa(pt, x, y, tile->color);
+ tile->status = LP_TILE_STATUS_UNDEFINED;
break;
}
}
@@ -257,7 +303,7 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc,
case LP_TILE_STATUS_UNDEFINED:
/* get new tile data from transfer */
- lp_get_tile_rgba_soa(pt, x, y, tile->color);
+ lp_get_tile_rgba_soa(pt, x & ~(TILE_SIZE - 1), y & ~(TILE_SIZE - 1), tile->color);
tile->status = LP_TILE_STATUS_DEFINED;
break;
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/drivers/llvmpipe/lp_tile_cache.h
index 6d8ba5ece7..161bab3799 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.h
@@ -33,42 +33,7 @@
#include "lp_tile_soa.h"
-enum llvmpipe_tile_status
-{
- LP_TILE_STATUS_UNDEFINED = 0,
- LP_TILE_STATUS_CLEAR = 1,
- LP_TILE_STATUS_DEFINED = 2
-};
-
-
-struct llvmpipe_cached_tile
-{
- enum llvmpipe_tile_status status;
-
- /** color in SOA format */
- uint8_t *color;
-};
-
-
-/** XXX move these */
-#define MAX_WIDTH 2048
-#define MAX_HEIGHT 2048
-
-
-struct llvmpipe_tile_cache
-{
- struct pipe_screen *screen;
- struct pipe_surface *surface; /**< the surface we're caching */
- struct pipe_transfer *transfer;
- void *transfer_map;
-
- struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE];
-
- uint8_t clear_color[4]; /**< for color bufs */
- uint clear_val; /**< for z+stencil, or packed color clear value */
-
- struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */
-};
+struct llvmpipe_tile_cache; /* opaque */
extern struct llvmpipe_tile_cache *
diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c
index ff2febb668..170ce3eb7e 100644
--- a/src/gallium/drivers/nv04/nv04_screen.c
+++ b/src/gallium/drivers/nv04/nv04_screen.c
@@ -16,8 +16,6 @@ nv04_screen_get_param(struct pipe_screen *screen, int param)
return 0;
case PIPE_CAP_GLSL:
return 0;
- case PIPE_CAP_S3TC:
- return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 0;
case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c
index 4469b22d91..ee5901e743 100644
--- a/src/gallium/drivers/nv10/nv10_screen.c
+++ b/src/gallium/drivers/nv10/nv10_screen.c
@@ -15,8 +15,6 @@ nv10_screen_get_param(struct pipe_screen *screen, int param)
return 0;
case PIPE_CAP_GLSL:
return 0;
- case PIPE_CAP_S3TC:
- return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 1;
case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c
index e6924ad71e..4eeacd1afd 100644
--- a/src/gallium/drivers/nv20/nv20_screen.c
+++ b/src/gallium/drivers/nv20/nv20_screen.c
@@ -15,8 +15,6 @@ nv20_screen_get_param(struct pipe_screen *screen, int param)
return 0;
case PIPE_CAP_GLSL:
return 0;
- case PIPE_CAP_S3TC:
- return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 1;
case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index f8285e4455..41af38450b 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -22,8 +22,6 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param)
return 1;
case PIPE_CAP_GLSL:
return 0;
- case PIPE_CAP_S3TC:
- return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 1;
case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c
index 5d2a4216c5..bd13dfddd1 100644
--- a/src/gallium/drivers/nv40/nv40_screen.c
+++ b/src/gallium/drivers/nv40/nv40_screen.c
@@ -21,8 +21,6 @@ nv40_screen_get_param(struct pipe_screen *pscreen, int param)
return 1;
case PIPE_CAP_GLSL:
return 0;
- case PIPE_CAP_S3TC:
- return 1;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 1;
case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 6e8f4f9750..fca078b174 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -37,11 +37,12 @@ nv50_flush(struct pipe_context *pipe, unsigned flags,
/* We need this in the ddx for reliable composite, not sure what we're
* actually flushing. We generate all our own flushes with flags = 0. */
- WAIT_RING(chan, 3);
+ WAIT_RING(chan, 2);
BEGIN_RING(chan, eng2d, 0x0110, 1);
OUT_RING (chan, 0);
- FIRE_RING(chan);
+ if (flags & PIPE_FLUSH_FRAME)
+ FIRE_RING(chan);
}
static void
@@ -110,6 +111,9 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv50->pipe.is_texture_referenced = nv50_is_texture_referenced;
nv50->pipe.is_buffer_referenced = nv50_is_buffer_referenced;
+ screen->base.channel->user_private = nv50;
+ screen->base.channel->flush_notify = nv50_state_flush_notify;
+
nv50_init_surface_functions(nv50);
nv50_init_state_functions(nv50);
nv50_init_query_functions(nv50);
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 1e9e8e49bf..4608854d71 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -116,6 +116,7 @@ struct nv50_state {
unsigned miptree_nr;
struct nouveau_stateobj *vertprog;
struct nouveau_stateobj *fragprog;
+ struct nouveau_stateobj *programs;
struct nouveau_stateobj *vtxfmt;
struct nouveau_stateobj *vtxbuf;
struct nouveau_stateobj *vtxattr;
@@ -190,10 +191,12 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
/* nv50_program.c */
extern void nv50_vertprog_validate(struct nv50_context *nv50);
extern void nv50_fragprog_validate(struct nv50_context *nv50);
+extern void nv50_linkage_validate(struct nv50_context *nv50);
extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p);
/* nv50_state_validate.c */
extern boolean nv50_state_validate(struct nv50_context *nv50);
+extern void nv50_state_flush_notify(struct nouveau_channel *chan);
/* nv50_tex.c */
extern void nv50_tex_validate(struct nv50_context *);
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 03b9243b82..93479a0314 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -148,6 +148,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
mt->image_nr = 1;
mt->level[0].pitch = *stride;
mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
+ mt->level[0].tile_mode = bo->tile_mode;
nouveau_bo_ref(bo, &mt->base.bo);
return &mt->base.base;
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 4a838529de..576d075318 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -90,6 +90,10 @@ struct nv50_reg {
int acc; /* instruction where this reg is last read (first insn == 1) */
};
+/* arbitrary limits */
+#define MAX_IF_DEPTH 4
+#define MAX_LOOP_DEPTH 4
+
struct nv50_pc {
struct nv50_program *p;
@@ -112,11 +116,22 @@ struct nv50_pc {
struct nv50_reg *temp_temp[16];
unsigned temp_temp_nr;
+ /* broadcast and destination replacement regs */
+ struct nv50_reg *r_brdc;
+ struct nv50_reg *r_dst[4];
+
unsigned interp_mode[32];
/* perspective interpolation registers */
struct nv50_reg *iv_p;
struct nv50_reg *iv_c;
+ struct nv50_program_exec *if_cond;
+ struct nv50_program_exec *if_insn[MAX_IF_DEPTH];
+ struct nv50_program_exec *br_join[MAX_IF_DEPTH];
+ struct nv50_program_exec *br_loop[MAX_LOOP_DEPTH]; /* for BRK branch */
+ int if_lvl, loop_lvl;
+ unsigned loop_pos[MAX_LOOP_DEPTH];
+
/* current instruction and total number of insns */
unsigned insn_cur;
unsigned insn_nr;
@@ -124,6 +139,25 @@ struct nv50_pc {
boolean allow32;
};
+static INLINE void
+ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
+{
+ reg->type = type;
+ reg->index = index;
+ reg->hw = hw;
+ reg->neg = 0;
+ reg->rhw = -1;
+ reg->acc = 0;
+}
+
+static INLINE unsigned
+popcnt4(uint32_t val)
+{
+ static const unsigned cnt[16]
+ = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
+ return cnt[val & 0xf];
+}
+
static void
alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
{
@@ -173,6 +207,10 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
assert(0);
}
+/* XXX: For shaders that aren't executed linearly (e.g. shaders that
+ * contain loops), we need to assign all hw regs to TGSI TEMPs early,
+ * lest we risk temp_temps overwriting regs alloc'd "later".
+ */
static struct nv50_reg *
alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
{
@@ -184,11 +222,8 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
if (!pc->r_temp[i]) {
- r = CALLOC_STRUCT(nv50_reg);
- r->type = P_TEMP;
- r->index = -1;
- r->hw = i;
- r->rhw = -1;
+ r = MALLOC_STRUCT(nv50_reg);
+ ctor_reg(r, P_TEMP, -1, i);
pc->r_temp[i] = r;
return r;
}
@@ -254,10 +289,8 @@ alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)
return alloc_temp4(pc, dst, idx + 4);
for (i = 0; i < 4; i++) {
- dst[i] = CALLOC_STRUCT(nv50_reg);
- dst[i]->type = P_TEMP;
- dst[i]->index = -1;
- dst[i]->hw = idx + i;
+ dst[i] = MALLOC_STRUCT(nv50_reg);
+ ctor_reg(dst[i], P_TEMP, -1, idx + i);
pc->r_temp[idx + i] = dst[i];
}
@@ -309,7 +342,7 @@ ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
static struct nv50_reg *
alloc_immd(struct nv50_pc *pc, float f)
{
- struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
+ struct nv50_reg *r = MALLOC_STRUCT(nv50_reg);
unsigned hw;
for (hw = 0; hw < pc->immd_nr * 4; hw++)
@@ -319,9 +352,7 @@ alloc_immd(struct nv50_pc *pc, float f)
if (hw == pc->immd_nr * 4)
hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4;
- r->type = P_IMMD;
- r->hw = hw;
- r->index = -1;
+ ctor_reg(r, P_IMMD, -1, hw);
return r;
}
@@ -543,6 +574,22 @@ check_swap_src_0_1(struct nv50_pc *pc,
}
static void
+set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src,
+ struct nv50_program_exec *e)
+{
+ struct nv50_reg *temp;
+
+ if (src->type != P_TEMP) {
+ temp = temp_temp(pc);
+ emit_mov(pc, temp, src);
+ src = temp;
+ }
+
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 9);
+}
+
+static void
set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
{
if (src->type == P_ATTR) {
@@ -744,7 +791,11 @@ emit_flop(struct nv50_pc *pc, unsigned sub,
}
set_dst(pc, dst, e);
- set_src_0(pc, src, e);
+
+ if (sub == 0 || sub == 2)
+ set_src_0_restricted(pc, src, e);
+ else
+ set_src_0(pc, src, e);
emit(pc, e);
}
@@ -786,16 +837,20 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
#define CVTOP_SAT 0x08
#define CVTOP_ABS 0x10
+/* 0x04 == 32 bit */
+/* 0x40 == dst is float */
+/* 0x80 == src is float */
#define CVT_F32_F32 0xc4
#define CVT_F32_S32 0x44
#define CVT_F32_U32 0x64
#define CVT_S32_F32 0x8c
#define CVT_S32_S32 0x0c
-#define CVT_F32_F32_ROP 0xcc
+#define CVT_NEG 0x20
+#define CVT_RI 0x08
static void
emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
- int wp, unsigned cop, unsigned fmt)
+ int wp, unsigned cvn, unsigned fmt)
{
struct nv50_program_exec *e;
@@ -804,7 +859,7 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
e->inst[0] |= 0xa0000000;
e->inst[1] |= 0x00004000;
- e->inst[1] |= (cop << 16);
+ e->inst[1] |= (cvn << 16);
e->inst[1] |= (fmt << 24);
set_src_0(pc, src, e);
@@ -821,53 +876,85 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
emit(pc, e);
}
+/* nv50 Condition codes:
+ * 0x1 = LT
+ * 0x2 = EQ
+ * 0x3 = LE
+ * 0x4 = GT
+ * 0x5 = NE
+ * 0x6 = GE
+ * 0x7 = set condition code ? (used before bra.lt/le/gt/ge)
+ * 0x8 = unordered bit (allows NaN)
+ */
static void
-emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
+emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
struct nv50_reg *src0, struct nv50_reg *src1)
{
+ static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+
struct nv50_program_exec *e = exec(pc);
- unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
struct nv50_reg *rdst;
- assert(c_op <= 7);
+ assert(ccode < 16);
if (check_swap_src_0_1(pc, &src0, &src1))
- c_op = inv_cop[c_op];
+ ccode = cc_swapped[ccode & 7] | (ccode & 8);
rdst = dst;
- if (dst->type != P_TEMP)
+ if (dst && dst->type != P_TEMP)
dst = alloc_temp(pc, NULL);
/* set.u32 */
set_long(pc, e);
e->inst[0] |= 0xb0000000;
- e->inst[1] |= (3 << 29);
- e->inst[1] |= (c_op << 14);
- /*XXX: breaks things, .u32 by default?
- * decuda will disasm as .u16 and use .lo/.hi regs, but this
- * doesn't seem to match what the hw actually does.
- inst[1] |= 0x04000000; << breaks things.. .u32 by default?
+ e->inst[1] |= 0x60000000 | (ccode << 14);
+
+ /* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but
+ * that doesn't seem to match what the hw actually does
+ e->inst[1] |= 0x04000000; << breaks things, u32 by default ?
*/
- set_dst(pc, dst, e);
+
+ if (wp >= 0)
+ set_pred_wr(pc, 1, wp, e);
+ if (dst)
+ set_dst(pc, dst, e);
+ else {
+ e->inst[0] |= 0x000001fc;
+ e->inst[1] |= 0x00000008;
+ }
+
set_src_0(pc, src0, e);
set_src_1(pc, src1, e);
- emit(pc, e);
- /* cvt.f32.u32 */
- e = exec(pc);
- e->inst[0] = 0xa0000001;
- e->inst[1] = 0x64014780;
- set_dst(pc, rdst, e);
- set_src_0(pc, dst, e);
emit(pc, e);
+ pc->if_cond = pc->p->exec_tail; /* record for OPCODE_IF */
- if (dst != rdst)
+ /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */
+ if (rdst)
+ emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32);
+ if (rdst && rdst != dst)
free_temp(pc, dst);
}
+static INLINE unsigned
+map_tgsi_setop_cc(unsigned op)
+{
+ switch (op) {
+ case TGSI_OPCODE_SLT: return 0x1;
+ case TGSI_OPCODE_SGE: return 0x6;
+ case TGSI_OPCODE_SEQ: return 0x2;
+ case TGSI_OPCODE_SGT: return 0x4;
+ case TGSI_OPCODE_SLE: return 0x3;
+ case TGSI_OPCODE_SNE: return 0xd;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
static INLINE void
emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
- emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32_ROP);
+ emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32 | CVT_RI);
}
static void
@@ -890,6 +977,12 @@ emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32);
}
+static INLINE void
+emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32);
+}
+
static void
emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
struct nv50_reg **src)
@@ -1073,10 +1166,11 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
emit(pc, e);
#if 1
- if (mask & 1) emit_mov(pc, dst[0], t[0]);
- if (mask & 2) emit_mov(pc, dst[1], t[1]);
- if (mask & 4) emit_mov(pc, dst[2], t[2]);
- if (mask & 8) emit_mov(pc, dst[3], t[3]);
+ c = 0;
+ if (mask & 1) emit_mov(pc, dst[0], t[c++]);
+ if (mask & 2) emit_mov(pc, dst[1], t[c++]);
+ if (mask & 4) emit_mov(pc, dst[2], t[c++]);
+ if (mask & 8) emit_mov(pc, dst[3], t[c]);
free_temp4(pc, t);
#else
@@ -1093,6 +1187,38 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
}
static void
+emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
+ struct nv50_program_exec **join)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ if (join) {
+ set_long(pc, e);
+ e->inst[0] |= 0xa0000002;
+ emit(pc, e);
+ *join = e;
+ e = exec(pc);
+ }
+
+ set_long(pc, e);
+ e->inst[0] |= 0x10000002;
+ if (pred >= 0)
+ set_pred(pc, cc, pred, e);
+ emit(pc, e);
+}
+
+static void
+emit_nop(struct nv50_pc *pc)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xf0000000;
+ set_long(pc, e);
+ e->inst[1] = 0xe0000000;
+ emit(pc, e);
+}
+
+static void
convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
{
unsigned q = 0, m = ~0;
@@ -1159,6 +1285,70 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
}
}
+/* Return a read mask for source registers deduced from opcode & write mask. */
+static unsigned
+nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
+{
+ unsigned x, mask = insn->FullDstRegisters[0].DstRegister.WriteMask;
+
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
+ case TGSI_OPCODE_DP3:
+ return 0x7;
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_KIL: /* WriteMask ignored */
+ return 0xf;
+ case TGSI_OPCODE_DST:
+ return mask & (c ? 0xa : 0x6);
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SCS:
+ return 0x1;
+ case TGSI_OPCODE_LIT:
+ return 0xb;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXP:
+ {
+ const struct tgsi_instruction_ext_texture *tex;
+
+ assert(insn->Instruction.Extended);
+ tex = &insn->InstructionExtTexture;
+
+ mask = 0x7;
+ if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
+ mask |= 0x8;
+
+ switch (tex->Texture) {
+ case TGSI_TEXTURE_1D:
+ mask &= 0x9;
+ break;
+ case TGSI_TEXTURE_2D:
+ mask &= 0xb;
+ break;
+ default:
+ break;
+ }
+ }
+ return mask;
+ case TGSI_OPCODE_XPD:
+ x = 0;
+ if (mask & 1) x |= 0x6;
+ if (mask & 2) x |= 0x5;
+ if (mask & 4) x |= 0x3;
+ return x;
+ default:
+ break;
+ }
+
+ return mask;
+}
+
static struct nv50_reg *
tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
{
@@ -1258,93 +1448,175 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
return r;
}
-/* returns TRUE if instruction can overwrite sources before they're read */
+/* return TRUE for ops that produce only a single result */
static boolean
-direct2dest_op(const struct tgsi_full_instruction *insn)
+is_scalar_op(unsigned op)
{
- if (insn->Instruction.Saturate)
- return FALSE;
-
- switch (insn->Instruction.Opcode) {
+ switch (op) {
case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_DP2:
case TGSI_OPCODE_DP3:
case TGSI_OPCODE_DP4:
case TGSI_OPCODE_DPH:
- case TGSI_OPCODE_KIL:
- case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
case TGSI_OPCODE_POW:
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
- case TGSI_OPCODE_SCS:
case TGSI_OPCODE_SIN:
+ /*
+ case TGSI_OPCODE_KIL:
+ case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_SCS:
+ */
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+/* Returns a bitmask indicating which dst components depend
+ * on source s, component c (reverse of nv50_tgsi_src_mask).
+ */
+static unsigned
+nv50_tgsi_dst_revdep(unsigned op, int s, int c)
+{
+ if (is_scalar_op(op))
+ return 0x1;
+
+ switch (op) {
+ case TGSI_OPCODE_DST:
+ return (1 << c) & (s ? 0xa : 0x6);
+ case TGSI_OPCODE_XPD:
+ switch (c) {
+ case 0: return 0x6;
+ case 1: return 0x5;
+ case 2: return 0x3;
+ case 3: return 0x0;
+ default:
+ assert(0);
+ return 0x0;
+ }
+ case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_SCS:
case TGSI_OPCODE_TEX:
case TGSI_OPCODE_TXP:
- return FALSE;
+ /* these take care of dangerous swizzles themselves */
+ return 0x0;
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_KIL:
+ /* don't call this function for these ops */
+ assert(0);
+ return 0;
default:
- return TRUE;
+ /* linear vector instruction */
+ return (1 << c);
}
}
+static INLINE boolean
+has_pred(struct nv50_program_exec *e, unsigned cc)
+{
+ if (!is_long(e) || is_immd(e))
+ return FALSE;
+ return ((e->inst[1] & 0x780) == (cc << 7));
+}
+
+/* on ENDIF see if we can do "@p0.neu single_op" instead of:
+ * join_at ENDIF
+ * @p0.eq bra ENDIF
+ * single_op
+ * ENDIF: nop.join
+ */
static boolean
-nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
+nv50_kill_branch(struct nv50_pc *pc)
{
- const struct tgsi_full_instruction *inst = &tok->FullInstruction;
- struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
+ int lvl = pc->if_lvl;
+
+ if (pc->if_insn[lvl]->next != pc->p->exec_tail)
+ return FALSE;
+
+ /* if ccode == 'true', the BRA is from an ELSE and the predicate
+ * reg may no longer be valid, since we currently always use $p0
+ */
+ if (has_pred(pc->if_insn[lvl], 0xf))
+ return FALSE;
+ assert(pc->if_insn[lvl] && pc->br_join[lvl]);
+
+ /* We'll use the exec allocated for JOIN_AT (as we can't easily
+ * update prev's next); if exec_tail is BRK, update the pointer.
+ */
+ if (pc->loop_lvl && pc->br_loop[pc->loop_lvl - 1] == pc->p->exec_tail)
+ pc->br_loop[pc->loop_lvl - 1] = pc->br_join[lvl];
+
+ pc->p->exec_size -= 4; /* remove JOIN_AT and BRA */
+
+ *pc->br_join[lvl] = *pc->p->exec_tail;
+
+ FREE(pc->if_insn[lvl]);
+ FREE(pc->p->exec_tail);
+
+ pc->p->exec_tail = pc->br_join[lvl];
+ pc->p->exec_tail->next = NULL;
+ set_pred(pc, 0xd, 0, pc->p->exec_tail);
+
+ return TRUE;
+}
+
+static boolean
+nv50_program_tx_insn(struct nv50_pc *pc,
+ const struct tgsi_full_instruction *inst)
+{
+ struct nv50_reg *rdst[4], *dst[4], *brdc, *src[3][4], *temp;
unsigned mask, sat, unit;
- boolean assimilate = FALSE;
int i, c;
mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
+ memset(src, 0, sizeof(src));
+
for (c = 0; c < 4; c++) {
- if (mask & (1 << c))
+ if ((mask & (1 << c)) && !pc->r_dst[c])
dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
else
- dst[c] = NULL;
- rdst[c] = NULL;
- src[0][c] = NULL;
- src[1][c] = NULL;
- src[2][c] = NULL;
+ dst[c] = pc->r_dst[c];
+ rdst[c] = dst[c];
}
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i];
+ unsigned src_mask;
+ boolean neg_supp;
+
+ src_mask = nv50_tgsi_src_mask(inst, i);
+ neg_supp = negate_supported(inst, i);
if (fs->SrcRegister.File == TGSI_FILE_SAMPLER)
unit = fs->SrcRegister.Index;
for (c = 0; c < 4; c++)
- src[i][c] = tgsi_src(pc, c, fs,
- negate_supported(inst, i));
+ if (src_mask & (1 << c))
+ src[i][c] = tgsi_src(pc, c, fs, neg_supp);
}
- if (sat) {
- for (c = 0; c < 4; c++) {
- rdst[c] = dst[c];
- dst[c] = temp_temp(pc);
- }
+ brdc = temp = pc->r_brdc;
+ if (brdc && brdc->type != P_TEMP) {
+ temp = temp_temp(pc);
+ if (sat)
+ brdc = temp;
} else
- if (direct2dest_op(inst)) {
+ if (sat) {
for (c = 0; c < 4; c++) {
- if (!dst[c] || dst[c]->type != P_TEMP)
- continue;
-
- for (i = c + 1; i < 4; i++) {
- if (dst[c] == src[0][i] ||
- dst[c] == src[1][i] ||
- dst[c] == src[2][i])
- break;
- }
- if (i == 4)
+ if (!(mask & (1 << c)) || dst[c]->type == P_TEMP)
continue;
-
- assimilate = TRUE;
rdst[c] = dst[c];
- dst[c] = alloc_temp(pc, NULL);
+ dst[c] = temp_temp(pc);
}
}
+ assert(brdc || !is_scalar_op(inst->Instruction.Opcode));
+
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
for (c = 0; c < 4; c++) {
@@ -1360,74 +1632,91 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
emit_add(pc, dst[c], src[0][c], src[1][c]);
}
break;
- case TGSI_OPCODE_COS:
- temp = temp_temp(pc);
- emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, 5, temp, temp);
+ case TGSI_OPCODE_BGNLOOP:
+ pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size;
+ break;
+ case TGSI_OPCODE_BRK:
+ emit_branch(pc, -1, 0, NULL);
+ assert(pc->loop_lvl > 0);
+ pc->br_loop[pc->loop_lvl - 1] = pc->p->exec_tail;
+ break;
+ case TGSI_OPCODE_CEIL:
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_mov(pc, dst[c], temp);
+ emit_cvt(pc, dst[c], src[0][c], -1,
+ CVTOP_CEIL, CVT_F32_F32 | CVT_RI);
+ }
+ break;
+ case TGSI_OPCODE_COS:
+ if (mask & 8) {
+ emit_precossin(pc, temp, src[0][3]);
+ emit_flop(pc, 5, dst[3], temp);
+ if (!(mask &= 7))
+ break;
+ if (temp == dst[3])
+ temp = brdc = temp_temp(pc);
}
+ emit_precossin(pc, temp, src[0][0]);
+ emit_flop(pc, 5, brdc, temp);
break;
case TGSI_OPCODE_DP3:
- temp = temp_temp(pc);
emit_mul(pc, temp, src[0][0], src[1][0]);
emit_mad(pc, temp, src[0][1], src[1][1], temp);
- emit_mad(pc, temp, src[0][2], src[1][2], temp);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
- }
+ emit_mad(pc, brdc, src[0][2], src[1][2], temp);
break;
case TGSI_OPCODE_DP4:
- temp = temp_temp(pc);
emit_mul(pc, temp, src[0][0], src[1][0]);
emit_mad(pc, temp, src[0][1], src[1][1], temp);
emit_mad(pc, temp, src[0][2], src[1][2], temp);
- emit_mad(pc, temp, src[0][3], src[1][3], temp);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
- }
+ emit_mad(pc, brdc, src[0][3], src[1][3], temp);
break;
case TGSI_OPCODE_DPH:
- temp = temp_temp(pc);
emit_mul(pc, temp, src[0][0], src[1][0]);
emit_mad(pc, temp, src[0][1], src[1][1], temp);
emit_mad(pc, temp, src[0][2], src[1][2], temp);
- emit_add(pc, temp, src[1][3], temp);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
- }
+ emit_add(pc, brdc, src[1][3], temp);
break;
case TGSI_OPCODE_DST:
- {
- struct nv50_reg *one = alloc_immd(pc, 1.0);
- if (mask & (1 << 0))
- emit_mov(pc, dst[0], one);
if (mask & (1 << 1))
emit_mul(pc, dst[1], src[0][1], src[1][1]);
if (mask & (1 << 2))
emit_mov(pc, dst[2], src[0][2]);
if (mask & (1 << 3))
emit_mov(pc, dst[3], src[1][3]);
- FREE(one);
- }
+ if (mask & (1 << 0))
+ emit_mov_immdval(pc, dst[0], 1.0f);
+ break;
+ case TGSI_OPCODE_ELSE:
+ emit_branch(pc, -1, 0, NULL);
+ pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
+ pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+ break;
+ case TGSI_OPCODE_ENDIF:
+ pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
+
+ /* try to replace branch over 1 insn with a predicated insn */
+ if (nv50_kill_branch(pc) == TRUE)
+ break;
+
+ if (pc->br_join[pc->if_lvl]) {
+ pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size;
+ pc->br_join[pc->if_lvl] = NULL;
+ }
+ /* emit a NOP as join point, we could set it on the next
+ * one, but would have to make sure it is long and !immd
+ */
+ emit_nop(pc);
+ pc->p->exec_tail->inst[1] |= 2;
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ emit_branch(pc, -1, 0, NULL);
+ pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl];
+ pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size;
break;
case TGSI_OPCODE_EX2:
- temp = temp_temp(pc);
emit_preex2(pc, temp, src[0][0]);
- emit_flop(pc, 6, temp, temp);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
- }
+ emit_flop(pc, 6, brdc, temp);
break;
case TGSI_OPCODE_FLR:
for (c = 0; c < 4; c++) {
@@ -1445,24 +1734,24 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
emit_sub(pc, dst[c], src[0][c], temp);
}
break;
+ case TGSI_OPCODE_IF:
+ /* emitting a join_at may not be necessary */
+ assert(pc->if_lvl < MAX_IF_DEPTH);
+ set_pred_wr(pc, 1, 0, pc->if_cond);
+ emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]);
+ pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+ break;
case TGSI_OPCODE_KIL:
emit_kil(pc, src[0][0]);
emit_kil(pc, src[0][1]);
emit_kil(pc, src[0][2]);
emit_kil(pc, src[0][3]);
- pc->p->cfg.fp.regs[2] |= 0x00100000;
break;
case TGSI_OPCODE_LIT:
emit_lit(pc, &dst[0], mask, &src[0][0]);
break;
case TGSI_OPCODE_LG2:
- temp = temp_temp(pc);
- emit_flop(pc, 3, temp, src[0][0]);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
- }
+ emit_flop(pc, 3, brdc, src[0][0]);
break;
case TGSI_OPCODE_LRP:
temp = temp_temp(pc);
@@ -1510,31 +1799,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
break;
case TGSI_OPCODE_POW:
- temp = temp_temp(pc);
- emit_pow(pc, temp, src[0][0], src[1][0]);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
- }
+ emit_pow(pc, brdc, src[0][0], src[1][0]);
break;
case TGSI_OPCODE_RCP:
- for (c = 3; c >= 0; c--) {
- if (!(mask & (1 << c)))
- continue;
- emit_flop(pc, 0, dst[c], src[0][0]);
- }
+ emit_flop(pc, 0, brdc, src[0][0]);
break;
case TGSI_OPCODE_RSQ:
- for (c = 3; c >= 0; c--) {
- if (!(mask & (1 << c)))
- continue;
- emit_flop(pc, 2, dst[c], src[0][0]);
- }
+ emit_flop(pc, 2, brdc, src[0][0]);
break;
case TGSI_OPCODE_SCS:
temp = temp_temp(pc);
- emit_precossin(pc, temp, src[0][0]);
+ if (mask & 3)
+ emit_precossin(pc, temp, src[0][0]);
if (mask & (1 << 0))
emit_flop(pc, 5, dst[0], temp);
if (mask & (1 << 1))
@@ -1544,28 +1820,29 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
if (mask & (1 << 3))
emit_mov_immdval(pc, dst[3], 1.0);
break;
- case TGSI_OPCODE_SGE:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
- }
- break;
case TGSI_OPCODE_SIN:
- temp = temp_temp(pc);
- emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, 4, temp, temp);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
+ if (mask & 8) {
+ emit_precossin(pc, temp, src[0][3]);
+ emit_flop(pc, 4, dst[3], temp);
+ if (!(mask &= 7))
+ break;
+ if (temp == dst[3])
+ temp = brdc = temp_temp(pc);
}
+ emit_precossin(pc, temp, src[0][0]);
+ emit_flop(pc, 4, brdc, temp);
break;
case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SNE:
+ i = map_tgsi_setop_cc(inst->Instruction.Opcode);
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
+ emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]);
}
break;
case TGSI_OPCODE_SUB:
@@ -1583,6 +1860,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
emit_tex(pc, dst, mask, src[0], unit,
inst->InstructionExtTexture.Texture, TRUE);
break;
+ case TGSI_OPCODE_TRUNC:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, dst[c], src[0][c], -1,
+ CVTOP_TRUNC, CVT_F32_F32 | CVT_RI);
+ }
+ break;
case TGSI_OPCODE_XPD:
temp = temp_temp(pc);
if (mask & (1 << 0)) {
@@ -1607,17 +1892,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
return FALSE;
}
+ if (brdc) {
+ if (sat)
+ emit_sat(pc, brdc, brdc);
+ for (c = 0; c < 4; c++)
+ if ((mask & (1 << c)) && dst[c] != brdc)
+ emit_mov(pc, dst[c], brdc);
+ } else
if (sat) {
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT,
- CVT_F32_F32);
+ /* in this case we saturate later */
+ if (dst[c]->type == P_TEMP && dst[c]->index < 0)
+ continue;
+ emit_sat(pc, rdst[c], dst[c]);
}
- } else if (assimilate) {
- for (c = 0; c < 4; c++)
- if (rdst[c])
- assimilate_temp(pc, rdst[c], dst[c]);
}
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
@@ -1626,9 +1916,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
continue;
if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
FREE(src[i][c]);
- else
- if (src[i][c]->acc == pc->insn_cur)
- release_hw(pc, src[i][c]);
}
}
@@ -1636,180 +1923,271 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
return TRUE;
}
-/* Adjust a bitmask that indicates what components of a source are used,
- * we use this in tx_prep so we only load interpolants that are needed.
- */
-static void
-insn_adjust_mask(const struct tgsi_full_instruction *insn, unsigned *mask)
-{
- const struct tgsi_instruction_ext_texture *tex;
-
- switch (insn->Instruction.Opcode) {
- case TGSI_OPCODE_DP3:
- *mask = 0x7;
- break;
- case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_DPH:
- *mask = 0xF;
- break;
- case TGSI_OPCODE_LIT:
- *mask = 0xB;
- break;
- case TGSI_OPCODE_RCP:
- case TGSI_OPCODE_RSQ:
- *mask = 0x1;
- break;
- case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TXP:
- assert(insn->Instruction.Extended);
- tex = &insn->InstructionExtTexture;
-
- *mask = 0x7;
- if (tex->Texture == TGSI_TEXTURE_1D)
- *mask = 0x1;
- else
- if (tex->Texture == TGSI_TEXTURE_2D)
- *mask = 0x3;
-
- if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
- *mask |= 0x8;
- break;
- default:
- break;
- }
-}
-
static void
-prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok,
- unsigned *r_usage[2])
+prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
{
- const struct tgsi_full_instruction *insn;
+ struct nv50_reg *reg = NULL;
const struct tgsi_full_src_register *src;
const struct tgsi_dst_register *dst;
+ unsigned i, c, k, mask;
- unsigned i, c, k, n, mask, *acc_p;
-
- insn = &tok->FullInstruction;
dst = &insn->FullDstRegisters[0].DstRegister;
mask = dst->WriteMask;
- if (!r_usage[0])
- r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned));
- if (!r_usage[1])
- r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned));
+ if (dst->File == TGSI_FILE_TEMPORARY)
+ reg = pc->temp;
+ else
+ if (dst->File == TGSI_FILE_OUTPUT)
+ reg = pc->result;
- if (dst->File == TGSI_FILE_TEMPORARY) {
+ if (reg) {
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- r_usage[0][dst->Index * 4 + c] = pc->insn_nr;
+ reg[dst->Index * 4 + c].acc = pc->insn_nr;
}
}
for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
src = &insn->FullSrcRegisters[i];
- switch (src->SrcRegister.File) {
- case TGSI_FILE_TEMPORARY:
- acc_p = r_usage[0];
- break;
- case TGSI_FILE_INPUT:
- acc_p = r_usage[1];
- break;
- default:
+ if (src->SrcRegister.File == TGSI_FILE_TEMPORARY)
+ reg = pc->temp;
+ else
+ if (src->SrcRegister.File == TGSI_FILE_INPUT)
+ reg = pc->attr;
+ else
continue;
- }
- insn_adjust_mask(insn, &mask);
+ mask = nv50_tgsi_src_mask(insn, i);
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
-
k = tgsi_util_get_full_src_register_extswizzle(src, c);
- switch (k) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
- n = src->SrcRegister.Index * 4 + k;
- acc_p[n] = pc->insn_nr;
- break;
- default:
- break;
- }
+
+ if (k > TGSI_EXTSWIZZLE_W)
+ continue;
+
+ reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr;
}
}
}
+/* Returns a bitmask indicating which dst components need to be
+ * written to temporaries first to avoid 'corrupting' sources.
+ *
+ * m[i] (out) indicate component to write in the i-th position
+ * rdep[c] (in) bitmasks of dst[i] that require dst[c] as source
+ */
+static unsigned
+nv50_revdep_reorder(unsigned m[4], unsigned rdep[4])
+{
+ unsigned i, c, x, unsafe;
+
+ for (c = 0; c < 4; c++)
+ m[c] = c;
+
+ /* Swap as long as a dst component written earlier is depended on
+ * by one written later, but the next one isn't depended on by it.
+ */
+ for (c = 0; c < 3; c++) {
+ if (rdep[m[c + 1]] & (1 << m[c]))
+ continue; /* if next one is depended on by us */
+ for (i = c + 1; i < 4; i++)
+ /* if we are depended on by a later one */
+ if (rdep[m[c]] & (1 << m[i]))
+ break;
+ if (i == 4)
+ continue;
+ /* now, swap */
+ x = m[c];
+ m[c] = m[c + 1];
+ m[c + 1] = x;
+
+ /* restart */
+ c = 0;
+ }
+
+ /* mark dependencies that could not be resolved by reordering */
+ for (i = 0; i < 3; ++i)
+ for (c = i + 1; c < 4; ++c)
+ if (rdep[m[i]] & (1 << m[c]))
+ unsafe |= (1 << i);
+
+ /* NOTE: $unsafe is with respect to order, not component */
+ return unsafe;
+}
+
+/* Select a suitable dst register for broadcasting scalar results,
+ * or return NULL if we have to allocate an extra TEMP.
+ *
+ * If e.g. only 1 component is written, we may also emit the final
+ * result to a write-only register.
+ */
+static struct nv50_reg *
+tgsi_broadcast_dst(struct nv50_pc *pc,
+ const struct tgsi_full_dst_register *fd, unsigned mask)
+{
+ if (fd->DstRegister.File == TGSI_FILE_TEMPORARY) {
+ int c = ffs(~mask & fd->DstRegister.WriteMask);
+ if (c)
+ return tgsi_dst(pc, c - 1, fd);
+ } else {
+ int c = ffs(fd->DstRegister.WriteMask) - 1;
+ if ((1 << c) == fd->DstRegister.WriteMask)
+ return tgsi_dst(pc, c, fd);
+ }
+
+ return NULL;
+}
+
+/* Scan source swizzles and return a bitmask indicating dst regs that
+ * also occur among the src regs, and fill rdep for nv50_revdep_reoder.
+ */
static unsigned
-load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid,
- int *aid, int *p_oid)
+nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
+ unsigned rdep[4])
{
- struct nv50_reg *iv;
- int oid, c, n;
- unsigned mask = 0;
+ const struct tgsi_full_dst_register *fd = &insn->FullDstRegisters[0];
+ const struct tgsi_full_src_register *fs;
+ unsigned i, deqs = 0;
- iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p;
+ for (i = 0; i < 4; ++i)
+ rdep[i] = 0;
- for (c = 0, n = i * 4; c < 4; c++, n++) {
- oid = (*p_oid)++;
- pc->attr[n].type = P_TEMP;
- pc->attr[n].index = i;
+ for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
+ unsigned chn, mask = nv50_tgsi_src_mask(insn, i);
+ boolean neg_supp = negate_supported(insn, i);
- if (pc->attr[n].acc == acc[n])
+ fs = &insn->FullSrcRegisters[i];
+ if (fs->SrcRegister.File != fd->DstRegister.File ||
+ fs->SrcRegister.Index != fd->DstRegister.Index)
continue;
- mask |= (1 << c);
- pc->attr[n].acc = acc[n];
- pc->attr[n].rhw = pc->attr[n].hw = -1;
- alloc_reg(pc, &pc->attr[n]);
+ for (chn = 0; chn < 4; ++chn) {
+ unsigned s, c;
+
+ if (!(mask & (1 << chn))) /* src is not read */
+ continue;
+ c = tgsi_util_get_full_src_register_extswizzle(fs, chn);
+ s = tgsi_util_get_full_src_register_sign_mode(fs, chn);
- pc->attr[n].rhw = (*aid)++;
- emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]);
+ if (c > TGSI_EXTSWIZZLE_W ||
+ !(fd->DstRegister.WriteMask & (1 << c)))
+ continue;
- pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4));
- (*mid)++;
- pc->p->cfg.fp.regs[1] += 0x00010001;
+ /* no danger if src is copied to TEMP first */
+ if ((s != TGSI_UTIL_SIGN_KEEP) &&
+ (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp))
+ continue;
+
+ rdep[c] |= nv50_tgsi_dst_revdep(
+ insn->Instruction.Opcode, i, chn);
+ deqs |= (1 << c);
+ }
}
- return mask;
+ return deqs;
}
static boolean
-nv50_program_tx_prep(struct nv50_pc *pc)
+nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
{
- struct tgsi_parse_context p;
- boolean ret = FALSE;
- unsigned i, c;
- unsigned fcol, bcol, fcrd, depr;
+ struct tgsi_full_instruction insn = tok->FullInstruction;
+ const struct tgsi_full_dst_register *fd;
+ unsigned i, deqs, rdep[4], m[4];
+
+ fd = &tok->FullInstruction.FullDstRegisters[0];
+ deqs = nv50_tgsi_scan_swizzle(&insn, rdep);
+
+ if (is_scalar_op(insn.Instruction.Opcode)) {
+ pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs);
+ if (!pc->r_brdc)
+ pc->r_brdc = temp_temp(pc);
+ return nv50_program_tx_insn(pc, &insn);
+ }
+ pc->r_brdc = NULL;
+
+ if (!deqs)
+ return nv50_program_tx_insn(pc, &insn);
+
+ deqs = nv50_revdep_reorder(m, rdep);
+
+ for (i = 0; i < 4; ++i) {
+ assert(pc->r_dst[m[i]] == NULL);
+
+ insn.FullDstRegisters[0].DstRegister.WriteMask =
+ fd->DstRegister.WriteMask & (1 << m[i]);
+
+ if (!insn.FullDstRegisters[0].DstRegister.WriteMask)
+ continue;
+
+ if (deqs & (1 << i))
+ pc->r_dst[m[i]] = alloc_temp(pc, NULL);
+
+ if (!nv50_program_tx_insn(pc, &insn))
+ return FALSE;
+ }
- /* count (centroid) perspective interpolations */
- unsigned centroid_loads = 0;
- unsigned perspect_loads = 0;
+ for (i = 0; i < 4; i++) {
+ struct nv50_reg *reg = pc->r_dst[i];
+ if (!reg)
+ continue;
+ pc->r_dst[i] = NULL;
+
+ if (insn.Instruction.Saturate == TGSI_SAT_ZERO_ONE)
+ emit_sat(pc, tgsi_dst(pc, i, fd), reg);
+ else
+ emit_mov(pc, tgsi_dst(pc, i, fd), reg);
+ free_temp(pc, reg);
+ }
- /* track register access for temps and attrs */
- unsigned *r_usage[2];
- r_usage[0] = NULL;
- r_usage[1] = NULL;
+ return TRUE;
+}
- depr = fcol = bcol = fcrd = 0xffff;
+static void
+load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+ struct nv50_reg *iv, **ppiv;
+ unsigned mode = pc->interp_mode[reg->index];
- if (pc->p->type == PIPE_SHADER_FRAGMENT) {
- pc->p->cfg.fp.regs[0] = 0x01000404;
- pc->p->cfg.fp.regs[1] = 0x00000400;
+ ppiv = (mode & INTERP_CENTROID) ? &pc->iv_c : &pc->iv_p;
+ iv = *ppiv;
+
+ if ((mode & INTERP_PERSPECTIVE) && !iv) {
+ iv = *ppiv = alloc_temp(pc, NULL);
+ iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1;
+
+ emit_interp(pc, iv, NULL, mode & INTERP_CENTROID);
+ emit_flop(pc, 0, iv, iv);
+
+ /* XXX: when loading interpolants dynamically, move these
+ * to the program head, or make sure it can't be skipped.
+ */
}
- tgsi_parse_init(&p, pc->p->pipe.tokens);
- while (!tgsi_parse_end_of_tokens(&p)) {
- const union tgsi_full_token *tok = &p.FullToken;
+ emit_interp(pc, reg, iv, mode);
+}
+
+static boolean
+nv50_program_tx_prep(struct nv50_pc *pc)
+{
+ struct tgsi_parse_context tp;
+ struct nv50_program *p = pc->p;
+ boolean ret = FALSE;
+ unsigned i, c, flat_nr = 0;
+
+ tgsi_parse_init(&tp, pc->p->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&tp)) {
+ const union tgsi_full_token *tok = &tp.FullToken;
- tgsi_parse_token(&p);
+ tgsi_parse_token(&tp);
switch (tok->Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
const struct tgsi_full_immediate *imm =
- &p.FullToken.FullImmediate;
+ &tp.FullToken.FullImmediate;
ctor_immd(pc, imm->u[0].Float,
imm->u[1].Float,
@@ -1820,78 +2198,61 @@ nv50_program_tx_prep(struct nv50_pc *pc)
case TGSI_TOKEN_TYPE_DECLARATION:
{
const struct tgsi_full_declaration *d;
- unsigned last, first, mode;
+ unsigned si, last, first, mode;
- d = &p.FullToken.FullDeclaration;
+ d = &tp.FullToken.FullDeclaration;
first = d->DeclarationRange.First;
last = d->DeclarationRange.Last;
switch (d->Declaration.File) {
case TGSI_FILE_TEMPORARY:
- if (pc->temp_nr < (last + 1))
- pc->temp_nr = last + 1;
break;
case TGSI_FILE_OUTPUT:
- if (pc->result_nr < (last + 1))
- pc->result_nr = last + 1;
-
- if (!d->Declaration.Semantic)
+ if (!d->Declaration.Semantic ||
+ p->type == PIPE_SHADER_FRAGMENT)
break;
+ si = d->Semantic.SemanticIndex;
switch (d->Semantic.SemanticName) {
- case TGSI_SEMANTIC_POSITION:
- depr = first;
- pc->p->cfg.fp.regs[2] |= 0x00000100;
- pc->p->cfg.fp.regs[3] |= 0x00000011;
+ case TGSI_SEMANTIC_BCOLOR:
+ p->cfg.two_side[si].hw = first;
+ if (p->cfg.io_nr > first)
+ p->cfg.io_nr = first;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ p->cfg.psiz = first;
+ if (p->cfg.io_nr > first)
+ p->cfg.io_nr = first;
break;
+ /*
+ case TGSI_SEMANTIC_CLIP_DISTANCE:
+ p->cfg.clpd = MIN2(p->cfg.clpd, first);
+ break;
+ */
default:
break;
}
-
break;
case TGSI_FILE_INPUT:
{
- if (pc->attr_nr < (last + 1))
- pc->attr_nr = last + 1;
-
- if (pc->p->type != PIPE_SHADER_FRAGMENT)
+ if (p->type != PIPE_SHADER_FRAGMENT)
break;
switch (d->Declaration.Interpolate) {
case TGSI_INTERPOLATE_CONSTANT:
mode = INTERP_FLAT;
+ flat_nr++;
break;
case TGSI_INTERPOLATE_PERSPECTIVE:
mode = INTERP_PERSPECTIVE;
+ p->cfg.regs[1] |= 0x08 << 24;
break;
default:
mode = INTERP_LINEAR;
break;
}
-
- if (d->Declaration.Semantic) {
- switch (d->Semantic.SemanticName) {
- case TGSI_SEMANTIC_POSITION:
- fcrd = first;
- break;
- case TGSI_SEMANTIC_COLOR:
- fcol = first;
- mode = INTERP_PERSPECTIVE;
- break;
- case TGSI_SEMANTIC_BCOLOR:
- bcol = first;
- mode = INTERP_PERSPECTIVE;
- break;
- }
- }
-
- if (d->Declaration.Centroid) {
+ if (d->Declaration.Centroid)
mode |= INTERP_CENTROID;
- if (mode & INTERP_PERSPECTIVE)
- centroid_loads++;
- } else
- if (mode & INTERP_PERSPECTIVE)
- perspect_loads++;
assert(last < 32);
for (i = first; i <= last; i++)
@@ -1899,8 +2260,6 @@ nv50_program_tx_prep(struct nv50_pc *pc)
}
break;
case TGSI_FILE_CONSTANT:
- if (pc->param_nr < (last + 1))
- pc->param_nr = last + 1;
break;
case TGSI_FILE_SAMPLER:
break;
@@ -1913,182 +2272,157 @@ nv50_program_tx_prep(struct nv50_pc *pc)
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
pc->insn_nr++;
- prep_inspect_insn(pc, tok, r_usage);
+ prep_inspect_insn(pc, &tok->FullInstruction);
break;
default:
break;
}
}
- if (pc->temp_nr) {
- pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg));
- if (!pc->temp)
- goto out_err;
+ if (p->type == PIPE_SHADER_VERTEX) {
+ int rid = 0;
- for (i = 0; i < pc->temp_nr; i++) {
- for (c = 0; c < 4; c++) {
- pc->temp[i*4+c].type = P_TEMP;
- pc->temp[i*4+c].hw = -1;
- pc->temp[i*4+c].rhw = -1;
- pc->temp[i*4+c].index = i;
- pc->temp[i*4+c].acc = r_usage[0][i*4+c];
+ for (i = 0; i < pc->attr_nr * 4; ++i) {
+ if (pc->attr[i].acc) {
+ pc->attr[i].hw = rid++;
+ p->cfg.attr[i / 32] |= 1 << (i % 32);
}
}
- }
-
- if (pc->attr_nr) {
- int oid = 4, mid = 4, aid = 0;
- /* oid = VP output id
- * aid = FP attribute/interpolant id
- * mid = VP output mapping field ID
- */
- pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
- if (!pc->attr)
- goto out_err;
-
- if (pc->p->type == PIPE_SHADER_FRAGMENT) {
- /* position should be loaded first */
- if (fcrd != 0xffff) {
- unsigned mask;
- mid = 0;
- mask = load_fp_attrib(pc, fcrd, r_usage[1],
- &mid, &aid, &oid);
- oid = 0;
- pc->p->cfg.fp.regs[1] |= (mask << 24);
- pc->p->cfg.fp.map[0] = 0x04040404 * fcrd;
- }
- pc->p->cfg.fp.map[0] += 0x03020100;
-
- /* should do MAD fcrd.xy, fcrd, SOME_CONST, fcrd */
-
- if (perspect_loads) {
- pc->iv_p = alloc_temp(pc, NULL);
-
- if (!(pc->p->cfg.fp.regs[1] & 0x08000000)) {
- pc->p->cfg.fp.regs[1] |= 0x08000000;
- pc->iv_p->rhw = aid++;
- emit_interp(pc, pc->iv_p, NULL,
- INTERP_LINEAR);
- emit_flop(pc, 0, pc->iv_p, pc->iv_p);
- } else {
- pc->iv_p->rhw = aid - 1;
- emit_flop(pc, 0, pc->iv_p,
- &pc->attr[fcrd * 4 + 3]);
- }
- }
+ for (i = 0, rid = 0; i < pc->result_nr; ++i) {
+ p->cfg.io[i].hw = rid;
+ p->cfg.io[i].id_vp = i;
- if (centroid_loads) {
- pc->iv_c = alloc_temp(pc, NULL);
- pc->iv_c->rhw = pc->iv_p ? aid - 1 : aid++;
- emit_interp(pc, pc->iv_c, NULL,
- INTERP_CENTROID);
- emit_flop(pc, 0, pc->iv_c, pc->iv_c);
- pc->p->cfg.fp.regs[1] |= 0x08000000;
+ for (c = 0; c < 4; ++c) {
+ int n = i * 4 + c;
+ if (!pc->result[n].acc)
+ continue;
+ pc->result[n].hw = rid++;
+ p->cfg.io[i].mask |= 1 << c;
}
+ }
- for (c = 0; c < 4; c++) {
- /* I don't know what these values do, but
- * let's set them like the blob does:
- */
- if (fcol != 0xffff && r_usage[1][fcol * 4 + c])
- pc->p->cfg.fp.regs[0] += 0x00010000;
- if (bcol != 0xffff && r_usage[1][bcol * 4 + c])
- pc->p->cfg.fp.regs[0] += 0x00010000;
- }
+ for (c = 0; c < 2; ++c)
+ if (p->cfg.two_side[c].hw < 0x40)
+ p->cfg.two_side[c] = p->cfg.io[
+ p->cfg.two_side[c].hw];
- for (i = 0; i < pc->attr_nr; i++)
- load_fp_attrib(pc, i, r_usage[1],
- &mid, &aid, &oid);
+ if (p->cfg.psiz < 0x40)
+ p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw;
+ } else
+ if (p->type == PIPE_SHADER_FRAGMENT) {
+ int rid, aid;
+ unsigned n = 0, m = pc->attr_nr - flat_nr;
- if (pc->iv_p)
- free_temp(pc, pc->iv_p);
- if (pc->iv_c)
- free_temp(pc, pc->iv_c);
+ int base = (TGSI_SEMANTIC_POSITION ==
+ p->info.input_semantic_name[0]) ? 0 : 1;
- pc->p->cfg.fp.high_map = (mid / 4);
- pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0);
- } else {
- /* vertex program */
- for (i = 0; i < pc->attr_nr * 4; i++) {
- pc->p->cfg.vp.attr[aid / 32] |=
- (1 << (aid % 32));
- pc->attr[i].type = P_ATTR;
- pc->attr[i].hw = aid++;
- pc->attr[i].index = i / 4;
+ /* non-flat interpolants have to be mapped to
+ * the lower hardware IDs, so sort them:
+ */
+ for (i = 0; i < pc->attr_nr; i++) {
+ if (pc->interp_mode[i] == INTERP_FLAT) {
+ p->cfg.io[m].id_vp = i + base;
+ p->cfg.io[m++].id_fp = i;
+ } else {
+ if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE))
+ p->cfg.io[n].linear = TRUE;
+ p->cfg.io[n].id_vp = i + base;
+ p->cfg.io[n++].id_fp = i;
}
}
- }
- if (pc->result_nr) {
- int rid = 0;
+ if (!base) /* set w-coordinate mask from perspective interp */
+ p->cfg.io[0].mask |= p->cfg.regs[1] >> 24;
- pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg));
- if (!pc->result)
- goto out_err;
+ aid = popcnt4( /* if fcrd isn't contained in cfg.io */
+ base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask);
- for (i = 0; i < pc->result_nr; i++) {
- for (c = 0; c < 4; c++) {
- if (pc->p->type == PIPE_SHADER_FRAGMENT) {
- pc->result[i*4+c].type = P_TEMP;
- pc->result[i*4+c].hw = -1;
- pc->result[i*4+c].rhw = (i == depr) ?
- -1 : rid++;
- } else {
- pc->result[i*4+c].type = P_RESULT;
- pc->result[i*4+c].hw = rid++;
- }
- pc->result[i*4+c].index = i;
- }
+ for (n = 0; n < pc->attr_nr; ++n) {
+ p->cfg.io[n].hw = rid = aid;
+ i = p->cfg.io[n].id_fp;
- if (pc->p->type == PIPE_SHADER_FRAGMENT &&
- depr != 0xffff) {
- pc->result[depr * 4 + 2].rhw =
- (pc->result_nr - 1) * 4;
+ for (c = 0; c < 4; ++c) {
+ if (!pc->attr[i * 4 + c].acc)
+ continue;
+ pc->attr[i * 4 + c].rhw = rid++;
+ p->cfg.io[n].mask |= 1 << c;
+
+ load_interpolant(pc, &pc->attr[i * 4 + c]);
}
+ aid += popcnt4(p->cfg.io[n].mask);
}
- }
- if (pc->param_nr) {
- int rid = 0;
+ if (!base)
+ p->cfg.regs[1] |= p->cfg.io[0].mask << 24;
- pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg));
- if (!pc->param)
- goto out_err;
+ m = popcnt4(p->cfg.regs[1] >> 24);
+
+ /* set count of non-position inputs and of non-flat
+ * non-position inputs for FP_INTERPOLANT_CTRL
+ */
+ p->cfg.regs[1] |= aid - m;
+
+ if (flat_nr) {
+ i = p->cfg.io[pc->attr_nr - flat_nr].hw;
+ p->cfg.regs[1] |= (i - m) << 16;
+ } else
+ p->cfg.regs[1] |= p->cfg.regs[1] << 16;
+
+ /* mark color semantic for light-twoside */
+ n = 0x40;
+ for (i = 0; i < pc->attr_nr; i++) {
+ ubyte si, sn;
- for (i = 0; i < pc->param_nr; i++) {
- for (c = 0; c < 4; c++) {
- pc->param[i*4+c].type = P_CONST;
- pc->param[i*4+c].hw = rid++;
- pc->param[i*4+c].index = i;
+ sn = p->info.input_semantic_name[p->cfg.io[i].id_fp];
+ si = p->info.input_semantic_index[p->cfg.io[i].id_fp];
+
+ if (sn == TGSI_SEMANTIC_COLOR) {
+ p->cfg.two_side[si] = p->cfg.io[i];
+
+ /* increase colour count */
+ p->cfg.regs[0] += popcnt4(
+ p->cfg.two_side[si].mask) << 16;
+
+ n = MIN2(n, p->cfg.io[i].hw - m);
}
}
+ if (n < 0x40)
+ p->cfg.regs[0] += n;
+
+ /* Initialize FP results:
+ * FragDepth is always first TGSI and last hw output
+ */
+ i = p->info.writes_z ? 4 : 0;
+ for (rid = 0; i < pc->result_nr * 4; i++)
+ pc->result[i].rhw = rid++;
+ if (p->info.writes_z)
+ pc->result[2].rhw = rid;
+
+ p->cfg.high_result = rid;
}
if (pc->immd_nr) {
int rid = 0;
- pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg));
+ pc->immd = MALLOC(pc->immd_nr * 4 * sizeof(struct nv50_reg));
if (!pc->immd)
goto out_err;
for (i = 0; i < pc->immd_nr; i++) {
- for (c = 0; c < 4; c++) {
- pc->immd[i*4+c].type = P_IMMD;
- pc->immd[i*4+c].hw = rid++;
- pc->immd[i*4+c].index = i;
- }
+ for (c = 0; c < 4; c++, rid++)
+ ctor_reg(&pc->immd[rid], P_IMMD, i, rid);
}
}
ret = TRUE;
out_err:
- if (r_usage[0])
- FREE(r_usage[0]);
- if (r_usage[1])
- FREE(r_usage[1]);
+ if (pc->iv_p)
+ free_temp(pc, pc->iv_p);
+ if (pc->iv_c)
+ free_temp(pc, pc->iv_c);
- tgsi_parse_free(&p);
+ tgsi_parse_free(&tp);
return ret;
}
@@ -2110,18 +2444,165 @@ free_nv50_pc(struct nv50_pc *pc)
}
static boolean
+ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
+{
+ int i, c;
+ unsigned rtype[2] = { P_ATTR, P_RESULT };
+
+ pc->p = p;
+ pc->temp_nr = p->info.file_max[TGSI_FILE_TEMPORARY] + 1;
+ pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1;
+ pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1;
+ pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1;
+
+ p->cfg.high_temp = 4;
+
+ p->cfg.two_side[0].hw = 0x40;
+ p->cfg.two_side[1].hw = 0x40;
+
+ switch (p->type) {
+ case PIPE_SHADER_VERTEX:
+ p->cfg.psiz = 0x40;
+ p->cfg.clpd = 0x40;
+ p->cfg.io_nr = pc->result_nr;
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ rtype[0] = rtype[1] = P_TEMP;
+
+ p->cfg.regs[0] = 0x01000004;
+ p->cfg.io_nr = pc->attr_nr;
+
+ if (p->info.writes_z) {
+ p->cfg.regs[2] |= 0x00000100;
+ p->cfg.regs[3] |= 0x00000011;
+ }
+ if (p->info.uses_kill)
+ p->cfg.regs[2] |= 0x00100000;
+ break;
+ }
+
+ if (pc->temp_nr) {
+ pc->temp = MALLOC(pc->temp_nr * 4 * sizeof(struct nv50_reg));
+ if (!pc->temp)
+ return FALSE;
+
+ for (i = 0; i < pc->temp_nr * 4; ++i)
+ ctor_reg(&pc->temp[i], P_TEMP, i / 4, -1);
+ }
+
+ if (pc->attr_nr) {
+ pc->attr = MALLOC(pc->attr_nr * 4 * sizeof(struct nv50_reg));
+ if (!pc->attr)
+ return FALSE;
+
+ for (i = 0; i < pc->attr_nr * 4; ++i)
+ ctor_reg(&pc->attr[i], rtype[0], i / 4, -1);
+ }
+
+ if (pc->result_nr) {
+ unsigned nr = pc->result_nr * 4;
+
+ pc->result = MALLOC(nr * sizeof(struct nv50_reg));
+ if (!pc->result)
+ return FALSE;
+
+ for (i = 0; i < nr; ++i)
+ ctor_reg(&pc->result[i], rtype[1], i / 4, -1);
+ }
+
+ if (pc->param_nr) {
+ int rid = 0;
+
+ pc->param = MALLOC(pc->param_nr * 4 * sizeof(struct nv50_reg));
+ if (!pc->param)
+ return FALSE;
+
+ for (i = 0; i < pc->param_nr; ++i)
+ for (c = 0; c < 4; ++c, ++rid)
+ ctor_reg(&pc->param[rid], P_CONST, i, rid);
+ }
+
+ return TRUE;
+}
+
+static void
+nv50_fp_move_results(struct nv50_pc *pc)
+{
+ struct nv50_reg reg;
+ unsigned i;
+
+ ctor_reg(&reg, P_TEMP, -1, -1);
+
+ for (i = 0; i < pc->result_nr * 4; ++i) {
+ if (pc->result[i].rhw < 0 || pc->result[i].hw < 0)
+ continue;
+ if (pc->result[i].rhw != pc->result[i].hw) {
+ reg.hw = pc->result[i].rhw;
+ emit_mov(pc, &reg, &pc->result[i]);
+ }
+ }
+}
+
+static void
+nv50_program_fixup_insns(struct nv50_pc *pc)
+{
+ struct nv50_program_exec *e, *prev = NULL, **bra_list;
+ unsigned i, n, pos;
+
+ bra_list = CALLOC(pc->p->exec_size, sizeof(struct nv50_program_exec *));
+
+ /* Collect branch instructions, we need to adjust their offsets
+ * when converting 32 bit instructions to 64 bit ones
+ */
+ for (n = 0, e = pc->p->exec_head; e; e = e->next)
+ if (e->param.index >= 0 && !e->param.mask)
+ bra_list[n++] = e;
+
+ /* Make sure we don't have any single 32 bit instructions. */
+ for (e = pc->p->exec_head, pos = 0; e; e = e->next) {
+ pos += is_long(e) ? 2 : 1;
+
+ if ((pos & 1) && (!e->next || is_long(e->next))) {
+ for (i = 0; i < n; ++i)
+ if (bra_list[i]->param.index >= pos)
+ bra_list[i]->param.index += 1;
+ convert_to_long(pc, e);
+ ++pos;
+ }
+ if (e->next)
+ prev = e;
+ }
+
+ assert(!is_immd(pc->p->exec_head));
+ assert(!is_immd(pc->p->exec_tail));
+
+ /* last instruction must be long so it can have the end bit set */
+ if (!is_long(pc->p->exec_tail)) {
+ convert_to_long(pc, pc->p->exec_tail);
+ if (prev)
+ convert_to_long(pc, prev);
+ }
+ assert(!(pc->p->exec_tail->inst[1] & 2));
+ /* set the end-bit */
+ pc->p->exec_tail->inst[1] |= 1;
+
+ FREE(bra_list);
+}
+
+static boolean
nv50_program_tx(struct nv50_program *p)
{
struct tgsi_parse_context parse;
struct nv50_pc *pc;
- unsigned k;
boolean ret;
pc = CALLOC_STRUCT(nv50_pc);
if (!pc)
return FALSE;
- pc->p = p;
- pc->p->cfg.high_temp = 4;
+
+ ret = ctor_nv50_pc(pc, p);
+ if (ret == FALSE)
+ goto out_cleanup;
ret = nv50_program_tx_prep(pc);
if (ret == FALSE)
@@ -2141,7 +2622,7 @@ nv50_program_tx(struct nv50_program *p)
switch (tok->Token.Type) {
case TGSI_TOKEN_TYPE_INSTRUCTION:
++pc->insn_cur;
- ret = nv50_program_tx_insn(pc, tok);
+ ret = nv50_tgsi_insn(pc, tok);
if (ret == FALSE)
goto out_err;
break;
@@ -2150,48 +2631,10 @@ nv50_program_tx(struct nv50_program *p)
}
}
- if (p->type == PIPE_SHADER_FRAGMENT) {
- struct nv50_reg out;
-
- out.type = P_TEMP;
- for (k = 0; k < pc->result_nr * 4; k++) {
- if (pc->result[k].rhw == -1)
- continue;
- if (pc->result[k].hw != pc->result[k].rhw) {
- out.hw = pc->result[k].rhw;
- emit_mov(pc, &out, &pc->result[k]);
- }
- if (pc->p->cfg.high_result < (pc->result[k].rhw + 1))
- pc->p->cfg.high_result = pc->result[k].rhw + 1;
- }
- }
-
- /* look for single half instructions and make them long */
- struct nv50_program_exec *e, *e_prev;
+ if (pc->p->type == PIPE_SHADER_FRAGMENT)
+ nv50_fp_move_results(pc);
- for (k = 0, e = pc->p->exec_head, e_prev = NULL; e; e = e->next) {
- if (!is_long(e))
- k++;
-
- if (!e->next || is_long(e->next)) {
- if (k & 1)
- convert_to_long(pc, e);
- k = 0;
- }
-
- if (e->next)
- e_prev = e;
- }
-
- if (!is_long(pc->p->exec_tail)) {
- /* this may occur if moving FP results */
- assert(e_prev && !is_long(e_prev));
- convert_to_long(pc, e_prev);
- convert_to_long(pc, pc->p->exec_tail);
- }
-
- assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
- pc->p->exec_tail->inst[1] |= 0x00000001;
+ nv50_program_fixup_insns(pc);
p->param_nr = pc->param_nr * 4;
p->immd_nr = pc->immd_nr * 4;
@@ -2258,30 +2701,19 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
p->immd_nr, NV50_CB_PMISC);
}
- if (!p->data[1] && p->param_nr) {
- struct nouveau_resource *heap =
- nv50->screen->parm_heap[p->type];
-
- if (nouveau_resource_alloc(heap, p->param_nr, p, &p->data[1])) {
- while (heap->next && heap->size < p->param_nr) {
- struct nv50_program *evict = heap->next->priv;
- nouveau_resource_free(&evict->data[1]);
- }
-
- if (nouveau_resource_alloc(heap, p->param_nr, p,
- &p->data[1]))
- assert(0);
- }
- }
+ assert(p->param_nr <= 128);
if (p->param_nr) {
- unsigned cbuf = NV50_CB_PVP;
+ unsigned cb;
float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
PIPE_BUFFER_USAGE_CPU_READ);
- if (p->type == PIPE_SHADER_FRAGMENT)
- cbuf = NV50_CB_PFP;
- nv50_program_upload_data(nv50, map, p->data[1]->start,
- p->param_nr, cbuf);
+
+ if (p->type == PIPE_SHADER_VERTEX)
+ cb = NV50_CB_PVP;
+ else
+ cb = NV50_CB_PFP;
+
+ nv50_program_upload_data(nv50, map, 0, p->param_nr, cb);
pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]);
}
}
@@ -2303,32 +2735,41 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
upload = TRUE;
}
- if ((p->data[0] && p->data[0]->start != p->data_start[0]) ||
- (p->data[1] && p->data[1]->start != p->data_start[1])) {
- for (e = p->exec_head; e; e = e->next) {
- unsigned ei, ci, bs;
+ if (p->data[0] && p->data[0]->start != p->data_start[0])
+ upload = TRUE;
- if (e->param.index < 0)
- continue;
- bs = (e->inst[1] >> 22) & 0x07;
- assert(bs < 2);
- ei = e->param.shift >> 5;
- ci = e->param.index + p->data[bs]->start;
+ if (!upload)
+ return;
+
+ for (e = p->exec_head; e; e = e->next) {
+ unsigned ei, ci, bs;
+
+ if (e->param.index < 0)
+ continue;
+
+ if (e->param.mask == 0) {
+ assert(!(e->param.index & 1));
+ /* seem to be 8 byte steps */
+ ei = (e->param.index >> 1) + 0 /* START_ID */;
- e->inst[ei] &= ~e->param.mask;
- e->inst[ei] |= (ci << e->param.shift);
+ e->inst[0] &= 0xf0000fff;
+ e->inst[0] |= ei << 12;
+ continue;
}
- if (p->data[0])
- p->data_start[0] = p->data[0]->start;
- if (p->data[1])
- p->data_start[1] = p->data[1]->start;
+ bs = (e->inst[1] >> 22) & 0x07;
+ assert(bs < 2);
+ ei = e->param.shift >> 5;
+ ci = e->param.index;
+ if (bs == 0)
+ ci += p->data[bs]->start;
- upload = TRUE;
+ e->inst[ei] &= ~e->param.mask;
+ e->inst[ei] |= (ci << e->param.shift);
}
- if (!upload)
- return;
+ if (p->data[0])
+ p->data_start[0] = p->data[0]->start;
#ifdef NV50_PROGRAM_DUMP
NOUVEAU_ERR("-------\n");
@@ -2402,8 +2843,8 @@ nv50_vertprog_validate(struct nv50_context *nv50)
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_LOW, 0, 0);
so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
- so_data (so, p->cfg.vp.attr[0]);
- so_data (so, p->cfg.vp.attr[1]);
+ so_data (so, p->cfg.attr[0]);
+ so_data (so, p->cfg.attr[1]);
so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
so_data (so, p->cfg.high_result);
so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2);
@@ -2421,7 +2862,6 @@ nv50_fragprog_validate(struct nv50_context *nv50)
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_program *p = nv50->fragprog;
struct nouveau_stateobj *so;
- unsigned i;
if (!p->translated) {
nv50_program_validate(nv50, p);
@@ -2438,29 +2878,186 @@ nv50_fragprog_validate(struct nv50_context *nv50)
NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_LOW, 0, 0);
- so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
- so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */
- so_data (so, 0x00000004);
- so_data (so, 0x00000000);
- so_data (so, 0x00000000);
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), p->cfg.fp.high_map);
- for (i = 0; i < p->cfg.fp.high_map; i++)
- so_data(so, p->cfg.fp.map[i]);
- so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 2);
- so_data (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */
+ so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1);
so_data (so, p->cfg.high_temp);
so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
so_data (so, p->cfg.high_result);
so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1);
- so_data (so, p->cfg.fp.regs[2]);
+ so_data (so, p->cfg.regs[2]);
so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
- so_data (so, p->cfg.fp.regs[3]);
+ so_data (so, p->cfg.regs[3]);
so_method(so, tesla, NV50TCL_FP_START_ID, 1);
so_data (so, 0); /* program start offset */
so_ref(so, &nv50->state.fragprog);
so_ref(NULL, &so);
}
+static void
+nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
+{
+ struct nv50_program *fp = nv50->fragprog;
+ struct nv50_program *vp = nv50->vertprog;
+ unsigned i, c, m = base;
+
+ /* XXX: This can't work correctly in all cases yet, we either
+ * have to create TGSI_SEMANTIC_PNTC or sprite_coord_mode has
+ * to be per FP input instead of per VP output
+ */
+ memset(pntc, 0, 8 * sizeof(uint32_t));
+
+ for (i = 0; i < fp->cfg.io_nr; i++) {
+ uint8_t sn, si;
+ uint8_t j = fp->cfg.io[i].id_vp, k = fp->cfg.io[i].id_fp;
+ unsigned n = popcnt4(fp->cfg.io[i].mask);
+
+ if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) {
+ m += n;
+ continue;
+ }
+
+ sn = vp->info.input_semantic_name[j];
+ si = vp->info.input_semantic_index[j];
+
+ if (j < fp->cfg.io_nr && sn == TGSI_SEMANTIC_GENERIC) {
+ ubyte mode =
+ nv50->rasterizer->pipe.sprite_coord_mode[si];
+
+ if (mode == PIPE_SPRITE_COORD_NONE) {
+ m += n;
+ continue;
+ }
+ }
+
+ /* this is either PointCoord or replaced by sprite coords */
+ for (c = 0; c < 4; c++) {
+ if (!(fp->cfg.io[i].mask & (1 << c)))
+ continue;
+ pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
+ ++m;
+ }
+ }
+}
+
+static int
+nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4],
+ struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo)
+{
+ int c;
+ uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw;
+ uint8_t *map = (uint8_t *)p_map;
+
+ for (c = 0; c < 4; ++c) {
+ if (mf & 1) {
+ if (fpi->linear == TRUE)
+ lin[mid / 32] |= 1 << (mid % 32);
+ map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40);
+ }
+
+ oid += mv & 1;
+ mf >>= 1;
+ mv >>= 1;
+ }
+
+ return mid;
+}
+
+void
+nv50_linkage_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_program *vp = nv50->vertprog;
+ struct nv50_program *fp = nv50->fragprog;
+ struct nouveau_stateobj *so;
+ struct nv50_sreg4 dummy, *vpo;
+ int i, n, c, m = 0;
+ uint32_t map[16], lin[4], reg[5], pcrd[8];
+
+ memset(map, 0, sizeof(map));
+ memset(lin, 0, sizeof(lin));
+
+ reg[1] = 0x00000004; /* low and high clip distance map ids */
+ reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */
+ reg[3] = 0x00000000; /* point size map id & enable */
+ reg[0] = fp->cfg.regs[0]; /* colour semantic reg */
+ reg[4] = fp->cfg.regs[1]; /* interpolant info */
+
+ dummy.linear = FALSE;
+ dummy.mask = 0xf; /* map all components of HPOS */
+ m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]);
+
+ dummy.mask = 0x0;
+
+ if (vp->cfg.clpd < 0x40) {
+ for (c = 0; c < vp->cfg.clpd_nr; ++c)
+ map[m++] = vp->cfg.clpd + c;
+ reg[1] = (m << 8);
+ }
+
+ reg[0] |= m << 8; /* adjust BFC0 id */
+
+ /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */
+ if (nv50->rasterizer->pipe.light_twoside) {
+ vpo = &vp->cfg.two_side[0];
+
+ m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]);
+ m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]);
+ }
+
+ reg[0] += m - 4; /* adjust FFC0 id */
+ reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */
+
+ i = 0;
+ if (fp->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION)
+ i = 1;
+ for (; i < fp->cfg.io_nr; i++) {
+ ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id_fp];
+ ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id_fp];
+
+ n = fp->cfg.io[i].id_vp;
+ if (n >= vp->cfg.io_nr ||
+ vp->info.output_semantic_name[n] != sn ||
+ vp->info.output_semantic_index[n] != si)
+ vpo = &dummy;
+ else
+ vpo = &vp->cfg.io[n];
+
+ m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo);
+ }
+
+ if (nv50->rasterizer->pipe.point_size_per_vertex) {
+ map[m / 4] |= vp->cfg.psiz << ((m % 4) * 8);
+ reg[3] = (m++ << 4) | 1;
+ }
+
+ /* now fill the stateobj */
+ so = so_new(64, 0);
+
+ n = (m + 3) / 4;
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
+ so_data (so, m);
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
+ so_datap (so, map, n);
+
+ so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
+ so_datap (so, reg, 4);
+
+ so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
+ so_data (so, reg[4]);
+
+ so_method(so, tesla, 0x1540, 4);
+ so_datap (so, lin, 4);
+
+ if (nv50->rasterizer->pipe.point_sprite) {
+ nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff);
+
+ so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8);
+ so_datap (so, pcrd, 8);
+ }
+
+ so_ref(so, &nv50->state.programs);
+ so_ref(NULL, &so);
+}
+
void
nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
{
@@ -2476,7 +3073,6 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
nouveau_bo_ref(NULL, &p->bo);
nouveau_resource_free(&p->data[0]);
- nouveau_resource_free(&p->data[1]);
p->translated = 0;
}
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 096e0476aa..d78dee083f 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -15,6 +15,15 @@ struct nv50_program_exec {
} param;
};
+struct nv50_sreg4 {
+ uint8_t hw;
+ uint8_t id_vp;
+ uint8_t id_fp;
+
+ uint8_t mask;
+ boolean linear;
+};
+
struct nv50_program {
struct pipe_shader_state pipe;
struct tgsi_shader_info info;
@@ -24,8 +33,8 @@ struct nv50_program {
struct nv50_program_exec *exec_head;
struct nv50_program_exec *exec_tail;
unsigned exec_size;
- struct nouveau_resource *data[2];
- unsigned data_start[2];
+ struct nouveau_resource *data[1];
+ unsigned data_start[1];
struct nouveau_bo *bo;
@@ -36,14 +45,20 @@ struct nv50_program {
struct {
unsigned high_temp;
unsigned high_result;
- struct {
- unsigned attr[2];
- } vp;
- struct {
- unsigned regs[4];
- unsigned map[5];
- unsigned high_map;
- } fp;
+
+ uint32_t attr[2];
+ uint32_t regs[4];
+
+ /* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */
+ unsigned io_nr;
+ struct nv50_sreg4 io[PIPE_MAX_SHADER_OUTPUTS];
+
+ /* FP colour inputs, VP/GP back colour outputs */
+ struct nv50_sreg4 two_side[2];
+
+ /* VP only */
+ uint8_t clpd, clpd_nr;
+ uint8_t psiz;
} cfg;
};
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index c7f80a2203..3b08e1b89f 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -87,12 +87,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
return 1;
case PIPE_CAP_GLSL:
return 0;
- case PIPE_CAP_S3TC:
- return 1;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 1;
case PIPE_CAP_POINT_SPRITE:
- return 0;
+ return 1;
case PIPE_CAP_MAX_RENDER_TARGETS:
return 8;
case PIPE_CAP_OCCLUSION_QUERY:
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 4283808ed9..81fa3e34c5 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -276,6 +276,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
so_method(so, tesla, 0x1684, 1);
so_data (so, cso->flatshade_first ? 0 : 1);
+ so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1);
+ so_data (so, cso->light_twoside);
+
so_method(so, tesla, NV50TCL_LINE_WIDTH, 1);
so_data (so, fui(cso->line_width));
so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1);
@@ -294,6 +297,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
so_method(so, tesla, NV50TCL_POINT_SIZE, 1);
so_data (so, fui(cso->point_size));
+ so_method(so, tesla, NV50TCL_POINT_SPRITE_ENABLE, 1);
+ so_data (so, cso->point_sprite);
+
so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3);
if (cso->front_winding == PIPE_WINDING_CCW) {
so_data(so, nvgl_polygon_mode(cso->fill_ccw));
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 344c2cf6dd..4ed76973c4 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -66,7 +66,8 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM);
break;
}
- so_data(so, bo->tile_mode << 4);
+ so_data(so, nv50_miptree(pt)->
+ level[fb->cbufs[i]->level].tile_mode << 4);
so_data(so, 0x00000000);
so_method(so, tesla, 0x1224, 1);
@@ -110,7 +111,8 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
break;
}
- so_data(so, bo->tile_mode << 4);
+ so_data(so, nv50_miptree(pt)->
+ level[fb->zsbuf->level].tile_mode << 4);
so_data(so, 0x00000000);
so_method(so, tesla, 0x1538, 1);
@@ -187,6 +189,8 @@ nv50_state_emit(struct nv50_context *nv50)
so_emit(chan, nv50->state.vertprog);
if (nv50->state.dirty & NV50_NEW_FRAGPROG)
so_emit(chan, nv50->state.fragprog);
+ if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG))
+ so_emit(chan, nv50->state.programs);
if (nv50->state.dirty & NV50_NEW_RASTERIZER)
so_emit(chan, nv50->state.rast);
if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
@@ -208,6 +212,12 @@ nv50_state_emit(struct nv50_context *nv50)
so_emit(chan, nv50->state.vtxattr);
}
nv50->state.dirty = 0;
+}
+
+void
+nv50_state_flush_notify(struct nouveau_channel *chan)
+{
+ struct nv50_context *nv50 = chan->user_private;
so_emit_reloc_markers(chan, nv50->state.fb);
so_emit_reloc_markers(chan, nv50->state.vertprog);
@@ -238,6 +248,9 @@ nv50_state_validate(struct nv50_context *nv50)
if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
nv50_fragprog_validate(nv50);
+ if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG))
+ nv50_linkage_validate(nv50);
+
if (nv50->dirty & NV50_NEW_RASTERIZER)
so_ref(nv50->rasterizer->so, &nv50->state.rast);
@@ -299,7 +312,7 @@ scissor_uptodate:
goto viewport_uptodate;
nv50->state.viewport_bypass = bypass;
- so = so_new(12, 0);
+ so = so_new(14, 0);
if (!bypass) {
so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3);
so_data (so, fui(nv50->viewport.translate[0]));
@@ -312,12 +325,21 @@ scissor_uptodate:
so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
so_data (so, 1);
+ /* 0x0000 = remove whole primitive only (xyz)
+ * 0x1018 = remove whole primitive only (xy), clamp z
+ * 0x1080 = clip primitive (xyz)
+ * 0x1098 = clip primitive (xy), clamp z
+ */
+ so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
+ so_data (so, 0x1080);
/* no idea what 0f90 does */
so_method(so, tesla, 0x0f90, 1);
so_data (so, 0);
} else {
so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
so_data (so, 0);
+ so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
+ so_data (so, 0x0000);
so_method(so, tesla, 0x0f90, 1);
so_data (so, 1);
}
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index b266324f58..6bf6f773b0 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -60,13 +60,13 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
format = nv50_format(ps->format);
if (format < 0)
return 1;
-
+
if (!bo->tile_flags) {
BEGIN_RING(chan, eng2d, mthd, 2);
OUT_RING (chan, format);
OUT_RING (chan, 1);
BEGIN_RING(chan, eng2d, mthd + 0x14, 5);
- OUT_RING (chan, mt->level[0].pitch);
+ OUT_RING (chan, mt->level[ps->level].pitch);
OUT_RING (chan, ps->width);
OUT_RING (chan, ps->height);
OUT_RELOCh(chan, bo, ps->offset, flags);
@@ -75,7 +75,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
BEGIN_RING(chan, eng2d, mthd, 5);
OUT_RING (chan, format);
OUT_RING (chan, 0);
- OUT_RING (chan, bo->tile_mode << 4);
+ OUT_RING (chan, mt->level[ps->level].tile_mode << 4);
OUT_RING (chan, 1);
OUT_RING (chan, 0);
BEGIN_RING(chan, eng2d, mthd + 0x18, 4);
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index e9c3562194..bb7731855c 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -89,14 +89,14 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
if (src_bo->tile_flags) {
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1);
- OUT_RING (chan, (sy << 16) | sx);
+ OUT_RING (chan, (sy << 16) | (sx * cpp));
} else {
src_offset += (line_count * src_pitch);
}
if (dst_bo->tile_flags) {
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1);
- OUT_RING (chan, (dy << 16) | dx);
+ OUT_RING (chan, (dy << 16) | (dx * cpp));
} else {
dst_offset += (line_count * dst_pitch);
}
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index d7a2c8c462..93c2152edc 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -9,6 +9,7 @@ C_SOURCES = \
r300_chipset.c \
r300_clear.c \
r300_context.c \
+ r300_debug.c \
r300_emit.c \
r300_flush.c \
r300_fs.c \
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index da67bc29b8..9cc455135d 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -22,6 +22,9 @@
#include "r300_context.h"
+#include "r300_flush.h"
+#include "r300_state_invariant.h"
+
static boolean r300_draw_range_elements(struct pipe_context* pipe,
struct pipe_buffer* indexBuffer,
unsigned indexSize,
@@ -146,6 +149,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->context.winsys = (struct pipe_winsys*)r300_winsys;
r300->context.screen = r300_screen(screen);
+ r300_init_debug(r300);
+
r300->context.destroy = r300_destroy_context;
r300->context.clear = r300_clear;
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index f78492d4aa..52b1c9a6b2 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -184,8 +184,15 @@ struct r300_texture {
/* Offsets into the buffer. */
unsigned offset[PIPE_MAX_TEXTURE_LEVELS];
- /* Stride (pitch?) of this texture in bytes */
- unsigned stride;
+ /**
+ * If non-zero, override the natural texture layout with
+ * a custom stride (in bytes).
+ *
+ * \note Mipmapping fails for textures with a non-natural layout!
+ *
+ * \sa r300_texture_get_stride
+ */
+ unsigned stride_override;
/* Total size of this texture, in bytes. */
unsigned size;
@@ -211,10 +218,7 @@ struct r300_vertex_format {
int fs_tab[16];
};
-static struct pipe_viewport_state r300_viewport_identity = {
- .scale = {1.0, 1.0, 1.0, 1.0},
- .translate = {0.0, 0.0, 0.0, 0.0},
-};
+extern struct pipe_viewport_state r300_viewport_identity;
struct r300_context {
/* Parent class */
@@ -275,6 +279,9 @@ struct r300_context {
uint32_t dirty_state;
/* Flag indicating whether or not the HW is dirty. */
uint32_t dirty_hw;
+
+ /** Combination of DBG_xxx flags */
+ unsigned debug;
};
/* Convenience cast wrapper. */
@@ -288,4 +295,40 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300);
void r300_init_state_functions(struct r300_context* r300);
void r300_init_surface_functions(struct r300_context* r300);
+/* Debug functionality. */
+
+/**
+ * Debug flags to disable/enable certain groups of debugging outputs.
+ *
+ * \note These may be rather coarse, and the grouping may be impractical.
+ * If you find, while debugging the driver, that a different grouping
+ * of these flags would be beneficial, just feel free to change them
+ * but make sure to update the documentation in r300_debug.c to reflect
+ * those changes.
+ */
+/*@{*/
+#define DBG_HELP 0x0000001
+#define DBG_FP 0x0000002
+#define DBG_VP 0x0000004
+#define DBG_CS 0x0000008
+#define DBG_DRAW 0x0000010
+/*@}*/
+
+static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags)
+{
+ return (ctx->debug & flags) ? true : false;
+}
+
+static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...)
+{
+ if (DBG_ON(ctx, flags)) {
+ va_list va;
+ va_start(va, fmt);
+ debug_vprintf(fmt, va);
+ va_end(va);
+ }
+}
+
+void r300_init_debug(struct r300_context * ctx);
+
#endif /* R300_CONTEXT_H */
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 71b142c0db..883f0a02dc 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -49,7 +49,8 @@
(RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2))
#define CS_LOCALS(context) \
- struct r300_winsys* cs_winsys = context->winsys; \
+ struct r300_context* const cs_context_copy = (context); \
+ struct r300_winsys* cs_winsys = cs_context_copy->winsys; \
int cs_count = 0;
#define CHECK_CS(size) \
@@ -58,7 +59,7 @@
#define BEGIN_CS(size) do { \
CHECK_CS(size); \
if (VERY_VERBOSE_CS) { \
- debug_printf("r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \
+ DBG(cs_context_copy, DBG_CS, "r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \
size, __FUNCTION__, __FILE__, __LINE__); \
} \
cs_winsys->begin_cs(cs_winsys, (size), \
@@ -67,47 +68,55 @@
} while (0)
#define OUT_CS(value) do { \
+ if (VERY_VERBOSE_CS || VERY_VERBOSE_REGISTERS) { \
+ DBG(cs_context_copy, DBG_CS, "r300: writing %08x\n", value); \
+ } \
cs_winsys->write_cs_dword(cs_winsys, (value)); \
cs_count--; \
} while (0)
#define OUT_CS_32F(value) do { \
+ if (VERY_VERBOSE_CS || VERY_VERBOSE_REGISTERS) { \
+ DBG(cs_context_copy, DBG_CS, "r300: writing %f\n", value); \
+ } \
cs_winsys->write_cs_dword(cs_winsys, fui(value)); \
cs_count--; \
} while (0)
#define OUT_CS_REG(register, value) do { \
if (VERY_VERBOSE_REGISTERS) \
- debug_printf("r300: writing 0x%08X to register 0x%04X\n", \
+ DBG(cs_context_copy, DBG_CS, "r300: writing 0x%08X to register 0x%04X\n", \
value, register); \
assert(register); \
- OUT_CS(CP_PACKET0(register, 0)); \
- OUT_CS(value); \
+ cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0(register, 0)); \
+ cs_winsys->write_cs_dword(cs_winsys, value); \
+ cs_count -= 2; \
} while (0)
/* Note: This expects count to be the number of registers,
* not the actual packet0 count! */
#define OUT_CS_REG_SEQ(register, count) do { \
if (VERY_VERBOSE_REGISTERS) \
- debug_printf("r300: writing register sequence of %d to 0x%04X\n", \
+ DBG(cs_context_copy, DBG_CS, "r300: writing register sequence of %d to 0x%04X\n", \
count, register); \
assert(register); \
- OUT_CS(CP_PACKET0(register, ((count) - 1))); \
+ cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1))); \
+ cs_count--; \
} while (0)
#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \
- debug_printf("r300: writing relocation for buffer %p, offset %d, " \
+ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \
"domains (%d, %d, %d)\n", \
bo, offset, rd, wd, flags); \
assert(bo); \
- OUT_CS(offset); \
+ cs_winsys->write_cs_dword(cs_winsys, offset); \
cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \
- cs_count -= 2; \
+ cs_count -= 3; \
} while (0)
#define END_CS do { \
if (VERY_VERBOSE_CS) { \
- debug_printf("r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \
+ DBG(cs_context_copy, DBG_CS, "r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \
__FILE__, __LINE__); \
} \
if (cs_count != 0) \
@@ -117,7 +126,7 @@
#define FLUSH_CS do { \
if (VERY_VERBOSE_CS) { \
- debug_printf("r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \
+ DBG(cs_context_copy, DBG_CS, "r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \
__FILE__, __LINE__); \
} \
cs_winsys->flush_cs(cs_winsys); \
@@ -127,27 +136,29 @@
#define OUT_CS_ONE_REG(register, count) do { \
if (VERY_VERBOSE_REGISTERS) \
- debug_printf("r300: writing data sequence of %d to 0x%04X\n", \
+ DBG(cs_context_copy, DBG_CS, "r300: writing data sequence of %d to 0x%04X\n", \
count, register); \
assert(register); \
- OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \
+ cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR); \
+ cs_count--; \
} while (0)
#define CP_PACKET3(op, count) \
(RADEON_CP_PACKET3 | (op) | ((count) << 16))
#define OUT_CS_PKT3(op, count) do { \
- OUT_CS(CP_PACKET3(op, count)); \
+ cs_winsys->write_cs_dword(cs_winsys, CP_PACKET3(op, count)); \
+ cs_count--; \
} while (0)
#define OUT_CS_INDEX_RELOC(bo, offset, count, rd, wd, flags) do { \
- debug_printf("r300: writing relocation for index buffer %p," \
+ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for index buffer %p," \
"offset %d\n", bo, offset); \
assert(bo); \
- OUT_CS(offset); \
- OUT_CS(count); \
+ cs_winsys->write_cs_dword(cs_winsys, offset); \
+ cs_winsys->write_cs_dword(cs_winsys, count); \
cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \
- cs_count -= 2; \
+ cs_count -= 4; \
} while (0)
#endif /* R300_CS_H */
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
new file mode 100644
index 0000000000..15308dda1d
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2009 Nicolai Haehnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_context.h"
+
+#include <ctype.h>
+
+
+struct debug_option {
+ const char * name;
+ unsigned flag;
+ const char * description;
+};
+
+static struct debug_option debug_options[] = {
+ { "help", DBG_HELP, "Helpful meta-information about the driver" },
+ { "fp", DBG_FP, "Fragment program handling" },
+ { "vp", DBG_VP, "Vertex program handling" },
+ { "cs", DBG_CS, "Command submissions" },
+ { "draw", DBG_DRAW, "Draw and emit" },
+
+ { "all", ~0, "Convenience option that enables all debug flags" },
+
+ /* must be last */
+ { 0, 0, 0 }
+};
+
+void r300_init_debug(struct r300_context * ctx)
+{
+ const char * options = debug_get_option("RADEON_DEBUG", 0);
+ boolean printhint = false;
+
+ if (options) {
+ while(*options) {
+ if (*options == ' ' || *options == ',') {
+ options++;
+ continue;
+ }
+
+ size_t length = strcspn(options, " ,");
+ struct debug_option * opt;
+
+ for(opt = debug_options; opt->name; ++opt) {
+ if (!strncmp(options, opt->name, length)) {
+ ctx->debug |= opt->flag;
+ break;
+ }
+ }
+
+ if (!opt->name) {
+ debug_printf("Unknown debug option: %s\n", options);
+ printhint = true;
+ }
+
+ options += length;
+ }
+
+ if (!ctx->debug)
+ printhint = true;
+ }
+
+ if (printhint || ctx->debug & DBG_HELP) {
+ debug_printf("You can enable debug output by setting the RADEON_DEBUG environment variable\n"
+ "to a comma-separated list of debug options. Available options are:\n");
+ for(struct debug_option * opt = debug_options; opt->name; ++opt) {
+ debug_printf(" %s: %s\n", opt->name, opt->description);
+ }
+ }
+}
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index bd4d59e6f1..77ce431cdc 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -25,6 +25,7 @@
#include "r300_emit.h"
#include "r300_fs.h"
+#include "r300_state_derived.h"
#include "r300_vs.h"
void r300_emit_blend_state(struct r300_context* r300,
@@ -282,7 +283,7 @@ void r300_emit_fb_state(struct r300_context* r300,
for (i = 0; i < fb->nr_cbufs; i++) {
tex = (struct r300_texture*)fb->cbufs[i]->texture;
assert(tex && tex->buffer && "cbuf is marked, but NULL!");
- pixpitch = tex->stride / tex->tex.block.size;
+ pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size;
OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
@@ -299,7 +300,7 @@ void r300_emit_fb_state(struct r300_context* r300,
if (fb->zsbuf) {
tex = (struct r300_texture*)fb->zsbuf->texture;
assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
- pixpitch = tex->stride / tex->tex.block.size;
+ pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size;
OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
@@ -380,6 +381,7 @@ void r300_emit_query_end(struct r300_context* r300,
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0),
0, RADEON_GEM_DOMAIN_GTT, 0);
+ break;
default:
debug_printf("r300: Implementation error: Chipset reports %d"
" pixel pipes!\n", caps->num_frag_pipes);
@@ -490,7 +492,7 @@ void r300_emit_vertex_buffer(struct r300_context* r300)
{
CS_LOCALS(r300);
- debug_printf("r300: Preparing vertex buffer %p for render, "
+ DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, "
"vertex size %d\n", r300->vbo,
r300->vertex_info.vinfo.size);
/* Set the pointer to our vertex buffer. The emitted values are this:
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 350691d592..c4002b8e5d 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -56,6 +56,11 @@ void r500_emit_fragment_program_code(struct r300_context* r300,
void r300_emit_fb_state(struct r300_context* r300,
struct pipe_framebuffer_state* fb);
+void r300_emit_query_begin(struct r300_context* r300,
+ struct r300_query* query);
+void r300_emit_query_end(struct r300_context* r300,
+ struct r300_query* query);
+
void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs);
void r300_emit_rs_block_state(struct r300_context* r300,
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 36463b9a2e..a0e848a59a 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -96,7 +96,7 @@ void r300_translate_fragment_shader(struct r300_context* r300,
memset(&compiler, 0, sizeof(compiler));
rc_init(&compiler.Base);
- compiler.Base.Debug = 1;
+ compiler.Base.Debug = DBG_ON(r300, DBG_FP);
compiler.code = &fs->code;
compiler.is_r500 = r300_screen(r300->context.screen)->caps->is_r500;
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 1d5185b417..2880d34877 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -22,6 +22,8 @@
#include "r300_query.h"
+#include "r300_emit.h"
+
static struct pipe_query* r300_create_query(struct pipe_context* pipe,
unsigned query_type)
{
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 03cd219cde..3abff5db62 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -1478,6 +1478,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_PITCH_EN (1 << 31)
# define R300_TX_WIDTH(x) ((x) << 0)
# define R300_TX_HEIGHT(x) ((x) << 11)
+# define R300_TX_DEPTH(x) ((x) << 22)
# define R300_TX_NUM_LEVELS(x) ((x) << 26)
#define R300_TX_FORMAT1_0 0x44C0
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index cd458d019a..16f6404012 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -26,15 +26,17 @@
#include "r300_cs.h"
#include "r300_context.h"
+#include "r300_emit.h"
#include "r300_reg.h"
#include "r300_state_derived.h"
/* r300_render: Vertex and index buffer primitive emission. */
+#define R300_MAX_VBO_SIZE (1024 * 1024)
struct r300_render {
/* Parent class */
struct vbuf_render base;
-
+
/* Pipe context */
struct r300_context* r300;
@@ -45,7 +47,10 @@ struct r300_render {
/* VBO */
struct pipe_buffer* vbo;
- size_t vbo_alloc_size;
+ size_t vbo_size;
+ size_t vbo_offset;
+ size_t vbo_max_used;
+ void * vbo_ptr;
};
static INLINE struct r300_render*
@@ -74,19 +79,19 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
struct pipe_screen* screen = r300->context.screen;
size_t size = (size_t)vertex_size * (size_t)count;
- if (r300render->vbo && (size > r300render->vbo_alloc_size)) {
- pipe_buffer_reference(&r300render->vbo, NULL);
- }
-
- if (!r300render->vbo) {
+ if (size + r300render->vbo_offset > r300render->vbo_size)
+ {
+ pipe_buffer_reference(&r300->vbo, NULL);
r300render->vbo = pipe_buffer_create(screen,
64,
PIPE_BUFFER_USAGE_VERTEX,
- size);
+ R300_MAX_VBO_SIZE);
+ r300render->vbo_size = R300_MAX_VBO_SIZE;
}
- r300render->vbo_alloc_size = MAX2(size, r300render->vbo_alloc_size);
r300render->vertex_size = vertex_size;
+ r300->vbo = r300render->vbo;
+ r300->vbo_offset = r300render->vbo_offset;
return (r300render->vbo) ? TRUE : FALSE;
}
@@ -96,8 +101,10 @@ static void* r300_render_map_vertices(struct vbuf_render* render)
struct r300_render* r300render = r300_render(render);
struct pipe_screen* screen = r300render->r300->context.screen;
- return (unsigned char*)pipe_buffer_map(screen, r300render->vbo,
- PIPE_BUFFER_USAGE_CPU_WRITE);
+ r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+
+ return (r300render->vbo_ptr + r300render->vbo_offset);
}
static void r300_render_unmap_vertices(struct vbuf_render* render,
@@ -106,15 +113,23 @@ static void r300_render_unmap_vertices(struct vbuf_render* render,
{
struct r300_render* r300render = r300_render(render);
struct pipe_screen* screen = r300render->r300->context.screen;
+ CS_LOCALS(r300render->r300);
+ BEGIN_CS(2);
+ OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max);
+ END_CS;
+ r300render->vbo_max_used = MAX2(r300render->vbo_max_used,
+ r300render->vertex_size * (max + 1));
pipe_buffer_unmap(screen, r300render->vbo);
}
static void r300_render_release_vertices(struct vbuf_render* render)
{
struct r300_render* r300render = r300_render(render);
+ struct r300_context* r300 = r300render->r300;
- pipe_buffer_reference(&r300render->vbo, NULL);
+ r300render->vbo_offset += r300render->vbo_max_used;
+ r300render->vbo_max_used = 0;
}
static boolean r300_render_set_primitive(struct vbuf_render* render,
@@ -162,14 +177,12 @@ static boolean r300_render_set_primitive(struct vbuf_render* render,
return TRUE;
}
-static void prepare_render(struct r300_render* render, unsigned count)
+static void r300_prepare_render(struct r300_render* render, unsigned count)
{
struct r300_context* r300 = render->r300;
CS_LOCALS(r300);
- r300->vbo = render->vbo;
-
r300_emit_dirty_state(r300);
}
@@ -182,9 +195,9 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
CS_LOCALS(r300);
- prepare_render(r300render, count);
+ r300_prepare_render(r300render, count);
- debug_printf("r300: Doing vbuf render, count %d\n", count);
+ DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count);
BEGIN_CS(2);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
@@ -207,7 +220,7 @@ static void r300_render_draw(struct vbuf_render* render,
CS_LOCALS(r300);
- prepare_render(r300render, count);
+ r300_prepare_render(r300render, count);
/* Send our indices into an index buffer. */
index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX,
@@ -216,23 +229,6 @@ static void r300_render_draw(struct vbuf_render* render,
return;
}
-/*
- index_map = pipe_buffer_map(screen, index_buffer,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- memcpy(index_map, indices, count);
- pipe_buffer_unmap(screen, index_buffer);
-
- debug_printf("r300: Doing indexbuf render, count %d\n", count);
-
- BEGIN_CS(8);
- OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
- OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
- r300render->hwprim);
- OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
- OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2));
- OUT_CS_INDEX_RELOC(index_buffer, 0, count, RADEON_GEM_DOMAIN_GTT, 0, 0);
- END_CS; */
-
BEGIN_CS(2 + (count+1)/2);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
@@ -271,6 +267,10 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300)
r300render->base.release_vertices = r300_render_release_vertices;
r300render->base.destroy = r300_render_destroy;
+ r300render->vbo = NULL;
+ r300render->vbo_size = 0;
+ r300render->vbo_offset = 0;
+
return &r300render->base;
}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 15740f6125..8296d56840 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -93,8 +93,6 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
} else {
return 0;
}
- case PIPE_CAP_S3TC:
- return 1;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 1;
case PIPE_CAP_POINT_SPRITE:
@@ -103,11 +101,9 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
case PIPE_CAP_MAX_RENDER_TARGETS:
return 4;
case PIPE_CAP_OCCLUSION_QUERY:
- /* IN THEORY */
- return 0;
+ return 1;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
- /* IN THEORY */
- return 0;
+ return 1;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
if (r300screen->caps->is_r500) {
/* 13 == 4096x4096 */
@@ -323,13 +319,14 @@ r300_get_tex_transfer(struct pipe_screen *screen,
if (trans) {
pipe_texture_reference(&trans->transfer.texture, texture);
trans->transfer.format = texture->format;
+ trans->transfer.x = x;
+ trans->transfer.y = y;
trans->transfer.width = w;
trans->transfer.height = h;
trans->transfer.block = texture->block;
trans->transfer.nblocksx = texture->nblocksx[level];
trans->transfer.nblocksy = texture->nblocksy[level];
- trans->transfer.stride = align(pf_get_stride(&trans->transfer.block,
- texture->width[level]), 32);
+ trans->transfer.stride = r300_texture_get_stride(tex, level);
trans->transfer.usage = usage;
trans->offset = offset;
}
@@ -356,7 +353,7 @@ static void* r300_transfer_map(struct pipe_screen* screen,
if (transfer->usage != PIPE_TRANSFER_READ) {
flags |= PIPE_BUFFER_USAGE_CPU_WRITE;
}
-
+
map = pipe_buffer_map(screen, tex->buffer, flags);
if (!map) {
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index c16cadd040..88cb9af6fb 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -20,10 +20,11 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_pack_color.h"
-#include "util/u_debug.h"
+#include "tgsi/tgsi_parse.h"
#include "pipe/p_config.h"
#include "pipe/internal/p_winsys_screen.h"
@@ -429,6 +430,9 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
r300->rs_state = rs;
r300->dirty_state |= R300_NEW_RASTERIZER;
+ r300->dirty_state |= R300_NEW_RS_BLOCK;
+ r300->dirty_state |= R300_NEW_SCISSOR;
+ r300->dirty_state |= R300_NEW_VIEWPORT;
}
/* Free rasterizer state. */
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index c01e61a9b1..02b7ab9107 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -23,6 +23,7 @@
#include "r300_state_derived.h"
#include "r300_fs.h"
+#include "r300_state_inlines.h"
#include "r300_vs.h"
/* r300_state_derived: Various bits of state which are dependent upon
@@ -52,7 +53,7 @@ static void r300_vs_tab_routes(struct r300_context* r300,
if (!r300screen->caps->has_tcl || !r300->rs_state->enable_vte)
{
for (i = 0; i < info->num_inputs; i++) {
- switch (info->input_semantic_name[i]) {
+ switch (r300->vs->code.inputs[i]) {
case TGSI_SEMANTIC_POSITION:
pos = TRUE;
tab[i] = 0;
@@ -62,10 +63,12 @@ static void r300_vs_tab_routes(struct r300_context* r300,
cols++;
break;
case TGSI_SEMANTIC_PSIZE:
+ assert(psize == FALSE);
psize = TRUE;
tab[i] = 15;
break;
case TGSI_SEMANTIC_FOG:
+ assert(fog == FALSE);
fog = TRUE;
/* Fall through */
case TGSI_SEMANTIC_GENERIC:
@@ -124,7 +127,9 @@ static void r300_vs_tab_routes(struct r300_context* r300,
vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */
- if (!pos) {
+ /* We need to add vertex position attribute only for SW TCL case,
+ * for HW TCL case it could be generated by vertex shader */
+ if (!pos && !r300screen->caps->has_tcl) {
debug_printf("r300: Forcing vertex position attribute emit...\n");
/* Make room for the position attribute
* at the beginning of the tab. */
@@ -163,7 +168,7 @@ static void r300_vs_tab_routes(struct r300_context* r300,
vinfo->hwfmt[3] |= (4 << (3 * i));
}
- for (i; i < texs; i++) {
+ for (; i < texs; i++) {
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i));
vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
@@ -195,13 +200,13 @@ static void r300_vertex_psc(struct r300_context* r300,
* and not on attrib information. */
if (r300screen->caps->has_tcl) {
attrib_count = r300->vs->info.num_inputs;
- debug_printf("r300: routing %d attribs in psc for vs\n",
+ DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n",
attrib_count);
} else {
attrib_count = vinfo->num_attribs;
- debug_printf("r300: attrib count: %d\n", attrib_count);
+ DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
for (i = 0; i < attrib_count; i++) {
- debug_printf("r300: attrib: offset %d, interp %d, size %d,"
+ DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
" tab %d\n", vinfo->attrib[i].src_index,
vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
tab[i]);
@@ -299,18 +304,18 @@ static void r300_update_fs_tab(struct r300_context* r300)
}
/* Now that we know where everything is... */
- debug_printf("r300: fp input count: %d\n", info->num_inputs);
+ DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs);
for (i = 0; i < info->num_inputs; i++) {
switch (tab[i]) {
case INTERP_LINEAR:
- debug_printf("r300: attrib: "
+ DBG(r300, DBG_DRAW, "r300: attrib: "
"stack offset %d, color, tab %d\n",
i, cols_emitted);
tab[i] = cols_emitted;
cols_emitted++;
break;
case INTERP_PERSPECTIVE:
- debug_printf("r300: attrib: "
+ DBG(r300, DBG_DRAW, "r300: attrib: "
"stack offset %d, texcoord, tab %d\n",
i, cols + texs);
tab[i] = cols + texs;
@@ -333,48 +338,37 @@ static void r300_update_rs_block(struct r300_context* r300)
struct r300_rs_block* rs = r300->rs_block;
struct tgsi_shader_info* info = &r300->fs->info;
int* tab = r300->vertex_info.fs_tab;
- int col_count = 0, fp_offset = 0, i, memory_pos, tex_count = 0;
-
+ int col_count = 0, fp_offset = 0, i, tex_count = 0;
+ int rs_tex_comp = 0;
memset(rs, 0, sizeof(struct r300_rs_block));
if (r300_screen(r300->context.screen)->caps->is_r500) {
for (i = 0; i < info->num_inputs; i++) {
assert(tab[i] != -1);
- memory_pos = tab[i] * 4;
switch (info->input_semantic_name[i]) {
case TGSI_SEMANTIC_COLOR:
rs->ip[col_count] |=
- R500_RS_COL_PTR(memory_pos) |
+ R500_RS_COL_PTR(col_count) |
R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
col_count++;
break;
case TGSI_SEMANTIC_GENERIC:
rs->ip[tex_count] |=
- R500_RS_SEL_S(memory_pos) |
- R500_RS_SEL_T(memory_pos + 1) |
- R500_RS_SEL_R(memory_pos + 2) |
- R500_RS_SEL_Q(memory_pos + 3);
+ R500_RS_SEL_S(rs_tex_comp) |
+ R500_RS_SEL_T(rs_tex_comp + 1) |
+ R500_RS_SEL_R(rs_tex_comp + 2) |
+ R500_RS_SEL_Q(rs_tex_comp + 3);
tex_count++;
+ rs_tex_comp += 4;
break;
default:
break;
}
}
- if (col_count == 0) {
- rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
- }
-
- if (tex_count == 0) {
- rs->ip[0] |=
- R500_RS_SEL_S(R500_RS_IP_PTR_K0) |
- R500_RS_SEL_T(R500_RS_IP_PTR_K0) |
- R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
- R500_RS_SEL_Q(R500_RS_IP_PTR_K1);
- }
-
/* Rasterize at least one color, or bad things happen. */
if ((col_count == 0) && (tex_count == 0)) {
+ rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
col_count++;
}
@@ -392,22 +386,22 @@ static void r300_update_rs_block(struct r300_context* r300)
} else {
for (i = 0; i < info->num_inputs; i++) {
assert(tab[i] != -1);
- memory_pos = tab[i] * 4;
switch (info->input_semantic_name[i]) {
case TGSI_SEMANTIC_COLOR:
rs->ip[col_count] |=
- R300_RS_COL_PTR(memory_pos) |
+ R300_RS_COL_PTR(col_count) |
R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
col_count++;
break;
case TGSI_SEMANTIC_GENERIC:
rs->ip[tex_count] |=
- R300_RS_TEX_PTR(memory_pos) |
+ R300_RS_TEX_PTR(rs_tex_comp) |
R300_RS_SEL_S(R300_RS_SEL_C0) |
R300_RS_SEL_T(R300_RS_SEL_C1) |
R300_RS_SEL_R(R300_RS_SEL_C2) |
R300_RS_SEL_Q(R300_RS_SEL_C3);
tex_count++;
+ rs_tex_comp+=4;
break;
default:
break;
@@ -444,7 +438,7 @@ static void r300_update_rs_block(struct r300_context* r300)
}
}
- rs->count = (tex_count * 4) | (col_count << R300_IC_COUNT_SHIFT) |
+ rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) |
R300_HIRES_EN;
rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0);
diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h
index 63ae8eb8d0..71a4a47b00 100644
--- a/src/gallium/drivers/r300/r300_state_derived.h
+++ b/src/gallium/drivers/r300/r300_state_derived.h
@@ -23,11 +23,7 @@
#ifndef R300_STATE_DERIVED_H
#define R300_STATE_DERIVED_H
-#include "draw/draw_vertex.h"
-
-#include "r300_context.h"
-#include "r300_reg.h"
-#include "r300_state_inlines.h"
+struct r300_context;
void r300_update_derived_state(struct r300_context* r300);
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index 7d822fec48..3865730d63 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -23,6 +23,12 @@
#include "r300_state_invariant.h"
+
+struct pipe_viewport_state r300_viewport_identity = {
+ .scale = {1.0, 1.0, 1.0, 1.0},
+ .translate = {0.0, 0.0, 0.0, 0.0},
+};
+
/* Calculate and emit invariant state. This is data that the 3D engine
* will probably want at the beginning of every CS, but it's not currently
* handled by any CSO setup, and in addition it doesn't really change much.
diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c
index 96e6e4a77d..cc6288cb51 100644
--- a/src/gallium/drivers/r300/r300_surface.c
+++ b/src/gallium/drivers/r300/r300_surface.c
@@ -29,7 +29,7 @@ static void r300_surface_setup(struct r300_context* r300,
unsigned w, unsigned h)
{
struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
- unsigned pixpitch = dest->stride / dest->tex.block.size;
+ unsigned pixpitch = r300_texture_get_stride(dest, 0) / dest->tex.block.size;
CS_LOCALS(r300);
r300_emit_blend_state(r300, &blend_clear_state);
@@ -100,7 +100,7 @@ static void r300_surface_fill(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
struct r300_texture* tex = (struct r300_texture*)dest->texture;
- unsigned pixpitch = tex->stride / tex->tex.block.size;
+ unsigned pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size;
boolean invalid = FALSE;
CS_LOCALS(r300);
@@ -233,7 +233,7 @@ static void r300_surface_copy(struct pipe_context* pipe,
struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
struct r300_texture* srctex = (struct r300_texture*)src->texture;
struct r300_texture* desttex = (struct r300_texture*)dest->texture;
- unsigned pixpitch = srctex->stride / srctex->tex.block.size;
+ unsigned pixpitch = r300_texture_get_stride(srctex, 0) / srctex->tex.block.size;
boolean invalid = FALSE;
float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty;
CS_LOCALS(r300);
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 590052509c..ce60ded7ca 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -22,41 +22,62 @@
#include "r300_texture.h"
-static void r300_setup_texture_state(struct r300_texture* tex,
- unsigned width,
- unsigned height,
- unsigned pitch,
- unsigned levels)
+static void r300_setup_texture_state(struct r300_texture* tex)
{
struct r300_texture_state* state = &tex->state;
+ struct pipe_texture *pt = &tex->tex;
- state->format0 = R300_TX_WIDTH((width - 1) & 0x7ff) |
- R300_TX_HEIGHT((height - 1) & 0x7ff) |
- R300_TX_NUM_LEVELS(levels) |
+ state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) |
+ R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff) |
+ R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) |
+ R300_TX_NUM_LEVELS(pt->last_level) |
R300_TX_PITCH_EN;
/* XXX */
- state->format1 = r300_translate_texformat(tex->tex.format);
+ state->format1 = r300_translate_texformat(pt->format);
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ state->format1 |= R300_TX_FORMAT_CUBIC_MAP;
+ }
+ if (pt->target == PIPE_TEXTURE_3D) {
+ state->format1 |= R300_TX_FORMAT_3D;
+ }
- state->format2 = pitch - 1;
+ state->format2 = (r300_texture_get_stride(tex, 0) / pt->block.size) - 1;
/* Assume (somewhat foolishly) that oversized textures will
* not be permitted by the state tracker. */
- if (width > 2048) {
+ if (pt->width[0] > 2048) {
state->format2 |= R500_TXWIDTH_BIT11;
}
- if (height > 2048) {
+ if (pt->height[0] > 2048) {
state->format2 |= R500_TXHEIGHT_BIT11;
}
- debug_printf("r300: Set texture state (%dx%d, pitch %d, %d levels)\n",
- width, height, pitch, levels);
+ debug_printf("r300: Set texture state (%dx%d, %d levels)\n",
+ pt->width[0], pt->height[0], pt->last_level);
+}
+
+/**
+ * Return the stride, in bytes, of the texture images of the given texture
+ * at the given level.
+ */
+unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level)
+{
+ if (tex->stride_override)
+ return tex->stride_override;
+
+ if (level > tex->tex.last_level) {
+ debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, level, tex->tex.last_level);
+ return 0;
+ }
+
+ return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32);
}
static void r300_setup_miptree(struct r300_texture* tex)
{
struct pipe_texture* base = &tex->tex;
- int stride, size, offset;
+ int stride, size;
int i;
for (i = 0; i <= base->last_level; i++) {
@@ -74,7 +95,7 @@ static void r300_setup_miptree(struct r300_texture* tex)
* XXX
* POT, uncompressed, unmippmapped textures can be aligned to 32,
* instead of 64. */
- stride = align(pf_get_stride(&base->block, base->width[i]), 32);
+ stride = r300_texture_get_stride(tex, i);
size = stride * base->nblocksy[i] * base->depth[i];
tex->offset[i] = align(tex->size, 32);
@@ -84,10 +105,6 @@ static void r300_setup_miptree(struct r300_texture* tex)
"(%dx%dx%d px, pitch %d bytes)\n",
i, base->width[i], base->height[i], base->depth[i],
stride);
- /* Save stride of first level to the texture. */
- if (i == 0) {
- tex->stride = stride;
- }
}
}
@@ -108,8 +125,7 @@ static struct pipe_texture*
r300_setup_miptree(tex);
- r300_setup_texture_state(tex, template->width[0], template->height[0],
- template->width[0], template->last_level);
+ r300_setup_texture_state(tex);
tex->buffer = screen->buffer_create(screen, 1024,
PIPE_BUFFER_USAGE_PIXEL,
@@ -189,11 +205,10 @@ static struct pipe_texture*
pipe_reference_init(&tex->tex.reference, 1);
tex->tex.screen = screen;
- tex->stride = *stride;
+ tex->stride_override = *stride;
/* XXX */
- r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0],
- tex->stride, 0);
+ r300_setup_texture_state(tex);
pipe_buffer_reference(&tex->buffer, buffer);
@@ -221,7 +236,7 @@ boolean r300_get_texture_buffer(struct pipe_texture* texture,
pipe_buffer_reference(buffer, tex->buffer);
if (stride) {
- *stride = tex->stride;
+ *stride = r300_texture_get_stride(tex, 0);
}
return TRUE;
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 3b56f0307c..78ee0f1611 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -30,8 +30,12 @@
#include "r300_context.h"
#include "r300_reg.h"
+struct r300_texture;
+
void r300_init_screen_texture_functions(struct pipe_screen* screen);
+unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level);
+
/* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */
static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
{
@@ -68,6 +72,11 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
/* W24_FP */
case PIPE_FORMAT_Z24S8_UNORM:
return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP);
+ /* Z5_Y6_X5 */
+ case PIPE_FORMAT_R16_SNORM:
+ return R300_EASY_TX_FORMAT(X, X, X, X, Z5Y6X5);
+ case PIPE_FORMAT_Z16_UNORM:
+ return R300_EASY_TX_FORMAT(X, X, X, X, X16);
default:
debug_printf("r300: Implementation error: "
"Got unsupported texture format %s in %s\n",
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index d68a104106..0913ca1bd5 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -158,7 +158,6 @@ static unsigned translate_saturate(unsigned saturate)
switch(saturate) {
case TGSI_SAT_NONE: return SATURATE_OFF;
case TGSI_SAT_ZERO_ONE: return SATURATE_ZERO_ONE;
- case TGSI_SAT_MINUS_PLUS_ONE: return SATURATE_PLUS_MINUS_ONE;
}
fprintf(stderr, "Unknown saturate mode: %i\n", saturate);
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 2cb903bba2..12a6e37be6 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -116,7 +116,7 @@ void r300_translate_vertex_shader(struct r300_context* r300,
/* Setup the compiler */
rc_init(&compiler.Base);
- compiler.Base.Debug = 1;
+ compiler.Base.Debug = DBG_ON(r300, DBG_VP);
compiler.code = &vs->code;
compiler.UserData = vs;
diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile
index 516e3992fd..bcb887a0b2 100644
--- a/src/gallium/drivers/softpipe/Makefile
+++ b/src/gallium/drivers/softpipe/Makefile
@@ -6,26 +6,17 @@ LIBNAME = softpipe
C_SOURCES = \
sp_fs_exec.c \
sp_fs_sse.c \
- sp_fs_llvm.c \
sp_clear.c \
sp_flush.c \
sp_query.c \
sp_context.c \
sp_draw_arrays.c \
- sp_prim_setup.c \
sp_prim_vbuf.c \
sp_quad_pipe.c \
- sp_quad_alpha_test.c \
- sp_quad_blend.c \
- sp_quad_colormask.c \
- sp_quad_coverage.c \
+ sp_quad_stipple.c \
sp_quad_depth_test.c \
- sp_quad_earlyz.c \
sp_quad_fs.c \
- sp_quad_occlusion.c \
- sp_quad_output.c \
- sp_quad_stencil.c \
- sp_quad_stipple.c \
+ sp_quad_blend.c \
sp_screen.c \
sp_setup.c \
sp_state_blend.c \
@@ -38,7 +29,9 @@ C_SOURCES = \
sp_state_vertex.c \
sp_texture.c \
sp_tex_sample.c \
+ sp_tex_tile_cache.c \
sp_tile_cache.c \
- sp_surface.c
+ sp_surface.c \
+ sp_video_context.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript
index f8720638a7..aac9edf44e 100644
--- a/src/gallium/drivers/softpipe/SConscript
+++ b/src/gallium/drivers/softpipe/SConscript
@@ -7,25 +7,16 @@ softpipe = env.ConvenienceLibrary(
source = [
'sp_fs_exec.c',
'sp_fs_sse.c',
- 'sp_fs_llvm.c',
'sp_clear.c',
'sp_context.c',
'sp_draw_arrays.c',
'sp_flush.c',
- 'sp_prim_setup.c',
'sp_prim_vbuf.c',
'sp_setup.c',
- 'sp_quad_alpha_test.c',
'sp_quad_blend.c',
'sp_quad_pipe.c',
- 'sp_quad_colormask.c',
- 'sp_quad_coverage.c',
'sp_quad_depth_test.c',
- 'sp_quad_earlyz.c',
'sp_quad_fs.c',
- 'sp_quad_occlusion.c',
- 'sp_quad_output.c',
- 'sp_quad_stencil.c',
'sp_quad_stipple.c',
'sp_query.c',
'sp_screen.c',
@@ -39,8 +30,10 @@ softpipe = env.ConvenienceLibrary(
'sp_state_vertex.c',
'sp_surface.c',
'sp_tex_sample.c',
+ 'sp_tex_tile_cache.c',
'sp_texture.c',
'sp_tile_cache.c',
+ 'sp_video_context.c',
])
-Export('softpipe') \ No newline at end of file
+Export('softpipe')
diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c
index d3af18e162..8fac8e6e05 100644
--- a/src/gallium/drivers/softpipe/sp_clear.c
+++ b/src/gallium/drivers/softpipe/sp_clear.c
@@ -36,8 +36,6 @@
#include "util/u_pack_color.h"
#include "sp_clear.h"
#include "sp_context.h"
-#include "sp_surface.h"
-#include "sp_state.h"
#include "sp_tile_cache.h"
diff --git a/src/gallium/drivers/softpipe/sp_clear.h b/src/gallium/drivers/softpipe/sp_clear.h
index 2e450672f5..9be3b86fe9 100644
--- a/src/gallium/drivers/softpipe/sp_clear.h
+++ b/src/gallium/drivers/softpipe/sp_clear.h
@@ -32,7 +32,6 @@
#ifndef SP_CLEAR_H
#define SP_CLEAR_H
-#include "pipe/p_state.h"
struct pipe_context;
extern void
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 86df320ea8..94d000a5ac 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -31,17 +31,18 @@
*/
#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
#include "pipe/p_defines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "sp_clear.h"
#include "sp_context.h"
#include "sp_flush.h"
-#include "sp_prim_setup.h"
#include "sp_prim_vbuf.h"
#include "sp_state.h"
#include "sp_surface.h"
#include "sp_tile_cache.h"
+#include "sp_tex_tile_cache.h"
#include "sp_texture.h"
#include "sp_winsys.h"
#include "sp_query.h"
@@ -72,18 +73,16 @@ softpipe_unmap_transfers(struct softpipe_context *sp)
{
uint i;
- for (i = 0; i < sp->framebuffer.nr_cbufs; i++)
- sp_flush_tile_cache(sp, sp->cbuf_cache[i]);
- sp_flush_tile_cache(sp, sp->zsbuf_cache);
-
for (i = 0; i < sp->framebuffer.nr_cbufs; i++) {
sp_tile_cache_unmap_transfers(sp->cbuf_cache[i]);
}
+
sp_tile_cache_unmap_transfers(sp->zsbuf_cache);
}
-static void softpipe_destroy( struct pipe_context *pipe )
+static void
+softpipe_destroy( struct pipe_context *pipe )
{
struct softpipe_context *softpipe = softpipe_context( pipe );
uint i;
@@ -91,26 +90,21 @@ static void softpipe_destroy( struct pipe_context *pipe )
if (softpipe->draw)
draw_destroy( softpipe->draw );
- for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
- softpipe->quad[i].polygon_stipple->destroy( softpipe->quad[i].polygon_stipple );
- softpipe->quad[i].earlyz->destroy( softpipe->quad[i].earlyz );
- softpipe->quad[i].shade->destroy( softpipe->quad[i].shade );
- softpipe->quad[i].alpha_test->destroy( softpipe->quad[i].alpha_test );
- softpipe->quad[i].depth_test->destroy( softpipe->quad[i].depth_test );
- softpipe->quad[i].stencil_test->destroy( softpipe->quad[i].stencil_test );
- softpipe->quad[i].occlusion->destroy( softpipe->quad[i].occlusion );
- softpipe->quad[i].coverage->destroy( softpipe->quad[i].coverage );
- softpipe->quad[i].blend->destroy( softpipe->quad[i].blend );
- softpipe->quad[i].colormask->destroy( softpipe->quad[i].colormask );
- softpipe->quad[i].output->destroy( softpipe->quad[i].output );
- }
+ softpipe->quad.shade->destroy( softpipe->quad.shade );
+ softpipe->quad.depth_test->destroy( softpipe->quad.depth_test );
+ softpipe->quad.blend->destroy( softpipe->quad.blend );
- for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
sp_destroy_tile_cache(softpipe->cbuf_cache[i]);
+ pipe_surface_reference(&softpipe->framebuffer.cbufs[i], NULL);
+ }
sp_destroy_tile_cache(softpipe->zsbuf_cache);
+ pipe_surface_reference(&softpipe->framebuffer.zsbuf, NULL);
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
- sp_destroy_tile_cache(softpipe->tex_cache[i]);
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ sp_destroy_tex_tile_cache(softpipe->tex_cache[i]);
+ pipe_texture_reference(&softpipe->texture[i], NULL);
+ }
for (i = 0; i < Elements(softpipe->constants); i++) {
if (softpipe->constants[i].buffer) {
@@ -121,6 +115,15 @@ static void softpipe_destroy( struct pipe_context *pipe )
FREE( softpipe );
}
+
+/**
+ * if (the texture is being used as a framebuffer surface)
+ * return PIPE_REFERENCED_FOR_WRITE
+ * else if (the texture is a bound texture source)
+ * return PIPE_REFERENCED_FOR_READ XXX not done yet
+ * else
+ * return PIPE_UNREFERENCED
+ */
static unsigned int
softpipe_is_texture_referenced( struct pipe_context *pipe,
struct pipe_texture *texture,
@@ -129,15 +132,17 @@ softpipe_is_texture_referenced( struct pipe_context *pipe,
struct softpipe_context *softpipe = softpipe_context( pipe );
unsigned i;
- if(softpipe->dirty_render_cache) {
+ if (softpipe->dirty_render_cache) {
for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
- if(softpipe->framebuffer.cbufs[i] &&
- softpipe->framebuffer.cbufs[i]->texture == texture)
+ if (softpipe->framebuffer.cbufs[i] &&
+ softpipe->framebuffer.cbufs[i]->texture == texture) {
return PIPE_REFERENCED_FOR_WRITE;
+ }
}
- if(softpipe->framebuffer.zsbuf &&
- softpipe->framebuffer.zsbuf->texture == texture)
+ if (softpipe->framebuffer.zsbuf &&
+ softpipe->framebuffer.zsbuf->texture == texture) {
return PIPE_REFERENCED_FOR_WRITE;
+ }
}
/* FIXME: we also need to do the same for the texture cache */
@@ -145,6 +150,7 @@ softpipe_is_texture_referenced( struct pipe_context *pipe,
return PIPE_UNREFERENCED;
}
+
static unsigned int
softpipe_is_buffer_referenced( struct pipe_context *pipe,
struct pipe_buffer *buf)
@@ -152,6 +158,7 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe,
return PIPE_UNREFERENCED;
}
+
struct pipe_context *
softpipe_create( struct pipe_screen *screen )
{
@@ -222,7 +229,6 @@ softpipe_create( struct pipe_screen *screen )
softpipe->pipe.is_buffer_referenced = softpipe_is_buffer_referenced;
softpipe_init_query_funcs( softpipe );
- softpipe_init_texture_funcs( softpipe );
/*
* Alloc caches for accessing drawing surfaces and textures.
@@ -233,41 +239,14 @@ softpipe_create( struct pipe_screen *screen )
softpipe->zsbuf_cache = sp_create_tile_cache( screen );
for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
- softpipe->tex_cache[i] = sp_create_tile_cache( screen );
+ softpipe->tex_cache[i] = sp_create_tex_tile_cache( screen );
/* setup quad rendering stages */
- for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
- softpipe->quad[i].polygon_stipple = sp_quad_polygon_stipple_stage(softpipe);
- softpipe->quad[i].earlyz = sp_quad_earlyz_stage(softpipe);
- softpipe->quad[i].shade = sp_quad_shade_stage(softpipe);
- softpipe->quad[i].alpha_test = sp_quad_alpha_test_stage(softpipe);
- softpipe->quad[i].depth_test = sp_quad_depth_test_stage(softpipe);
- softpipe->quad[i].stencil_test = sp_quad_stencil_test_stage(softpipe);
- softpipe->quad[i].occlusion = sp_quad_occlusion_stage(softpipe);
- softpipe->quad[i].coverage = sp_quad_coverage_stage(softpipe);
- softpipe->quad[i].blend = sp_quad_blend_stage(softpipe);
- softpipe->quad[i].colormask = sp_quad_colormask_stage(softpipe);
- softpipe->quad[i].output = sp_quad_output_stage(softpipe);
- }
+ softpipe->quad.shade = sp_quad_shade_stage(softpipe);
+ softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe);
+ softpipe->quad.blend = sp_quad_blend_stage(softpipe);
- /* vertex shader samplers */
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- softpipe->tgsi.vert_samplers[i].base.get_samples = sp_get_samples_vertex;
- softpipe->tgsi.vert_samplers[i].unit = i;
- softpipe->tgsi.vert_samplers[i].sp = softpipe;
- softpipe->tgsi.vert_samplers[i].cache = softpipe->tex_cache[i];
- softpipe->tgsi.vert_samplers_list[i] = &softpipe->tgsi.vert_samplers[i];
- }
-
- /* fragment shader samplers */
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples_fragment;
- softpipe->tgsi.frag_samplers[i].unit = i;
- softpipe->tgsi.frag_samplers[i].sp = softpipe;
- softpipe->tgsi.frag_samplers[i].cache = softpipe->tex_cache[i];
- softpipe->tgsi.frag_samplers_list[i] = &softpipe->tgsi.frag_samplers[i];
- }
/*
* Create drawing context and plug our rendering stage into it.
@@ -281,30 +260,28 @@ softpipe_create( struct pipe_screen *screen )
(struct tgsi_sampler **)
softpipe->tgsi.vert_samplers_list);
- softpipe->setup = sp_draw_render_stage(softpipe);
- if (!softpipe->setup)
- goto fail;
-
if (debug_get_bool_option( "SP_NO_RAST", FALSE ))
softpipe->no_rast = TRUE;
- if (debug_get_bool_option( "SP_NO_VBUF", FALSE )) {
- /* Deprecated path -- vbuf is the intended interface to the draw module:
- */
- draw_set_rasterize_stage(softpipe->draw, softpipe->setup);
- }
- else {
- sp_init_vbuf(softpipe);
- }
+ softpipe->vbuf_backend = sp_create_vbuf_backend(softpipe);
+ if (!softpipe->vbuf_backend)
+ goto fail;
+
+ softpipe->vbuf = draw_vbuf_stage(softpipe->draw, softpipe->vbuf_backend);
+ if (!softpipe->vbuf)
+ goto fail;
+
+ draw_set_rasterize_stage(softpipe->draw, softpipe->vbuf);
+ draw_set_render(softpipe->draw, softpipe->vbuf_backend);
+
+
/* plug in AA line/point stages */
draw_install_aaline_stage(softpipe->draw, &softpipe->pipe);
draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe);
-#if USE_DRAW_STAGE_PSTIPPLE
/* Do polygon stipple w/ texture map + frag prog? */
draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe);
-#endif
sp_init_surface_functions(softpipe);
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 7888c2f644..43a195c8ef 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -36,24 +36,13 @@
#include "draw/draw_vertex.h"
#include "sp_quad_pipe.h"
-#include "sp_tex_sample.h"
-/**
- * This is a temporary variable for testing draw-stage polygon stipple.
- * If zero, do stipple in sp_quad_stipple.c
- */
-#define USE_DRAW_STAGE_PSTIPPLE 1
-
-/* Number of threads working on individual quads.
- * Setting to 1 disables this feature.
- */
-#define SP_NUM_QUAD_THREADS 1
-
struct softpipe_vbuf_render;
struct draw_context;
struct draw_stage;
struct softpipe_tile_cache;
+struct softpipe_tex_tile_cache;
struct sp_fragment_shader;
struct sp_vertex_shader;
@@ -62,12 +51,12 @@ struct softpipe_context {
struct pipe_context pipe; /**< base class */
/** Constant state objects */
- const struct pipe_blend_state *blend;
- const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
- const struct pipe_depth_stencil_alpha_state *depth_stencil;
- const struct pipe_rasterizer_state *rasterizer;
- const struct sp_fragment_shader *fs;
- const struct sp_vertex_shader *vs;
+ struct pipe_blend_state *blend;
+ struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_depth_stencil_alpha_state *depth_stencil;
+ struct pipe_rasterizer_state *rasterizer;
+ struct sp_fragment_shader *fs;
+ struct sp_vertex_shader *vs;
/** Other rendering state */
struct pipe_blend_color blend_color;
@@ -107,7 +96,16 @@ struct softpipe_context {
/** Which vertex shader output slot contains point size */
int psize_slot;
- unsigned reduced_api_prim; /**< PIPE_PRIM_POINTS, _LINES or _TRIANGLES */
+ /* The reduced version of the primitive supplied by the state
+ * tracker.
+ */
+ unsigned reduced_api_prim;
+
+ /* The reduced primitive after unfilled triangles, wide-line
+ * decomposition, etc, are taken into account. This is the
+ * primitive actually rasterized.
+ */
+ unsigned reduced_prim;
/** Derived from scissor and surface bounds: */
struct pipe_scissor_state cliprect;
@@ -116,41 +114,33 @@ struct softpipe_context {
/** Software quad rendering pipeline */
struct {
- struct quad_stage *polygon_stipple;
- struct quad_stage *earlyz;
struct quad_stage *shade;
- struct quad_stage *alpha_test;
- struct quad_stage *stencil_test;
struct quad_stage *depth_test;
- struct quad_stage *occlusion;
- struct quad_stage *coverage;
struct quad_stage *blend;
- struct quad_stage *colormask;
- struct quad_stage *output;
struct quad_stage *first; /**< points to one of the above stages */
- } quad[SP_NUM_QUAD_THREADS];
+ } quad;
/** TGSI exec things */
struct {
- struct sp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS];
- struct sp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS];
- struct sp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS];
- struct sp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS];
+ struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_SAMPLERS];
+ struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS];
} tgsi;
/** The primitive drawing context */
struct draw_context *draw;
- struct draw_stage *setup;
+
+ /** Draw module backend */
+ struct vbuf_render *vbuf_backend;
struct draw_stage *vbuf;
- struct softpipe_vbuf_render *vbuf_render;
boolean dirty_render_cache;
struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS];
struct softpipe_tile_cache *zsbuf_cache;
-
- struct softpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
+
+ unsigned tex_timestamp;
+ struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
unsigned use_sse : 1;
unsigned dump_fs : 1;
@@ -164,5 +154,9 @@ softpipe_context( struct pipe_context *pipe )
return (struct softpipe_context *)pipe;
}
+void
+softpipe_reset_sampler_varients(struct softpipe_context *softpipe);
+
+
#endif /* SP_CONTEXT_H */
diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
index 4a14d49686..e38b767cf2 100644
--- a/src/gallium/drivers/softpipe/sp_flush.c
+++ b/src/gallium/drivers/softpipe/sp_flush.c
@@ -37,6 +37,7 @@
#include "sp_surface.h"
#include "sp_state.h"
#include "sp_tile_cache.h"
+#include "sp_tex_tile_cache.h"
#include "sp_winsys.h"
@@ -52,17 +53,19 @@ softpipe_flush( struct pipe_context *pipe,
if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
for (i = 0; i < softpipe->num_textures; i++) {
- sp_flush_tile_cache(softpipe, softpipe->tex_cache[i]);
+ sp_flush_tex_tile_cache(softpipe->tex_cache[i]);
}
}
- if (flags & PIPE_FLUSH_RENDER_CACHE) {
+ if (flags & PIPE_FLUSH_SWAPBUFFERS) {
+ /* If this is a swapbuffers, just flush color buffers.
+ *
+ * The zbuffer changes are not discarded, but held in the cache
+ * in the hope that a later clear will wipe them out.
+ */
for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++)
if (softpipe->cbuf_cache[i])
- sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]);
-
- if (softpipe->zsbuf_cache)
- sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache);
+ sp_flush_tile_cache(softpipe->cbuf_cache[i]);
/* Need this call for hardware buffers before swapbuffers.
*
@@ -71,7 +74,15 @@ softpipe_flush( struct pipe_context *pipe,
* to unmap surfaces when flushing.
*/
softpipe_unmap_transfers(softpipe);
-
+ }
+ else if (flags & PIPE_FLUSH_RENDER_CACHE) {
+ for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++)
+ if (softpipe->cbuf_cache[i])
+ sp_flush_tile_cache(softpipe->cbuf_cache[i]);
+
+ if (softpipe->zsbuf_cache)
+ sp_flush_tile_cache(softpipe->zsbuf_cache);
+
softpipe->dirty_render_cache = FALSE;
}
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 9ee86fe787..4076114d39 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -59,15 +59,34 @@ sp_exec_fragment_shader(const struct sp_fragment_shader *base)
}
+static void
+exec_prepare( const struct sp_fragment_shader *base,
+ struct tgsi_exec_machine *machine,
+ struct tgsi_sampler **samplers )
+{
+ /*
+ * Bind tokens/shader to the interpreter's machine state.
+ * Avoid redundant binding.
+ */
+ if (machine->Tokens != base->shader.tokens) {
+ tgsi_exec_machine_bind_shader( machine,
+ base->shader.tokens,
+ PIPE_MAX_SAMPLERS,
+ samplers );
+ }
+}
+
+
+
/**
* Compute quad X,Y,Z,W for the four fragments in a quad.
*
* This should really be part of the compiled shader.
*/
-void
-sp_setup_pos_vector(const struct tgsi_interp_coef *coef,
- float x, float y,
- struct tgsi_exec_vector *quadpos)
+static void
+setup_pos_vector(const struct tgsi_interp_coef *coef,
+ float x, float y,
+ struct tgsi_exec_vector *quadpos)
{
uint chan;
/* do X */
@@ -95,24 +114,6 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef,
}
-static void
-exec_prepare( const struct sp_fragment_shader *base,
- struct tgsi_exec_machine *machine,
- struct tgsi_sampler **samplers )
-{
- /*
- * Bind tokens/shader to the interpreter's machine state.
- * Avoid redundant binding.
- */
- if (machine->Tokens != base->shader.tokens) {
- tgsi_exec_machine_bind_shader( machine,
- base->shader.tokens,
- PIPE_MAX_SAMPLERS,
- samplers );
- }
-}
-
-
/* TODO: hide the machine struct in here somewhere, remove from this
* interface:
*/
@@ -122,11 +123,43 @@ exec_run( const struct sp_fragment_shader *base,
struct quad_header *quad )
{
/* Compute X, Y, Z, W vals for this quad */
- sp_setup_pos_vector(quad->posCoef,
- (float)quad->input.x0, (float)quad->input.y0,
- &machine->QuadPos);
+ setup_pos_vector(quad->posCoef,
+ (float)quad->input.x0, (float)quad->input.y0,
+ &machine->QuadPos);
- return tgsi_exec_machine_run( machine );
+ quad->inout.mask &= tgsi_exec_machine_run( machine );
+ if (quad->inout.mask == 0)
+ return FALSE;
+
+ /* store outputs */
+ {
+ const ubyte *sem_name = base->info.output_semantic_name;
+ const ubyte *sem_index = base->info.output_semantic_index;
+ const uint n = base->info.num_outputs;
+ uint i;
+ for (i = 0; i < n; i++) {
+ switch (sem_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ {
+ uint cbuf = sem_index[i];
+ memcpy(quad->output.color[cbuf],
+ &machine->Outputs[i].xyzw[0].f[0],
+ sizeof(quad->output.color[0]) );
+ }
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ {
+ uint j;
+ for (j = 0; j < 4; j++) {
+ quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j];
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ return TRUE;
}
diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c
deleted file mode 100644
index 95c0d982d1..0000000000
--- a/src/gallium/drivers/softpipe/sp_fs_llvm.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Execute fragment shader using LLVM code generation.
- * Authors:
- * Zack Rusin
- */
-
-#include "sp_context.h"
-#include "sp_state.h"
-#include "sp_fs.h"
-
-#include "pipe/p_state.h"
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-#include "tgsi/tgsi_sse2.h"
-
-#if 0
-
-/**
- * Subclass of sp_fragment_shader
- */
-struct sp_llvm_fragment_shader
-{
- struct sp_fragment_shader base;
- struct gallivm_prog *llvm_prog;
-};
-
-
-static void
-shade_quad_llvm(struct quad_stage *qs,
- struct quad_header *quad)
-{
- struct quad_shade_stage *qss = quad_shade_stage(qs);
- struct softpipe_context *softpipe = qs->softpipe;
- float dests[4][16][4] ALIGN16_ATTRIB;
- float inputs[4][16][4] ALIGN16_ATTRIB;
- const float fx = (float) quad->x0;
- const float fy = (float) quad->y0;
- struct gallivm_prog *llvm = qss->llvm_prog;
-
- inputs[0][0][0] = fx;
- inputs[1][0][0] = fx + 1.0f;
- inputs[2][0][0] = fx;
- inputs[3][0][0] = fx + 1.0f;
-
- inputs[0][0][1] = fy;
- inputs[1][0][1] = fy;
- inputs[2][0][1] = fy + 1.0f;
- inputs[3][0][1] = fy + 1.0f;
-
-
- gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef);
-
-#if DLLVM
- debug_printf("MASK = %d\n", quad->mask);
- for (int i = 0; i < 4; ++i) {
- for (int j = 0; j < 2; ++j) {
- debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j,
- inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]);
- }
- }
-#endif
-
- quad->mask &=
- gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs,
- softpipe->mapped_constants[PIPE_SHADER_FRAGMENT],
- qss->samplers);
-#if DLLVM
- debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n",
- dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3],
- dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]);
-#endif
-
- /* store result color */
- if (qss->colorOutSlot >= 0) {
- unsigned i;
- /* XXX need to handle multiple color outputs someday */
- allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot]
- == TGSI_SEMANTIC_COLOR);
- for (i = 0; i < QUAD_SIZE; ++i) {
- quad->outputs.color[0][0][i] = dests[i][qss->colorOutSlot][0];
- quad->outputs.color[0][1][i] = dests[i][qss->colorOutSlot][1];
- quad->outputs.color[0][2][i] = dests[i][qss->colorOutSlot][2];
- quad->outputs.color[0][3][i] = dests[i][qss->colorOutSlot][3];
- }
- }
-#if DLLVM
- for (int i = 0; i < QUAD_SIZE; ++i) {
- debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot,
- quad->outputs.color[0][0][i],
- quad->outputs.color[0][1][i],
- quad->outputs.color[0][2][i],
- quad->outputs.color[0][3][i]);
- }
-#endif
-
- /* store result Z */
- if (qss->depthOutSlot >= 0) {
- /* output[slot] is new Z */
- uint i;
- for (i = 0; i < 4; i++) {
- quad->outputs.depth[i] = dests[i][0][2];
- }
- }
- else {
- /* copy input Z (which was interpolated by the executor) to output Z */
- uint i;
- for (i = 0; i < 4; i++) {
- quad->outputs.depth[i] = inputs[i][0][2];
- }
- }
-#if DLLVM
- debug_printf("D [%f, %f, %f, %f] mask = %d\n",
- quad->outputs.depth[0],
- quad->outputs.depth[1],
- quad->outputs.depth[2],
- quad->outputs.depth[3], quad->mask);
-#endif
-
- /* shader may cull fragments */
- if( quad->mask ) {
- qs->next->run( qs->next, quad );
- }
-}
-
-
-unsigned
-run_llvm_fs( const struct sp_fragment_shader *base,
- struct foo *machine )
-{
-}
-
-
-void
-delete_llvm_fs( struct sp_fragment_shader *base )
-{
- FREE(base);
-}
-
-
-struct sp_fragment_shader *
-softpipe_create_fs_llvm(struct softpipe_context *softpipe,
- const struct pipe_shader_state *templ)
-{
- struct sp_llvm_fragment_shader *shader = NULL;
-
- /* LLVM fragment shaders currently disabled:
- */
- state = CALLOC_STRUCT(sp_llvm_shader_state);
- if (!state)
- return NULL;
-
- state->llvm_prog = 0;
-
- if (!gallivm_global_cpu_engine()) {
- gallivm_cpu_engine_create(state->llvm_prog);
- }
- else
- gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog);
-
- if (shader) {
- shader->base.run = run_llvm_fs;
- shader->base.delete = delete_llvm_fs;
- }
-
- return shader;
-}
-
-
-#else
-
-struct sp_fragment_shader *
-softpipe_create_fs_llvm(struct softpipe_context *softpipe,
- const struct pipe_shader_state *templ)
-{
- return NULL;
-}
-
-#endif
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index f4fa0905d7..9d3e4670ee 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -76,6 +76,43 @@ fs_sse_prepare( const struct sp_fragment_shader *base,
}
+
+/**
+ * Compute quad X,Y,Z,W for the four fragments in a quad.
+ *
+ * This should really be part of the compiled shader.
+ */
+static void
+setup_pos_vector(const struct tgsi_interp_coef *coef,
+ float x, float y,
+ struct tgsi_exec_vector *quadpos)
+{
+ uint chan;
+ /* do X */
+ quadpos->xyzw[0].f[0] = x;
+ quadpos->xyzw[0].f[1] = x + 1;
+ quadpos->xyzw[0].f[2] = x;
+ quadpos->xyzw[0].f[3] = x + 1;
+
+ /* do Y */
+ quadpos->xyzw[1].f[0] = y;
+ quadpos->xyzw[1].f[1] = y;
+ quadpos->xyzw[1].f[2] = y + 1;
+ quadpos->xyzw[1].f[3] = y + 1;
+
+ /* do Z and W for all fragments in the quad */
+ for (chan = 2; chan < 4; chan++) {
+ const float dadx = coef->dadx[chan];
+ const float dady = coef->dady[chan];
+ const float a0 = coef->a0[chan] + dadx * x + dady * y;
+ quadpos->xyzw[chan].f[0] = a0;
+ quadpos->xyzw[chan].f[1] = a0 + dadx;
+ quadpos->xyzw[chan].f[2] = a0 + dady;
+ quadpos->xyzw[chan].f[3] = a0 + dadx + dady;
+ }
+}
+
+
/* TODO: codegenerate the whole run function, skip this wrapper.
* TODO: break dependency on tgsi_exec_machine struct
* TODO: push Position calculation into the generated shader
@@ -89,9 +126,9 @@ fs_sse_run( const struct sp_fragment_shader *base,
struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base);
/* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */
- sp_setup_pos_vector(quad->posCoef,
- (float)quad->input.x0, (float)quad->input.y0,
- machine->Temps);
+ setup_pos_vector(quad->posCoef,
+ (float)quad->input.x0, (float)quad->input.y0,
+ machine->Temps);
/* init kill mask */
tgsi_set_kill_mask(machine, 0x0);
@@ -104,7 +141,39 @@ fs_sse_run( const struct sp_fragment_shader *base,
// , &machine->QuadPos
);
- return ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]);
+ quad->inout.mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]);
+ if (quad->inout.mask == 0)
+ return FALSE;
+
+ /* store outputs */
+ {
+ const ubyte *sem_name = base->info.output_semantic_name;
+ const ubyte *sem_index = base->info.output_semantic_index;
+ const uint n = base->info.num_outputs;
+ uint i;
+ for (i = 0; i < n; i++) {
+ switch (sem_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ {
+ uint cbuf = sem_index[i];
+ memcpy(quad->output.color[cbuf],
+ &machine->Outputs[i].xyzw[0].f[0],
+ sizeof(quad->output.color[0]) );
+ }
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ {
+ uint j;
+ for (j = 0; j < 4; j++) {
+ quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j];
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ return TRUE;
}
diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c
deleted file mode 100644
index 038ff04d4f..0000000000
--- a/src/gallium/drivers/softpipe/sp_prim_setup.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \brief A draw stage that drives our triangle setup routines from
- * within the draw pipeline. One of two ways to drive setup, the
- * other being in sp_prim_vbuf.c.
- *
- * \author Keith Whitwell <keith@tungstengraphics.com>
- * \author Brian Paul
- */
-
-
-#include "sp_context.h"
-#include "sp_setup.h"
-#include "sp_state.h"
-#include "sp_prim_setup.h"
-#include "draw/draw_pipe.h"
-#include "draw/draw_vertex.h"
-#include "util/u_memory.h"
-
-/**
- * Triangle setup info (derived from draw_stage).
- * Also used for line drawing (taking some liberties).
- */
-struct setup_stage {
- struct draw_stage stage; /**< This must be first (base class) */
-
- struct setup_context *setup;
-};
-
-
-
-/**
- * Basically a cast wrapper.
- */
-static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
-{
- return (struct setup_stage *)stage;
-}
-
-
-typedef const float (*cptrf4)[4];
-
-static void
-do_tri(struct draw_stage *stage, struct prim_header *prim)
-{
- struct setup_stage *setup = setup_stage( stage );
-
- setup_tri( setup->setup,
- (cptrf4)prim->v[0]->data,
- (cptrf4)prim->v[1]->data,
- (cptrf4)prim->v[2]->data );
-}
-
-static void
-do_line(struct draw_stage *stage, struct prim_header *prim)
-{
- struct setup_stage *setup = setup_stage( stage );
-
- setup_line( setup->setup,
- (cptrf4)prim->v[0]->data,
- (cptrf4)prim->v[1]->data );
-}
-
-static void
-do_point(struct draw_stage *stage, struct prim_header *prim)
-{
- struct setup_stage *setup = setup_stage( stage );
-
- setup_point( setup->setup,
- (cptrf4)prim->v[0]->data );
-}
-
-
-
-
-static void setup_begin( struct draw_stage *stage )
-{
- struct setup_stage *setup = setup_stage(stage);
-
- setup_prepare( setup->setup );
-
- stage->point = do_point;
- stage->line = do_line;
- stage->tri = do_tri;
-}
-
-
-static void setup_first_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- setup_begin(stage);
- stage->point( stage, header );
-}
-
-static void setup_first_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- setup_begin(stage);
- stage->line( stage, header );
-}
-
-
-static void setup_first_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- setup_begin(stage);
- stage->tri( stage, header );
-}
-
-
-
-static void setup_flush( struct draw_stage *stage,
- unsigned flags )
-{
- stage->point = setup_first_point;
- stage->line = setup_first_line;
- stage->tri = setup_first_tri;
-}
-
-
-static void reset_stipple_counter( struct draw_stage *stage )
-{
-}
-
-
-static void render_destroy( struct draw_stage *stage )
-{
- struct setup_stage *ssetup = setup_stage(stage);
- setup_destroy_context(ssetup->setup);
- FREE( stage );
-}
-
-
-/**
- * Create a new primitive setup/render stage.
- */
-struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe )
-{
- struct setup_stage *sstage = CALLOC_STRUCT(setup_stage);
-
- sstage->setup = setup_create_context(softpipe);
- sstage->stage.draw = softpipe->draw;
- sstage->stage.point = setup_first_point;
- sstage->stage.line = setup_first_line;
- sstage->stage.tri = setup_first_tri;
- sstage->stage.flush = setup_flush;
- sstage->stage.reset_stipple_counter = reset_stipple_counter;
- sstage->stage.destroy = render_destroy;
-
- return (struct draw_stage *)sstage;
-}
-
-struct setup_context *
-sp_draw_setup_context( struct draw_stage *stage )
-{
- struct setup_stage *ssetup = setup_stage(stage);
- return ssetup->setup;
-}
-
-void
-sp_draw_flush( struct draw_stage *stage )
-{
- stage->flush( stage, 0 );
-}
diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.h b/src/gallium/drivers/softpipe/sp_prim_setup.h
deleted file mode 100644
index 49bdd98ed8..0000000000
--- a/src/gallium/drivers/softpipe/sp_prim_setup.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef SP_PRIM_SETUP_H
-#define SP_PRIM_SETUP_H
-
-
-/**
- * vbuf is a special stage to gather the stream of triangles, lines, points
- * together and reconstruct vertex buffers for hardware upload.
- *
- * First attempt, work in progress.
- *
- * TODO:
- * - separate out vertex buffer building and primitive emit, ie >1 draw per vb.
- * - tell vbuf stage how to build hw vertices directly
- * - pass vbuf stage a buffer pointer for direct emit to agp/vram.
- *
- *
- *
- * Vertices are just an array of floats, with all the attributes
- * packed. We currently assume a layout like:
- *
- * attr[0][0..3] - window position
- * attr[1..n][0..3] - remaining attributes.
- *
- * Attributes are assumed to be 4 floats wide but are packed so that
- * all the enabled attributes run contiguously.
- */
-
-
-struct draw_stage;
-struct softpipe_context;
-
-
-typedef void (*vbuf_draw_func)( struct pipe_context *pipe,
- unsigned prim,
- const ushort *elements,
- unsigned nr_elements,
- const void *vertex_buffer,
- unsigned nr_vertices );
-
-
-extern struct draw_stage *
-sp_draw_render_stage( struct softpipe_context *softpipe );
-
-extern struct setup_context *
-sp_draw_setup_context( struct draw_stage * );
-
-extern void
-sp_draw_flush( struct draw_stage * );
-
-
-extern struct draw_stage *
-sp_draw_vbuf_stage( struct draw_context *draw_context,
- struct pipe_context *pipe,
- vbuf_draw_func draw );
-
-
-#endif /* SP_PRIM_SETUP_H */
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 42021789ea..e603c20fc4 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -37,13 +37,13 @@
#include "sp_context.h"
+#include "sp_setup.h"
#include "sp_state.h"
#include "sp_prim_vbuf.h"
-#include "sp_prim_setup.h"
-#include "sp_setup.h"
#include "draw/draw_context.h"
#include "draw/draw_vbuf.h"
#include "util/u_memory.h"
+#include "util/u_prim.h"
#define SP_MAX_VBUF_INDEXES 1024
@@ -58,6 +58,8 @@ struct softpipe_vbuf_render
{
struct vbuf_render base;
struct softpipe_context *softpipe;
+ struct setup_context *setup;
+
uint prim;
uint vertex_size;
uint nr_vertices;
@@ -74,6 +76,11 @@ softpipe_vbuf_render(struct vbuf_render *vbr)
}
+
+
+
+
+
static const struct vertex_info *
sp_vbuf_get_vertex_info(struct vbuf_render *vbr)
{
@@ -104,36 +111,6 @@ sp_vbuf_allocate_vertices(struct vbuf_render *vbr,
static void
sp_vbuf_release_vertices(struct vbuf_render *vbr)
{
-#if 0
- {
- struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
- const struct vertex_info *info =
- softpipe_get_vbuf_vertex_info(cvbr->softpipe);
- const float *vtx = (const float *) cvbr->vertex_buffer;
- uint i, j;
- debug_printf("%s (vtx_size = %u, vtx_used = %u)\n",
- __FUNCTION__, cvbr->vertex_size, cvbr->nr_vertices);
- for (i = 0; i < cvbr->nr_vertices; i++) {
- for (j = 0; j < info->num_attribs; j++) {
- uint k;
- switch (info->attrib[j].emit) {
- case EMIT_4F: k = 4; break;
- case EMIT_3F: k = 3; break;
- case EMIT_2F: k = 2; break;
- case EMIT_1F: k = 1; break;
- default: assert(0);
- }
- debug_printf("Vert %u attr %u: ", i, j);
- while (k-- > 0) {
- debug_printf("%g ", vtx[0]);
- vtx++;
- }
- debug_printf("\n");
- }
- }
- }
-#endif
-
/* keep the old allocation for next time */
}
@@ -159,14 +136,11 @@ static boolean
sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
-
- /* XXX: break this dependency - make setup_context live under
- * softpipe, rename the old "setup" draw stage to something else.
- */
- struct setup_context *setup_ctx = sp_draw_setup_context(cvbr->softpipe->setup);
+ struct setup_context *setup_ctx = cvbr->setup;
setup_prepare( setup_ctx );
+ cvbr->softpipe->reduced_prim = u_reduced_prim(prim);
cvbr->prim = prim;
return TRUE;
@@ -191,14 +165,9 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
struct softpipe_context *softpipe = cvbr->softpipe;
const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
const void *vertex_buffer = cvbr->vertex_buffer;
+ struct setup_context *setup_ctx = cvbr->setup;
unsigned i;
- /* XXX: break this dependency - make setup_context live under
- * softpipe, rename the old "setup" draw stage to something else.
- */
- struct draw_stage *setup = softpipe->setup;
- struct setup_context *setup_ctx = sp_draw_setup_context(setup);
-
switch (cvbr->prim) {
case PIPE_PRIM_POINTS:
for (i = 0; i < nr; i++) {
@@ -237,14 +206,16 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
break;
case PIPE_PRIM_TRIANGLES:
- for (i = 2; i < nr; i += 3) {
- if (softpipe->rasterizer->flatshade_first) {
+ if (softpipe->rasterizer->flatshade_first) {
+ for (i = 2; i < nr; i += 3) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-1], stride),
get_vert(vertex_buffer, indices[i-0], stride),
get_vert(vertex_buffer, indices[i-2], stride) );
}
- else {
+ }
+ else {
+ for (i = 2; i < nr; i += 3) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-2], stride),
get_vert(vertex_buffer, indices[i-1], stride),
@@ -254,14 +225,16 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
break;
case PIPE_PRIM_TRIANGLE_STRIP:
- for (i = 2; i < nr; i += 1) {
- if (softpipe->rasterizer->flatshade_first) {
+ if (softpipe->rasterizer->flatshade_first) {
+ for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i+(i&1)-1], stride),
get_vert(vertex_buffer, indices[i-(i&1)], stride),
get_vert(vertex_buffer, indices[i-2], stride) );
}
- else {
+ }
+ else {
+ for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i+(i&1)-2], stride),
get_vert(vertex_buffer, indices[i-(i&1)-1], stride),
@@ -271,14 +244,16 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
break;
case PIPE_PRIM_TRIANGLE_FAN:
- for (i = 2; i < nr; i += 1) {
- if (softpipe->rasterizer->flatshade_first) {
+ if (softpipe->rasterizer->flatshade_first) {
+ for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-0], stride),
get_vert(vertex_buffer, indices[0], stride),
get_vert(vertex_buffer, indices[i-1], stride) );
}
- else {
+ }
+ else {
+ for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[0], stride),
get_vert(vertex_buffer, indices[i-1], stride),
@@ -288,8 +263,8 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
break;
case PIPE_PRIM_QUADS:
- for (i = 3; i < nr; i += 4) {
- if (softpipe->rasterizer->flatshade_first) {
+ if (softpipe->rasterizer->flatshade_first) {
+ for (i = 3; i < nr; i += 4) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-2], stride),
get_vert(vertex_buffer, indices[i-1], stride),
@@ -299,7 +274,9 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
get_vert(vertex_buffer, indices[i-0], stride),
get_vert(vertex_buffer, indices[i-3], stride) );
}
- else {
+ }
+ else {
+ for (i = 3; i < nr; i += 4) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-3], stride),
get_vert(vertex_buffer, indices[i-2], stride),
@@ -314,8 +291,8 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
break;
case PIPE_PRIM_QUAD_STRIP:
- for (i = 3; i < nr; i += 2) {
- if (softpipe->rasterizer->flatshade_first) {
+ if (softpipe->rasterizer->flatshade_first) {
+ for (i = 3; i < nr; i += 2) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-0], stride),
get_vert(vertex_buffer, indices[i-1], stride),
@@ -325,7 +302,9 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
get_vert(vertex_buffer, indices[i-0], stride),
get_vert(vertex_buffer, indices[i-3], stride) );
}
- else {
+ }
+ else {
+ for (i = 3; i < nr; i += 2) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-3], stride),
get_vert(vertex_buffer, indices[i-2], stride),
@@ -355,11 +334,6 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
default:
assert(0);
}
-
- /* XXX: why are we calling this??? If we had to call something, it
- * would be a function in sp_setup.c:
- */
- sp_draw_flush( setup );
}
@@ -372,17 +346,12 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
+ struct setup_context *setup_ctx = cvbr->setup;
const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
const void *vertex_buffer =
(void *) get_vert(cvbr->vertex_buffer, start, stride);
unsigned i;
- /* XXX: break this dependency - make setup_context live under
- * softpipe, rename the old "setup" draw stage to something else.
- */
- struct draw_stage *setup = softpipe->setup;
- struct setup_context *setup_ctx = sp_draw_setup_context(setup);
-
switch (cvbr->prim) {
case PIPE_PRIM_POINTS:
for (i = 0; i < nr; i++) {
@@ -421,14 +390,16 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
break;
case PIPE_PRIM_TRIANGLES:
- for (i = 2; i < nr; i += 3) {
- if (softpipe->rasterizer->flatshade_first) {
+ if (softpipe->rasterizer->flatshade_first) {
+ for (i = 2; i < nr; i += 3) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, i-1, stride),
get_vert(vertex_buffer, i-0, stride),
get_vert(vertex_buffer, i-2, stride) );
}
- else {
+ }
+ else {
+ for (i = 2; i < nr; i += 3) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, i-2, stride),
get_vert(vertex_buffer, i-1, stride),
@@ -438,14 +409,16 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
break;
case PIPE_PRIM_TRIANGLE_STRIP:
- for (i = 2; i < nr; i++) {
- if (softpipe->rasterizer->flatshade_first) {
+ if (softpipe->rasterizer->flatshade_first) {
+ for (i = 2; i < nr; i++) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, i+(i&1)-1, stride),
get_vert(vertex_buffer, i-(i&1), stride),
get_vert(vertex_buffer, i-2, stride) );
}
- else {
+ }
+ else {
+ for (i = 2; i < nr; i++) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, i+(i&1)-2, stride),
get_vert(vertex_buffer, i-(i&1)-1, stride),
@@ -455,14 +428,16 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
break;
case PIPE_PRIM_TRIANGLE_FAN:
- for (i = 2; i < nr; i += 1) {
- if (softpipe->rasterizer->flatshade_first) {
+ if (softpipe->rasterizer->flatshade_first) {
+ for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, i-0, stride),
get_vert(vertex_buffer, 0, stride),
get_vert(vertex_buffer, i-1, stride) );
}
- else {
+ }
+ else {
+ for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, 0, stride),
get_vert(vertex_buffer, i-1, stride),
@@ -472,8 +447,8 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
break;
case PIPE_PRIM_QUADS:
- for (i = 3; i < nr; i += 4) {
- if (softpipe->rasterizer->flatshade_first) {
+ if (softpipe->rasterizer->flatshade_first) {
+ for (i = 3; i < nr; i += 4) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, i-2, stride),
get_vert(vertex_buffer, i-1, stride),
@@ -483,7 +458,9 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
get_vert(vertex_buffer, i-0, stride),
get_vert(vertex_buffer, i-3, stride) );
}
- else {
+ }
+ else {
+ for (i = 3; i < nr; i += 4) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, i-3, stride),
get_vert(vertex_buffer, i-2, stride),
@@ -497,8 +474,8 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
break;
case PIPE_PRIM_QUAD_STRIP:
- for (i = 3; i < nr; i += 2) {
- if (softpipe->rasterizer->flatshade_first) {
+ if (softpipe->rasterizer->flatshade_first) {
+ for (i = 3; i < nr; i += 2) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, i-0, stride),
get_vert(vertex_buffer, i-1, stride),
@@ -508,7 +485,9 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
get_vert(vertex_buffer, i-0, stride),
get_vert(vertex_buffer, i-3, stride) );
}
- else {
+ }
+ else {
+ for (i = 3; i < nr; i += 2) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, i-3, stride),
get_vert(vertex_buffer, i-2, stride),
@@ -546,40 +525,38 @@ static void
sp_vbuf_destroy(struct vbuf_render *vbr)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
- cvbr->softpipe->vbuf_render = NULL;
+ setup_destroy_context(cvbr->setup);
FREE(cvbr);
}
/**
- * Initialize the post-transform vertex buffer information for the given
- * context.
+ * Create the post-transform vertex handler for the given context.
*/
-void
-sp_init_vbuf(struct softpipe_context *sp)
+struct vbuf_render *
+sp_create_vbuf_backend(struct softpipe_context *sp)
{
- assert(sp->draw);
+ struct softpipe_vbuf_render *cvbr = CALLOC_STRUCT(softpipe_vbuf_render);
- sp->vbuf_render = CALLOC_STRUCT(softpipe_vbuf_render);
+ assert(sp->draw);
- sp->vbuf_render->base.max_indices = SP_MAX_VBUF_INDEXES;
- sp->vbuf_render->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE;
- sp->vbuf_render->base.get_vertex_info = sp_vbuf_get_vertex_info;
- sp->vbuf_render->base.allocate_vertices = sp_vbuf_allocate_vertices;
- sp->vbuf_render->base.map_vertices = sp_vbuf_map_vertices;
- sp->vbuf_render->base.unmap_vertices = sp_vbuf_unmap_vertices;
- sp->vbuf_render->base.set_primitive = sp_vbuf_set_primitive;
- sp->vbuf_render->base.draw = sp_vbuf_draw;
- sp->vbuf_render->base.draw_arrays = sp_vbuf_draw_arrays;
- sp->vbuf_render->base.release_vertices = sp_vbuf_release_vertices;
- sp->vbuf_render->base.destroy = sp_vbuf_destroy;
+ cvbr->base.max_indices = SP_MAX_VBUF_INDEXES;
+ cvbr->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE;
- sp->vbuf_render->softpipe = sp;
+ cvbr->base.get_vertex_info = sp_vbuf_get_vertex_info;
+ cvbr->base.allocate_vertices = sp_vbuf_allocate_vertices;
+ cvbr->base.map_vertices = sp_vbuf_map_vertices;
+ cvbr->base.unmap_vertices = sp_vbuf_unmap_vertices;
+ cvbr->base.set_primitive = sp_vbuf_set_primitive;
+ cvbr->base.draw = sp_vbuf_draw;
+ cvbr->base.draw_arrays = sp_vbuf_draw_arrays;
+ cvbr->base.release_vertices = sp_vbuf_release_vertices;
+ cvbr->base.destroy = sp_vbuf_destroy;
- sp->vbuf = draw_vbuf_stage(sp->draw, &sp->vbuf_render->base);
+ cvbr->softpipe = sp;
- draw_set_rasterize_stage(sp->draw, sp->vbuf);
+ cvbr->setup = setup_create_context(cvbr->softpipe);
- draw_set_render(sp->draw, &sp->vbuf_render->base);
+ return &cvbr->base;
}
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.h b/src/gallium/drivers/softpipe/sp_prim_vbuf.h
index 1de9cc2a89..ad01cc2f28 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.h
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.h
@@ -31,8 +31,8 @@
struct softpipe_context;
-extern void
-sp_init_vbuf(struct softpipe_context *softpipe);
+extern struct vbuf_render *
+sp_create_vbuf_backend(struct softpipe_context *softpipe);
#endif /* SP_VBUF_H */
diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h
index bd6c6cb912..a3236bd116 100644
--- a/src/gallium/drivers/softpipe/sp_quad.h
+++ b/src/gallium/drivers/softpipe/sp_quad.h
@@ -97,10 +97,10 @@ struct quad_header {
struct quad_header_inout inout;
struct quad_header_output output;
- const struct tgsi_interp_coef *coef;
+ /* Redundant/duplicated:
+ */
const struct tgsi_interp_coef *posCoef;
-
- unsigned nr_attrs;
+ const struct tgsi_interp_coef *coef;
};
#endif /* SP_QUAD_H */
diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
deleted file mode 100644
index 0845bae0e6..0000000000
--- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
+++ /dev/null
@@ -1,108 +0,0 @@
-
-/**
- * quad alpha test
- */
-
-#include "sp_context.h"
-#include "sp_quad.h"
-#include "sp_quad_pipe.h"
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-
-
-static void
-alpha_test_quad(struct quad_stage *qs, struct quad_header *quad)
-{
- struct softpipe_context *softpipe = qs->softpipe;
- const float ref = softpipe->depth_stencil->alpha.ref_value;
- unsigned passMask = 0x0, j;
- const uint cbuf = 0; /* only output[0].alpha is tested */
- const float *aaaa = quad->output.color[cbuf][3];
-
- switch (softpipe->depth_stencil->alpha.func) {
- case PIPE_FUNC_NEVER:
- break;
- case PIPE_FUNC_LESS:
- /*
- * If mask were an array [4] we could do this SIMD-style:
- * passMask = (quad->outputs.color[0][3] <= vec4(ref));
- */
- for (j = 0; j < QUAD_SIZE; j++) {
- if (aaaa[j] < ref) {
- passMask |= (1 << j);
- }
- }
- break;
- case PIPE_FUNC_EQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (aaaa[j] == ref) {
- passMask |= (1 << j);
- }
- }
- break;
- case PIPE_FUNC_LEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (aaaa[j] <= ref) {
- passMask |= (1 << j);
- }
- }
- break;
- case PIPE_FUNC_GREATER:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (aaaa[j] > ref) {
- passMask |= (1 << j);
- }
- }
- break;
- case PIPE_FUNC_NOTEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (aaaa[j] != ref) {
- passMask |= (1 << j);
- }
- }
- break;
- case PIPE_FUNC_GEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (aaaa[j] >= ref) {
- passMask |= (1 << j);
- }
- }
- break;
- case PIPE_FUNC_ALWAYS:
- passMask = MASK_ALL;
- break;
- default:
- assert(0);
- }
-
- quad->inout.mask &= passMask;
-
- if (quad->inout.mask)
- qs->next->run(qs->next, quad);
-}
-
-
-static void alpha_test_begin(struct quad_stage *qs)
-{
- qs->next->begin(qs->next);
-}
-
-
-static void alpha_test_destroy(struct quad_stage *qs)
-{
- FREE( qs );
-}
-
-
-struct quad_stage *
-sp_quad_alpha_test_stage( struct softpipe_context *softpipe )
-{
- struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
- stage->softpipe = softpipe;
- stage->begin = alpha_test_begin;
- stage->run = alpha_test_quad;
- stage->destroy = alpha_test_destroy;
-
- return stage;
-}
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index b1e18805c7..e243c63fa2 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -117,644 +117,865 @@ do { \
static void
-logicop_quad(struct quad_stage *qs, struct quad_header *quad)
+logicop_quad(struct quad_stage *qs,
+ float (*quadColor)[4],
+ float (*dest)[4])
{
struct softpipe_context *softpipe = qs->softpipe;
- uint cbuf;
+ ubyte src[4][4], dst[4][4], res[4][4];
+ uint *src4 = (uint *) src;
+ uint *dst4 = (uint *) dst;
+ uint *res4 = (uint *) res;
+ uint j;
+
+
+ /* convert to ubyte */
+ for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
+ dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
+ dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
+ dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
+ dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
+
+ src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
+ src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
+ src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
+ src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
+ }
- /* loop over colorbuffer outputs */
- for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
- float dest[4][QUAD_SIZE];
- ubyte src[4][4], dst[4][4], res[4][4];
- uint *src4 = (uint *) src;
- uint *dst4 = (uint *) dst;
- uint *res4 = (uint *) res;
- struct softpipe_cached_tile *
- tile = sp_get_cached_tile(softpipe,
- softpipe->cbuf_cache[cbuf],
- quad->input.x0, quad->input.y0);
- float (*quadColor)[4] = quad->output.color[cbuf];
- uint i, j;
+ switch (softpipe->blend->logicop_func) {
+ case PIPE_LOGICOP_CLEAR:
+ for (j = 0; j < 4; j++)
+ res4[j] = 0;
+ break;
+ case PIPE_LOGICOP_NOR:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~(src4[j] | dst4[j]);
+ break;
+ case PIPE_LOGICOP_AND_INVERTED:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~src4[j] & dst4[j];
+ break;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~src4[j];
+ break;
+ case PIPE_LOGICOP_AND_REVERSE:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j] & ~dst4[j];
+ break;
+ case PIPE_LOGICOP_INVERT:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~dst4[j];
+ break;
+ case PIPE_LOGICOP_XOR:
+ for (j = 0; j < 4; j++)
+ res4[j] = dst4[j] ^ src4[j];
+ break;
+ case PIPE_LOGICOP_NAND:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~(src4[j] & dst4[j]);
+ break;
+ case PIPE_LOGICOP_AND:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j] & dst4[j];
+ break;
+ case PIPE_LOGICOP_EQUIV:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~(src4[j] ^ dst4[j]);
+ break;
+ case PIPE_LOGICOP_NOOP:
+ for (j = 0; j < 4; j++)
+ res4[j] = dst4[j];
+ break;
+ case PIPE_LOGICOP_OR_INVERTED:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~src4[j] | dst4[j];
+ break;
+ case PIPE_LOGICOP_COPY:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j];
+ break;
+ case PIPE_LOGICOP_OR_REVERSE:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j] | ~dst4[j];
+ break;
+ case PIPE_LOGICOP_OR:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j] | dst4[j];
+ break;
+ case PIPE_LOGICOP_SET:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~0;
+ break;
+ default:
+ assert(0);
+ }
- /* get/swizzle dest colors */
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
- int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
- for (i = 0; i < 4; i++) {
- dest[i][j] = tile->data.color[y][x][i];
- }
- }
+ for (j = 0; j < 4; j++) {
+ quadColor[j][0] = ubyte_to_float(res[j][0]);
+ quadColor[j][1] = ubyte_to_float(res[j][1]);
+ quadColor[j][2] = ubyte_to_float(res[j][2]);
+ quadColor[j][3] = ubyte_to_float(res[j][3]);
+ }
+}
- /* convert to ubyte */
- for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
- dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
- dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
- dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
- dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
-
- src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
- src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
- src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
- src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
- }
- switch (softpipe->blend->logicop_func) {
- case PIPE_LOGICOP_CLEAR:
- for (j = 0; j < 4; j++)
- res4[j] = 0;
- break;
- case PIPE_LOGICOP_NOR:
- for (j = 0; j < 4; j++)
- res4[j] = ~(src4[j] | dst4[j]);
- break;
- case PIPE_LOGICOP_AND_INVERTED:
- for (j = 0; j < 4; j++)
- res4[j] = ~src4[j] & dst4[j];
- break;
- case PIPE_LOGICOP_COPY_INVERTED:
- for (j = 0; j < 4; j++)
- res4[j] = ~src4[j];
- break;
- case PIPE_LOGICOP_AND_REVERSE:
- for (j = 0; j < 4; j++)
- res4[j] = src4[j] & ~dst4[j];
- break;
- case PIPE_LOGICOP_INVERT:
- for (j = 0; j < 4; j++)
- res4[j] = ~dst4[j];
- break;
- case PIPE_LOGICOP_XOR:
- for (j = 0; j < 4; j++)
- res4[j] = dst4[j] ^ src4[j];
- break;
- case PIPE_LOGICOP_NAND:
- for (j = 0; j < 4; j++)
- res4[j] = ~(src4[j] & dst4[j]);
- break;
- case PIPE_LOGICOP_AND:
- for (j = 0; j < 4; j++)
- res4[j] = src4[j] & dst4[j];
- break;
- case PIPE_LOGICOP_EQUIV:
- for (j = 0; j < 4; j++)
- res4[j] = ~(src4[j] ^ dst4[j]);
- break;
- case PIPE_LOGICOP_NOOP:
- for (j = 0; j < 4; j++)
- res4[j] = dst4[j];
- break;
- case PIPE_LOGICOP_OR_INVERTED:
- for (j = 0; j < 4; j++)
- res4[j] = ~src4[j] | dst4[j];
- break;
- case PIPE_LOGICOP_COPY:
- for (j = 0; j < 4; j++)
- res4[j] = src4[j];
- break;
- case PIPE_LOGICOP_OR_REVERSE:
- for (j = 0; j < 4; j++)
- res4[j] = src4[j] | ~dst4[j];
- break;
- case PIPE_LOGICOP_OR:
- for (j = 0; j < 4; j++)
- res4[j] = src4[j] | dst4[j];
- break;
- case PIPE_LOGICOP_SET:
- for (j = 0; j < 4; j++)
- res4[j] = ~0;
- break;
- default:
- assert(0);
- }
- for (j = 0; j < 4; j++) {
- quadColor[j][0] = ubyte_to_float(res[j][0]);
- quadColor[j][1] = ubyte_to_float(res[j][1]);
- quadColor[j][2] = ubyte_to_float(res[j][2]);
- quadColor[j][3] = ubyte_to_float(res[j][3]);
- }
+static void
+blend_quad(struct quad_stage *qs,
+ float (*quadColor)[4],
+ float (*dest)[4])
+{
+ static const float zero[4] = { 0, 0, 0, 0 };
+ static const float one[4] = { 1, 1, 1, 1 };
+ struct softpipe_context *softpipe = qs->softpipe;
+ float source[4][QUAD_SIZE];
+
+ /*
+ * Compute src/first term RGB
+ */
+ switch (softpipe->blend->rgb_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ VEC4_COPY(source[0], quadColor[0]); /* R */
+ VEC4_COPY(source[1], quadColor[1]); /* G */
+ VEC4_COPY(source[2], quadColor[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
+ VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
+ VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ {
+ const float *alpha = quadColor[3];
+ VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
+ VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
+ VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ {
+ const float *alpha = dest[3];
+ VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ {
+ const float *alpha = quadColor[3];
+ float diff[4], temp[4];
+ VEC4_SUB(diff, one, dest[3]);
+ VEC4_MIN(temp, alpha, diff);
+ VEC4_MUL(source[0], quadColor[0], temp); /* R */
+ VEC4_MUL(source[1], quadColor[1], temp); /* G */
+ VEC4_MUL(source[2], quadColor[2], temp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ {
+ float comp[4];
+ VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
+ VEC4_MUL(source[0], quadColor[0], comp); /* R */
+ VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
+ VEC4_MUL(source[1], quadColor[1], comp); /* G */
+ VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
+ VEC4_MUL(source[2], quadColor[2], comp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ {
+ float alpha[4];
+ VEC4_SCALAR(alpha, softpipe->blend_color.color[3]);
+ VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ VEC4_COPY(source[0], zero); /* R */
+ VEC4_COPY(source[1], zero); /* G */
+ VEC4_COPY(source[2], zero); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ {
+ float inv_comp[4];
+ VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
+ VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
+ VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
+ VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
+ VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
+ VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ {
+ float inv_alpha[4];
+ VEC4_SUB(inv_alpha, one, quadColor[3]);
+ VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ {
+ float inv_alpha[4];
+ VEC4_SUB(inv_alpha, one, dest[3]);
+ VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ {
+ float inv_comp[4];
+ VEC4_SUB(inv_comp, one, dest[0]); /* R */
+ VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
+ VEC4_SUB(inv_comp, one, dest[1]); /* G */
+ VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
+ VEC4_SUB(inv_comp, one, dest[2]); /* B */
+ VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ {
+ float inv_comp[4];
+ /* R */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
+ VEC4_MUL(source[0], quadColor[0], inv_comp);
+ /* G */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
+ VEC4_MUL(source[1], quadColor[1], inv_comp);
+ /* B */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
+ VEC4_MUL(source[2], quadColor[2], inv_comp);
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ {
+ float inv_alpha[4];
+ VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]);
+ VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ assert(0); /* to do */
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * Compute src/first term A
+ */
+ switch (softpipe->blend->alpha_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ VEC4_COPY(source[3], quadColor[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ {
+ const float *alpha = quadColor[3];
+ VEC4_MUL(source[3], quadColor[3], alpha); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ /* multiply alpha by 1.0 */
+ VEC4_COPY(source[3], quadColor[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ {
+ float comp[4];
+ VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
+ VEC4_MUL(source[3], quadColor[3], comp); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ VEC4_COPY(source[3], zero); /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ {
+ float inv_alpha[4];
+ VEC4_SUB(inv_alpha, one, quadColor[3]);
+ VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ {
+ float inv_alpha[4];
+ VEC4_SUB(inv_alpha, one, dest[3]);
+ VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ {
+ float inv_comp[4];
+ /* A */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
+ VEC4_MUL(source[3], quadColor[3], inv_comp);
+ }
+ break;
+ default:
+ assert(0);
}
- /* pass quad to next stage */
- qs->next->run(qs->next, quad);
-}
+ /*
+ * Compute dest/second term RGB
+ */
+ switch (softpipe->blend->rgb_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ /* dest = dest * 1 NO-OP, leave dest as-is */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
+ VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
+ VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
+ VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
+ VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
+ VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
+ VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
+ VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
+ VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ assert(0); /* illegal */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ {
+ float comp[4];
+ VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
+ VEC4_MUL(dest[0], dest[0], comp); /* R */
+ VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
+ VEC4_MUL(dest[1], dest[1], comp); /* G */
+ VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
+ VEC4_MUL(dest[2], dest[2], comp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ {
+ float comp[4];
+ VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
+ VEC4_MUL(dest[0], dest[0], comp); /* R */
+ VEC4_MUL(dest[1], dest[1], comp); /* G */
+ VEC4_MUL(dest[2], dest[2], comp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ VEC4_COPY(dest[0], zero); /* R */
+ VEC4_COPY(dest[1], zero); /* G */
+ VEC4_COPY(dest[2], zero); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ /* XXX what are these? */
+ assert(0);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ {
+ float inv_comp[4];
+ VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
+ VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
+ VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
+ VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
+ VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
+ VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ {
+ float one_minus_alpha[QUAD_SIZE];
+ VEC4_SUB(one_minus_alpha, one, quadColor[3]);
+ VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
+ VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
+ VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ {
+ float inv_comp[4];
+ VEC4_SUB(inv_comp, one, dest[3]); /* A */
+ VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
+ VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
+ VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ {
+ float inv_comp[4];
+ VEC4_SUB(inv_comp, one, dest[0]); /* R */
+ VEC4_MUL(dest[0], dest[0], inv_comp); /* R */
+ VEC4_SUB(inv_comp, one, dest[1]); /* G */
+ VEC4_MUL(dest[1], dest[1], inv_comp); /* G */
+ VEC4_SUB(inv_comp, one, dest[2]); /* B */
+ VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ {
+ float inv_comp[4];
+ /* R */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
+ VEC4_MUL(dest[0], dest[0], inv_comp);
+ /* G */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
+ VEC4_MUL(dest[1], dest[1], inv_comp);
+ /* B */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
+ VEC4_MUL(dest[2], dest[2], inv_comp);
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ {
+ float inv_comp[4];
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
+ VEC4_MUL(dest[0], dest[0], inv_comp);
+ VEC4_MUL(dest[1], dest[1], inv_comp);
+ VEC4_MUL(dest[2], dest[2], inv_comp);
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* XXX what are these? */
+ assert(0);
+ break;
+ default:
+ assert(0);
+ }
+ /*
+ * Compute dest/second term A
+ */
+ switch (softpipe->blend->alpha_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ /* dest = dest * 1 NO-OP, leave dest as-is */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ assert(0); /* illegal */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ {
+ float comp[4];
+ VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
+ VEC4_MUL(dest[3], dest[3], comp); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ VEC4_COPY(dest[3], zero); /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ {
+ float one_minus_alpha[QUAD_SIZE];
+ VEC4_SUB(one_minus_alpha, one, quadColor[3]);
+ VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ {
+ float inv_comp[4];
+ VEC4_SUB(inv_comp, one, dest[3]); /* A */
+ VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ {
+ float inv_comp[4];
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
+ VEC4_MUL(dest[3], dest[3], inv_comp);
+ }
+ break;
+ default:
+ assert(0);
+ }
+ /*
+ * Combine RGB terms
+ */
+ switch (softpipe->blend->rgb_func) {
+ case PIPE_BLEND_ADD:
+ VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */
+ VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */
+ VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */
+ break;
+ case PIPE_BLEND_MIN:
+ VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
+ break;
+ case PIPE_BLEND_MAX:
+ VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * Combine A terms
+ */
+ switch (softpipe->blend->alpha_func) {
+ case PIPE_BLEND_ADD:
+ VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */
+ break;
+ case PIPE_BLEND_MIN:
+ VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
+ break;
+ case PIPE_BLEND_MAX:
+ VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
+ break;
+ default:
+ assert(0);
+ }
+}
static void
-blend_quad(struct quad_stage *qs, struct quad_header *quad)
+colormask_quad(struct quad_stage *qs,
+ float (*quadColor)[4],
+ float (*dest)[4])
{
- static const float zero[4] = { 0, 0, 0, 0 };
- static const float one[4] = { 1, 1, 1, 1 };
+ struct softpipe_context *softpipe = qs->softpipe;
+ /* R */
+ if (!(softpipe->blend->colormask & PIPE_MASK_R))
+ COPY_4V(quadColor[0], dest[0]);
+
+ /* G */
+ if (!(softpipe->blend->colormask & PIPE_MASK_G))
+ COPY_4V(quadColor[1], dest[1]);
+
+ /* B */
+ if (!(softpipe->blend->colormask & PIPE_MASK_B))
+ COPY_4V(quadColor[2], dest[2]);
+
+ /* A */
+ if (!(softpipe->blend->colormask & PIPE_MASK_A))
+ COPY_4V(quadColor[3], dest[3]);
+}
+
+
+static void
+blend_fallback(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
struct softpipe_context *softpipe = qs->softpipe;
- uint cbuf;
+ const struct pipe_blend_state *blend = softpipe->blend;
+ unsigned cbuf;
+
+ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++)
+ {
+ float dest[4][QUAD_SIZE];
+ struct softpipe_cached_tile *tile
+ = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
+ quads[0]->input.x0,
+ quads[0]->input.y0);
+ uint q, i, j;
+
+ for (q = 0; q < nr; q++) {
+ struct quad_header *quad = quads[q];
+ float (*quadColor)[4] = quad->output.color[cbuf];
+ const int itx = (quad->input.x0 & (TILE_SIZE-1));
+ const int ity = (quad->input.y0 & (TILE_SIZE-1));
+
+ /* get/swizzle dest colors
+ */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = itx + (j & 1);
+ int y = ity + (j >> 1);
+ for (i = 0; i < 4; i++) {
+ dest[i][j] = tile->data.color[y][x][i];
+ }
+ }
- if (softpipe->blend->logicop_enable) {
- logicop_quad(qs, quad);
- return;
+
+ if (blend->logicop_enable) {
+ logicop_quad( qs, quadColor, dest );
+ }
+ else if (blend->blend_enable) {
+ blend_quad( qs, quadColor, dest );
+ }
+
+ if (blend->colormask != 0xf)
+ colormask_quad( qs, quadColor, dest );
+
+ /* Output color values
+ */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (quad->inout.mask & (1 << j)) {
+ int x = itx + (j & 1);
+ int y = ity + (j >> 1);
+ for (i = 0; i < 4; i++) { /* loop over color chans */
+ tile->data.color[y][x][i] = quadColor[i][j];
+ }
+ }
+ }
+ }
}
+}
- /* loop over colorbuffer outputs */
- for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
- float source[4][QUAD_SIZE], dest[4][QUAD_SIZE];
- struct softpipe_cached_tile *tile
- = sp_get_cached_tile(softpipe,
- softpipe->cbuf_cache[cbuf],
- quad->input.x0, quad->input.y0);
- float (*quadColor)[4] = quad->output.color[cbuf];
- uint i, j;
+static void
+blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ static const float one[4] = { 1, 1, 1, 1 };
+ float one_minus_alpha[QUAD_SIZE];
+ float dest[4][QUAD_SIZE];
+ float source[4][QUAD_SIZE];
+ uint i, j, q;
+
+ struct softpipe_cached_tile *tile
+ = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
+ quads[0]->input.x0,
+ quads[0]->input.y0);
+
+ for (q = 0; q < nr; q++) {
+ struct quad_header *quad = quads[q];
+ float (*quadColor)[4] = quad->output.color[0];
+ const float *alpha = quadColor[3];
+ const int itx = (quad->input.x0 & (TILE_SIZE-1));
+ const int ity = (quad->input.y0 & (TILE_SIZE-1));
+
/* get/swizzle dest colors */
for (j = 0; j < QUAD_SIZE; j++) {
- int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
- int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
+ int x = itx + (j & 1);
+ int y = ity + (j >> 1);
for (i = 0; i < 4; i++) {
dest[i][j] = tile->data.color[y][x][i];
}
}
- /*
- * Compute src/first term RGB
- */
- switch (softpipe->blend->rgb_src_factor) {
- case PIPE_BLENDFACTOR_ONE:
- VEC4_COPY(source[0], quadColor[0]); /* R */
- VEC4_COPY(source[1], quadColor[1]); /* G */
- VEC4_COPY(source[2], quadColor[2]); /* B */
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
- VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
- VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- {
- const float *alpha = quadColor[3];
- VEC4_MUL(source[0], quadColor[0], alpha); /* R */
- VEC4_MUL(source[1], quadColor[1], alpha); /* G */
- VEC4_MUL(source[2], quadColor[2], alpha); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
- VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
- VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
- break;
- case PIPE_BLENDFACTOR_DST_ALPHA:
- {
- const float *alpha = dest[3];
- VEC4_MUL(source[0], quadColor[0], alpha); /* R */
- VEC4_MUL(source[1], quadColor[1], alpha); /* G */
- VEC4_MUL(source[2], quadColor[2], alpha); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- {
- const float *alpha = quadColor[3];
- float diff[4], temp[4];
- VEC4_SUB(diff, one, dest[3]);
- VEC4_MIN(temp, alpha, diff);
- VEC4_MUL(source[0], quadColor[0], temp); /* R */
- VEC4_MUL(source[1], quadColor[1], temp); /* G */
- VEC4_MUL(source[2], quadColor[2], temp); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- {
- float comp[4];
- VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
- VEC4_MUL(source[0], quadColor[0], comp); /* R */
- VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
- VEC4_MUL(source[1], quadColor[1], comp); /* G */
- VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
- VEC4_MUL(source[2], quadColor[2], comp); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- {
- float alpha[4];
- VEC4_SCALAR(alpha, softpipe->blend_color.color[3]);
- VEC4_MUL(source[0], quadColor[0], alpha); /* R */
- VEC4_MUL(source[1], quadColor[1], alpha); /* G */
- VEC4_MUL(source[2], quadColor[2], alpha); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- assert(0); /* to do */
- break;
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- assert(0); /* to do */
- break;
- case PIPE_BLENDFACTOR_ZERO:
- VEC4_COPY(source[0], zero); /* R */
- VEC4_COPY(source[1], zero); /* G */
- VEC4_COPY(source[2], zero); /* B */
- break;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- {
- float inv_comp[4];
- VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
- VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
- VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
- VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
- VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
- VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- {
- float inv_alpha[4];
- VEC4_SUB(inv_alpha, one, quadColor[3]);
- VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
- VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
- VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- {
- float inv_alpha[4];
- VEC4_SUB(inv_alpha, one, dest[3]);
- VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
- VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
- VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- {
- float inv_comp[4];
- VEC4_SUB(inv_comp, one, dest[0]); /* R */
- VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
- VEC4_SUB(inv_comp, one, dest[1]); /* G */
- VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
- VEC4_SUB(inv_comp, one, dest[2]); /* B */
- VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- {
- float inv_comp[4];
- /* R */
- VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
- VEC4_MUL(source[0], quadColor[0], inv_comp);
- /* G */
- VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
- VEC4_MUL(source[1], quadColor[1], inv_comp);
- /* B */
- VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
- VEC4_MUL(source[2], quadColor[2], inv_comp);
- }
- break;
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- {
- float inv_alpha[4];
- VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]);
- VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
- VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
- VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+ VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+ VEC4_MUL(source[3], quadColor[3], alpha); /* A */
+
+ VEC4_SUB(one_minus_alpha, one, alpha);
+ VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
+ VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
+ VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
+ VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* B */
+
+ VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
+ VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (quad->inout.mask & (1 << j)) {
+ int x = itx + (j & 1);
+ int y = ity + (j >> 1);
+ for (i = 0; i < 4; i++) { /* loop over color chans */
+ tile->data.color[y][x][i] = quadColor[i][j];
+ }
}
- break;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- assert(0); /* to do */
- break;
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- assert(0); /* to do */
- break;
- default:
- assert(0);
}
+ }
+}
- /*
- * Compute src/first term A
- */
- switch (softpipe->blend->alpha_src_factor) {
- case PIPE_BLENDFACTOR_ONE:
- VEC4_COPY(source[3], quadColor[3]); /* A */
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- {
- const float *alpha = quadColor[3];
- VEC4_MUL(source[3], quadColor[3], alpha); /* A */
- }
- break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_DST_ALPHA:
- VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- /* multiply alpha by 1.0 */
- VEC4_COPY(source[3], quadColor[3]); /* A */
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- {
- float comp[4];
- VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
- VEC4_MUL(source[3], quadColor[3], comp); /* A */
- }
- break;
- case PIPE_BLENDFACTOR_ZERO:
- VEC4_COPY(source[3], zero); /* A */
- break;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- {
- float inv_alpha[4];
- VEC4_SUB(inv_alpha, one, quadColor[3]);
- VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
- }
- break;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- {
- float inv_alpha[4];
- VEC4_SUB(inv_alpha, one, dest[3]);
- VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
- }
- break;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- {
- float inv_comp[4];
- /* A */
- VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
- VEC4_MUL(source[3], quadColor[3], inv_comp);
+static void
+blend_single_add_one_one(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ float dest[4][QUAD_SIZE];
+ uint i, j, q;
+
+ struct softpipe_cached_tile *tile
+ = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
+ quads[0]->input.x0,
+ quads[0]->input.y0);
+
+ for (q = 0; q < nr; q++) {
+ struct quad_header *quad = quads[q];
+ float (*quadColor)[4] = quad->output.color[0];
+ const int itx = (quad->input.x0 & (TILE_SIZE-1));
+ const int ity = (quad->input.y0 & (TILE_SIZE-1));
+
+ /* get/swizzle dest colors */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = itx + (j & 1);
+ int y = ity + (j >> 1);
+ for (i = 0; i < 4; i++) {
+ dest[i][j] = tile->data.color[y][x][i];
}
- break;
- default:
- assert(0);
}
+
+ VEC4_ADD_SAT(quadColor[0], quadColor[0], dest[0]); /* R */
+ VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */
+ VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */
+ VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */
-
- /*
- * Compute dest/second term RGB
- */
- switch (softpipe->blend->rgb_dst_factor) {
- case PIPE_BLENDFACTOR_ONE:
- /* dest = dest * 1 NO-OP, leave dest as-is */
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
- VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
- VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
- VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
- VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
- break;
- case PIPE_BLENDFACTOR_DST_ALPHA:
- VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
- VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
- VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
- break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
- VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
- VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- assert(0); /* illegal */
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- {
- float comp[4];
- VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
- VEC4_MUL(dest[0], dest[0], comp); /* R */
- VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
- VEC4_MUL(dest[1], dest[1], comp); /* G */
- VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
- VEC4_MUL(dest[2], dest[2], comp); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- {
- float comp[4];
- VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
- VEC4_MUL(dest[0], dest[0], comp); /* R */
- VEC4_MUL(dest[1], dest[1], comp); /* G */
- VEC4_MUL(dest[2], dest[2], comp); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_ZERO:
- VEC4_COPY(dest[0], zero); /* R */
- VEC4_COPY(dest[1], zero); /* G */
- VEC4_COPY(dest[2], zero); /* B */
- break;
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- /* XXX what are these? */
- assert(0);
- break;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- {
- float inv_comp[4];
- VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
- VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
- VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
- VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
- VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
- VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- {
- float one_minus_alpha[QUAD_SIZE];
- VEC4_SUB(one_minus_alpha, one, quadColor[3]);
- VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
- VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
- VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- {
- float inv_comp[4];
- VEC4_SUB(inv_comp, one, dest[3]); /* A */
- VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
- VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
- VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- {
- float inv_comp[4];
- VEC4_SUB(inv_comp, one, dest[0]); /* R */
- VEC4_MUL(dest[0], dest[0], inv_comp); /* R */
- VEC4_SUB(inv_comp, one, dest[1]); /* G */
- VEC4_MUL(dest[1], dest[1], inv_comp); /* G */
- VEC4_SUB(inv_comp, one, dest[2]); /* B */
- VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
- }
- break;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- {
- float inv_comp[4];
- /* R */
- VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
- VEC4_MUL(dest[0], dest[0], inv_comp);
- /* G */
- VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
- VEC4_MUL(dest[1], dest[1], inv_comp);
- /* B */
- VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
- VEC4_MUL(dest[2], dest[2], inv_comp);
- }
- break;
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- {
- float inv_comp[4];
- VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
- VEC4_MUL(dest[0], dest[0], inv_comp);
- VEC4_MUL(dest[1], dest[1], inv_comp);
- VEC4_MUL(dest[2], dest[2], inv_comp);
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (quad->inout.mask & (1 << j)) {
+ int x = itx + (j & 1);
+ int y = ity + (j >> 1);
+ for (i = 0; i < 4; i++) { /* loop over color chans */
+ tile->data.color[y][x][i] = quadColor[i][j];
+ }
}
- break;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- /* XXX what are these? */
- assert(0);
- break;
- default:
- assert(0);
}
+ }
+}
- /*
- * Compute dest/second term A
- */
- switch (softpipe->blend->alpha_dst_factor) {
- case PIPE_BLENDFACTOR_ONE:
- /* dest = dest * 1 NO-OP, leave dest as-is */
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
- break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_DST_ALPHA:
- VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- assert(0); /* illegal */
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- {
- float comp[4];
- VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
- VEC4_MUL(dest[3], dest[3], comp); /* A */
- }
- break;
- case PIPE_BLENDFACTOR_ZERO:
- VEC4_COPY(dest[3], zero); /* A */
- break;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- {
- float one_minus_alpha[QUAD_SIZE];
- VEC4_SUB(one_minus_alpha, one, quadColor[3]);
- VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
- }
- break;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- {
- float inv_comp[4];
- VEC4_SUB(inv_comp, one, dest[3]); /* A */
- VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
- }
- break;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- {
- float inv_comp[4];
- VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
- VEC4_MUL(dest[3], dest[3], inv_comp);
+
+static void
+single_output_color(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ uint i, j, q;
+
+ struct softpipe_cached_tile *tile
+ = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
+ quads[0]->input.x0,
+ quads[0]->input.y0);
+
+ for (q = 0; q < nr; q++) {
+ struct quad_header *quad = quads[q];
+ float (*quadColor)[4] = quad->output.color[0];
+ const int itx = (quad->input.x0 & (TILE_SIZE-1));
+ const int ity = (quad->input.y0 & (TILE_SIZE-1));
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (quad->inout.mask & (1 << j)) {
+ int x = itx + (j & 1);
+ int y = ity + (j >> 1);
+ for (i = 0; i < 4; i++) { /* loop over color chans */
+ tile->data.color[y][x][i] = quadColor[i][j];
+ }
}
- break;
- default:
- assert(0);
}
+ }
+}
+
+static void
+blend_noop(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+}
- /*
- * Combine RGB terms
- */
- switch (softpipe->blend->rgb_func) {
- case PIPE_BLEND_ADD:
- VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
- VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
- VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
- break;
- case PIPE_BLEND_SUBTRACT:
- VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */
- VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */
- VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */
- break;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */
- VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */
- VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */
- break;
- case PIPE_BLEND_MIN:
- VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
- VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
- VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
- break;
- case PIPE_BLEND_MAX:
- VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
- VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
- VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
- break;
- default:
- assert(0);
- }
- /*
- * Combine A terms
- */
- switch (softpipe->blend->alpha_func) {
- case PIPE_BLEND_ADD:
- VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
- break;
- case PIPE_BLEND_SUBTRACT:
- VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */
- break;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */
- break;
- case PIPE_BLEND_MIN:
- VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
- break;
- case PIPE_BLEND_MAX:
- VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
- break;
- default:
- assert(0);
+static void
+choose_blend_quad(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ struct softpipe_context *softpipe = qs->softpipe;
+ const struct pipe_blend_state *blend = softpipe->blend;
+
+ qs->run = blend_fallback;
+
+ if (softpipe->framebuffer.nr_cbufs == 0) {
+ qs->run = blend_noop;
+ }
+ else if (!softpipe->blend->logicop_enable &&
+ softpipe->blend->colormask == 0xf)
+ {
+ if (!blend->blend_enable) {
+ qs->run = single_output_color;
}
+ else if (blend->rgb_src_factor == blend->alpha_src_factor &&
+ blend->rgb_dst_factor == blend->alpha_dst_factor &&
+ blend->rgb_func == blend->alpha_func &&
+ softpipe->framebuffer.nr_cbufs == 1)
+ {
+ if (blend->alpha_func == PIPE_BLEND_ADD) {
+ if (blend->rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
+ blend->rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
+ qs->run = blend_single_add_one_one;
+ }
+ else if (blend->rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
+ blend->rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
+ qs->run = blend_single_add_src_alpha_inv_src_alpha;
- } /* cbuf loop */
+ }
+ }
+ }
- /* pass blended quad to next stage */
- qs->next->run(qs->next, quad);
+ qs->run(qs, quads, nr);
}
static void blend_begin(struct quad_stage *qs)
{
- qs->next->begin(qs->next);
+ qs->run = choose_blend_quad;
}
@@ -770,7 +991,7 @@ struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
stage->softpipe = softpipe;
stage->begin = blend_begin;
- stage->run = blend_quad;
+ stage->run = choose_blend_quad;
stage->destroy = blend_destroy;
return stage;
diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c
deleted file mode 100644
index 953d8516b9..0000000000
--- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c
+++ /dev/null
@@ -1,74 +0,0 @@
-
-#include "util/u_memory.h"
-#include "sp_context.h"
-#include "sp_quad.h"
-#include "sp_surface.h"
-#include "sp_quad_pipe.h"
-
-
-/**
- * Loop over colorbuffers, passing quad to next stage each time.
- */
-static void
-cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad)
-{
- struct softpipe_context *softpipe = qs->softpipe;
- float tmp[PIPE_MAX_COLOR_BUFS][4][QUAD_SIZE];
- unsigned i;
-
- assert(sizeof(quad->outputs.color) == sizeof(tmp));
- assert(softpipe->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS);
-
- /* make copy of original colors since they can get modified
- * by blending and masking.
- * XXX we won't have to do this if the fragment program actually emits
- * N separate colors and we're drawing to N color buffers (MRT).
- * But if we emitted one color and glDrawBuffer(GL_FRONT_AND_BACK) is
- * in effect, we need to save/restore colors like this.
- */
- memcpy(tmp, quad->outputs.color, sizeof(tmp));
-
- for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
- /* set current cbuffer */
-#if 0 /* obsolete & going away */
- softpipe->current_cbuf = i;
-#endif
-
- /* pass blended quad to next stage */
- qs->next->run(qs->next, quad);
-
- /* restore quad's colors for next buffer */
- memcpy(quad->outputs.color, tmp, sizeof(tmp));
- }
-}
-
-
-static void cbuf_loop_begin(struct quad_stage *qs)
-{
- qs->next->begin(qs->next);
-}
-
-
-static void cbuf_loop_destroy(struct quad_stage *qs)
-{
- FREE( qs );
-}
-
-
-/**
- * Create the colorbuffer loop stage.
- * This is used to implement multiple render targets and GL_FRONT_AND_BACK
- * rendering.
- */
-struct quad_stage *sp_quad_bufloop_stage( struct softpipe_context *softpipe )
-{
- struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
- stage->softpipe = softpipe;
- stage->begin = cbuf_loop_begin;
- stage->run = cbuf_loop_quad;
- stage->destroy = cbuf_loop_destroy;
-
- return stage;
-}
-
diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c
deleted file mode 100644
index dc90e5d5e9..0000000000
--- a/src/gallium/drivers/softpipe/sp_quad_colormask.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \brief quad colormask stage
- * \author Brian Paul
- */
-
-#include "pipe/p_defines.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
-#include "sp_context.h"
-#include "sp_quad.h"
-#include "sp_surface.h"
-#include "sp_quad_pipe.h"
-#include "sp_tile_cache.h"
-
-
-
-/**
- * XXX colormask could be rolled into blending...
- */
-static void
-colormask_quad(struct quad_stage *qs, struct quad_header *quad)
-{
- struct softpipe_context *softpipe = qs->softpipe;
- uint cbuf;
-
- /* loop over colorbuffer outputs */
- for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
- float dest[4][QUAD_SIZE];
- struct softpipe_cached_tile *tile
- = sp_get_cached_tile(softpipe,
- softpipe->cbuf_cache[cbuf],
- quad->input.x0, quad->input.y0);
- float (*quadColor)[4] = quad->output.color[cbuf];
- uint i, j;
-
- /* get/swizzle dest colors */
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
- int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
- for (i = 0; i < 4; i++) {
- dest[i][j] = tile->data.color[y][x][i];
- }
- }
-
- /* R */
- if (!(softpipe->blend->colormask & PIPE_MASK_R))
- COPY_4V(quadColor[0], dest[0]);
-
- /* G */
- if (!(softpipe->blend->colormask & PIPE_MASK_G))
- COPY_4V(quadColor[1], dest[1]);
-
- /* B */
- if (!(softpipe->blend->colormask & PIPE_MASK_B))
- COPY_4V(quadColor[2], dest[2]);
-
- /* A */
- if (!(softpipe->blend->colormask & PIPE_MASK_A))
- COPY_4V(quadColor[3], dest[3]);
- }
-
- /* pass quad to next stage */
- qs->next->run(qs->next, quad);
-}
-
-
-static void colormask_begin(struct quad_stage *qs)
-{
- qs->next->begin(qs->next);
-}
-
-
-static void colormask_destroy(struct quad_stage *qs)
-{
- FREE( qs );
-}
-
-
-struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe )
-{
- struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
- stage->softpipe = softpipe;
- stage->begin = colormask_begin;
- stage->run = colormask_quad;
- stage->destroy = colormask_destroy;
-
- return stage;
-}
diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c
deleted file mode 100644
index 4aeee85870..0000000000
--- a/src/gallium/drivers/softpipe/sp_quad_coverage.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * \brief Apply AA coverage to quad alpha valus
- * \author Brian Paul
- */
-
-
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-#include "sp_context.h"
-#include "sp_quad.h"
-#include "sp_quad_pipe.h"
-
-
-/**
- * Multiply quad's alpha values by the fragment coverage.
- */
-static void
-coverage_quad(struct quad_stage *qs, struct quad_header *quad)
-{
- struct softpipe_context *softpipe = qs->softpipe;
- const uint prim = quad->input.prim;
-
- if ((softpipe->rasterizer->poly_smooth && prim == QUAD_PRIM_TRI) ||
- (softpipe->rasterizer->line_smooth && prim == QUAD_PRIM_LINE) ||
- (softpipe->rasterizer->point_smooth && prim == QUAD_PRIM_POINT)) {
- uint cbuf;
-
- /* loop over colorbuffer outputs */
- for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
- float (*quadColor)[4] = quad->output.color[cbuf];
- unsigned j;
- for (j = 0; j < QUAD_SIZE; j++) {
- assert(quad->input.coverage[j] >= 0.0);
- assert(quad->input.coverage[j] <= 1.0);
- quadColor[3][j] *= quad->input.coverage[j];
- }
- }
- }
-
- qs->next->run(qs->next, quad);
-}
-
-
-static void coverage_begin(struct quad_stage *qs)
-{
- qs->next->begin(qs->next);
-}
-
-
-static void coverage_destroy(struct quad_stage *qs)
-{
- FREE( qs );
-}
-
-
-struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe )
-{
- struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
- stage->softpipe = softpipe;
- stage->begin = coverage_begin;
- stage->run = coverage_quad;
- stage->destroy = coverage_destroy;
-
- return stage;
-}
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
index d463930bae..0ca86c4e1c 100644
--- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -31,61 +31,109 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
+#include "tgsi/tgsi_scan.h"
#include "sp_context.h"
#include "sp_quad.h"
#include "sp_surface.h"
#include "sp_quad_pipe.h"
#include "sp_tile_cache.h"
+#include "sp_state.h" /* for sp_fragment_shader */
-/**
- * Do depth testing for a quad.
- * Not static since it's used by the stencil code.
- */
+struct depth_data {
+ struct pipe_surface *ps;
+ enum pipe_format format;
+ unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */
+ unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */
+ ubyte stencilVals[QUAD_SIZE];
+ struct softpipe_cached_tile *tile;
+};
-/*
- * To increase efficiency, we should probably have multiple versions
- * of this function that are specifically for Z16, Z32 and FP Z buffers.
- * Try to effectively do that with codegen...
- */
-void
-sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
+
+static void
+get_depth_stencil_values( struct depth_data *data,
+ const struct quad_header *quad )
{
- struct softpipe_context *softpipe = qs->softpipe;
- struct pipe_surface *ps = softpipe->framebuffer.zsbuf;
- const enum pipe_format format = ps->format;
- unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */
- unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */
- unsigned zmask = 0;
unsigned j;
- struct softpipe_cached_tile *tile
- = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0);
+ const struct softpipe_cached_tile *tile = data->tile;
+
+ switch (data->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ data->bzzzz[j] = tile->data.depth16[y][x];
+ }
+ break;
+ case PIPE_FORMAT_Z32_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ data->bzzzz[j] = tile->data.depth32[y][x];
+ }
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
+ data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
+ }
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
+ data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
- assert(ps); /* shouldn't get here if there's no zbuffer */
+/* If the shader has not been run, interpolate the depth values
+ * ourselves.
+ */
+static void
+interpolate_quad_depth( struct quad_header *quad )
+{
+ const float fx = (float) quad->input.x0;
+ const float fy = (float) quad->input.y0;
+ const float dzdx = quad->posCoef->dadx[2];
+ const float dzdy = quad->posCoef->dady[2];
+ const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
- /*
- * Convert quad's float depth values to int depth values (qzzzz).
+ quad->output.depth[0] = z0;
+ quad->output.depth[1] = z0 + dzdx;
+ quad->output.depth[2] = z0 + dzdy;
+ quad->output.depth[3] = z0 + dzdx + dzdy;
+}
+
+
+static void
+convert_quad_depth( struct depth_data *data,
+ const struct quad_header *quad )
+{
+ unsigned j;
+
+ /* Convert quad's float depth values to int depth values (qzzzz).
* If the Z buffer stores integer values, we _have_ to do the depth
* compares with integers (not floats). Otherwise, the float->int->float
* conversion of Z values (which isn't an identity function) will cause
* Z-fighting errors.
- *
- * Also, get the zbuffer values (bzzzz) from the cached tile.
*/
- switch (format) {
+ switch (data->format) {
case PIPE_FORMAT_Z16_UNORM:
{
float scale = 65535.0;
for (j = 0; j < QUAD_SIZE; j++) {
- qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
- }
-
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- bzzzz[j] = tile->data.depth16[y][x];
+ data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
}
}
break;
@@ -94,47 +142,247 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
double scale = (double) (uint) ~0UL;
for (j = 0; j < QUAD_SIZE; j++) {
- qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
- }
-
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- bzzzz[j] = tile->data.depth32[y][x];
+ data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
}
}
break;
case PIPE_FORMAT_X8Z24_UNORM:
- /* fall-through */
case PIPE_FORMAT_S8Z24_UNORM:
{
float scale = (float) ((1 << 24) - 1);
for (j = 0; j < QUAD_SIZE; j++) {
- qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
- }
-
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
+ data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
}
}
break;
case PIPE_FORMAT_Z24X8_UNORM:
- /* fall-through */
case PIPE_FORMAT_Z24S8_UNORM:
{
float scale = (float) ((1 << 24) - 1);
for (j = 0; j < QUAD_SIZE; j++) {
- qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
+ data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
}
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- bzzzz[j] = tile->data.depth32[y][x] >> 8;
+
+
+static void
+write_depth_stencil_values( struct depth_data *data,
+ struct quad_header *quad )
+{
+ struct softpipe_cached_tile *tile = data->tile;
+ unsigned j;
+
+ /* put updated Z values back into cached tile */
+ switch (data->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
+ }
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_Z32_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.depth32[y][x] = data->bzzzz[j];
+ }
+ break;
+ case PIPE_FORMAT_S8Z24_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
+ }
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
+ }
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.depth32[y][x] = data->bzzzz[j] << 8;
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+
+
+/** Only 8-bit stencil supported */
+#define STENCIL_MAX 0xff
+
+
+/**
+ * Do the basic stencil test (compare stencil buffer values against the
+ * reference value.
+ *
+ * \param data->stencilVals the stencil values from the stencil buffer
+ * \param func the stencil func (PIPE_FUNC_x)
+ * \param ref the stencil reference value
+ * \param valMask the stencil value mask indicating which bits of the stencil
+ * values and ref value are to be used.
+ * \return mask indicating which pixels passed the stencil test
+ */
+static unsigned
+do_stencil_test(struct depth_data *data,
+ unsigned func,
+ unsigned ref, unsigned valMask)
+{
+ unsigned passMask = 0x0;
+ unsigned j;
+
+ ref &= valMask;
+
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ /* passMask = 0x0 */
+ break;
+ case PIPE_FUNC_LESS:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (ref < (data->stencilVals[j] & valMask)) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_EQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (ref == (data->stencilVals[j] & valMask)) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_LEQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (ref <= (data->stencilVals[j] & valMask)) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_GREATER:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (ref > (data->stencilVals[j] & valMask)) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (ref != (data->stencilVals[j] & valMask)) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_GEQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (ref >= (data->stencilVals[j] & valMask)) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_ALWAYS:
+ passMask = MASK_ALL;
+ break;
+ default:
+ assert(0);
+ }
+
+ return passMask;
+}
+
+
+/**
+ * Apply the stencil operator to stencil values.
+ *
+ * \param data->stencilVals the stencil buffer values (read and written)
+ * \param mask indicates which pixels to update
+ * \param op the stencil operator (PIPE_STENCIL_OP_x)
+ * \param ref the stencil reference value
+ * \param wrtMask writemask controlling which bits are changed in the
+ * stencil values
+ */
+static void
+apply_stencil_op(struct depth_data *data,
+ unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
+{
+ unsigned j;
+ ubyte newstencil[QUAD_SIZE];
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ newstencil[j] = data->stencilVals[j];
+ }
+
+ switch (op) {
+ case PIPE_STENCIL_OP_KEEP:
+ /* no-op */
+ break;
+ case PIPE_STENCIL_OP_ZERO:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ newstencil[j] = 0;
+ }
+ }
+ break;
+ case PIPE_STENCIL_OP_REPLACE:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ newstencil[j] = ref;
+ }
+ }
+ break;
+ case PIPE_STENCIL_OP_INCR:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ if (data->stencilVals[j] < STENCIL_MAX) {
+ newstencil[j] = data->stencilVals[j] + 1;
+ }
+ }
+ }
+ break;
+ case PIPE_STENCIL_OP_DECR:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ if (data->stencilVals[j] > 0) {
+ newstencil[j] = data->stencilVals[j] - 1;
+ }
+ }
+ }
+ break;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ newstencil[j] = data->stencilVals[j] + 1;
+ }
+ }
+ break;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ newstencil[j] = data->stencilVals[j] - 1;
+ }
+ }
+ break;
+ case PIPE_STENCIL_OP_INVERT:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ newstencil[j] = ~data->stencilVals[j];
}
}
break;
@@ -142,6 +390,39 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
assert(0);
}
+ /*
+ * update the stencil values
+ */
+ if (wrtMask != STENCIL_MAX) {
+ /* apply bit-wise stencil buffer writemask */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
+ }
+ }
+ else {
+ for (j = 0; j < QUAD_SIZE; j++) {
+ data->stencilVals[j] = newstencil[j];
+ }
+ }
+}
+
+
+
+/*
+ * To increase efficiency, we should probably have multiple versions
+ * of this function that are specifically for Z16, Z32 and FP Z buffers.
+ * Try to effectively do that with codegen...
+ */
+
+static boolean
+depth_test_quad(struct quad_stage *qs,
+ struct depth_data *data,
+ struct quad_header *quad)
+{
+ struct softpipe_context *softpipe = qs->softpipe;
+ unsigned zmask = 0;
+ unsigned j;
+
switch (softpipe->depth_stencil->depth.func) {
case PIPE_FUNC_NEVER:
/* zmask = 0 */
@@ -151,37 +432,37 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
* Like this: quad->mask &= (quad->outputs.depth < zzzz);
*/
for (j = 0; j < QUAD_SIZE; j++) {
- if (qzzzz[j] < bzzzz[j])
+ if (data->qzzzz[j] < data->bzzzz[j])
zmask |= 1 << j;
}
break;
case PIPE_FUNC_EQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
- if (qzzzz[j] == bzzzz[j])
+ if (data->qzzzz[j] == data->bzzzz[j])
zmask |= 1 << j;
}
break;
case PIPE_FUNC_LEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
- if (qzzzz[j] <= bzzzz[j])
+ if (data->qzzzz[j] <= data->bzzzz[j])
zmask |= (1 << j);
}
break;
case PIPE_FUNC_GREATER:
for (j = 0; j < QUAD_SIZE; j++) {
- if (qzzzz[j] > bzzzz[j])
+ if (data->qzzzz[j] > data->bzzzz[j])
zmask |= (1 << j);
}
break;
case PIPE_FUNC_NOTEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
- if (qzzzz[j] != bzzzz[j])
+ if (data->qzzzz[j] != data->bzzzz[j])
zmask |= (1 << j);
}
break;
case PIPE_FUNC_GEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
- if (qzzzz[j] >= bzzzz[j])
+ if (data->qzzzz[j] >= data->bzzzz[j])
zmask |= (1 << j);
}
break;
@@ -193,80 +474,480 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
}
quad->inout.mask &= zmask;
+ if (quad->inout.mask == 0)
+ return FALSE;
+ /* Update our internal copy only if writemask set. Even if
+ * depth.writemask is FALSE, may still need to write out buffer
+ * data due to stencil changes.
+ */
if (softpipe->depth_stencil->depth.writemask) {
-
- /* This is also efficient with sse / spe instructions:
- */
for (j = 0; j < QUAD_SIZE; j++) {
- if (quad->inout.mask & (1 << j)) {
- bzzzz[j] = qzzzz[j];
- }
+ if (quad->inout.mask & (1 << j)) {
+ data->bzzzz[j] = data->qzzzz[j];
+ }
}
+ }
- /* put updated Z values back into cached tile */
- switch (format) {
- case PIPE_FORMAT_Z16_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- tile->data.depth16[y][x] = (ushort) bzzzz[j];
+ return TRUE;
+}
+
+
+
+/**
+ * Do stencil (and depth) testing. Stenciling depends on the outcome of
+ * depth testing.
+ */
+static void
+depth_stencil_test_quad(struct quad_stage *qs,
+ struct depth_data *data,
+ struct quad_header *quad)
+{
+ struct softpipe_context *softpipe = qs->softpipe;
+ unsigned func, zFailOp, zPassOp, failOp;
+ ubyte ref, wrtMask, valMask;
+ uint face = quad->input.facing;
+
+ if (!softpipe->depth_stencil->stencil[1].enabled) {
+ /* single-sided stencil test, use front (face=0) state */
+ face = 0;
+ }
+
+ /* choose front or back face function, operator, etc */
+ /* XXX we could do these initializations once per primitive */
+ func = softpipe->depth_stencil->stencil[face].func;
+ failOp = softpipe->depth_stencil->stencil[face].fail_op;
+ zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
+ zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
+ ref = softpipe->depth_stencil->stencil[face].ref_value;
+ wrtMask = softpipe->depth_stencil->stencil[face].writemask;
+ valMask = softpipe->depth_stencil->stencil[face].valuemask;
+
+
+ /* do the stencil test first */
+ {
+ unsigned passMask, failMask;
+ passMask = do_stencil_test(data, func, ref, valMask);
+ failMask = quad->inout.mask & ~passMask;
+ quad->inout.mask &= passMask;
+
+ if (failOp != PIPE_STENCIL_OP_KEEP) {
+ apply_stencil_op(data, failMask, failOp, ref, wrtMask);
+ }
+ }
+
+ if (quad->inout.mask) {
+ /* now the pixels that passed the stencil test are depth tested */
+ if (softpipe->depth_stencil->depth.enabled) {
+ const unsigned origMask = quad->inout.mask;
+
+ depth_test_quad(qs, data, quad); /* quad->mask is updated */
+
+ /* update stencil buffer values according to z pass/fail result */
+ if (zFailOp != PIPE_STENCIL_OP_KEEP) {
+ const unsigned zFailMask = origMask & ~quad->inout.mask;
+ apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask);
}
- break;
- case PIPE_FORMAT_X8Z24_UNORM:
- /* fall-through */
- /* (yes, this falls through to a different case than above) */
- case PIPE_FORMAT_Z32_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- tile->data.depth32[y][x] = bzzzz[j];
+
+ if (zPassOp != PIPE_STENCIL_OP_KEEP) {
+ const unsigned zPassMask = origMask & quad->inout.mask;
+ apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask);
}
- break;
- case PIPE_FORMAT_S8Z24_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- uint s8z24 = tile->data.depth32[y][x];
- s8z24 = (s8z24 & 0xff000000) | bzzzz[j];
- tile->data.depth32[y][x] = s8z24;
+ }
+ else {
+ /* no depth test, apply Zpass operator to stencil buffer values */
+ apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
+ }
+ }
+}
+
+
+#define ALPHATEST( FUNC, COMP ) \
+ static int \
+ alpha_test_quads_##FUNC( struct quad_stage *qs, \
+ struct quad_header *quads[], \
+ unsigned nr ) \
+ { \
+ const float ref = qs->softpipe->depth_stencil->alpha.ref_value; \
+ const uint cbuf = 0; /* only output[0].alpha is tested */ \
+ unsigned pass_nr = 0; \
+ unsigned i; \
+ \
+ for (i = 0; i < nr; i++) { \
+ const float *aaaa = quads[i]->output.color[cbuf][3]; \
+ unsigned passMask = 0; \
+ \
+ if (aaaa[0] COMP ref) passMask |= (1 << 0); \
+ if (aaaa[1] COMP ref) passMask |= (1 << 1); \
+ if (aaaa[2] COMP ref) passMask |= (1 << 2); \
+ if (aaaa[3] COMP ref) passMask |= (1 << 3); \
+ \
+ quads[i]->inout.mask &= passMask; \
+ \
+ if (quads[i]->inout.mask) \
+ quads[pass_nr++] = quads[i]; \
+ } \
+ \
+ return pass_nr; \
+ }
+
+
+ALPHATEST( LESS, < )
+ALPHATEST( EQUAL, == )
+ALPHATEST( LEQUAL, <= )
+ALPHATEST( GREATER, > )
+ALPHATEST( NOTEQUAL, != )
+ALPHATEST( GEQUAL, >= )
+
+
+/* XXX: Incorporate into shader using KILP.
+ */
+static int
+alpha_test_quads(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ switch (qs->softpipe->depth_stencil->alpha.func) {
+ case PIPE_FUNC_LESS:
+ return alpha_test_quads_LESS( qs, quads, nr );
+ case PIPE_FUNC_EQUAL:
+ return alpha_test_quads_EQUAL( qs, quads, nr );
+ break;
+ case PIPE_FUNC_LEQUAL:
+ return alpha_test_quads_LEQUAL( qs, quads, nr );
+ case PIPE_FUNC_GREATER:
+ return alpha_test_quads_GREATER( qs, quads, nr );
+ case PIPE_FUNC_NOTEQUAL:
+ return alpha_test_quads_NOTEQUAL( qs, quads, nr );
+ case PIPE_FUNC_GEQUAL:
+ return alpha_test_quads_GEQUAL( qs, quads, nr );
+ case PIPE_FUNC_ALWAYS:
+ return nr;
+ case PIPE_FUNC_NEVER:
+ default:
+ return 0;
+ }
+}
+
+static unsigned mask_count[16] =
+{
+ 0, /* 0x0 */
+ 1, /* 0x1 */
+ 1, /* 0x2 */
+ 2, /* 0x3 */
+ 1, /* 0x4 */
+ 2, /* 0x5 */
+ 2, /* 0x6 */
+ 3, /* 0x7 */
+ 1, /* 0x8 */
+ 2, /* 0x9 */
+ 2, /* 0xa */
+ 3, /* 0xb */
+ 2, /* 0xc */
+ 3, /* 0xd */
+ 3, /* 0xe */
+ 4, /* 0xf */
+};
+
+
+
+static void
+depth_test_quads_fallback(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ unsigned i, pass = 0;
+ const struct sp_fragment_shader *fs = qs->softpipe->fs;
+ boolean interp_depth = !fs->info.writes_z;
+ struct depth_data data;
+
+
+ if (qs->softpipe->depth_stencil->alpha.enabled) {
+ nr = alpha_test_quads(qs, quads, nr);
+ }
+
+ if (qs->softpipe->framebuffer.zsbuf &&
+ (qs->softpipe->depth_stencil->depth.enabled ||
+ qs->softpipe->depth_stencil->stencil[0].enabled)) {
+
+ data.ps = qs->softpipe->framebuffer.zsbuf;
+ data.format = data.ps->format;
+ data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache,
+ quads[0]->input.x0,
+ quads[0]->input.y0);
+
+ for (i = 0; i < nr; i++) {
+ get_depth_stencil_values(&data, quads[i]);
+
+ if (qs->softpipe->depth_stencil->depth.enabled) {
+ if (interp_depth)
+ interpolate_quad_depth(quads[i]);
+
+ convert_quad_depth(&data, quads[i]);
}
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- uint z24s8 = tile->data.depth32[y][x];
- z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8);
- tile->data.depth32[y][x] = z24s8;
+
+ if (qs->softpipe->depth_stencil->stencil[0].enabled) {
+ depth_stencil_test_quad(qs, &data, quads[i]);
+ write_depth_stencil_values(&data, quads[i]);
}
- break;
- case PIPE_FORMAT_Z24X8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- tile->data.depth32[y][x] = bzzzz[j] << 8;
+ else {
+ if (!depth_test_quad(qs, &data, quads[i]))
+ continue;
+
+ if (qs->softpipe->depth_stencil->depth.writemask)
+ write_depth_stencil_values(&data, quads[i]);
}
- break;
- default:
- assert(0);
+
+
+ quads[pass++] = quads[i];
+ }
+
+ nr = pass;
+ }
+
+ if (qs->softpipe->active_query_count) {
+ for (i = 0; i < nr; i++)
+ qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];
+ }
+
+ if (nr)
+ qs->next->run(qs->next, quads, nr);
+}
+
+/* XXX: this function assumes setup function actually emits linear
+ * spans of quads. It seems a lot more natural to do (early)
+ * depth-testing on spans rather than quads.
+ */
+static void
+depth_interp_z16_less_write(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ unsigned i, pass = 0;
+ const unsigned ix = quads[0]->input.x0;
+ const unsigned iy = quads[0]->input.y0;
+ const float fx = (float) ix;
+ const float fy = (float) iy;
+ const float dzdx = quads[0]->posCoef->dadx[2];
+ const float dzdy = quads[0]->posCoef->dady[2];
+ const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy;
+ struct softpipe_cached_tile *tile;
+ ushort (*depth16)[TILE_SIZE];
+ ushort idepth[4], depth_step;
+ const float scale = 65535.0;
+
+ idepth[0] = (ushort)((z0) * scale);
+ idepth[1] = (ushort)((z0 + dzdx) * scale);
+ idepth[2] = (ushort)((z0 + dzdy) * scale);
+ idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
+
+ depth_step = (ushort)(dzdx * 2 * scale);
+
+ tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy);
+
+ depth16 = (ushort (*)[TILE_SIZE])
+ &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE];
+
+ for (i = 0; i < nr; i++) {
+ unsigned outmask = quads[i]->inout.mask;
+ unsigned mask = 0;
+
+ if ((outmask & 1) && idepth[0] < depth16[0][0]) {
+ depth16[0][0] = idepth[0];
+ mask |= (1 << 0);
+ }
+
+ if ((outmask & 2) && idepth[1] < depth16[0][1]) {
+ depth16[0][1] = idepth[1];
+ mask |= (1 << 1);
+ }
+
+ if ((outmask & 4) && idepth[2] < depth16[1][0]) {
+ depth16[1][0] = idepth[2];
+ mask |= (1 << 2);
+ }
+
+ if ((outmask & 8) && idepth[3] < depth16[1][1]) {
+ depth16[1][1] = idepth[3];
+ mask |= (1 << 3);
}
+
+ idepth[0] += depth_step;
+ idepth[1] += depth_step;
+ idepth[2] += depth_step;
+ idepth[3] += depth_step;
+
+ depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2];
+
+ quads[i]->inout.mask = mask;
+ if (quads[i]->inout.mask)
+ quads[pass++] = quads[i];
}
+
+ if (pass)
+ qs->next->run(qs->next, quads, pass);
+
}
static void
-depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
+depth_interp_z16_lequal_write(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
{
- sp_depth_test_quad(qs, quad);
+ unsigned i, pass = 0;
+ const unsigned ix = quads[0]->input.x0;
+ const unsigned iy = quads[0]->input.y0;
+ const float fx = (float) ix;
+ const float fy = (float) iy;
+ const float dzdx = quads[0]->posCoef->dadx[2];
+ const float dzdy = quads[0]->posCoef->dady[2];
+ const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy;
+ struct softpipe_cached_tile *tile;
+ ushort (*depth16)[TILE_SIZE];
+ ushort idepth[4], depth_step;
+ const float scale = 65535.0;
+
+ idepth[0] = (ushort)((z0) * scale);
+ idepth[1] = (ushort)((z0 + dzdx) * scale);
+ idepth[2] = (ushort)((z0 + dzdy) * scale);
+ idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
+
+ depth_step = (ushort)(dzdx * 2 * scale);
+
+ tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy);
+
+ depth16 = (ushort (*)[TILE_SIZE])
+ &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE];
+
+ for (i = 0; i < nr; i++) {
+ unsigned outmask = quads[i]->inout.mask;
+ unsigned mask = 0;
+
+ if ((outmask & 1) && idepth[0] <= depth16[0][0]) {
+ depth16[0][0] = idepth[0];
+ mask |= (1 << 0);
+ }
+
+ if ((outmask & 2) && idepth[1] <= depth16[0][1]) {
+ depth16[0][1] = idepth[1];
+ mask |= (1 << 1);
+ }
+
+ if ((outmask & 4) && idepth[2] <= depth16[1][0]) {
+ depth16[1][0] = idepth[2];
+ mask |= (1 << 2);
+ }
+
+ if ((outmask & 8) && idepth[3] <= depth16[1][1]) {
+ depth16[1][1] = idepth[3];
+ mask |= (1 << 3);
+ }
+
+ idepth[0] += depth_step;
+ idepth[1] += depth_step;
+ idepth[2] += depth_step;
+ idepth[3] += depth_step;
+
+ depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2];
+
+ quads[i]->inout.mask = mask;
+ if (quads[i]->inout.mask)
+ quads[pass++] = quads[i];
+ }
+
+ if (pass)
+ qs->next->run(qs->next, quads, pass);
- if (quad->inout.mask)
- qs->next->run(qs->next, quad);
}
+
+
+
+static void
+depth_noop(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ qs->next->run(qs->next, quads, nr);
+}
+
+
+
+static void
+choose_depth_test(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ boolean interp_depth = !qs->softpipe->fs->info.writes_z;
+
+ boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
+
+ boolean depth = (qs->softpipe->framebuffer.zsbuf &&
+ qs->softpipe->depth_stencil->depth.enabled);
+
+ unsigned depthfunc = qs->softpipe->depth_stencil->depth.func;
+
+ boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled;
+
+ boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask;
+
+ boolean occlusion = qs->softpipe->active_query_count;
+
+
+ if (!alpha &&
+ !depth &&
+ !stencil) {
+ qs->run = depth_noop;
+ }
+ else if (!alpha &&
+ interp_depth &&
+ depth &&
+ depthwrite &&
+ !occlusion &&
+ !stencil)
+ {
+ switch (depthfunc) {
+ case PIPE_FUNC_LESS:
+ switch (qs->softpipe->framebuffer.zsbuf->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ qs->run = depth_interp_z16_less_write;
+ break;
+ default:
+ qs->run = depth_test_quads_fallback;
+ break;
+ }
+ break;
+ case PIPE_FUNC_LEQUAL:
+ switch (qs->softpipe->framebuffer.zsbuf->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ qs->run = depth_interp_z16_lequal_write;
+ break;
+ default:
+ qs->run = depth_test_quads_fallback;
+ break;
+ }
+ break;
+ default:
+ qs->run = depth_test_quads_fallback;
+ }
+ }
+ else {
+ qs->run = depth_test_quads_fallback;
+ }
+
+
+ qs->run( qs, quads, nr );
+}
+
+
+
+
+
static void depth_test_begin(struct quad_stage *qs)
{
+ qs->run = choose_depth_test;
qs->next->begin(qs->next);
}
@@ -283,7 +964,7 @@ struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe )
stage->softpipe = softpipe;
stage->begin = depth_test_begin;
- stage->run = depth_test_quad;
+ stage->run = choose_depth_test;
stage->destroy = depth_test_destroy;
return stage;
diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c
deleted file mode 100644
index 496fd39ed1..0000000000
--- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \brief Quad early-z testing
- */
-
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-#include "sp_quad.h"
-#include "sp_quad_pipe.h"
-
-
-/**
- * All this stage does is compute the quad's Z values (which is normally
- * done by the shading stage).
- * The next stage will do the actual depth test.
- */
-static void
-earlyz_quad(
- struct quad_stage *qs,
- struct quad_header *quad )
-{
- const float fx = (float) quad->input.x0;
- const float fy = (float) quad->input.y0;
- const float dzdx = quad->posCoef->dadx[2];
- const float dzdy = quad->posCoef->dady[2];
- const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
-
- quad->output.depth[0] = z0;
- quad->output.depth[1] = z0 + dzdx;
- quad->output.depth[2] = z0 + dzdy;
- quad->output.depth[3] = z0 + dzdx + dzdy;
-
- qs->next->run( qs->next, quad );
-}
-
-static void
-earlyz_begin(
- struct quad_stage *qs )
-{
- qs->next->begin( qs->next );
-}
-
-static void
-earlyz_destroy(
- struct quad_stage *qs )
-{
- FREE( qs );
-}
-
-struct quad_stage *
-sp_quad_earlyz_stage(
- struct softpipe_context *softpipe )
-{
- struct quad_stage *stage = CALLOC_STRUCT( quad_stage );
-
- stage->softpipe = softpipe;
- stage->begin = earlyz_begin;
- stage->run = earlyz_quad;
- stage->destroy = earlyz_destroy;
-
- return stage;
-}
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 28f8d1a60e..1e7533d0f9 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -68,72 +68,69 @@ quad_shade_stage(struct quad_stage *qs)
/**
* Execute fragment shader for the four fragments in the quad.
*/
-static void
+static INLINE boolean
shade_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct quad_shade_stage *qss = quad_shade_stage( qs );
struct softpipe_context *softpipe = qs->softpipe;
struct tgsi_exec_machine *machine = qss->machine;
- boolean z_written;
-
- /* Consts do not require 16 byte alignment. */
- machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
-
- machine->InterpCoefs = quad->coef;
/* run shader */
- quad->inout.mask &= softpipe->fs->run( softpipe->fs, machine, quad );
-
- /* store outputs */
- z_written = FALSE;
- {
- const ubyte *sem_name = softpipe->fs->info.output_semantic_name;
- const ubyte *sem_index = softpipe->fs->info.output_semantic_index;
- const uint n = qss->stage.softpipe->fs->info.num_outputs;
- uint i;
- for (i = 0; i < n; i++) {
- switch (sem_name[i]) {
- case TGSI_SEMANTIC_COLOR:
- {
- uint cbuf = sem_index[i];
- memcpy(quad->output.color[cbuf],
- &machine->Outputs[i].xyzw[0].f[0],
- sizeof(quad->output.color[0]) );
- }
- break;
- case TGSI_SEMANTIC_POSITION:
- {
- uint j;
- for (j = 0; j < 4; j++) {
- quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j];
- }
- z_written = TRUE;
- }
- break;
- }
+ return softpipe->fs->run( softpipe->fs, machine, quad );
+}
+
+
+
+static void
+coverage_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ struct softpipe_context *softpipe = qs->softpipe;
+ uint cbuf;
+
+ /* loop over colorbuffer outputs */
+ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
+ float (*quadColor)[4] = quad->output.color[cbuf];
+ unsigned j;
+ for (j = 0; j < QUAD_SIZE; j++) {
+ assert(quad->input.coverage[j] >= 0.0);
+ assert(quad->input.coverage[j] <= 1.0);
+ quadColor[3][j] *= quad->input.coverage[j];
}
}
+}
+
- if (!z_written) {
- /* compute Z values now, as in the quad earlyz stage */
- /* XXX we should really only do this if the earlyz stage is not used */
- const float fx = (float) quad->input.x0;
- const float fy = (float) quad->input.y0;
- const float dzdx = quad->posCoef->dadx[2];
- const float dzdy = quad->posCoef->dady[2];
- const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
-
- quad->output.depth[0] = z0;
- quad->output.depth[1] = z0 + dzdx;
- quad->output.depth[2] = z0 + dzdy;
- quad->output.depth[3] = z0 + dzdx + dzdy;
- }
- /* shader may cull fragments */
- if (quad->inout.mask) {
- qs->next->run( qs->next, quad );
+static void
+shade_quads(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ struct quad_shade_stage *qss = quad_shade_stage( qs );
+ struct softpipe_context *softpipe = qs->softpipe;
+ struct tgsi_exec_machine *machine = qss->machine;
+
+ unsigned i, pass = 0;
+
+ machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
+ machine->InterpCoefs = quads[0]->coef;
+
+ for (i = 0; i < nr; i++) {
+ if (!shade_quad(qs, quads[i]))
+ continue;
+
+ if (/*do_coverage*/ 0)
+ coverage_quad( qs, quads[i] );
+
+ quads[pass++] = quads[i];
}
+
+ if (pass)
+ qs->next->run(qs->next, quads, pass);
}
+
+
+
/**
@@ -174,7 +171,7 @@ sp_quad_shade_stage( struct softpipe_context *softpipe )
qss->stage.softpipe = softpipe;
qss->stage.begin = shade_begin;
- qss->stage.run = shade_quad;
+ qss->stage.run = shade_quads;
qss->stage.destroy = shade_destroy;
qss->machine = tgsi_exec_machine_create();
diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c
deleted file mode 100644
index dfa7ff3b1d..0000000000
--- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * \brief Quad occlusion counter stage
- * \author Brian Paul
- */
-
-
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-#include "sp_context.h"
-#include "sp_quad.h"
-#include "sp_surface.h"
-#include "sp_quad_pipe.h"
-
-static unsigned count_bits( unsigned val )
-{
- unsigned i;
-
- for (i = 0; val ; val >>= 1)
- i += (val & 1);
-
- return i;
-}
-
-static void
-occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad)
-{
- struct softpipe_context *softpipe = qs->softpipe;
-
- softpipe->occlusion_count += count_bits(quad->inout.mask);
-
- qs->next->run(qs->next, quad);
-}
-
-
-static void occlusion_begin(struct quad_stage *qs)
-{
- qs->next->begin(qs->next);
-}
-
-
-static void occlusion_destroy(struct quad_stage *qs)
-{
- FREE( qs );
-}
-
-
-struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe )
-{
- struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
- stage->softpipe = softpipe;
- stage->begin = occlusion_begin;
- stage->run = occlusion_count_quad;
- stage->destroy = occlusion_destroy;
-
- return stage;
-}
diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c
deleted file mode 100644
index 92d5f9f3c1..0000000000
--- a/src/gallium/drivers/softpipe/sp_quad_output.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "util/u_memory.h"
-#include "sp_context.h"
-#include "sp_quad.h"
-#include "sp_surface.h"
-#include "sp_quad_pipe.h"
-#include "sp_tile_cache.h"
-
-
-/**
- * Last step of quad processing: write quad colors to the framebuffer,
- * taking mask into account.
- */
-static void
-output_quad(struct quad_stage *qs, struct quad_header *quad)
-{
- /* in-tile pos: */
- const int itx = quad->input.x0 % TILE_SIZE;
- const int ity = quad->input.y0 % TILE_SIZE;
-
- struct softpipe_context *softpipe = qs->softpipe;
- uint cbuf;
-
- /* loop over colorbuffer outputs */
- for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
- struct softpipe_cached_tile *tile
- = sp_get_cached_tile(softpipe,
- softpipe->cbuf_cache[cbuf],
- quad->input.x0, quad->input.y0);
- float (*quadColor)[4] = quad->output.color[cbuf];
- int i, j;
-
- /* get/swizzle dest colors */
- for (j = 0; j < QUAD_SIZE; j++) {
- if (quad->inout.mask & (1 << j)) {
- int x = itx + (j & 1);
- int y = ity + (j >> 1);
- for (i = 0; i < 4; i++) { /* loop over color chans */
- tile->data.color[y][x][i] = quadColor[i][j];
- }
- if (0) {
- debug_printf("sp write pixel %d,%d: %g, %g, %g\n",
- quad->input.x0 + x,
- quad->input.y0 + y,
- quadColor[0][j],
- quadColor[1][j],
- quadColor[2][j]);
- }
- }
- }
- }
-}
-
-
-static void output_begin(struct quad_stage *qs)
-{
- assert(qs->next == NULL);
-}
-
-
-static void output_destroy(struct quad_stage *qs)
-{
- FREE( qs );
-}
-
-
-struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe )
-{
- struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
- stage->softpipe = softpipe;
- stage->begin = output_begin;
- stage->run = output_quad;
- stage->destroy = output_destroy;
-
- return stage;
-}
diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c
index b5f69b7426..1b5bab4eca 100644
--- a/src/gallium/drivers/softpipe/sp_quad_pipe.c
+++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c
@@ -31,88 +31,33 @@
#include "pipe/p_shader_tokens.h"
static void
-sp_push_quad_first(
- struct softpipe_context *sp,
- struct quad_stage *quad,
- uint i )
+sp_push_quad_first( struct softpipe_context *sp,
+ struct quad_stage *quad )
{
- quad->next = sp->quad[i].first;
- sp->quad[i].first = quad;
+ quad->next = sp->quad.first;
+ sp->quad.first = quad;
}
-static void
-sp_build_depth_stencil(
- struct softpipe_context *sp,
- uint i )
-{
- if (sp->depth_stencil->stencil[0].enabled ||
- sp->depth_stencil->stencil[1].enabled) {
- sp_push_quad_first( sp, sp->quad[i].stencil_test, i );
- }
- else if (sp->depth_stencil->depth.enabled &&
- sp->framebuffer.zsbuf) {
- sp_push_quad_first( sp, sp->quad[i].depth_test, i );
- }
-}
void
sp_build_quad_pipeline(struct softpipe_context *sp)
{
- uint i;
-
boolean early_depth_test =
- sp->depth_stencil->depth.enabled &&
- sp->framebuffer.zsbuf &&
- !sp->depth_stencil->alpha.enabled &&
- !sp->fs->info.uses_kill &&
- !sp->fs->info.writes_z;
-
- /* build up the pipeline in reverse order... */
- for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
- sp->quad[i].first = sp->quad[i].output;
-
- if (sp->blend->colormask != 0xf) {
- sp_push_quad_first( sp, sp->quad[i].colormask, i );
- }
+ sp->depth_stencil->depth.enabled &&
+ sp->framebuffer.zsbuf &&
+ !sp->depth_stencil->alpha.enabled &&
+ !sp->fs->info.uses_kill &&
+ !sp->fs->info.writes_z;
- if (sp->blend->blend_enable ||
- sp->blend->logicop_enable) {
- sp_push_quad_first( sp, sp->quad[i].blend, i );
- }
+ sp->quad.first = sp->quad.blend;
- if (sp->active_query_count) {
- sp_push_quad_first( sp, sp->quad[i].occlusion, i );
- }
-
- if (sp->rasterizer->poly_smooth ||
- sp->rasterizer->line_smooth ||
- sp->rasterizer->point_smooth) {
- sp_push_quad_first( sp, sp->quad[i].coverage, i );
- }
-
- if (!early_depth_test) {
- sp_build_depth_stencil( sp, i );
- }
-
- if (sp->depth_stencil->alpha.enabled) {
- sp_push_quad_first( sp, sp->quad[i].alpha_test, i );
- }
-
- /* XXX always enable shader? */
- if (1) {
- sp_push_quad_first( sp, sp->quad[i].shade, i );
- }
-
- if (early_depth_test) {
- sp_build_depth_stencil( sp, i );
- sp_push_quad_first( sp, sp->quad[i].earlyz, i );
- }
-
-#if !USE_DRAW_STAGE_PSTIPPLE
- if (sp->rasterizer->poly_stipple_enable) {
- sp_push_quad_first( sp, sp->quad[i].polygon_stipple, i );
- }
-#endif
+ if (early_depth_test) {
+ sp_push_quad_first( sp, sp->quad.shade );
+ sp_push_quad_first( sp, sp->quad.depth_test );
+ }
+ else {
+ sp_push_quad_first( sp, sp->quad.depth_test );
+ sp_push_quad_first( sp, sp->quad.shade );
}
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.h b/src/gallium/drivers/softpipe/sp_quad_pipe.h
index 0e40586ffc..c0aa134831 100644
--- a/src/gallium/drivers/softpipe/sp_quad_pipe.h
+++ b/src/gallium/drivers/softpipe/sp_quad_pipe.h
@@ -49,7 +49,7 @@ struct quad_stage {
void (*begin)(struct quad_stage *qs);
/** the stage action */
- void (*run)(struct quad_stage *qs, struct quad_header *quad);
+ void (*run)(struct quad_stage *qs, struct quad_header *quad[], unsigned nr);
void (*destroy)(struct quad_stage *qs);
};
@@ -69,6 +69,4 @@ struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe );
void sp_build_quad_pipeline(struct softpipe_context *sp);
-void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad);
-
#endif /* SP_QUAD_PIPE_H */
diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c
deleted file mode 100644
index 5e9d447737..0000000000
--- a/src/gallium/drivers/softpipe/sp_quad_stencil.c
+++ /dev/null
@@ -1,352 +0,0 @@
-
-/**
- * \brief Quad stencil testing
- */
-
-
-#include "sp_context.h"
-#include "sp_quad.h"
-#include "sp_surface.h"
-#include "sp_tile_cache.h"
-#include "sp_quad_pipe.h"
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-
-
-/** Only 8-bit stencil supported */
-#define STENCIL_MAX 0xff
-
-
-/**
- * Do the basic stencil test (compare stencil buffer values against the
- * reference value.
- *
- * \param stencilVals the stencil values from the stencil buffer
- * \param func the stencil func (PIPE_FUNC_x)
- * \param ref the stencil reference value
- * \param valMask the stencil value mask indicating which bits of the stencil
- * values and ref value are to be used.
- * \return mask indicating which pixels passed the stencil test
- */
-static unsigned
-do_stencil_test(const ubyte stencilVals[QUAD_SIZE], unsigned func,
- unsigned ref, unsigned valMask)
-{
- unsigned passMask = 0x0;
- unsigned j;
-
- ref &= valMask;
-
- switch (func) {
- case PIPE_FUNC_NEVER:
- /* passMask = 0x0 */
- break;
- case PIPE_FUNC_LESS:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (ref < (stencilVals[j] & valMask)) {
- passMask |= (1 << j);
- }
- }
- break;
- case PIPE_FUNC_EQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (ref == (stencilVals[j] & valMask)) {
- passMask |= (1 << j);
- }
- }
- break;
- case PIPE_FUNC_LEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (ref <= (stencilVals[j] & valMask)) {
- passMask |= (1 << j);
- }
- }
- break;
- case PIPE_FUNC_GREATER:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (ref > (stencilVals[j] & valMask)) {
- passMask |= (1 << j);
- }
- }
- break;
- case PIPE_FUNC_NOTEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (ref != (stencilVals[j] & valMask)) {
- passMask |= (1 << j);
- }
- }
- break;
- case PIPE_FUNC_GEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (ref >= (stencilVals[j] & valMask)) {
- passMask |= (1 << j);
- }
- }
- break;
- case PIPE_FUNC_ALWAYS:
- passMask = MASK_ALL;
- break;
- default:
- assert(0);
- }
-
- return passMask;
-}
-
-
-/**
- * Apply the stencil operator to stencil values.
- *
- * \param stencilVals the stencil buffer values (read and written)
- * \param mask indicates which pixels to update
- * \param op the stencil operator (PIPE_STENCIL_OP_x)
- * \param ref the stencil reference value
- * \param wrtMask writemask controlling which bits are changed in the
- * stencil values
- */
-static void
-apply_stencil_op(ubyte stencilVals[QUAD_SIZE],
- unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
-{
- unsigned j;
- ubyte newstencil[QUAD_SIZE];
-
- for (j = 0; j < QUAD_SIZE; j++) {
- newstencil[j] = stencilVals[j];
- }
-
- switch (op) {
- case PIPE_STENCIL_OP_KEEP:
- /* no-op */
- break;
- case PIPE_STENCIL_OP_ZERO:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (mask & (1 << j)) {
- newstencil[j] = 0;
- }
- }
- break;
- case PIPE_STENCIL_OP_REPLACE:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (mask & (1 << j)) {
- newstencil[j] = ref;
- }
- }
- break;
- case PIPE_STENCIL_OP_INCR:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (mask & (1 << j)) {
- if (stencilVals[j] < STENCIL_MAX) {
- newstencil[j] = stencilVals[j] + 1;
- }
- }
- }
- break;
- case PIPE_STENCIL_OP_DECR:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (mask & (1 << j)) {
- if (stencilVals[j] > 0) {
- newstencil[j] = stencilVals[j] - 1;
- }
- }
- }
- break;
- case PIPE_STENCIL_OP_INCR_WRAP:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (mask & (1 << j)) {
- newstencil[j] = stencilVals[j] + 1;
- }
- }
- break;
- case PIPE_STENCIL_OP_DECR_WRAP:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (mask & (1 << j)) {
- newstencil[j] = stencilVals[j] - 1;
- }
- }
- break;
- case PIPE_STENCIL_OP_INVERT:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (mask & (1 << j)) {
- newstencil[j] = ~stencilVals[j];
- }
- }
- break;
- default:
- assert(0);
- }
-
- /*
- * update the stencil values
- */
- if (wrtMask != STENCIL_MAX) {
- /* apply bit-wise stencil buffer writemask */
- for (j = 0; j < QUAD_SIZE; j++) {
- stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & stencilVals[j]);
- }
- }
- else {
- for (j = 0; j < QUAD_SIZE; j++) {
- stencilVals[j] = newstencil[j];
- }
- }
-}
-
-
-/**
- * Do stencil (and depth) testing. Stenciling depends on the outcome of
- * depth testing.
- */
-static void
-stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
-{
- struct softpipe_context *softpipe = qs->softpipe;
- struct pipe_surface *ps = softpipe->framebuffer.zsbuf;
- unsigned func, zFailOp, zPassOp, failOp;
- ubyte ref, wrtMask, valMask;
- ubyte stencilVals[QUAD_SIZE];
- struct softpipe_cached_tile *tile
- = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0);
- uint j;
- uint face = quad->input.facing;
-
- if (!softpipe->depth_stencil->stencil[1].enabled) {
- /* single-sided stencil test, use front (face=0) state */
- face = 0;
- }
-
- /* choose front or back face function, operator, etc */
- /* XXX we could do these initializations once per primitive */
- func = softpipe->depth_stencil->stencil[face].func;
- failOp = softpipe->depth_stencil->stencil[face].fail_op;
- zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
- zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
- ref = softpipe->depth_stencil->stencil[face].ref_value;
- wrtMask = softpipe->depth_stencil->stencil[face].writemask;
- valMask = softpipe->depth_stencil->stencil[face].valuemask;
-
- assert(ps); /* shouldn't get here if there's no stencil buffer */
-
- /* get stencil values from cached tile */
- switch (ps->format) {
- case PIPE_FORMAT_S8Z24_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- stencilVals[j] = tile->data.depth32[y][x] >> 24;
- }
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- stencilVals[j] = tile->data.depth32[y][x] & 0xff;
- }
- break;
- case PIPE_FORMAT_S8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- stencilVals[j] = tile->data.stencil8[y][x];
- }
- break;
- default:
- assert(0);
- }
-
- /* do the stencil test first */
- {
- unsigned passMask, failMask;
- passMask = do_stencil_test(stencilVals, func, ref, valMask);
- failMask = quad->inout.mask & ~passMask;
- quad->inout.mask &= passMask;
-
- if (failOp != PIPE_STENCIL_OP_KEEP) {
- apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask);
- }
- }
-
- if (quad->inout.mask) {
- /* now the pixels that passed the stencil test are depth tested */
- if (softpipe->depth_stencil->depth.enabled) {
- const unsigned origMask = quad->inout.mask;
-
- sp_depth_test_quad(qs, quad); /* quad->mask is updated */
-
- /* update stencil buffer values according to z pass/fail result */
- if (zFailOp != PIPE_STENCIL_OP_KEEP) {
- const unsigned failMask = origMask & ~quad->inout.mask;
- apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask);
- }
-
- if (zPassOp != PIPE_STENCIL_OP_KEEP) {
- const unsigned passMask = origMask & quad->inout.mask;
- apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask);
- }
- }
- else {
- /* no depth test, apply Zpass operator to stencil buffer values */
- apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask);
- }
-
- }
-
- /* put new stencil values into cached tile */
- switch (ps->format) {
- case PIPE_FORMAT_S8Z24_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- uint s8z24 = tile->data.depth32[y][x];
- s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff);
- tile->data.depth32[y][x] = s8z24;
- }
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- uint z24s8 = tile->data.depth32[y][x];
- z24s8 = (z24s8 & 0xffffff00) | stencilVals[j];
- tile->data.depth32[y][x] = z24s8;
- }
- break;
- case PIPE_FORMAT_S8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- tile->data.stencil8[y][x] = stencilVals[j];
- }
- break;
- default:
- assert(0);
- }
-
- if (quad->inout.mask)
- qs->next->run(qs->next, quad);
-}
-
-
-static void stencil_begin(struct quad_stage *qs)
-{
- qs->next->begin(qs->next);
-}
-
-
-static void stencil_destroy(struct quad_stage *qs)
-{
- FREE( qs );
-}
-
-
-struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe )
-{
- struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
- stage->softpipe = softpipe;
- stage->begin = stencil_begin;
- stage->run = stencil_test_quad;
- stage->destroy = stencil_destroy;
-
- return stage;
-}
diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c
index 07162db7b6..a0527a596a 100644
--- a/src/gallium/drivers/softpipe/sp_quad_stipple.c
+++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c
@@ -14,14 +14,20 @@
* Apply polygon stipple to quads produced by triangle rasterization
*/
static void
-stipple_quad(struct quad_stage *qs, struct quad_header *quad)
+stipple_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr)
{
static const uint bit31 = 1 << 31;
static const uint bit30 = 1 << 30;
+ unsigned pass = nr;
+
+ struct softpipe_context *softpipe = qs->softpipe;
+ unsigned q;
+
+ pass = 0;
+
+ for (q = 0; q < nr; q++) {
+ struct quad_header *quad = quads[q];
- if (quad->input.prim == QUAD_PRIM_TRI) {
- struct softpipe_context *softpipe = qs->softpipe;
- /* need to invert Y to index into OpenGL's stipple pattern */
const int col0 = quad->input.x0 % 32;
const int y0 = quad->input.y0;
const int y1 = y0 + 1;
@@ -41,13 +47,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad)
if ((stipple1 & (bit30 >> col0)) == 0)
quad->inout.mask &= ~MASK_BOTTOM_RIGHT;
- if (!quad->inout.mask) {
- /* all fragments failed stipple test, end of quad pipeline */
- return;
- }
+ if (quad->inout.mask)
+ quads[pass++] = quad;
}
- qs->next->run(qs->next, quad);
+ qs->next->run(qs->next, quads, pass);
}
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 769425bd12..81fb7aa20c 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -40,7 +40,7 @@
static const char *
softpipe_get_vendor(struct pipe_screen *screen)
{
- return "Tungsten Graphics, Inc.";
+ return "VMware, Inc.";
}
@@ -65,8 +65,6 @@ softpipe_get_param(struct pipe_screen *screen, int param)
return 1;
case PIPE_CAP_GLSL:
return 1;
- case PIPE_CAP_S3TC:
- return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 0;
case PIPE_CAP_POINT_SPRITE:
@@ -137,10 +135,14 @@ softpipe_is_format_supported( struct pipe_screen *screen,
target == PIPE_TEXTURE_CUBE);
switch(format) {
+ case PIPE_FORMAT_L16_UNORM:
+ case PIPE_FORMAT_YCBCR_REV:
+ case PIPE_FORMAT_YCBCR:
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT5_RGBA:
+ case PIPE_FORMAT_Z32_FLOAT:
return FALSE;
default:
return TRUE;
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index de3ae3c369..ade125662a 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -33,7 +33,6 @@
*/
#include "sp_context.h"
-#include "sp_prim_setup.h"
#include "sp_quad.h"
#include "sp_quad_pipe.h"
#include "sp_setup.h"
@@ -61,87 +60,9 @@ struct edge {
int lines; /**< number of lines on this edge */
};
-#if SP_NUM_QUAD_THREADS > 1
-/* Set to 1 if you want other threads to be instantly
- * notified of pending jobs.
- */
-#define INSTANT_NOTEMPTY_NOTIFY 0
-
-struct thread_info
-{
- struct setup_context *setup;
- uint id;
- pipe_thread handle;
-};
-
-struct quad_job;
-
-typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job );
-
-struct quad_job
-{
- struct quad_header_input input;
- struct quad_header_inout inout;
- quad_job_routine routine;
-};
-
-#define NUM_QUAD_JOBS 64
-
-struct quad_job_que
-{
- struct quad_job jobs[NUM_QUAD_JOBS];
- uint first;
- uint last;
- pipe_mutex que_mutex;
- pipe_condvar que_notfull_condvar;
- pipe_condvar que_notempty_condvar;
- uint jobs_added;
- uint jobs_done;
- pipe_condvar que_done_condvar;
-};
-
-static void
-add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routine routine )
-{
-#if INSTANT_NOTEMPTY_NOTIFY
- boolean empty;
-#endif
-
- /* Wait for empty slot, see if the que is empty.
- */
- pipe_mutex_lock( que->que_mutex );
- while ((que->last + 1) % NUM_QUAD_JOBS == que->first) {
-#if !INSTANT_NOTEMPTY_NOTIFY
- pipe_condvar_broadcast( que->que_notempty_condvar );
-#endif
- pipe_condvar_wait( que->que_notfull_condvar, que->que_mutex );
- }
-#if INSTANT_NOTEMPTY_NOTIFY
- empty = que->last == que->first;
-#endif
- que->jobs_added++;
- pipe_mutex_unlock( que->que_mutex );
+#define MAX_QUADS 16
- /* Submit new job.
- */
- que->jobs[que->last].input = quad->input;
- que->jobs[que->last].inout = quad->inout;
- que->jobs[que->last].routine = routine;
- que->last = (que->last + 1) % NUM_QUAD_JOBS;
-
-#if INSTANT_NOTEMPTY_NOTIFY
- /* If the que was empty, notify consumers there's a job to be done.
- */
- if (empty) {
- pipe_mutex_lock( que->que_mutex );
- pipe_condvar_broadcast( que->que_notempty_condvar );
- pipe_mutex_unlock( que->que_mutex );
- }
-#endif
-}
-
-#endif
/**
* Triangle setup info (derived from draw_stage).
@@ -164,22 +85,19 @@ struct setup_context {
struct edge emaj;
float oneoverarea;
+ int facing;
+
+ struct quad_header quad[MAX_QUADS];
+ struct quad_header *quad_ptrs[MAX_QUADS];
+ unsigned count;
struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
struct tgsi_interp_coef posCoef; /* For Z, W */
- struct quad_header quad;
-
-#if SP_NUM_QUAD_THREADS > 1
- struct quad_job_que que;
- struct thread_info threads[SP_NUM_QUAD_THREADS];
-#endif
struct {
int left[2]; /**< [0] = row0, [1] = row1 */
int right[2];
int y;
- unsigned y_flags;
- unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
} span;
#if DEBUG_FRAGS
@@ -190,67 +108,6 @@ struct setup_context {
unsigned winding; /* which winding to cull */
};
-#if SP_NUM_QUAD_THREADS > 1
-
-static PIPE_THREAD_ROUTINE( quad_thread, param )
-{
- struct thread_info *info = (struct thread_info *) param;
- struct quad_job_que *que = &info->setup->que;
-
- for (;;) {
- struct quad_job job;
- boolean full;
-
- /* Wait for an available job.
- */
- pipe_mutex_lock( que->que_mutex );
- while (que->last == que->first)
- pipe_condvar_wait( que->que_notempty_condvar, que->que_mutex );
-
- /* See if the que is full.
- */
- full = (que->last + 1) % NUM_QUAD_JOBS == que->first;
-
- /* Take a job and remove it from que.
- */
- job = que->jobs[que->first];
- que->first = (que->first + 1) % NUM_QUAD_JOBS;
-
- /* Notify the producer if the que is not full.
- */
- if (full)
- pipe_condvar_signal( que->que_notfull_condvar );
- pipe_mutex_unlock( que->que_mutex );
-
- job.routine( info->setup, info->id, &job );
-
- /* Notify the producer if that's the last finished job.
- */
- pipe_mutex_lock( que->que_mutex );
- que->jobs_done++;
- if (que->jobs_added == que->jobs_done)
- pipe_condvar_signal( que->que_done_condvar );
- pipe_mutex_unlock( que->que_mutex );
- }
-
- return NULL;
-}
-
-#define WAIT_FOR_COMPLETION(setup) \
- do {\
- pipe_mutex_lock( setup->que.que_mutex );\
- if (!INSTANT_NOTEMPTY_NOTIFY)\
- pipe_condvar_broadcast( setup->que.que_notempty_condvar );\
- while (setup->que.jobs_added != setup->que.jobs_done)\
- pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\
- pipe_mutex_unlock( setup->que.que_mutex );\
- } while (0)
-
-#else
-
-#define WAIT_FOR_COMPLETION(setup) ((void) 0)
-
-#endif
@@ -313,98 +170,18 @@ quad_clip( struct setup_context *setup, struct quad_header *quad )
* Emit a quad (pass to next stage) with clipping.
*/
static INLINE void
-clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread )
+clip_emit_quad( struct setup_context *setup, struct quad_header *quad )
{
quad_clip( setup, quad );
+
if (quad->inout.mask) {
struct softpipe_context *sp = setup->softpipe;
- sp->quad[thread].first->run( sp->quad[thread].first, quad );
+ sp->quad.first->run( sp->quad.first, &quad, 1 );
}
}
-#if SP_NUM_QUAD_THREADS > 1
-
-static void
-clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job )
-{
- struct quad_header quad;
-
- quad.input = job->input;
- quad.inout = job->inout;
- quad.coef = setup->quad.coef;
- quad.posCoef = setup->quad.posCoef;
- quad.nr_attrs = setup->quad.nr_attrs;
- clip_emit_quad( setup, &quad, thread );
-}
-
-#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, &setup->quad, clip_emit_quad_job )
-
-#else
-
-#define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 )
-
-#endif
-
-/**
- * Emit a quad (pass to next stage). No clipping is done.
- */
-static INLINE void
-emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread )
-{
- struct softpipe_context *sp = setup->softpipe;
-#if DEBUG_FRAGS
- uint mask = quad->inout.mask;
-#endif
-
-#if DEBUG_FRAGS
- if (mask & 1) setup->numFragsEmitted++;
- if (mask & 2) setup->numFragsEmitted++;
- if (mask & 4) setup->numFragsEmitted++;
- if (mask & 8) setup->numFragsEmitted++;
-#endif
- sp->quad[thread].first->run( sp->quad[thread].first, quad );
-#if DEBUG_FRAGS
- mask = quad->inout.mask;
- if (mask & 1) setup->numFragsWritten++;
- if (mask & 2) setup->numFragsWritten++;
- if (mask & 4) setup->numFragsWritten++;
- if (mask & 8) setup->numFragsWritten++;
-#endif
-}
-
-#if SP_NUM_QUAD_THREADS > 1
-static void
-emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job )
-{
- struct quad_header quad;
-
- quad.input = job->input;
- quad.inout = job->inout;
- quad.coef = setup->quad.coef;
- quad.posCoef = setup->quad.posCoef;
- quad.nr_attrs = setup->quad.nr_attrs;
- emit_quad( setup, &quad, thread );
-}
-
-#define EMIT_QUAD(setup,x,y,mask) do {\
- setup->quad.input.x0 = x;\
- setup->quad.input.y0 = y;\
- setup->quad.inout.mask = mask;\
- add_quad_job( &setup->que, &setup->quad, emit_quad_job );\
- } while (0)
-
-#else
-
-#define EMIT_QUAD(setup,x,y,mask) do {\
- setup->quad.input.x0 = x;\
- setup->quad.input.y0 = y;\
- setup->quad.inout.mask = mask;\
- emit_quad( setup, &setup->quad, 0 );\
- } while (0)
-
-#endif
/**
* Given an X or Y coordinate, return the block/quad coordinate that it
@@ -412,7 +189,12 @@ emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job )
*/
static INLINE int block( int x )
{
- return x & ~1;
+ return x & ~(2-1);
+}
+
+static INLINE int block_x( int x )
+{
+ return x & ~(16-1);
}
@@ -421,72 +203,63 @@ static INLINE int block( int x )
*/
static void flush_spans( struct setup_context *setup )
{
+ const int step = 16;
const int xleft0 = setup->span.left[0];
const int xleft1 = setup->span.left[1];
const int xright0 = setup->span.right[0];
const int xright1 = setup->span.right[1];
- int minleft, maxright;
+ struct quad_stage *pipe = setup->softpipe->quad.first;
+
+
+ int minleft = block_x(MIN2(xleft0, xleft1));
+ int maxright = MAX2(xright0, xright1);
int x;
- switch (setup->span.y_flags) {
- case 0x3:
- /* both odd and even lines written (both quad rows) */
- minleft = block(MIN2(xleft0, xleft1));
- maxright = block(MAX2(xright0, xright1));
- for (x = minleft; x <= maxright; x += 2) {
- /* determine which of the four pixels is inside the span bounds */
- uint mask = 0x0;
- if (x >= xleft0 && x < xright0)
- mask |= MASK_TOP_LEFT;
- if (x >= xleft1 && x < xright1)
- mask |= MASK_BOTTOM_LEFT;
- if (x+1 >= xleft0 && x+1 < xright0)
- mask |= MASK_TOP_RIGHT;
- if (x+1 >= xleft1 && x+1 < xright1)
- mask |= MASK_BOTTOM_RIGHT;
- if (mask)
- EMIT_QUAD( setup, x, setup->span.y, mask );
- }
- break;
-
- case 0x1:
- /* only even line written (quad top row) */
- minleft = block(xleft0);
- maxright = block(xright0);
- for (x = minleft; x <= maxright; x += 2) {
- uint mask = 0x0;
- if (x >= xleft0 && x < xright0)
- mask |= MASK_TOP_LEFT;
- if (x+1 >= xleft0 && x+1 < xright0)
- mask |= MASK_TOP_RIGHT;
- if (mask)
- EMIT_QUAD( setup, x, setup->span.y, mask );
- }
- break;
-
- case 0x2:
- /* only odd line written (quad bottom row) */
- minleft = block(xleft1);
- maxright = block(xright1);
- for (x = minleft; x <= maxright; x += 2) {
- uint mask = 0x0;
- if (x >= xleft1 && x < xright1)
- mask |= MASK_BOTTOM_LEFT;
- if (x+1 >= xleft1 && x+1 < xright1)
- mask |= MASK_BOTTOM_RIGHT;
- if (mask)
- EMIT_QUAD( setup, x, setup->span.y, mask );
- }
- break;
+ for (x = minleft; x < maxright; x += step) {
+ unsigned skip_left0 = CLAMP(xleft0 - x, 0, step);
+ unsigned skip_left1 = CLAMP(xleft1 - x, 0, step);
+ unsigned skip_right0 = CLAMP(x + step - xright0, 0, step);
+ unsigned skip_right1 = CLAMP(x + step - xright1, 0, step);
+ unsigned lx = x;
+ unsigned q = 0;
- default:
- return;
+ unsigned skipmask_left0 = (1U << skip_left0) - 1U;
+ unsigned skipmask_left1 = (1U << skip_left1) - 1U;
+
+ /* These calculations fail when step == 32 and skip_right == 0.
+ */
+ unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0);
+ unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1);
+
+ unsigned mask0 = ~skipmask_left0 & ~skipmask_right0;
+ unsigned mask1 = ~skipmask_left1 & ~skipmask_right1;
+
+ if (mask0 | mask1) {
+ do {
+ unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2);
+ if (quadmask) {
+ setup->quad[q].input.x0 = lx;
+ setup->quad[q].input.y0 = setup->span.y;
+ setup->quad[q].input.facing = setup->facing;
+ setup->quad[q].inout.mask = quadmask;
+ setup->quad_ptrs[q] = &setup->quad[q];
+ q++;
+ }
+ mask0 >>= 2;
+ mask1 >>= 2;
+ lx += 2;
+ } while (mask0 | mask1);
+
+ pipe->run( pipe, setup->quad_ptrs, q );
+ }
}
+
setup->span.y = 0;
- setup->span.y_flags = 0;
setup->span.right[0] = 0;
setup->span.right[1] = 0;
+ setup->span.left[0] = 1000000; /* greater than right[0] */
+ setup->span.left[1] = 1000000; /* greater than right[1] */
}
@@ -496,7 +269,7 @@ static void print_vertex(const struct setup_context *setup,
{
int i;
debug_printf(" Vertex: (%p)\n", v);
- for (i = 0; i < setup->quad.nr_attrs; i++) {
+ for (i = 0; i < setup->quad[0].nr_attrs; i++) {
debug_printf(" %d: %f %f %f %f\n", i,
v[i][0], v[i][1], v[i][2], v[i][3]);
if (util_is_inf_or_nan(v[i][0])) {
@@ -601,7 +374,9 @@ static boolean setup_sort_vertices( struct setup_context *setup,
* - the GLSL gl_FrontFacing fragment attribute (bool)
* - two-sided stencil test
*/
- setup->quad.input.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
+ setup->facing =
+ ((det > 0.0) ^
+ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW));
return TRUE;
}
@@ -788,7 +563,7 @@ static void setup_tri_coefficients( struct setup_context *setup )
}
if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
- setup->coef[fragSlot].a0[0] = 1.0f - setup->quad.input.facing;
+ setup->coef[fragSlot].a0[0] = 1.0f - setup->facing;
setup->coef[fragSlot].dadx[0] = 0.0;
setup->coef[fragSlot].dady[0] = 0.0;
}
@@ -844,11 +619,10 @@ static void subtriangle( struct setup_context *setup,
/* clip top/bottom */
start_y = sy;
- finish_y = sy + lines;
-
if (start_y < miny)
start_y = miny;
+ finish_y = sy + lines;
if (finish_y > maxy)
finish_y = maxy;
@@ -885,7 +659,6 @@ static void subtriangle( struct setup_context *setup,
setup->span.left[_y&1] = left;
setup->span.right[_y&1] = right;
- setup->span.y_flags |= 1<<(_y&1);
}
}
@@ -958,10 +731,9 @@ void setup_tri( struct setup_context *setup,
setup_tri_coefficients( setup );
setup_tri_edges( setup );
- setup->quad.input.prim = QUAD_PRIM_TRI;
+ assert(setup->softpipe->reduced_prim == PIPE_PRIM_TRIANGLES);
setup->span.y = 0;
- setup->span.y_flags = 0;
setup->span.right[0] = 0;
setup->span.right[1] = 0;
/* setup->span.z_mode = tri_z_mode( setup->ctx ); */
@@ -983,8 +755,6 @@ void setup_tri( struct setup_context *setup,
flush_spans( setup );
- WAIT_FOR_COMPLETION(setup);
-
#if DEBUG_FRAGS
printf("Tri: %u frags emitted, %u written\n",
setup->numFragsEmitted,
@@ -1101,7 +871,7 @@ setup_line_coefficients(struct setup_context *setup,
}
if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
- setup->coef[fragSlot].a0[0] = 1.0f - setup->quad.input.facing;
+ setup->coef[fragSlot].a0[0] = 1.0f - setup->facing;
setup->coef[fragSlot].dadx[0] = 0.0;
setup->coef[fragSlot].dady[0] = 0.0;
}
@@ -1122,20 +892,20 @@ plot(struct setup_context *setup, int x, int y)
const int quadY = y - iy;
const int mask = (1 << ix) << (2 * iy);
- if (quadX != setup->quad.input.x0 ||
- quadY != setup->quad.input.y0)
+ if (quadX != setup->quad[0].input.x0 ||
+ quadY != setup->quad[0].input.y0)
{
/* flush prev quad, start new quad */
- if (setup->quad.input.x0 != -1)
- CLIP_EMIT_QUAD(setup);
+ if (setup->quad[0].input.x0 != -1)
+ clip_emit_quad( setup, &setup->quad[0] );
- setup->quad.input.x0 = quadX;
- setup->quad.input.y0 = quadY;
- setup->quad.inout.mask = 0x0;
+ setup->quad[0].input.x0 = quadX;
+ setup->quad[0].input.y0 = quadY;
+ setup->quad[0].inout.mask = 0x0;
}
- setup->quad.inout.mask |= mask;
+ setup->quad[0].inout.mask |= mask;
}
@@ -1195,17 +965,18 @@ setup_line(struct setup_context *setup,
assert(dx >= 0);
assert(dy >= 0);
+ assert(setup->softpipe->reduced_prim == PIPE_PRIM_LINES);
+
+ setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1;
+ setup->quad[0].inout.mask = 0x0;
- setup->quad.input.x0 = setup->quad.input.y0 = -1;
- setup->quad.inout.mask = 0x0;
- setup->quad.input.prim = QUAD_PRIM_LINE;
/* XXX temporary: set coverage to 1.0 so the line appears
* if AA mode happens to be enabled.
*/
- setup->quad.input.coverage[0] =
- setup->quad.input.coverage[1] =
- setup->quad.input.coverage[2] =
- setup->quad.input.coverage[3] = 1.0;
+ setup->quad[0].input.coverage[0] =
+ setup->quad[0].input.coverage[1] =
+ setup->quad[0].input.coverage[2] =
+ setup->quad[0].input.coverage[3] = 1.0;
if (dx > dy) {
/*** X-major line ***/
@@ -1249,11 +1020,9 @@ setup_line(struct setup_context *setup,
}
/* draw final quad */
- if (setup->quad.inout.mask) {
- CLIP_EMIT_QUAD(setup);
+ if (setup->quad[0].inout.mask) {
+ clip_emit_quad( setup, &setup->quad[0] );
}
-
- WAIT_FOR_COMPLETION(setup);
}
@@ -1300,6 +1069,8 @@ setup_point( struct setup_context *setup,
if (softpipe->no_rast)
return;
+ assert(setup->softpipe->reduced_prim == PIPE_PRIM_POINTS);
+
/* For points, all interpolants are constant-valued.
* However, for point sprites, we'll need to setup texcoords appropriately.
* XXX: which coefficients are the texcoords???
@@ -1346,22 +1117,21 @@ setup_point( struct setup_context *setup,
}
if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
- setup->coef[fragSlot].a0[0] = 1.0f - setup->quad.input.facing;
+ setup->coef[fragSlot].a0[0] = 1.0f - setup->facing;
setup->coef[fragSlot].dadx[0] = 0.0;
setup->coef[fragSlot].dady[0] = 0.0;
}
}
- setup->quad.input.prim = QUAD_PRIM_POINT;
if (halfSize <= 0.5 && !round) {
/* special case for 1-pixel points */
const int ix = ((int) x) & 1;
const int iy = ((int) y) & 1;
- setup->quad.input.x0 = (int) x - ix;
- setup->quad.input.y0 = (int) y - iy;
- setup->quad.inout.mask = (1 << ix) << (2 * iy);
- CLIP_EMIT_QUAD(setup);
+ setup->quad[0].input.x0 = (int) x - ix;
+ setup->quad[0].input.y0 = (int) y - iy;
+ setup->quad[0].inout.mask = (1 << ix) << (2 * iy);
+ clip_emit_quad( setup, &setup->quad[0] );
}
else {
if (round) {
@@ -1381,15 +1151,15 @@ setup_point( struct setup_context *setup,
for (ix = ixmin; ix <= ixmax; ix += 2) {
float dx, dy, dist2, cover;
- setup->quad.inout.mask = 0x0;
+ setup->quad[0].inout.mask = 0x0;
dx = (ix + 0.5f) - x;
dy = (iy + 0.5f) - y;
dist2 = dx * dx + dy * dy;
if (dist2 <= rmax2) {
cover = 1.0F - (dist2 - rmin2) * cscale;
- setup->quad.input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
- setup->quad.inout.mask |= MASK_TOP_LEFT;
+ setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
+ setup->quad[0].inout.mask |= MASK_TOP_LEFT;
}
dx = (ix + 1.5f) - x;
@@ -1397,8 +1167,8 @@ setup_point( struct setup_context *setup,
dist2 = dx * dx + dy * dy;
if (dist2 <= rmax2) {
cover = 1.0F - (dist2 - rmin2) * cscale;
- setup->quad.input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
- setup->quad.inout.mask |= MASK_TOP_RIGHT;
+ setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
+ setup->quad[0].inout.mask |= MASK_TOP_RIGHT;
}
dx = (ix + 0.5f) - x;
@@ -1406,8 +1176,8 @@ setup_point( struct setup_context *setup,
dist2 = dx * dx + dy * dy;
if (dist2 <= rmax2) {
cover = 1.0F - (dist2 - rmin2) * cscale;
- setup->quad.input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
- setup->quad.inout.mask |= MASK_BOTTOM_LEFT;
+ setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
+ setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT;
}
dx = (ix + 1.5f) - x;
@@ -1415,14 +1185,14 @@ setup_point( struct setup_context *setup,
dist2 = dx * dx + dy * dy;
if (dist2 <= rmax2) {
cover = 1.0F - (dist2 - rmin2) * cscale;
- setup->quad.input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
- setup->quad.inout.mask |= MASK_BOTTOM_RIGHT;
+ setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
+ setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT;
}
- if (setup->quad.inout.mask) {
- setup->quad.input.x0 = ix;
- setup->quad.input.y0 = iy;
- CLIP_EMIT_QUAD(setup);
+ if (setup->quad[0].inout.mask) {
+ setup->quad[0].input.x0 = ix;
+ setup->quad[0].input.y0 = iy;
+ clip_emit_quad( setup, &setup->quad[0] );
}
}
}
@@ -1466,33 +1236,25 @@ setup_point( struct setup_context *setup,
mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
}
- setup->quad.inout.mask = mask;
- setup->quad.input.x0 = ix;
- setup->quad.input.y0 = iy;
- CLIP_EMIT_QUAD(setup);
+ setup->quad[0].inout.mask = mask;
+ setup->quad[0].input.x0 = ix;
+ setup->quad[0].input.y0 = iy;
+ clip_emit_quad( setup, &setup->quad[0] );
}
}
}
}
-
- WAIT_FOR_COMPLETION(setup);
}
void setup_prepare( struct setup_context *setup )
{
struct softpipe_context *sp = setup->softpipe;
- unsigned i;
if (sp->dirty) {
softpipe_update_derived(sp);
}
- /* Note: nr_attrs is only used for debugging (vertex printing) */
- setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw);
-
- for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
- sp->quad[i].first->begin( sp->quad[i].first );
- }
+ sp->quad.first->begin( sp->quad.first );
if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL &&
@@ -1520,30 +1282,17 @@ void setup_destroy_context( struct setup_context *setup )
struct setup_context *setup_create_context( struct softpipe_context *softpipe )
{
struct setup_context *setup = CALLOC_STRUCT(setup_context);
-#if SP_NUM_QUAD_THREADS > 1
- uint i;
-#endif
+ unsigned i;
setup->softpipe = softpipe;
- setup->quad.coef = setup->coef;
- setup->quad.posCoef = &setup->posCoef;
-
-#if SP_NUM_QUAD_THREADS > 1
- setup->que.first = 0;
- setup->que.last = 0;
- pipe_mutex_init( setup->que.que_mutex );
- pipe_condvar_init( setup->que.que_notfull_condvar );
- pipe_condvar_init( setup->que.que_notempty_condvar );
- setup->que.jobs_added = 0;
- setup->que.jobs_done = 0;
- pipe_condvar_init( setup->que.que_done_condvar );
- for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
- setup->threads[i].setup = setup;
- setup->threads[i].id = i;
- setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] );
+ for (i = 0; i < MAX_QUADS; i++) {
+ setup->quad[i].coef = setup->coef;
+ setup->quad[i].posCoef = &setup->posCoef;
}
-#endif
+
+ setup->span.left[0] = 1000000; /* greater than right[0] */
+ setup->span.left[1] = 1000000; /* greater than right[1] */
return setup;
}
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 9776e978e3..77ee3c1136 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -87,6 +87,7 @@ struct sp_fragment_shader {
struct sp_vertex_shader {
struct pipe_shader_state shader;
struct draw_vertex_shader *draw_data;
+ int max_sampler; /* -1 if no samplers */
};
diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c
index 384fe559af..efed082f82 100644
--- a/src/gallium/drivers/softpipe/sp_state_blend.c
+++ b/src/gallium/drivers/softpipe/sp_state_blend.c
@@ -45,7 +45,7 @@ void softpipe_bind_blend_state( struct pipe_context *pipe,
{
struct softpipe_context *softpipe = softpipe_context(pipe);
- softpipe->blend = (const struct pipe_blend_state *)blend;
+ softpipe->blend = (struct pipe_blend_state *)blend;
softpipe->dirty |= SP_NEW_BLEND;
}
@@ -86,7 +86,7 @@ softpipe_bind_depth_stencil_state(struct pipe_context *pipe,
{
struct softpipe_context *softpipe = softpipe_context(pipe);
- softpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil;
+ softpipe->depth_stencil = (struct pipe_depth_stencil_alpha_state *)depth_stencil;
softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA;
}
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index 75551000c9..1faeca1c2a 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -32,7 +32,10 @@
#include "draw/draw_vertex.h"
#include "draw/draw_private.h"
#include "sp_context.h"
+#include "sp_screen.h"
#include "sp_state.h"
+#include "sp_texture.h"
+#include "sp_tex_tile_cache.h"
/**
@@ -65,24 +68,19 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
const struct sp_fragment_shader *spfs = softpipe->fs;
const enum interp_mode colorInterp
= softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
+ struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf;
+ const uint num = draw_num_vs_outputs(softpipe->draw);
uint i;
- if (softpipe->vbuf) {
- /* if using the post-transform vertex buffer, tell draw_vbuf to
- * simply emit the whole post-xform vertex as-is:
- */
- struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf;
- const uint num = draw_num_vs_outputs(softpipe->draw);
- uint i;
-
- /* No longer any need to try and emit draw vertex_header info.
- */
- vinfo_vbuf->num_attribs = 0;
- for (i = 0; i < num; i++) {
- draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i);
- }
- draw_compute_vertex_size(vinfo_vbuf);
+ /* Tell draw_vbuf to simply emit the whole post-xform vertex
+ * as-is. No longer any need to try and emit draw vertex_header
+ * info.
+ */
+ vinfo_vbuf->num_attribs = 0;
+ for (i = 0; i < num; i++) {
+ draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i);
}
+ draw_compute_vertex_size(vinfo_vbuf);
/*
* Loop over fragment shader inputs, searching for the matching output
@@ -91,6 +89,23 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
vinfo->num_attribs = 0;
for (i = 0; i < spfs->info.num_inputs; i++) {
int src;
+ enum interp_mode interp;
+
+ switch (spfs->info.input_interpolate[i]) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ interp = INTERP_CONSTANT;
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ interp = INTERP_LINEAR;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ interp = INTERP_PERSPECTIVE;
+ break;
+ default:
+ assert(0);
+ interp = INTERP_LINEAR;
+ }
+
switch (spfs->info.input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
src = draw_find_vs_output(softpipe->draw,
@@ -106,7 +121,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
case TGSI_SEMANTIC_FOG:
src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
break;
case TGSI_SEMANTIC_GENERIC:
@@ -114,7 +129,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
/* this includes texcoords and varying vars */
src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC,
spfs->info.input_semantic_index[i]);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
break;
default:
@@ -164,11 +179,19 @@ softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe)
static void
compute_cliprect(struct softpipe_context *sp)
{
+ /* SP_NEW_FRAMEBUFFER
+ */
uint surfWidth = sp->framebuffer.width;
uint surfHeight = sp->framebuffer.height;
+ /* SP_NEW_RASTERIZER
+ */
if (sp->rasterizer->scissor) {
- /* clip to scissor rect */
+
+ /* SP_NEW_SCISSOR
+ *
+ * clip to scissor rect:
+ */
sp->cliprect.minx = MAX2(sp->scissor.minx, 0);
sp->cliprect.miny = MAX2(sp->scissor.miny, 0);
sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth);
@@ -184,27 +207,63 @@ compute_cliprect(struct softpipe_context *sp)
}
+static void
+update_tgsi_samplers( struct softpipe_context *softpipe )
+{
+ unsigned i;
+
+ softpipe_reset_sampler_varients( softpipe );
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ struct softpipe_tex_tile_cache *tc = softpipe->tex_cache[i];
+ if (tc->texture) {
+ struct softpipe_texture *spt = softpipe_texture(tc->texture);
+ if (spt->timestamp != tc->timestamp) {
+ sp_tex_tile_cache_validate_texture( tc );
+ /*
+ _debug_printf("INV %d %d\n", tc->timestamp, spt->timestamp);
+ */
+ tc->timestamp = spt->timestamp;
+ }
+ }
+ }
+}
+
+
/* Hopefully this will remain quite simple, otherwise need to pull in
* something like the state tracker mechanism.
*/
void softpipe_update_derived( struct softpipe_context *softpipe )
{
+ struct softpipe_screen *sp_screen = softpipe_screen(softpipe->pipe.screen);
+
+ /* Check for updated textures.
+ */
+ if (softpipe->tex_timestamp != sp_screen->timestamp) {
+ softpipe->tex_timestamp = sp_screen->timestamp;
+ softpipe->dirty |= SP_NEW_TEXTURE;
+ }
+
+ if (softpipe->dirty & (SP_NEW_SAMPLER |
+ SP_NEW_TEXTURE |
+ SP_NEW_FS |
+ SP_NEW_VS))
+ update_tgsi_samplers( softpipe );
+
if (softpipe->dirty & (SP_NEW_RASTERIZER |
SP_NEW_FS |
SP_NEW_VS))
invalidate_vertex_layout( softpipe );
if (softpipe->dirty & (SP_NEW_SCISSOR |
- SP_NEW_DEPTH_STENCIL_ALPHA |
+ SP_NEW_RASTERIZER |
SP_NEW_FRAMEBUFFER))
compute_cliprect(softpipe);
if (softpipe->dirty & (SP_NEW_BLEND |
SP_NEW_DEPTH_STENCIL_ALPHA |
SP_NEW_FRAMEBUFFER |
- SP_NEW_RASTERIZER |
- SP_NEW_FS |
- SP_NEW_QUERY))
+ SP_NEW_FS))
sp_build_quad_pipeline(softpipe);
softpipe->dirty = 0;
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index 4330c20393..256faa94b8 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -31,9 +31,8 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
-#include "pipe/internal/p_winsys_screen.h"
-#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
+#include "draw/draw_vs.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_parse.h"
@@ -51,12 +50,9 @@ softpipe_create_fs_state(struct pipe_context *pipe,
tgsi_dump(templ->tokens, 0);
/* codegen */
- state = softpipe_create_fs_llvm( softpipe, templ );
+ state = softpipe_create_fs_sse( softpipe, templ );
if (!state) {
- state = softpipe_create_fs_sse( softpipe, templ );
- if (!state) {
- state = softpipe_create_fs_exec( softpipe, templ );
- }
+ state = softpipe_create_fs_exec( softpipe, templ );
}
assert(state);
@@ -111,6 +107,8 @@ softpipe_create_vs_state(struct pipe_context *pipe,
if (state->draw_data == NULL)
goto fail;
+ state->max_sampler = state->draw_data->info.file_max[TGSI_FILE_SAMPLER];
+
return state;
fail:
@@ -128,7 +126,7 @@ softpipe_bind_vs_state(struct pipe_context *pipe, void *vs)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
- softpipe->vs = (const struct sp_vertex_shader *)vs;
+ softpipe->vs = (struct sp_vertex_shader *) vs;
draw_bind_vertex_shader(softpipe->draw,
(softpipe->vs ? softpipe->vs->draw_data : NULL));
@@ -142,8 +140,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
- struct sp_vertex_shader *state =
- (struct sp_vertex_shader *)vs;
+ struct sp_vertex_shader *state = (struct sp_vertex_shader *) vs;
draw_delete_vertex_shader(softpipe->draw, state->draw_data);
FREE( state );
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index cb517b02e4..db0b8ab76b 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -32,21 +32,37 @@
#include "util/u_memory.h"
#include "draw/draw_context.h"
+#include "draw/draw_context.h"
#include "sp_context.h"
-#include "sp_context.h"
#include "sp_state.h"
#include "sp_texture.h"
-#include "sp_tile_cache.h"
-#include "draw/draw_context.h"
+#include "sp_tex_sample.h"
+#include "sp_tex_tile_cache.h"
+struct sp_sampler {
+ struct pipe_sampler_state base;
+ struct sp_sampler_varient *varients;
+ struct sp_sampler_varient *current;
+};
+
+static struct sp_sampler *sp_sampler( struct pipe_sampler_state *sampler )
+{
+ return (struct sp_sampler *)sampler;
+}
+
void *
softpipe_create_sampler_state(struct pipe_context *pipe,
const struct pipe_sampler_state *sampler)
{
- return mem_dup(sampler, sizeof(*sampler));
+ struct sp_sampler *sp_sampler = CALLOC_STRUCT(sp_sampler);
+
+ sp_sampler->base = *sampler;
+ sp_sampler->varients = NULL;
+
+ return (void *)sp_sampler;
}
@@ -97,7 +113,7 @@ softpipe_set_sampler_textures(struct pipe_context *pipe,
struct pipe_texture *tex = i < num ? texture[i] : NULL;
pipe_texture_reference(&softpipe->texture[i], tex);
- sp_tile_cache_set_texture(pipe, softpipe->tex_cache[i], tex);
+ sp_tex_tile_cache_set_texture(softpipe->tex_cache[i], tex);
}
softpipe->num_textures = num;
@@ -106,10 +122,111 @@ softpipe_set_sampler_textures(struct pipe_context *pipe,
}
+/**
+ * Find/create an sp_sampler_varient object for sampling the given texture,
+ * sampler and tex unit.
+ *
+ * Note that the tex unit is significant. We can't re-use a sampler
+ * varient for multiple texture units because the sampler varient contains
+ * the texture object pointer. If the texture object pointer were stored
+ * somewhere outside the sampler varient, we could re-use samplers for
+ * multiple texture units.
+ */
+static struct sp_sampler_varient *
+get_sampler_varient( unsigned unit,
+ struct sp_sampler *sampler,
+ struct pipe_texture *texture,
+ unsigned processor )
+{
+ struct softpipe_texture *sp_texture = softpipe_texture(texture);
+ struct sp_sampler_varient *v = NULL;
+ union sp_sampler_key key;
+
+ /* if this fails, widen the key.unit field and update this assertion */
+ assert(PIPE_MAX_SAMPLERS <= 16);
+
+ key.bits.target = sp_texture->base.target;
+ key.bits.is_pot = sp_texture->pot;
+ key.bits.processor = processor;
+ key.bits.unit = unit;
+ key.bits.pad = 0;
+
+ if (sampler->current &&
+ key.value == sampler->current->key.value) {
+ v = sampler->current;
+ }
+
+ if (v == NULL) {
+ for (v = sampler->varients; v; v = v->next)
+ if (v->key.value == key.value)
+ break;
+
+ if (v == NULL) {
+ v = sp_create_sampler_varient( &sampler->base, key );
+ v->next = sampler->varients;
+ sampler->varients = v;
+ }
+ }
+
+ sampler->current = v;
+ return v;
+}
+
+
+
+
+void
+softpipe_reset_sampler_varients(struct softpipe_context *softpipe)
+{
+ int i;
+
+ /* It's a bit hard to build these samplers ahead of time -- don't
+ * really know which samplers are going to be used for vertex and
+ * fragment programs.
+ */
+ for (i = 0; i <= softpipe->vs->max_sampler; i++) {
+ if (softpipe->sampler[i]) {
+ softpipe->tgsi.vert_samplers_list[i] =
+ get_sampler_varient( i,
+ sp_sampler(softpipe->sampler[i]),
+ softpipe->texture[i],
+ TGSI_PROCESSOR_VERTEX );
+
+ sp_sampler_varient_bind_texture( softpipe->tgsi.vert_samplers_list[i],
+ softpipe->tex_cache[i],
+ softpipe->texture[i] );
+ }
+ }
+
+ for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) {
+ if (softpipe->sampler[i]) {
+ softpipe->tgsi.frag_samplers_list[i] =
+ get_sampler_varient( i,
+ sp_sampler(softpipe->sampler[i]),
+ softpipe->texture[i],
+ TGSI_PROCESSOR_FRAGMENT );
+
+ sp_sampler_varient_bind_texture( softpipe->tgsi.frag_samplers_list[i],
+ softpipe->tex_cache[i],
+ softpipe->texture[i] );
+ }
+ }
+}
+
+
+
void
softpipe_delete_sampler_state(struct pipe_context *pipe,
void *sampler)
{
+ struct sp_sampler *sp_sampler = (struct sp_sampler *)sampler;
+ struct sp_sampler_varient *v, *tmp;
+
+ for (v = sp_sampler->varients; v; v = tmp) {
+ tmp = v->next;
+ sp_sampler_varient_destroy(v);
+ }
+
FREE( sampler );
}
diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c
index 7c06d864a7..bc0e201130 100644
--- a/src/gallium/drivers/softpipe/sp_state_surface.c
+++ b/src/gallium/drivers/softpipe/sp_state_surface.c
@@ -53,10 +53,10 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe,
/* check if changing cbuf */
if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) {
/* flush old */
- sp_flush_tile_cache(sp, sp->cbuf_cache[i]);
+ sp_flush_tile_cache(sp->cbuf_cache[i]);
/* assign new */
- sp->framebuffer.cbufs[i] = fb->cbufs[i];
+ pipe_surface_reference(&sp->framebuffer.cbufs[i], fb->cbufs[i]);
/* update cache */
sp_tile_cache_set_surface(sp->cbuf_cache[i], fb->cbufs[i]);
@@ -68,58 +68,28 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe,
/* zbuf changing? */
if (sp->framebuffer.zsbuf != fb->zsbuf) {
/* flush old */
- sp_flush_tile_cache(sp, sp->zsbuf_cache);
+ sp_flush_tile_cache(sp->zsbuf_cache);
/* assign new */
- sp->framebuffer.zsbuf = fb->zsbuf;
+ pipe_surface_reference(&sp->framebuffer.zsbuf, fb->zsbuf);
/* update cache */
sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf);
- }
-
-#if 0
- /* XXX combined depth/stencil here */
-
- /* sbuf changing? */
- if (sp->framebuffer.sbuf != fb->sbuf) {
- /* flush old */
- sp_flush_tile_cache(sp, sp->sbuf_cache_sep);
-
- /* assign new */
- sp->framebuffer.sbuf = fb->sbuf;
-
- /* update cache */
- if (fb->sbuf != fb->zbuf) {
- /* separate stencil buf */
- sp->sbuf_cache = sp->sbuf_cache_sep;
- sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf);
- }
- else {
- /* combined depth/stencil */
- sp->sbuf_cache = sp->zbuf_cache;
- sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf);
- }
- }
-#endif
- /* Tell draw module how deep the Z/depth buffer is */
- {
- int depth_bits;
- double mrd;
+ /* Tell draw module how deep the Z/depth buffer is */
if (sp->framebuffer.zsbuf) {
+ int depth_bits;
+ double mrd;
depth_bits = pf_get_component_bits(sp->framebuffer.zsbuf->format,
PIPE_FORMAT_COMP_Z);
+ if (depth_bits > 16) {
+ mrd = 0.0000001;
+ }
+ else {
+ mrd = 0.00002;
+ }
+ draw_set_mrd(sp->draw, mrd);
}
- else {
- depth_bits = 0;
- }
- if (depth_bits > 16) {
- mrd = 0.0000001;
- }
- else {
- mrd = 0.00002;
- }
- draw_set_mrd(sp->draw, mrd);
}
sp->framebuffer.width = fb->width;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index f99a30277d..c22ee86b66 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -31,29 +31,33 @@
*
* Authors:
* Brian Paul
+ * Keith Whitwell
*/
-#include "sp_context.h"
-#include "sp_quad.h"
-#include "sp_surface.h"
-#include "sp_texture.h"
-#include "sp_tex_sample.h"
-#include "sp_tile_cache.h"
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "sp_quad.h" /* only for #define QUAD_* tokens */
+#include "sp_tex_sample.h"
+#include "sp_tex_tile_cache.h"
/*
- * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes
- * see 1-pixel bands of improperly weighted linear-filtered textures.
+ * Return fractional part of 'f'. Used for computing interpolation weights.
+ * Need to be careful with negative values.
+ * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
+ * of improperly weighted linear-filtered textures.
* The tests/texwrap.c demo is a good test.
- * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
- * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
*/
-#define FRAC(f) ((f) - util_ifloor(f))
+static INLINE float
+frac(float f)
+{
+ return f - util_ifloor(f);
+}
+
/**
@@ -100,10 +104,16 @@ lerp_3d(float a, float b, float c,
/**
- * If A is a signed integer, A % B doesn't give the right value for A < 0
- * (in terms of texture repeat). Just casting to unsigned fixes that.
+ * Compute coord % size for repeat wrap modes.
+ * Note that if coord is a signed integer, coord % size doesn't give
+ * the right value for coord < 0 (in terms of texture repeat). Just
+ * casting to unsigned fixes that.
*/
-#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
+static INLINE int
+repeat(int coord, unsigned size)
+{
+ return (int) ((unsigned) coord % size);
+}
/**
@@ -115,133 +125,153 @@ lerp_3d(float a, float b, float c,
* \param icoord returns the integer texcoords
* \return integer texture index
*/
-static INLINE void
-nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
- int icoord[4])
+static void
+wrap_nearest_repeat(const float s[4], unsigned size, int icoord[4])
{
uint ch;
- switch (wrapMode) {
- case PIPE_TEX_WRAP_REPEAT:
- /* s limited to [0,1) */
- /* i limited to [0,size-1] */
- for (ch = 0; ch < 4; ch++) {
- int i = util_ifloor(s[ch] * size);
- icoord[ch] = REMAINDER(i, size);
- }
- return;
- case PIPE_TEX_WRAP_CLAMP:
+ /* s limited to [0,1) */
+ /* i limited to [0,size-1] */
+ for (ch = 0; ch < 4; ch++) {
+ int i = util_ifloor(s[ch] * size);
+ icoord[ch] = repeat(i, size);
+ }
+}
+
+
+static void
+wrap_nearest_clamp(const float s[4], unsigned size, int icoord[4])
+{
+ uint ch;
+ /* s limited to [0,1] */
+ /* i limited to [0,size-1] */
+ for (ch = 0; ch < 4; ch++) {
+ if (s[ch] <= 0.0F)
+ icoord[ch] = 0;
+ else if (s[ch] >= 1.0F)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(s[ch] * size);
+ }
+}
+
+
+static void
+wrap_nearest_clamp_to_edge(const float s[4], unsigned size, int icoord[4])
+{
+ uint ch;
+ /* s limited to [min,max] */
+ /* i limited to [0, size-1] */
+ const float min = 1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ for (ch = 0; ch < 4; ch++) {
+ if (s[ch] < min)
+ icoord[ch] = 0;
+ else if (s[ch] > max)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(s[ch] * size);
+ }
+}
+
+
+static void
+wrap_nearest_clamp_to_border(const float s[4], unsigned size, int icoord[4])
+{
+ uint ch;
+ /* s limited to [min,max] */
+ /* i limited to [-1, size] */
+ const float min = -1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ for (ch = 0; ch < 4; ch++) {
+ if (s[ch] <= min)
+ icoord[ch] = -1;
+ else if (s[ch] >= max)
+ icoord[ch] = size;
+ else
+ icoord[ch] = util_ifloor(s[ch] * size);
+ }
+}
+
+
+static void
+wrap_nearest_mirror_repeat(const float s[4], unsigned size, int icoord[4])
+{
+ uint ch;
+ const float min = 1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ for (ch = 0; ch < 4; ch++) {
+ const int flr = util_ifloor(s[ch]);
+ float u;
+ if (flr & 1)
+ u = 1.0F - (s[ch] - (float) flr);
+ else
+ u = s[ch] - (float) flr;
+ if (u < min)
+ icoord[ch] = 0;
+ else if (u > max)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
+}
+
+
+static void
+wrap_nearest_mirror_clamp(const float s[4], unsigned size, int icoord[4])
+{
+ uint ch;
+ for (ch = 0; ch < 4; ch++) {
/* s limited to [0,1] */
/* i limited to [0,size-1] */
- for (ch = 0; ch < 4; ch++) {
- if (s[ch] <= 0.0F)
- icoord[ch] = 0;
- else if (s[ch] >= 1.0F)
- icoord[ch] = size - 1;
- else
- icoord[ch] = util_ifloor(s[ch] * size);
- }
- return;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- {
- /* s limited to [min,max] */
- /* i limited to [0, size-1] */
- const float min = 1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- if (s[ch] < min)
- icoord[ch] = 0;
- else if (s[ch] > max)
- icoord[ch] = size - 1;
- else
- icoord[ch] = util_ifloor(s[ch] * size);
- }
- }
- return;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- {
- /* s limited to [min,max] */
- /* i limited to [-1, size] */
- const float min = -1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- if (s[ch] <= min)
- icoord[ch] = -1;
- else if (s[ch] >= max)
- icoord[ch] = size;
- else
- icoord[ch] = util_ifloor(s[ch] * size);
- }
- }
- return;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- {
- const float min = 1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- const int flr = util_ifloor(s[ch]);
- float u;
- if (flr & 1)
- u = 1.0F - (s[ch] - (float) flr);
- else
- u = s[ch] - (float) flr;
- if (u < min)
- icoord[ch] = 0;
- else if (u > max)
- icoord[ch] = size - 1;
- else
- icoord[ch] = util_ifloor(u * size);
- }
- }
- return;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- for (ch = 0; ch < 4; ch++) {
- /* s limited to [0,1] */
- /* i limited to [0,size-1] */
- const float u = fabsf(s[ch]);
- if (u <= 0.0F)
- icoord[ch] = 0;
- else if (u >= 1.0F)
- icoord[ch] = size - 1;
- else
- icoord[ch] = util_ifloor(u * size);
- }
- return;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- {
- /* s limited to [min,max] */
- /* i limited to [0, size-1] */
- const float min = 1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- const float u = fabsf(s[ch]);
- if (u < min)
- icoord[ch] = 0;
- else if (u > max)
- icoord[ch] = size - 1;
- else
- icoord[ch] = util_ifloor(u * size);
- }
- }
- return;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- {
- /* s limited to [min,max] */
- /* i limited to [0, size-1] */
- const float min = -1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- const float u = fabsf(s[ch]);
- if (u < min)
- icoord[ch] = -1;
- else if (u > max)
- icoord[ch] = size;
- else
- icoord[ch] = util_ifloor(u * size);
- }
- }
- return;
- default:
- assert(0);
+ const float u = fabsf(s[ch]);
+ if (u <= 0.0F)
+ icoord[ch] = 0;
+ else if (u >= 1.0F)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
+}
+
+
+static void
+wrap_nearest_mirror_clamp_to_edge(const float s[4], unsigned size,
+ int icoord[4])
+{
+ uint ch;
+ /* s limited to [min,max] */
+ /* i limited to [0, size-1] */
+ const float min = 1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ for (ch = 0; ch < 4; ch++) {
+ const float u = fabsf(s[ch]);
+ if (u < min)
+ icoord[ch] = 0;
+ else if (u > max)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
+}
+
+
+static void
+wrap_nearest_mirror_clamp_to_border(const float s[4], unsigned size,
+ int icoord[4])
+{
+ uint ch;
+ /* s limited to [min,max] */
+ /* i limited to [0, size-1] */
+ const float min = -1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ for (ch = 0; ch < 4; ch++) {
+ const float u = fabsf(s[ch]);
+ if (u < min)
+ icoord[ch] = -1;
+ else if (u > max)
+ icoord[ch] = size;
+ else
+ icoord[ch] = util_ifloor(u * size);
}
}
@@ -256,125 +286,156 @@ nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
* \param w returns blend factor/weight between texture indexes
* \param icoord returns the computed integer texture coords
*/
-static INLINE void
-linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
+static void
+wrap_linear_repeat(const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
+{
+ uint ch;
+ for (ch = 0; ch < 4; ch++) {
+ float u = s[ch] * size - 0.5F;
+ icoord0[ch] = repeat(util_ifloor(u), size);
+ icoord1[ch] = repeat(icoord0[ch] + 1, size);
+ w[ch] = frac(u);
+ }
+}
+
+
+static void
+wrap_linear_clamp(const float s[4], unsigned size,
int icoord0[4], int icoord1[4], float w[4])
{
uint ch;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], 0.0F, 1.0F);
+ u = u * size - 0.5f;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = frac(u);
+ }
+}
- switch (wrapMode) {
- case PIPE_TEX_WRAP_REPEAT:
- for (ch = 0; ch < 4; ch++) {
- float u = s[ch] * size - 0.5F;
- icoord0[ch] = REMAINDER(util_ifloor(u), size);
- icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
- w[ch] = FRAC(u);
- }
- break;;
- case PIPE_TEX_WRAP_CLAMP:
- for (ch = 0; ch < 4; ch++) {
- float u = CLAMP(s[ch], 0.0F, 1.0F);
- u = u * size - 0.5f;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- w[ch] = FRAC(u);
- }
- break;;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- for (ch = 0; ch < 4; ch++) {
- float u = CLAMP(s[ch], 0.0F, 1.0F);
- u = u * size - 0.5f;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- if (icoord0[ch] < 0)
- icoord0[ch] = 0;
- if (icoord1[ch] >= (int) size)
- icoord1[ch] = size - 1;
- w[ch] = FRAC(u);
- }
- break;;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- {
- const float min = -1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- float u = CLAMP(s[ch], min, max);
- u = u * size - 0.5f;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- w[ch] = FRAC(u);
- }
- }
- break;;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- for (ch = 0; ch < 4; ch++) {
- const int flr = util_ifloor(s[ch]);
- float u;
- if (flr & 1)
- u = 1.0F - (s[ch] - (float) flr);
- else
- u = s[ch] - (float) flr;
- u = u * size - 0.5F;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- if (icoord0[ch] < 0)
- icoord0[ch] = 0;
- if (icoord1[ch] >= (int) size)
- icoord1[ch] = size - 1;
- w[ch] = FRAC(u);
- }
- break;;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- for (ch = 0; ch < 4; ch++) {
- float u = fabsf(s[ch]);
- if (u >= 1.0F)
- u = (float) size;
- else
- u *= size;
- u -= 0.5F;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- w[ch] = FRAC(u);
- }
- break;;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- for (ch = 0; ch < 4; ch++) {
- float u = fabsf(s[ch]);
- if (u >= 1.0F)
- u = (float) size;
- else
- u *= size;
- u -= 0.5F;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- if (icoord0[ch] < 0)
- icoord0[ch] = 0;
- if (icoord1[ch] >= (int) size)
- icoord1[ch] = size - 1;
- w[ch] = FRAC(u);
- }
- break;;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- {
- const float min = -1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- float u = fabsf(s[ch]);
- if (u <= min)
- u = min * size;
- else if (u >= max)
- u = max * size;
- else
- u *= size;
- u -= 0.5F;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- w[ch] = FRAC(u);
- }
- }
- break;;
- default:
- assert(0);
+
+static void
+wrap_linear_clamp_to_edge(const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
+{
+ uint ch;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], 0.0F, 1.0F);
+ u = u * size - 0.5f;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord0[ch] < 0)
+ icoord0[ch] = 0;
+ if (icoord1[ch] >= (int) size)
+ icoord1[ch] = size - 1;
+ w[ch] = frac(u);
+ }
+}
+
+
+static void
+wrap_linear_clamp_to_border(const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
+{
+ const float min = -1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ uint ch;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], min, max);
+ u = u * size - 0.5f;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = frac(u);
+ }
+}
+
+
+static void
+wrap_linear_mirror_repeat(const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
+{
+ uint ch;
+ for (ch = 0; ch < 4; ch++) {
+ const int flr = util_ifloor(s[ch]);
+ float u;
+ if (flr & 1)
+ u = 1.0F - (s[ch] - (float) flr);
+ else
+ u = s[ch] - (float) flr;
+ u = u * size - 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord0[ch] < 0)
+ icoord0[ch] = 0;
+ if (icoord1[ch] >= (int) size)
+ icoord1[ch] = size - 1;
+ w[ch] = frac(u);
+ }
+}
+
+
+static void
+wrap_linear_mirror_clamp(const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
+{
+ uint ch;
+ for (ch = 0; ch < 4; ch++) {
+ float u = fabsf(s[ch]);
+ if (u >= 1.0F)
+ u = (float) size;
+ else
+ u *= size;
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = frac(u);
+ }
+}
+
+
+static void
+wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
+{
+ uint ch;
+ for (ch = 0; ch < 4; ch++) {
+ float u = fabsf(s[ch]);
+ if (u >= 1.0F)
+ u = (float) size;
+ else
+ u *= size;
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord0[ch] < 0)
+ icoord0[ch] = 0;
+ if (icoord1[ch] >= (int) size)
+ icoord1[ch] = size - 1;
+ w[ch] = frac(u);
+ }
+}
+
+
+static void
+wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
+{
+ const float min = -1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ uint ch;
+ for (ch = 0; ch < 4; ch++) {
+ float u = fabsf(s[ch]);
+ if (u <= min)
+ u = min * size;
+ else if (u >= max)
+ u = max * size;
+ else
+ u *= size;
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = frac(u);
}
}
@@ -383,27 +444,27 @@ linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
* For RECT textures / unnormalized texcoords
* Only a subset of wrap modes supported.
*/
-static INLINE void
-nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
- int icoord[4])
+static void
+wrap_nearest_unorm_clamp(const float s[4], unsigned size, int icoord[4])
{
uint ch;
- switch (wrapMode) {
- case PIPE_TEX_WRAP_CLAMP:
- for (ch = 0; ch < 4; ch++) {
- int i = util_ifloor(s[ch]);
- icoord[ch]= CLAMP(i, 0, (int) size-1);
- }
- return;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- /* fall-through */
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- for (ch = 0; ch < 4; ch++) {
- icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
- }
- return;
- default:
- assert(0);
+ for (ch = 0; ch < 4; ch++) {
+ int i = util_ifloor(s[ch]);
+ icoord[ch]= CLAMP(i, 0, (int) size-1);
+ }
+}
+
+
+/**
+ * Handles clamp_to_edge and clamp_to_border:
+ */
+static void
+wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size,
+ int icoord[4])
+{
+ uint ch;
+ for (ch = 0; ch < 4; ch++) {
+ icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
}
}
@@ -412,358 +473,971 @@ nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
* For RECT textures / unnormalized texcoords.
* Only a subset of wrap modes supported.
*/
-static INLINE void
-linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
- int icoord0[4], int icoord1[4], float w[4])
+static void
+wrap_linear_unorm_clamp(const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
{
uint ch;
- switch (wrapMode) {
- case PIPE_TEX_WRAP_CLAMP:
- for (ch = 0; ch < 4; ch++) {
- /* Not exactly what the spec says, but it matches NVIDIA output */
- float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- w[ch] = FRAC(u);
- }
- return;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- /* fall-through */
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- for (ch = 0; ch < 4; ch++) {
- float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
- u -= 0.5F;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- if (icoord1[ch] > (int) size - 1)
- icoord1[ch] = size - 1;
- w[ch] = FRAC(u);
- }
- break;
- default:
- assert(0);
+ for (ch = 0; ch < 4; ch++) {
+ /* Not exactly what the spec says, but it matches NVIDIA output */
+ float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = frac(u);
}
}
-static unsigned
-choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
+static void
+wrap_linear_unorm_clamp_to_border(const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
{
- /*
- major axis
- direction target sc tc ma
- ---------- ------------------------------- --- --- ---
- +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
- -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
- +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
- -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
- +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
- -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
- */
- const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
- unsigned face;
- float sc, tc, ma;
-
- if (arx >= ary && arx >= arz) {
- if (rx >= 0.0F) {
- face = PIPE_TEX_FACE_POS_X;
- sc = -rz;
- tc = -ry;
- ma = arx;
+ uint ch;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord1[ch] > (int) size - 1)
+ icoord1[ch] = size - 1;
+ w[ch] = frac(u);
+ }
+}
+
+
+
+/**
+ * Examine the quad's texture coordinates to compute the partial
+ * derivatives w.r.t X and Y, then compute lambda (level of detail).
+ */
+static float
+compute_lambda_1d(const struct sp_sampler_varient *samp,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias)
+{
+ const struct pipe_texture *texture = samp->texture;
+ const struct pipe_sampler_state *sampler = samp->sampler;
+ float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
+ float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
+ float rho = MAX2(dsdx, dsdy) * texture->width[0];
+ float lambda;
+
+ lambda = util_fast_log2(rho);
+ lambda += lodbias + sampler->lod_bias;
+ lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
+
+ return lambda;
+}
+
+
+static float
+compute_lambda_2d(const struct sp_sampler_varient *samp,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias)
+{
+ const struct pipe_texture *texture = samp->texture;
+ const struct pipe_sampler_state *sampler = samp->sampler;
+ float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
+ float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
+ float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
+ float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
+ float maxx = MAX2(dsdx, dsdy) * texture->width[0];
+ float maxy = MAX2(dtdx, dtdy) * texture->height[0];
+ float rho = MAX2(maxx, maxy);
+ float lambda;
+
+ lambda = util_fast_log2(rho);
+ lambda += lodbias + sampler->lod_bias;
+ lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
+
+ return lambda;
+}
+
+
+static float
+compute_lambda_3d(const struct sp_sampler_varient *samp,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias)
+{
+ const struct pipe_texture *texture = samp->texture;
+ const struct pipe_sampler_state *sampler = samp->sampler;
+ float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
+ float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
+ float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
+ float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
+ float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
+ float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]);
+ float maxx = MAX2(dsdx, dsdy) * texture->width[0];
+ float maxy = MAX2(dtdx, dtdy) * texture->height[0];
+ float maxz = MAX2(dpdx, dpdy) * texture->depth[0];
+ float rho, lambda;
+
+ rho = MAX2(maxx, maxy);
+ rho = MAX2(rho, maxz);
+
+ lambda = util_fast_log2(rho);
+ lambda += lodbias + sampler->lod_bias;
+ lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
+
+ return lambda;
+}
+
+
+/**
+ * Compute lambda for a vertex texture sampler.
+ * Since there aren't derivatives to use, just return the LOD bias.
+ */
+static float
+compute_lambda_vert(const struct sp_sampler_varient *samp,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias)
+{
+ return lodbias;
+}
+
+
+
+/**
+ * Get a texel from a texture, using the texture tile cache.
+ *
+ * \param addr the template tex address containing cube, z, face info.
+ * \param x the x coord of texel within 2D image
+ * \param y the y coord of texel within 2D image
+ * \param rgba the quad to put the texel/color into
+ *
+ * XXX maybe move this into sp_tex_tile_cache.c and merge with the
+ * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1...
+ */
+
+
+
+
+static INLINE const float *
+get_texel_2d_no_border(const struct sp_sampler_varient *samp,
+ union tex_tile_address addr, int x, int y)
+{
+ const struct softpipe_tex_cached_tile *tile;
+
+ addr.bits.x = x / TILE_SIZE;
+ addr.bits.y = y / TILE_SIZE;
+ y %= TILE_SIZE;
+ x %= TILE_SIZE;
+
+ tile = sp_get_cached_tile_tex(samp->cache, addr);
+
+ return &tile->data.color[y][x][0];
+}
+
+
+static INLINE const float *
+get_texel_2d(const struct sp_sampler_varient *samp,
+ union tex_tile_address addr, int x, int y)
+{
+ const struct pipe_texture *texture = samp->texture;
+ unsigned level = addr.bits.level;
+
+ if (x < 0 || x >= (int) texture->width[level] ||
+ y < 0 || y >= (int) texture->height[level]) {
+ return samp->sampler->border_color;
+ }
+ else {
+ return get_texel_2d_no_border( samp, addr, x, y );
+ }
+}
+
+
+/* Gather a quad of adjacent texels within a tile:
+ */
+static INLINE void
+get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp,
+ union tex_tile_address addr,
+ unsigned x, unsigned y,
+ const float *out[4])
+{
+ const struct softpipe_tex_cached_tile *tile;
+
+ addr.bits.x = x / TILE_SIZE;
+ addr.bits.y = y / TILE_SIZE;
+ y %= TILE_SIZE;
+ x %= TILE_SIZE;
+
+ tile = sp_get_cached_tile_tex(samp->cache, addr);
+
+ out[0] = &tile->data.color[y ][x ][0];
+ out[1] = &tile->data.color[y ][x+1][0];
+ out[2] = &tile->data.color[y+1][x ][0];
+ out[3] = &tile->data.color[y+1][x+1][0];
+}
+
+
+/* Gather a quad of potentially non-adjacent texels:
+ */
+static INLINE void
+get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp,
+ union tex_tile_address addr,
+ int x0, int y0,
+ int x1, int y1,
+ const float *out[4])
+{
+ out[0] = get_texel_2d_no_border( samp, addr, x0, y0 );
+ out[1] = get_texel_2d_no_border( samp, addr, x1, y0 );
+ out[2] = get_texel_2d_no_border( samp, addr, x0, y1 );
+ out[3] = get_texel_2d_no_border( samp, addr, x1, y1 );
+}
+
+/* Can involve a lot of unnecessary checks for border color:
+ */
+static INLINE void
+get_texel_quad_2d(const struct sp_sampler_varient *samp,
+ union tex_tile_address addr,
+ int x0, int y0,
+ int x1, int y1,
+ const float *out[4])
+{
+ out[0] = get_texel_2d( samp, addr, x0, y0 );
+ out[1] = get_texel_2d( samp, addr, x1, y0 );
+ out[3] = get_texel_2d( samp, addr, x1, y1 );
+ out[2] = get_texel_2d( samp, addr, x0, y1 );
+}
+
+
+
+/* 3d varients:
+ */
+static INLINE const float *
+get_texel_3d_no_border(const struct sp_sampler_varient *samp,
+ union tex_tile_address addr, int x, int y, int z)
+{
+ const struct softpipe_tex_cached_tile *tile;
+
+ addr.bits.x = x / TILE_SIZE;
+ addr.bits.y = y / TILE_SIZE;
+ addr.bits.z = z;
+ y %= TILE_SIZE;
+ x %= TILE_SIZE;
+
+ tile = sp_get_cached_tile_tex(samp->cache, addr);
+
+ return &tile->data.color[y][x][0];
+}
+
+
+static INLINE const float *
+get_texel_3d(const struct sp_sampler_varient *samp,
+ union tex_tile_address addr, int x, int y, int z)
+{
+ const struct pipe_texture *texture = samp->texture;
+ unsigned level = addr.bits.level;
+
+ if (x < 0 || x >= (int) texture->width[level] ||
+ y < 0 || y >= (int) texture->height[level] ||
+ z < 0 || z >= (int) texture->depth[level]) {
+ return samp->sampler->border_color;
+ }
+ else {
+ return get_texel_3d_no_border( samp, addr, x, y, z );
+ }
+}
+
+
+/**
+ * Given the logbase2 of a mipmap's base level size and a mipmap level,
+ * return the size (in texels) of that mipmap level.
+ * For example, if level[0].width = 256 then base_pot will be 8.
+ * If level = 2, then we'll return 64 (the width at level=2).
+ * Return 1 if level > base_pot.
+ */
+static INLINE unsigned
+pot_level_size(unsigned base_pot, unsigned level)
+{
+ return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
+}
+
+
+/* Some image-filter fastpaths:
+ */
+static INLINE void
+img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ unsigned j;
+ unsigned level = samp->level;
+ unsigned xpot = pot_level_size(samp->xpot, level);
+ unsigned ypot = pot_level_size(samp->ypot, level);
+ unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
+ unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
+ union tex_tile_address addr;
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int c;
+
+ float u = s[j] * xpot - 0.5F;
+ float v = t[j] * ypot - 0.5F;
+
+ int uflr = util_ifloor(u);
+ int vflr = util_ifloor(v);
+
+ float xw = u - (float)uflr;
+ float yw = v - (float)vflr;
+
+ int x0 = uflr & (xpot - 1);
+ int y0 = vflr & (ypot - 1);
+
+ const float *tx[4];
+
+ /* Can we fetch all four at once:
+ */
+ if (x0 < xmax && y0 < ymax) {
+ get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx);
}
else {
- face = PIPE_TEX_FACE_NEG_X;
- sc = rz;
- tc = -ry;
- ma = arx;
+ unsigned x1 = (x0 + 1) & (xpot - 1);
+ unsigned y1 = (y0 + 1) & (ypot - 1);
+ get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
+ }
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp_2d(xw, yw,
+ tx[0][c], tx[1][c],
+ tx[2][c], tx[3][c]);
}
}
- else if (ary >= arx && ary >= arz) {
- if (ry >= 0.0F) {
- face = PIPE_TEX_FACE_POS_Y;
- sc = rx;
- tc = rz;
- ma = ary;
+}
+
+
+static INLINE void
+img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ unsigned j;
+ unsigned level = samp->level;
+ unsigned xpot = pot_level_size(samp->xpot, level);
+ unsigned ypot = pot_level_size(samp->ypot, level);
+ union tex_tile_address addr;
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int c;
+
+ float u = s[j] * xpot;
+ float v = t[j] * ypot;
+
+ int uflr = util_ifloor(u);
+ int vflr = util_ifloor(v);
+
+ int x0 = uflr & (xpot - 1);
+ int y0 = vflr & (ypot - 1);
+
+ const float *out = get_texel_2d_no_border(samp, addr, x0, y0);
+
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = out[c];
}
- else {
- face = PIPE_TEX_FACE_NEG_Y;
- sc = rx;
- tc = -rz;
- ma = ary;
+ }
+}
+
+
+static INLINE void
+img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ unsigned j;
+ unsigned level = samp->level;
+ unsigned xpot = pot_level_size(samp->xpot, level);
+ unsigned ypot = pot_level_size(samp->ypot, level);
+ union tex_tile_address addr;
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int c;
+
+ float u = s[j] * xpot;
+ float v = t[j] * ypot;
+
+ int x0, y0;
+ const float *out;
+
+ x0 = util_ifloor(u);
+ if (x0 < 0)
+ x0 = 0;
+ else if (x0 > xpot - 1)
+ x0 = xpot - 1;
+
+ y0 = util_ifloor(v);
+ if (y0 < 0)
+ y0 = 0;
+ else if (y0 > ypot - 1)
+ y0 = ypot - 1;
+
+ out = get_texel_2d_no_border(samp, addr, x0, y0);
+
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = out[c];
}
}
- else {
- if (rz > 0.0F) {
- face = PIPE_TEX_FACE_POS_Z;
- sc = rx;
- tc = -ry;
- ma = arz;
+}
+
+
+static void
+img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct pipe_texture *texture = samp->texture;
+ unsigned level0, j;
+ int width;
+ int x[4];
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = texture->width[level0];
+
+ assert(width > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->nearest_texcoord_s(s, width, x);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *out = get_texel_2d(samp, addr, x[j], 0);
+ int c;
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = out[c];
}
- else {
- face = PIPE_TEX_FACE_NEG_Z;
- sc = -rx;
- tc = -ry;
- ma = arz;
+ }
+}
+
+
+static void
+img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct pipe_texture *texture = samp->texture;
+ unsigned level0, j;
+ int width, height;
+ int x[4], y[4];
+ union tex_tile_address addr;
+
+
+ level0 = samp->level;
+ width = texture->width[level0];
+ height = texture->height[level0];
+
+ assert(width > 0);
+ assert(height > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->nearest_texcoord_s(s, width, x);
+ samp->nearest_texcoord_t(t, height, y);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *out = get_texel_2d(samp, addr, x[j], y[j]);
+ int c;
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = out[c];
}
}
+}
- *newS = ( sc / ma + 1.0F ) * 0.5F;
- *newT = ( tc / ma + 1.0F ) * 0.5F;
- return face;
+static INLINE union tex_tile_address
+face(union tex_tile_address addr, unsigned face )
+{
+ addr.bits.face = face;
+ return addr;
}
-/**
- * Examine the quad's texture coordinates to compute the partial
- * derivatives w.r.t X and Y, then compute lambda (level of detail).
- *
- * This is only done for fragment shaders, not vertex shaders.
- */
-static float
-compute_lambda(const struct pipe_texture *tex,
- const struct pipe_sampler_state *sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias)
+static void
+img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- float rho, lambda;
+ const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct pipe_texture *texture = samp->texture;
+ const unsigned *faces = samp->faces; /* zero when not cube-mapping */
+ unsigned level0, j;
+ int width, height;
+ int x[4], y[4];
+ union tex_tile_address addr;
- assert(sampler->normalized_coords);
+ level0 = samp->level;
+ width = texture->width[level0];
+ height = texture->height[level0];
- assert(s);
- {
- float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
- float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT];
- dsdx = fabsf(dsdx);
- dsdy = fabsf(dsdy);
- rho = MAX2(dsdx, dsdy) * tex->width[0];
- }
- if (t) {
- float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
- float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT];
- float max;
- dtdx = fabsf(dtdx);
- dtdy = fabsf(dtdy);
- max = MAX2(dtdx, dtdy) * tex->height[0];
- rho = MAX2(rho, max);
+ assert(width > 0);
+ assert(height > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->nearest_texcoord_s(s, width, x);
+ samp->nearest_texcoord_t(t, height, y);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *out = get_texel_2d(samp, face(addr, faces[j]), x[j], y[j]);
+ int c;
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = out[c];
+ }
}
- if (p) {
- float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT];
- float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT];
- float max;
- dpdx = fabsf(dpdx);
- dpdy = fabsf(dpdy);
- max = MAX2(dpdx, dpdy) * tex->depth[0];
- rho = MAX2(rho, max);
+}
+
+
+static void
+img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct pipe_texture *texture = samp->texture;
+ unsigned level0, j;
+ int width, height, depth;
+ int x[4], y[4], z[4];
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = texture->width[level0];
+ height = texture->height[level0];
+ depth = texture->depth[level0];
+
+ assert(width > 0);
+ assert(height > 0);
+ assert(depth > 0);
+
+ samp->nearest_texcoord_s(s, width, x);
+ samp->nearest_texcoord_t(t, height, y);
+ samp->nearest_texcoord_p(p, depth, z);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *out = get_texel_3d(samp, addr, x[j], y[j], z[j]);
+ int c;
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = out[c];
+ }
}
+}
- lambda = util_fast_log2(rho);
- lambda += lodbias + sampler->lod_bias;
- lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
- return lambda;
+static void
+img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct pipe_texture *texture = samp->texture;
+ unsigned level0, j;
+ int width;
+ int x0[4], x1[4];
+ float xw[4]; /* weights */
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = texture->width[level0];
+
+ assert(width > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->linear_texcoord_s(s, width, x0, x1, xw);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *tx0 = get_texel_2d(samp, addr, x0[j], 0);
+ const float *tx1 = get_texel_2d(samp, addr, x1[j], 0);
+ int c;
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]);
+ }
+ }
}
-/**
- * Do several things here:
- * 1. Compute lambda from the texcoords, if needed
- * 2. Determine if we're minifying or magnifying
- * 3. If minifying, choose mipmap levels
- * 4. Return image filter to use within mipmap images
- * \param level0 Returns first mipmap level to sample from
- * \param level1 Returns second mipmap level to sample from
- * \param levelBlend Returns blend factor between levels, in [0,1]
- * \param imgFilter Returns either the min or mag filter, depending on lambda
- */
static void
-choose_mipmap_levels(const struct pipe_texture *texture,
- const struct pipe_sampler_state *sampler,
+img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- boolean computeLambda,
float lodbias,
- unsigned *level0, unsigned *level1, float *levelBlend,
- unsigned *imgFilter)
-{
- if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
- /* no mipmap selection needed */
- *level0 = *level1 = CLAMP((int) sampler->min_lod,
- 0, (int) texture->last_level);
-
- if (sampler->min_img_filter != sampler->mag_img_filter) {
- /* non-mipmapped texture, but still need to determine if doing
- * minification or magnification.
- */
- float lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
- if (lambda <= 0.0) {
- *imgFilter = sampler->mag_img_filter;
- }
- else {
- *imgFilter = sampler->min_img_filter;
- }
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct pipe_texture *texture = samp->texture;
+ unsigned level0, j;
+ int width, height;
+ int x0[4], y0[4], x1[4], y1[4];
+ float xw[4], yw[4]; /* weights */
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = texture->width[level0];
+ height = texture->height[level0];
+
+ assert(width > 0);
+ assert(height > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->linear_texcoord_s(s, width, x0, x1, xw);
+ samp->linear_texcoord_t(t, height, y0, y1, yw);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *tx0 = get_texel_2d(samp, addr, x0[j], y0[j]);
+ const float *tx1 = get_texel_2d(samp, addr, x1[j], y0[j]);
+ const float *tx2 = get_texel_2d(samp, addr, x0[j], y1[j]);
+ const float *tx3 = get_texel_2d(samp, addr, x1[j], y1[j]);
+ int c;
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp_2d(xw[j], yw[j],
+ tx0[c], tx1[c],
+ tx2[c], tx3[c]);
}
- else {
- *imgFilter = sampler->mag_img_filter;
+ }
+}
+
+
+static void
+img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct pipe_texture *texture = samp->texture;
+ const unsigned *faces = samp->faces; /* zero when not cube-mapping */
+ unsigned level0, j;
+ int width, height;
+ int x0[4], y0[4], x1[4], y1[4];
+ float xw[4], yw[4]; /* weights */
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = texture->width[level0];
+ height = texture->height[level0];
+
+ assert(width > 0);
+ assert(height > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->linear_texcoord_s(s, width, x0, x1, xw);
+ samp->linear_texcoord_t(t, height, y0, y1, yw);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ union tex_tile_address addrj = face(addr, faces[j]);
+ const float *tx0 = get_texel_2d(samp, addrj, x0[j], y0[j]);
+ const float *tx1 = get_texel_2d(samp, addrj, x1[j], y0[j]);
+ const float *tx2 = get_texel_2d(samp, addrj, x0[j], y1[j]);
+ const float *tx3 = get_texel_2d(samp, addrj, x1[j], y1[j]);
+ int c;
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp_2d(xw[j], yw[j],
+ tx0[c], tx1[c],
+ tx2[c], tx3[c]);
}
}
- else {
- float lambda;
+}
- if (computeLambda)
- /* fragment shader */
- lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
- else
- /* vertex shader */
- lambda = lodbias; /* not really a bias, but absolute LOD */
- if (lambda <= 0.0) { /* XXX threshold depends on the filter */
- /* magnifying */
- *imgFilter = sampler->mag_img_filter;
- *level0 = *level1 = 0;
+static void
+img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct pipe_texture *texture = samp->texture;
+ unsigned level0, j;
+ int width, height, depth;
+ int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
+ float xw[4], yw[4], zw[4]; /* interpolation weights */
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = texture->width[level0];
+ height = texture->height[level0];
+ depth = texture->depth[level0];
+
+ addr.value = 0;
+ addr.bits.level = level0;
+
+ assert(width > 0);
+ assert(height > 0);
+ assert(depth > 0);
+
+ samp->linear_texcoord_s(s, width, x0, x1, xw);
+ samp->linear_texcoord_t(t, height, y0, y1, yw);
+ samp->linear_texcoord_p(p, depth, z0, z1, zw);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int c;
+
+ const float *tx00 = get_texel_3d(samp, addr, x0[j], y0[j], z0[j]);
+ const float *tx01 = get_texel_3d(samp, addr, x1[j], y0[j], z0[j]);
+ const float *tx02 = get_texel_3d(samp, addr, x0[j], y1[j], z0[j]);
+ const float *tx03 = get_texel_3d(samp, addr, x1[j], y1[j], z0[j]);
+
+ const float *tx10 = get_texel_3d(samp, addr, x0[j], y0[j], z1[j]);
+ const float *tx11 = get_texel_3d(samp, addr, x1[j], y0[j], z1[j]);
+ const float *tx12 = get_texel_3d(samp, addr, x0[j], y1[j], z1[j]);
+ const float *tx13 = get_texel_3d(samp, addr, x1[j], y1[j], z1[j]);
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
+ tx00[c], tx01[c],
+ tx02[c], tx03[c],
+ tx10[c], tx11[c],
+ tx12[c], tx13[c]);
}
- else {
- /* minifying */
- *imgFilter = sampler->min_img_filter;
-
- /* choose mipmap level(s) and compute the blend factor between them */
- if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
- /* Nearest mipmap level */
- const int lvl = (int) (lambda + 0.5);
- *level0 =
- *level1 = CLAMP(lvl, 0, (int) texture->last_level);
- }
- else {
- /* Linear interpolation between mipmap levels */
- const int lvl = (int) lambda;
- *level0 = CLAMP(lvl, 0, (int) texture->last_level);
- *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
- *levelBlend = FRAC(lambda); /* blending weight between levels */
+ }
+}
+
+
+static void
+mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct pipe_texture *texture = samp->texture;
+ int level0;
+ float lambda;
+
+ lambda = samp->compute_lambda(samp, s, t, p, lodbias);
+ level0 = (int)lambda;
+
+ if (lambda < 0.0) {
+ samp->level = 0;
+ samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ }
+ else if (level0 >= texture->last_level) {
+ samp->level = texture->last_level;
+ samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ }
+ else {
+ float levelBlend = lambda - level0;
+ float rgba0[4][4];
+ float rgba1[4][4];
+ int c,j;
+
+ samp->level = level0;
+ samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 );
+
+ samp->level = level0+1;
+ samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 );
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
}
}
}
}
-/**
- * Get a texel from a texture, using the texture tile cache.
- *
- * \param face the cube face in 0..5
- * \param level the mipmap level
- * \param x the x coord of texel within 2D image
- * \param y the y coord of texel within 2D image
- * \param z which slice of a 3D texture
- * \param rgba the quad to put the texel/color into
- * \param j which element of the rgba quad to write to
- *
- * XXX maybe move this into sp_tile_cache.c and merge with the
- * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1...
- */
static void
-get_texel(const struct tgsi_sampler *tgsi_sampler,
- unsigned face, unsigned level, int x, int y, int z,
- float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
+mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
- struct softpipe_context *sp = samp->sp;
- const uint unit = samp->unit;
- const struct pipe_texture *texture = sp->texture[unit];
- const struct pipe_sampler_state *sampler = sp->sampler[unit];
+ struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct pipe_texture *texture = samp->texture;
+ float lambda;
- if (x < 0 || x >= (int) texture->width[level] ||
- y < 0 || y >= (int) texture->height[level] ||
- z < 0 || z >= (int) texture->depth[level]) {
- rgba[0][j] = sampler->border_color[0];
- rgba[1][j] = sampler->border_color[1];
- rgba[2][j] = sampler->border_color[2];
- rgba[3][j] = sampler->border_color[3];
+ lambda = samp->compute_lambda(samp, s, t, p, lodbias);
+
+ if (lambda < 0.0) {
+ samp->level = 0;
+ samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
}
else {
- const int tx = x % TILE_SIZE;
- const int ty = y % TILE_SIZE;
- const struct softpipe_cached_tile *tile
- = sp_get_cached_tile_tex(sp, samp->cache,
- x, y, z, face, level);
- rgba[0][j] = tile->data.color[ty][tx][0];
- rgba[1][j] = tile->data.color[ty][tx][1];
- rgba[2][j] = tile->data.color[ty][tx][2];
- rgba[3][j] = tile->data.color[ty][tx][3];
- if (0)
- {
- debug_printf("Get texel %f %f %f %f from %s\n",
- rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
- pf_name(texture->format));
- }
+ samp->level = (int)(lambda + 0.5) ;
+ samp->level = MIN2(samp->level, (int)texture->last_level);
+ samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ }
+
+#if 0
+ printf("RGBA %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
+ rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
+ rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
+ rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
+ rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
+#endif
+}
+
+
+static void
+mip_filter_none(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ float lambda = samp->compute_lambda(samp, s, t, p, lodbias);
+
+ if (lambda < 0.0) {
+ samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ }
+ else {
+ samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
}
}
+
/**
- * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
- * When we sampled the depth texture, the depth value was put into all
- * RGBA channels. We look at the red channel here.
- * \param rgba quad of (depth) texel values
- * \param p texture 'P' components for four pixels in quad
- * \param j which pixel in the quad to test [0..3]
+ * Specialized version of mip_filter_linear with hard-wired calls to
+ * 2d lambda calculation and 2d_linear_repeat_POT img filters.
*/
-static INLINE void
-shadow_compare(const struct pipe_sampler_state *sampler,
- float rgba[NUM_CHANNELS][QUAD_SIZE],
- const float p[QUAD_SIZE],
- uint j)
+static void
+mip_filter_linear_2d_linear_repeat_POT(
+ struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- int k;
- switch (sampler->compare_func) {
- case PIPE_FUNC_LESS:
- k = p[j] < rgba[0][j];
- break;
- case PIPE_FUNC_LEQUAL:
- k = p[j] <= rgba[0][j];
- break;
- case PIPE_FUNC_GREATER:
- k = p[j] > rgba[0][j];
- break;
- case PIPE_FUNC_GEQUAL:
- k = p[j] >= rgba[0][j];
- break;
- case PIPE_FUNC_EQUAL:
- k = p[j] == rgba[0][j];
- break;
- case PIPE_FUNC_NOTEQUAL:
- k = p[j] != rgba[0][j];
- break;
- case PIPE_FUNC_ALWAYS:
- k = 1;
- break;
- case PIPE_FUNC_NEVER:
- k = 0;
- break;
- default:
- k = 0;
- assert(0);
- break;
+ struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct pipe_texture *texture = samp->texture;
+ int level0;
+ float lambda;
+
+ lambda = compute_lambda_2d(samp, s, t, p, lodbias);
+ level0 = (int)lambda;
+
+ /* Catches both negative and large values of level0:
+ */
+ if ((unsigned)level0 >= texture->last_level) {
+ if (level0 < 0)
+ samp->level = 0;
+ else
+ samp->level = texture->last_level;
+
+ img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba );
}
+ else {
+ float levelBlend = lambda - level0;
+ float rgba0[4][4];
+ float rgba1[4][4];
+ int c,j;
- /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
- rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k;
- rgba[3][j] = 1.0F;
+ samp->level = level0;
+ img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 );
+
+ samp->level = level0+1;
+ img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 );
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
+ }
+ }
+ }
}
+
/**
- * As above, but do four z/texture comparisons.
+ * Do shadow/depth comparisons.
*/
-static INLINE void
-shadow_compare4(const struct pipe_sampler_state *sampler,
- float rgba[NUM_CHANNELS][QUAD_SIZE],
- const float p[QUAD_SIZE])
+static void
+sample_compare(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
{
+ struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct pipe_sampler_state *sampler = samp->sampler;
int j, k0, k1, k2, k3;
float val;
+ samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba );
+
+ /**
+ * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
+ * When we sampled the depth texture, the depth value was put into all
+ * RGBA channels. We look at the red channel here.
+ */
+
/* compare four texcoords vs. four texture samples */
switch (sampler->compare_func) {
case PIPE_FUNC_LESS:
@@ -826,470 +1500,392 @@ shadow_compare4(const struct pipe_sampler_state *sampler,
/**
- * Common code for sampling 1D/2D/cube textures.
- * Could probably extend for 3D...
+ * Compute which cube face is referenced by each texcoord and put that
+ * info into the sampler faces[] array. Then sample the cube faces
*/
static void
-sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- boolean computeLambda,
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE],
- const unsigned faces[4])
-{
- const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
- const struct softpipe_context *sp = samp->sp;
- const uint unit = samp->unit;
- const struct pipe_texture *texture = sp->texture[unit];
- const struct pipe_sampler_state *sampler = sp->sampler[unit];
- unsigned level0, level1, j, imgFilter;
- int width, height;
- float levelBlend;
-
- choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
- &level0, &level1, &levelBlend, &imgFilter);
-
- assert(sampler->normalized_coords);
-
- width = texture->width[level0];
- height = texture->height[level0];
-
- assert(width > 0);
-
- switch (imgFilter) {
- case PIPE_TEX_FILTER_NEAREST:
- {
- int x[4], y[4];
- nearest_texcoord_4(sampler->wrap_s, s, width, x);
- nearest_texcoord_4(sampler->wrap_t, t, height, y);
-
- for (j = 0; j < QUAD_SIZE; j++) {
- get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare(sampler, rgba, p, j);
- }
+sample_cube(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ unsigned j;
+ float ssss[4], tttt[4];
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- unsigned c;
- x[j] /= 2;
- y[j] /= 2;
- get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
- rgba2, j);
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
- shadow_compare(sampler, rgba2, p, j);
- }
-
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
- }
- }
+ /*
+ major axis
+ direction target sc tc ma
+ ---------- ------------------------------- --- --- ---
+ +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
+ -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
+ +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
+ -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
+ +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
+ -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
+ */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ float rx = s[j];
+ float ry = t[j];
+ float rz = p[j];
+ const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
+ unsigned face;
+ float sc, tc, ma;
+
+ if (arx >= ary && arx >= arz) {
+ if (rx >= 0.0F) {
+ face = PIPE_TEX_FACE_POS_X;
+ sc = -rz;
+ tc = -ry;
+ ma = arx;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_X;
+ sc = rz;
+ tc = -ry;
+ ma = arx;
}
}
- break;
- case PIPE_TEX_FILTER_LINEAR:
- case PIPE_TEX_FILTER_ANISO:
+ else if (ary >= arx && ary >= arz) {
+ if (ry >= 0.0F) {
+ face = PIPE_TEX_FACE_POS_Y;
+ sc = rx;
+ tc = rz;
+ ma = ary;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_Y;
+ sc = rx;
+ tc = -rz;
+ ma = ary;
+ }
+ }
+ else {
+ if (rz > 0.0F) {
+ face = PIPE_TEX_FACE_POS_Z;
+ sc = rx;
+ tc = -ry;
+ ma = arz;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_Z;
+ sc = -rx;
+ tc = -ry;
+ ma = arz;
+ }
+ }
+
{
- int x0[4], y0[4], x1[4], y1[4];
- float xw[4], yw[4]; /* weights */
-
- linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
- linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
-
- for (j = 0; j < QUAD_SIZE; j++) {
- float tx[4][4]; /* texels */
- int c;
- get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
- get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
- get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
- get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare4(sampler, tx, p);
- }
+ const float ima = 1.0 / ma;
+ ssss[j] = ( sc * ima + 1.0F ) * 0.5F;
+ tttt[j] = ( tc * ima + 1.0F ) * 0.5F;
+ samp->faces[j] = face;
+ }
+ }
- /* interpolate R, G, B, A */
- for (c = 0; c < 4; c++) {
- rgba[c][j] = lerp_2d(xw[j], yw[j],
- tx[c][0], tx[c][1],
- tx[c][2], tx[c][3]);
- }
+ /* In our little pipeline, the compare stage is next. If compare
+ * is not active, this will point somewhere deeper into the
+ * pipeline, eg. to mip_filter or even img_filter.
+ */
+ samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba);
+}
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- x0[j] /= 2;
- y0[j] /= 2;
- x1[j] /= 2;
- y1[j] /= 2;
- get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
- get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
- get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
- get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
- shadow_compare4(sampler, tx, p);
- }
-
- /* interpolate R, G, B, A */
- for (c = 0; c < 4; c++) {
- rgba2[c][j] = lerp_2d(xw[j], yw[j],
- tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
- }
-
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
- }
- }
- }
- }
- break;
+
+
+static wrap_nearest_func
+get_nearest_unorm_wrap(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_TEX_WRAP_CLAMP:
+ return wrap_nearest_unorm_clamp;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return wrap_nearest_unorm_clamp_to_border;
default:
assert(0);
+ return wrap_nearest_unorm_clamp;
}
}
-static INLINE void
-sp_get_samples_1d(const struct tgsi_sampler *sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- boolean computeLambda,
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
+static wrap_nearest_func
+get_nearest_wrap(unsigned mode)
{
- static const unsigned faces[4] = {0, 0, 0, 0};
- static const float tzero[4] = {0, 0, 0, 0};
- sp_get_samples_2d_common(sampler, s, tzero, NULL,
- computeLambda, lodbias, rgba, faces);
+ switch (mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return wrap_nearest_repeat;
+ case PIPE_TEX_WRAP_CLAMP:
+ return wrap_nearest_clamp;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return wrap_nearest_clamp_to_edge;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return wrap_nearest_clamp_to_border;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return wrap_nearest_mirror_repeat;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ return wrap_nearest_mirror_clamp;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return wrap_nearest_mirror_clamp_to_edge;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return wrap_nearest_mirror_clamp_to_border;
+ default:
+ assert(0);
+ return wrap_nearest_repeat;
+ }
}
-static INLINE void
-sp_get_samples_2d(const struct tgsi_sampler *sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- boolean computeLambda,
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
+static wrap_linear_func
+get_linear_unorm_wrap(unsigned mode)
{
- static const unsigned faces[4] = {0, 0, 0, 0};
- sp_get_samples_2d_common(sampler, s, t, p,
- computeLambda, lodbias, rgba, faces);
+ switch (mode) {
+ case PIPE_TEX_WRAP_CLAMP:
+ return wrap_linear_unorm_clamp;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return wrap_linear_unorm_clamp_to_border;
+ default:
+ assert(0);
+ return wrap_linear_unorm_clamp;
+ }
}
-static INLINE void
-sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- boolean computeLambda,
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
+static wrap_linear_func
+get_linear_wrap(unsigned mode)
{
- const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
- const struct softpipe_context *sp = samp->sp;
- const uint unit = samp->unit;
- const struct pipe_texture *texture = sp->texture[unit];
- const struct pipe_sampler_state *sampler = sp->sampler[unit];
- /* get/map pipe_surfaces corresponding to 3D tex slices */
- unsigned level0, level1, j, imgFilter;
- int width, height, depth;
- float levelBlend;
- const uint face = 0;
-
- choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
- &level0, &level1, &levelBlend, &imgFilter);
-
- assert(sampler->normalized_coords);
-
- width = texture->width[level0];
- height = texture->height[level0];
- depth = texture->depth[level0];
-
- assert(width > 0);
- assert(height > 0);
- assert(depth > 0);
-
- switch (imgFilter) {
- case PIPE_TEX_FILTER_NEAREST:
- {
- int x[4], y[4], z[4];
- nearest_texcoord_4(sampler->wrap_s, s, width, x);
- nearest_texcoord_4(sampler->wrap_t, t, height, y);
- nearest_texcoord_4(sampler->wrap_r, p, depth, z);
- for (j = 0; j < QUAD_SIZE; j++) {
- get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- unsigned c;
- x[j] /= 2;
- y[j] /= 2;
- z[j] /= 2;
- get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
- }
- }
- }
- }
- break;
- case PIPE_TEX_FILTER_LINEAR:
- case PIPE_TEX_FILTER_ANISO:
- {
- int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
- float xw[4], yw[4], zw[4]; /* interpolation weights */
- linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
- linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
- linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw);
-
- for (j = 0; j < QUAD_SIZE; j++) {
- int c;
- float tx0[4][4], tx1[4][4];
- get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
- get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
- get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
- get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
- get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
- get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
- get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
- get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
-
- /* interpolate R, G, B, A */
- for (c = 0; c < 4; c++) {
- rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
- tx0[c][0], tx0[c][1],
- tx0[c][2], tx0[c][3],
- tx1[c][0], tx1[c][1],
- tx1[c][2], tx1[c][3]);
- }
-
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- x0[j] /= 2;
- y0[j] /= 2;
- z0[j] /= 2;
- x1[j] /= 2;
- y1[j] /= 2;
- z1[j] /= 2;
- get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
- get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
- get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
- get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
- get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
- get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
- get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
- get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
-
- /* interpolate R, G, B, A */
- for (c = 0; c < 4; c++) {
- rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
- tx0[c][0], tx0[c][1],
- tx0[c][2], tx0[c][3],
- tx1[c][0], tx1[c][1],
- tx1[c][2], tx1[c][3]);
- }
-
- /* blend mipmap levels */
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
- }
- }
- }
- }
- break;
+ switch (mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return wrap_linear_repeat;
+ case PIPE_TEX_WRAP_CLAMP:
+ return wrap_linear_clamp;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return wrap_linear_clamp_to_edge;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return wrap_linear_clamp_to_border;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return wrap_linear_mirror_repeat;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ return wrap_linear_mirror_clamp;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return wrap_linear_mirror_clamp_to_edge;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return wrap_linear_mirror_clamp_to_border;
default:
assert(0);
+ return wrap_linear_repeat;
}
}
-static void
-sp_get_samples_cube(const struct tgsi_sampler *sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- boolean computeLambda,
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
+static compute_lambda_func
+get_lambda_func(const union sp_sampler_key key)
{
- unsigned faces[QUAD_SIZE], j;
- float ssss[4], tttt[4];
- for (j = 0; j < QUAD_SIZE; j++) {
- faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j);
+ if (key.bits.processor == TGSI_PROCESSOR_VERTEX)
+ return compute_lambda_vert;
+
+ switch (key.bits.target) {
+ case PIPE_TEXTURE_1D:
+ return compute_lambda_1d;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_CUBE:
+ return compute_lambda_2d;
+ case PIPE_TEXTURE_3D:
+ return compute_lambda_3d;
+ default:
+ assert(0);
+ return compute_lambda_1d;
}
- sp_get_samples_2d_common(sampler, ssss, tttt, NULL,
- computeLambda, lodbias, rgba, faces);
}
-static void
-sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- boolean computeLambda,
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
- const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
- const struct softpipe_context *sp = samp->sp;
- const uint unit = samp->unit;
- const struct pipe_texture *texture = sp->texture[unit];
- const struct pipe_sampler_state *sampler = sp->sampler[unit];
- const uint face = 0;
- unsigned level0, level1, j, imgFilter;
- int width, height;
- float levelBlend;
-
- choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
- &level0, &level1, &levelBlend, &imgFilter);
-
- /* texture RECTS cannot be mipmapped */
- assert(level0 == level1);
-
- width = texture->width[level0];
- height = texture->height[level0];
-
- assert(width > 0);
-
- switch (imgFilter) {
- case PIPE_TEX_FILTER_NEAREST:
- {
- int x[4], y[4];
- nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
- nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
- for (j = 0; j < QUAD_SIZE; j++) {
- get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare(sampler, rgba, p, j);
- }
- }
- }
+static filter_func
+get_img_filter(const union sp_sampler_key key,
+ unsigned filter,
+ const struct pipe_sampler_state *sampler)
+{
+ switch (key.bits.target) {
+ case PIPE_TEXTURE_1D:
+ if (filter == PIPE_TEX_FILTER_NEAREST)
+ return img_filter_1d_nearest;
+ else
+ return img_filter_1d_linear;
break;
- case PIPE_TEX_FILTER_LINEAR:
- case PIPE_TEX_FILTER_ANISO:
+ case PIPE_TEXTURE_2D:
+ /* Try for fast path:
+ */
+ if (key.bits.is_pot &&
+ sampler->wrap_s == sampler->wrap_t &&
+ sampler->normalized_coords)
{
- int x0[4], y0[4], x1[4], y1[4];
- float xw[4], yw[4]; /* weights */
- linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw);
- linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
- for (j = 0; j < QUAD_SIZE; j++) {
- float tx[4][4]; /* texels */
- int c;
- get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
- get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
- get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
- get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare4(sampler, tx, p);
+ switch (sampler->wrap_s) {
+ case PIPE_TEX_WRAP_REPEAT:
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return img_filter_2d_nearest_repeat_POT;
+ case PIPE_TEX_FILTER_LINEAR:
+ return img_filter_2d_linear_repeat_POT;
+ default:
+ break;
}
- for (c = 0; c < 4; c++) {
- rgba[c][j] = lerp_2d(xw[j], yw[j],
- tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return img_filter_2d_nearest_clamp_POT;
+ default:
+ break;
}
}
}
+ /* Otherwise use default versions:
+ */
+ if (filter == PIPE_TEX_FILTER_NEAREST)
+ return img_filter_2d_nearest;
+ else
+ return img_filter_2d_linear;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ if (filter == PIPE_TEX_FILTER_NEAREST)
+ return img_filter_cube_nearest;
+ else
+ return img_filter_cube_linear;
+ break;
+ case PIPE_TEXTURE_3D:
+ if (filter == PIPE_TEX_FILTER_NEAREST)
+ return img_filter_3d_nearest;
+ else
+ return img_filter_3d_linear;
break;
default:
assert(0);
+ return img_filter_1d_nearest;
}
}
/**
- * Common code for vertex/fragment program texture sampling.
+ * Bind the given texture object and texture cache to the sampler varient.
*/
-static INLINE void
-sp_get_samples(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- boolean computeLambda,
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
+void
+sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
+ struct softpipe_tex_tile_cache *tex_cache,
+ const struct pipe_texture *texture )
{
- const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
- const struct softpipe_context *sp = samp->sp;
- const uint unit = samp->unit;
- const struct pipe_texture *texture = sp->texture[unit];
- const struct pipe_sampler_state *sampler = sp->sampler[unit];
-
- if (!texture)
- return;
+ const struct pipe_sampler_state *sampler = samp->sampler;
- switch (texture->target) {
- case PIPE_TEXTURE_1D:
- assert(sampler->normalized_coords);
- sp_get_samples_1d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
- break;
- case PIPE_TEXTURE_2D:
- if (sampler->normalized_coords)
- sp_get_samples_2d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
- else
- sp_get_samples_rect(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
- break;
- case PIPE_TEXTURE_3D:
- assert(sampler->normalized_coords);
- sp_get_samples_3d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
- break;
- case PIPE_TEXTURE_CUBE:
- assert(sampler->normalized_coords);
- sp_get_samples_cube(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
- break;
- default:
- assert(0);
- }
-
-#if 0 /* DEBUG */
- {
- int i;
- printf("Sampled at %f, %f, %f:\n", s[0], t[0], p[0]);
- for (i = 0; i < 4; i++) {
- printf("Frag %d: %f %f %f %f\n", i,
- rgba[0][i],
- rgba[1][i],
- rgba[2][i],
- rgba[3][i]);
- }
- }
-#endif
+ samp->texture = texture;
+ samp->cache = tex_cache;
+ samp->xpot = util_unsigned_logbase2( texture->width[0] );
+ samp->ypot = util_unsigned_logbase2( texture->height[0] );
+ samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level);
}
-/**
- * Called via tgsi_sampler::get_samples() when running a fragment shader.
- * Get four filtered RGBA values from the sampler's texture.
- */
void
-sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
+sp_sampler_varient_destroy( struct sp_sampler_varient *samp )
{
- sp_get_samples(tgsi_sampler, s, t, p, TRUE, lodbias, rgba);
+ FREE(samp);
}
/**
- * Called via tgsi_sampler::get_samples() when running a vertex shader.
- * Get four filtered RGBA values from the sampler's texture.
+ * Create a sampler varient for a given set of non-orthogonal state.
*/
-void
-sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
+struct sp_sampler_varient *
+sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
+ const union sp_sampler_key key )
{
- sp_get_samples(tgsi_sampler, s, t, p, FALSE, lodbias, rgba);
+ struct sp_sampler_varient *samp = CALLOC_STRUCT(sp_sampler_varient);
+ if (!samp)
+ return NULL;
+
+ samp->sampler = sampler;
+ samp->key = key;
+
+ /* Note that (for instance) linear_texcoord_s and
+ * nearest_texcoord_s may be active at the same time, if the
+ * sampler min_img_filter differs from its mag_img_filter.
+ */
+ if (sampler->normalized_coords) {
+ samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
+ samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
+ samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
+
+ samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
+ samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
+ samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
+ }
+ else {
+ samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
+ samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
+ samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
+
+ samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
+ samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
+ samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
+ }
+
+ samp->compute_lambda = get_lambda_func( key );
+
+ samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler);
+ samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler);
+
+ switch (sampler->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ if (sampler->min_img_filter == sampler->mag_img_filter)
+ samp->mip_filter = samp->min_img_filter;
+ else
+ samp->mip_filter = mip_filter_none;
+ break;
+
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ samp->mip_filter = mip_filter_nearest;
+ break;
+
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ if (key.bits.is_pot &&
+ sampler->min_img_filter == sampler->mag_img_filter &&
+ sampler->normalized_coords &&
+ sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
+ sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
+ sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR)
+ {
+ samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
+ }
+ else
+ {
+ samp->mip_filter = mip_filter_linear;
+ }
+ break;
+ }
+
+ if (sampler->compare_mode != FALSE) {
+ samp->compare = sample_compare;
+ }
+ else {
+ /* Skip compare operation by promoting the mip_filter function
+ * pointer:
+ */
+ samp->compare = samp->mip_filter;
+ }
+
+ if (key.bits.target == PIPE_TEXTURE_CUBE) {
+ samp->base.get_samples = sample_cube;
+ }
+ else {
+ samp->faces[0] = 0;
+ samp->faces[1] = 0;
+ samp->faces[2] = 0;
+ samp->faces[3] = 0;
+
+ /* Skip cube face determination by promoting the compare
+ * function pointer:
+ */
+ samp->base.get_samples = samp->compare;
+ }
+
+ return samp;
}
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 40d8eb2c2a..b0797711d3 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -31,43 +31,122 @@
#include "tgsi/tgsi_exec.h"
+struct sp_sampler_varient;
+
+typedef void (*wrap_nearest_func)(const float s[4],
+ unsigned size,
+ int icoord[4]);
+
+typedef void (*wrap_linear_func)(const float s[4],
+ unsigned size,
+ int icoord0[4],
+ int icoord1[4],
+ float w[4]);
+
+typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias);
+
+typedef void (*filter_func)(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE]);
+
+
+union sp_sampler_key {
+ struct {
+ unsigned target:3;
+ unsigned is_pot:1;
+ unsigned processor:2;
+ unsigned unit:4;
+ unsigned pad:22;
+ } bits;
+ unsigned value;
+};
/**
* Subclass of tgsi_sampler
*/
-struct sp_shader_sampler
+struct sp_sampler_varient
{
struct tgsi_sampler base; /**< base class */
- uint unit;
- struct softpipe_context *sp;
- struct softpipe_tile_cache *cache;
+ union sp_sampler_key key;
+
+ /* The owner of this struct:
+ */
+ const struct pipe_sampler_state *sampler;
+
+
+ /* Currently bound texture:
+ */
+ const struct pipe_texture *texture;
+ struct softpipe_tex_tile_cache *cache;
+
+ unsigned processor;
+
+ /* For sp_get_samples_2d_linear_POT:
+ */
+ unsigned xpot;
+ unsigned ypot;
+ unsigned level;
+
+ unsigned faces[4];
+
+ wrap_nearest_func nearest_texcoord_s;
+ wrap_nearest_func nearest_texcoord_t;
+ wrap_nearest_func nearest_texcoord_p;
+
+ wrap_linear_func linear_texcoord_s;
+ wrap_linear_func linear_texcoord_t;
+ wrap_linear_func linear_texcoord_p;
+
+ filter_func min_img_filter;
+ filter_func mag_img_filter;
+
+ compute_lambda_func compute_lambda;
+
+ filter_func mip_filter;
+ filter_func compare;
+
+ /* Linked list:
+ */
+ struct sp_sampler_varient *next;
};
+struct sp_sampler;
+/* Create a sampler varient for a given set of non-orthogonal state. Currently the
+ */
+struct sp_sampler_varient *
+sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
+ const union sp_sampler_key key );
-static INLINE const struct sp_shader_sampler *
-sp_shader_sampler(const struct tgsi_sampler *sampler)
-{
- return (const struct sp_shader_sampler *) sampler;
-}
+void sp_sampler_varient_bind_texture( struct sp_sampler_varient *varient,
+ struct softpipe_tex_tile_cache *tex_cache,
+ const struct pipe_texture *tex );
+void sp_sampler_varient_destroy( struct sp_sampler_varient * );
-extern void
-sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE]);
+
+
+static INLINE struct sp_sampler_varient *
+sp_sampler_varient(const struct tgsi_sampler *sampler)
+{
+ return (struct sp_sampler_varient *) sampler;
+}
extern void
-sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE]);
+sp_get_samples(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE]);
#endif /* SP_TEX_SAMPLE_H */
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
new file mode 100644
index 0000000000..407a22a9f4
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
@@ -0,0 +1,273 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Texture tile caching.
+ *
+ * Author:
+ * Brian Paul
+ */
+
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_tile.h"
+#include "sp_context.h"
+#include "sp_surface.h"
+#include "sp_texture.h"
+#include "sp_tex_tile_cache.h"
+
+
+
+struct softpipe_tex_tile_cache *
+sp_create_tex_tile_cache( struct pipe_screen *screen )
+{
+ struct softpipe_tex_tile_cache *tc;
+ uint pos;
+
+ tc = CALLOC_STRUCT( softpipe_tex_tile_cache );
+ if (tc) {
+ tc->screen = screen;
+ for (pos = 0; pos < NUM_ENTRIES; pos++) {
+ tc->entries[pos].addr.bits.invalid = 1;
+ }
+ tc->last_tile = &tc->entries[0]; /* any tile */
+ }
+ return tc;
+}
+
+
+void
+sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc)
+{
+ struct pipe_screen *screen;
+ uint pos;
+
+ for (pos = 0; pos < NUM_ENTRIES; pos++) {
+ /*assert(tc->entries[pos].x < 0);*/
+ }
+ if (tc->transfer) {
+ screen = tc->transfer->texture->screen;
+ screen->tex_transfer_destroy(tc->transfer);
+ }
+ if (tc->tex_trans) {
+ screen = tc->tex_trans->texture->screen;
+ screen->tex_transfer_destroy(tc->tex_trans);
+ }
+
+ FREE( tc );
+}
+
+
+
+
+void
+sp_tex_tile_cache_map_transfers(struct softpipe_tex_tile_cache *tc)
+{
+ if (tc->tex_trans && !tc->tex_trans_map)
+ tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans);
+}
+
+
+void
+sp_tex_tile_cache_unmap_transfers(struct softpipe_tex_tile_cache *tc)
+{
+ if (tc->tex_trans_map) {
+ tc->screen->transfer_unmap(tc->screen, tc->tex_trans);
+ tc->tex_trans_map = NULL;
+ }
+}
+
+/**
+ * Invalidate all cached tiles for the cached texture.
+ * Should be called when the texture is modified.
+ */
+void
+sp_tex_tile_cache_validate_texture(struct softpipe_tex_tile_cache *tc)
+{
+ unsigned i;
+
+ assert(tc);
+ assert(tc->texture);
+
+ for (i = 0; i < NUM_ENTRIES; i++) {
+ tc->entries[i].addr.bits.invalid = 1;
+ }
+}
+
+/**
+ * Specify the texture to cache.
+ */
+void
+sp_tex_tile_cache_set_texture(struct softpipe_tex_tile_cache *tc,
+ struct pipe_texture *texture)
+{
+ uint i;
+
+ assert(!tc->transfer);
+
+ if (tc->texture != texture) {
+ pipe_texture_reference(&tc->texture, texture);
+
+ if (tc->tex_trans) {
+ struct pipe_screen *screen = tc->tex_trans->texture->screen;
+
+ if (tc->tex_trans_map) {
+ screen->transfer_unmap(screen, tc->tex_trans);
+ tc->tex_trans_map = NULL;
+ }
+
+ screen->tex_transfer_destroy(tc->tex_trans);
+ tc->tex_trans = NULL;
+ }
+
+ /* mark as entries as invalid/empty */
+ /* XXX we should try to avoid this when the teximage hasn't changed */
+ for (i = 0; i < NUM_ENTRIES; i++) {
+ tc->entries[i].addr.bits.invalid = 1;
+ }
+
+ tc->tex_face = -1; /* any invalid value here */
+ }
+}
+
+
+
+
+/**
+ * Flush the tile cache: write all dirty tiles back to the transfer.
+ * any tiles "flagged" as cleared will be "really" cleared.
+ */
+void
+sp_flush_tex_tile_cache(struct softpipe_tex_tile_cache *tc)
+{
+ int pos;
+
+ if (tc->texture) {
+ /* caching a texture, mark all entries as empty */
+ for (pos = 0; pos < NUM_ENTRIES; pos++) {
+ tc->entries[pos].addr.bits.invalid = 1;
+ }
+ tc->tex_face = -1;
+ }
+
+}
+
+
+/**
+ * Given the texture face, level, zslice, x and y values, compute
+ * the cache entry position/index where we'd hope to find the
+ * cached texture tile.
+ * This is basically a direct-map cache.
+ * XXX There's probably lots of ways in which we can improve this.
+ */
+static INLINE uint
+tex_cache_pos( union tex_tile_address addr )
+{
+ uint entry = (addr.bits.x +
+ addr.bits.y * 9 +
+ addr.bits.z * 3 +
+ addr.bits.face +
+ addr.bits.level * 7);
+
+ return entry % NUM_ENTRIES;
+}
+
+/**
+ * Similar to sp_get_cached_tile() but for textures.
+ * Tiles are read-only and indexed with more params.
+ */
+const struct softpipe_tex_cached_tile *
+sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
+ union tex_tile_address addr )
+{
+ struct pipe_screen *screen = tc->screen;
+ struct softpipe_tex_cached_tile *tile;
+
+ tile = tc->entries + tex_cache_pos( addr );
+
+ if (addr.value != tile->addr.value) {
+
+ /* cache miss. Most misses are because we've invaldiated the
+ * texture cache previously -- most commonly on binding a new
+ * texture. Currently we effectively flush the cache on texture
+ * bind.
+ */
+#if 0
+ _debug_printf("miss at %u: x=%d y=%d z=%d face=%d level=%d\n"
+ " tile %u: x=%d y=%d z=%d face=%d level=%d\n",
+ pos, x/TILE_SIZE, y/TILE_SIZE, z, face, level,
+ pos, tile->addr.bits.x, tile->addr.bits.y, tile->z, tile->face, tile->level);
+#endif
+
+ /* check if we need to get a new transfer */
+ if (!tc->tex_trans ||
+ tc->tex_face != addr.bits.face ||
+ tc->tex_level != addr.bits.level ||
+ tc->tex_z != addr.bits.z) {
+ /* get new transfer (view into texture) */
+
+ if (tc->tex_trans) {
+ if (tc->tex_trans_map) {
+ tc->screen->transfer_unmap(tc->screen, tc->tex_trans);
+ tc->tex_trans_map = NULL;
+ }
+
+ screen->tex_transfer_destroy(tc->tex_trans);
+ tc->tex_trans = NULL;
+ }
+
+ tc->tex_trans =
+ screen->get_tex_transfer(screen, tc->texture,
+ addr.bits.face,
+ addr.bits.level,
+ addr.bits.z,
+ PIPE_TRANSFER_READ, 0, 0,
+ tc->texture->width[addr.bits.level],
+ tc->texture->height[addr.bits.level]);
+
+ tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
+
+ tc->tex_face = addr.bits.face;
+ tc->tex_level = addr.bits.level;
+ tc->tex_z = addr.bits.z;
+ }
+
+ /* get tile from the transfer (view into texture) */
+ pipe_get_tile_rgba(tc->tex_trans,
+ addr.bits.x * TILE_SIZE,
+ addr.bits.y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
+ (float *) tile->data.color);
+ tile->addr = addr;
+ }
+
+ tc->last_tile = tile;
+ return tile;
+}
+
+
+
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
new file mode 100644
index 0000000000..ac6886a3df
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
@@ -0,0 +1,155 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SP_TEX_TILE_CACHE_H
+#define SP_TEX_TILE_CACHE_H
+
+
+#include "pipe/p_compiler.h"
+
+
+struct softpipe_context;
+struct softpipe_tex_tile_cache;
+
+
+/**
+ * Cache tile size (width and height). This needs to be a power of two.
+ */
+#define TILE_SIZE 64
+
+
+/* If we need to support > 4096, just expand this to be a 64 bit
+ * union, or consider tiling in Z as well.
+ */
+union tex_tile_address {
+ struct {
+ unsigned x:6; /* 4096 / TILE_SIZE */
+ unsigned y:6; /* 4096 / TILE_SIZE */
+ unsigned z:12; /* 4096 -- z not tiled */
+ unsigned face:3;
+ unsigned level:4;
+ unsigned invalid:1;
+ } bits;
+ unsigned value;
+};
+
+
+struct softpipe_tex_cached_tile
+{
+ union tex_tile_address addr;
+ union {
+ float color[TILE_SIZE][TILE_SIZE][4];
+ } data;
+};
+
+#define NUM_ENTRIES 50
+
+struct softpipe_tex_tile_cache
+{
+ struct pipe_screen *screen;
+ struct pipe_transfer *transfer;
+ void *transfer_map;
+
+ struct pipe_texture *texture; /**< if caching a texture */
+ unsigned timestamp;
+
+ struct softpipe_tex_cached_tile entries[NUM_ENTRIES];
+
+ struct pipe_transfer *tex_trans;
+ void *tex_trans_map;
+ int tex_face, tex_level, tex_z;
+
+ struct softpipe_tex_cached_tile *last_tile; /**< most recently retrieved tile */
+};
+
+
+extern struct softpipe_tex_tile_cache *
+sp_create_tex_tile_cache( struct pipe_screen *screen );
+
+extern void
+sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc);
+
+
+extern void
+sp_tex_tile_cache_map_transfers(struct softpipe_tex_tile_cache *tc);
+
+extern void
+sp_tex_tile_cache_unmap_transfers(struct softpipe_tex_tile_cache *tc);
+
+extern void
+sp_tex_tile_cache_set_texture(struct softpipe_tex_tile_cache *tc,
+ struct pipe_texture *texture);
+
+void
+sp_tex_tile_cache_validate_texture(struct softpipe_tex_tile_cache *tc);
+
+extern void
+sp_flush_tex_tile_cache(struct softpipe_tex_tile_cache *tc);
+
+
+
+extern const struct softpipe_tex_cached_tile *
+sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
+ union tex_tile_address addr );
+
+static INLINE union tex_tile_address
+tex_tile_address( unsigned x,
+ unsigned y,
+ unsigned z,
+ unsigned face,
+ unsigned level )
+{
+ union tex_tile_address addr;
+
+ addr.value = 0;
+ addr.bits.x = x / TILE_SIZE;
+ addr.bits.y = y / TILE_SIZE;
+ addr.bits.z = z;
+ addr.bits.face = face;
+ addr.bits.level = level;
+
+ return addr;
+}
+
+/* Quickly retrieve tile if it matches last lookup.
+ */
+static INLINE const struct softpipe_tex_cached_tile *
+sp_get_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
+ union tex_tile_address addr )
+{
+ if (tc->last_tile->addr.value == addr.value)
+ return tc->last_tile;
+
+ return sp_find_cached_tile_tex( tc, addr );
+}
+
+
+
+
+
+#endif /* SP_TEX_TILE_CACHE_H */
+
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 70f0932431..1c64d58372 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -30,26 +30,21 @@
* Michel Dänzer <michel@tungstengraphics.com>
*/
-#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/internal/p_winsys_screen.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "sp_context.h"
#include "sp_state.h"
#include "sp_texture.h"
-#include "sp_tile_cache.h"
#include "sp_screen.h"
#include "sp_winsys.h"
-/* Simple, maximally packed layout.
- */
-
-
-/* Conventional allocation path for non-display textures:
+/**
+ * Conventional allocation path for non-display textures:
+ * Use a simple, maximally packed layout.
*/
static boolean
softpipe_texture_layout(struct pipe_screen *screen,
@@ -89,6 +84,10 @@ softpipe_texture_layout(struct pipe_screen *screen,
return spt->buffer != NULL;
}
+
+/**
+ * Texture layout for simple color buffers.
+ */
static boolean
softpipe_displaytarget_layout(struct pipe_screen *screen,
struct softpipe_texture * spt)
@@ -112,21 +111,22 @@ softpipe_displaytarget_layout(struct pipe_screen *screen,
}
-
-
-
static struct pipe_texture *
softpipe_texture_create(struct pipe_screen *screen,
- const struct pipe_texture *templat)
+ const struct pipe_texture *template)
{
struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture);
if (!spt)
return NULL;
- spt->base = *templat;
+ spt->base = *template;
pipe_reference_init(&spt->base.reference, 1);
spt->base.screen = screen;
+ spt->pot = (util_is_power_of_two(template->width[0]) &&
+ util_is_power_of_two(template->height[0]) &&
+ util_is_power_of_two(template->depth[0]));
+
if (spt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
PIPE_TEXTURE_USAGE_PRIMARY)) {
if (!softpipe_displaytarget_layout(screen, spt))
@@ -222,6 +222,13 @@ softpipe_get_tex_surface(struct pipe_screen *screen,
if (ps->usage & PIPE_BUFFER_USAGE_GPU_READ)
ps->usage |= PIPE_BUFFER_USAGE_CPU_READ;
+ if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE |
+ PIPE_BUFFER_USAGE_GPU_WRITE)) {
+ /* Mark the surface as dirty. The tile cache will look for this. */
+ spt->timestamp++;
+ softpipe_screen(screen)->timestamp++;
+ }
+
ps->face = face;
ps->level = level;
ps->zslice = zslice;
@@ -342,14 +349,13 @@ softpipe_transfer_map( struct pipe_screen *screen,
/* May want to different things here depending on read/write nature
* of the map:
*/
- if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ)
- {
+ if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) {
/* Do something to notify sharing contexts of a texture change.
* In softpipe, that would mean flushing the texture cache.
*/
softpipe_screen(screen)->timestamp++;
}
-
+
xfer_map = map + softpipe_transfer(transfer)->offset +
transfer->y / transfer->block.height * transfer->stride +
transfer->x / transfer->block.width * transfer->block.size;
@@ -360,7 +366,7 @@ softpipe_transfer_map( struct pipe_screen *screen,
static void
softpipe_transfer_unmap(struct pipe_screen *screen,
- struct pipe_transfer *transfer)
+ struct pipe_transfer *transfer)
{
struct softpipe_texture *spt;
@@ -371,14 +377,60 @@ softpipe_transfer_unmap(struct pipe_screen *screen,
if (transfer->usage != PIPE_TRANSFER_READ) {
/* Mark the texture as dirty to expire the tile caches. */
- spt->modified = TRUE;
+ spt->timestamp++;
}
}
+static struct pipe_video_surface*
+softpipe_video_surface_create(struct pipe_screen *screen,
+ enum pipe_video_chroma_format chroma_format,
+ unsigned width, unsigned height)
+{
+ struct softpipe_video_surface *sp_vsfc;
+ struct pipe_texture template;
-void
-softpipe_init_texture_funcs(struct softpipe_context *sp)
+ assert(screen);
+ assert(width && height);
+
+ sp_vsfc = CALLOC_STRUCT(softpipe_video_surface);
+ if (!sp_vsfc)
+ return NULL;
+
+ pipe_reference_init(&sp_vsfc->base.reference, 1);
+ sp_vsfc->base.screen = screen;
+ sp_vsfc->base.chroma_format = chroma_format;
+ /*sp_vsfc->base.surface_format = PIPE_VIDEO_SURFACE_FORMAT_VUYA;*/
+ sp_vsfc->base.width = width;
+ sp_vsfc->base.height = height;
+
+ memset(&template, 0, sizeof(struct pipe_texture));
+ template.target = PIPE_TEXTURE_2D;
+ template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
+ template.last_level = 0;
+ /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */
+ template.width[0] = util_next_power_of_two(width);
+ template.height[0] = util_next_power_of_two(height);
+ template.depth[0] = 1;
+ pf_get_block(template.format, &template.block);
+ template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+ sp_vsfc->tex = screen->texture_create(screen, &template);
+ if (!sp_vsfc->tex) {
+ FREE(sp_vsfc);
+ return NULL;
+ }
+
+ return &sp_vsfc->base;
+}
+
+
+static void
+softpipe_video_surface_destroy(struct pipe_video_surface *vsfc)
{
+ struct softpipe_video_surface *sp_vsfc = softpipe_video_surface(vsfc);
+
+ pipe_texture_reference(&sp_vsfc->tex, NULL);
+ FREE(sp_vsfc);
}
@@ -396,6 +448,9 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen)
screen->tex_transfer_destroy = softpipe_tex_transfer_destroy;
screen->transfer_map = softpipe_transfer_map;
screen->transfer_unmap = softpipe_transfer_unmap;
+
+ screen->video_surface_create = softpipe_video_surface_create;
+ screen->video_surface_destroy = softpipe_video_surface_destroy;
}
@@ -404,7 +459,7 @@ softpipe_get_texture_buffer( struct pipe_texture *texture,
struct pipe_buffer **buf,
unsigned *stride )
{
- struct softpipe_texture *tex = (struct softpipe_texture *)texture;
+ struct softpipe_texture *tex = (struct softpipe_texture *) texture;
if (!tex)
return FALSE;
diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h
index 893aa7d11d..2ef64e1e7c 100644
--- a/src/gallium/drivers/softpipe/sp_texture.h
+++ b/src/gallium/drivers/softpipe/sp_texture.h
@@ -30,6 +30,7 @@
#include "pipe/p_state.h"
+#include "pipe/p_video_state.h"
struct pipe_context;
@@ -48,7 +49,11 @@ struct softpipe_texture
*/
struct pipe_buffer *buffer;
- boolean modified;
+ /* True if texture images are power-of-two in all dimensions:
+ */
+ boolean pot;
+
+ unsigned timestamp;
};
struct softpipe_transfer
@@ -58,6 +63,15 @@ struct softpipe_transfer
unsigned long offset;
};
+struct softpipe_video_surface
+{
+ struct pipe_video_surface base;
+
+ /* The data is held here:
+ */
+ struct pipe_texture *tex;
+};
+
/** cast wrappers */
static INLINE struct softpipe_texture *
@@ -72,9 +86,12 @@ softpipe_transfer(struct pipe_transfer *pt)
return (struct softpipe_transfer *) pt;
}
+static INLINE struct softpipe_video_surface *
+softpipe_video_surface(struct pipe_video_surface *pvs)
+{
+ return (struct softpipe_video_surface *) pvs;
+}
-extern void
-softpipe_init_texture_funcs( struct softpipe_context *softpipe );
extern void
softpipe_init_screen_texture_funcs(struct pipe_screen *screen);
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index 461cbb9f95..de47970985 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -26,7 +26,7 @@
**************************************************************************/
/**
- * Texture tile caching.
+ * Render target tile caching.
*
* Author:
* Brian Paul
@@ -35,38 +35,8 @@
#include "pipe/p_inlines.h"
#include "util/u_memory.h"
#include "util/u_tile.h"
-#include "sp_context.h"
-#include "sp_surface.h"
-#include "sp_texture.h"
#include "sp_tile_cache.h"
-#define NUM_ENTRIES 50
-
-
-/** XXX move these */
-#define MAX_WIDTH 4096
-#define MAX_HEIGHT 4096
-
-
-struct softpipe_tile_cache
-{
- struct pipe_screen *screen;
- struct pipe_surface *surface; /**< the surface we're caching */
- struct pipe_transfer *transfer;
- void *transfer_map;
- struct pipe_texture *texture; /**< if caching a texture */
- struct softpipe_cached_tile entries[NUM_ENTRIES];
- uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32];
- float clear_color[4]; /**< for color bufs */
- uint clear_val; /**< for z+stencil, or packed color clear value */
- boolean depth_stencil; /**< Is the surface a depth/stencil format? */
-
- struct pipe_transfer *tex_trans;
- void *tex_trans_map;
- int tex_face, tex_level, tex_z;
-
- struct softpipe_cached_tile tile; /**< scratch tile for clears */
-};
/**
@@ -76,7 +46,7 @@ struct softpipe_tile_cache
* a LRU replacement policy.
*/
#define CACHE_POS(x, y) \
- (((x) / TILE_SIZE + ((y) / TILE_SIZE) * 5) % NUM_ENTRIES)
+ (((x) + (y) * 5) % NUM_ENTRIES)
@@ -84,12 +54,10 @@ struct softpipe_tile_cache
* Is the tile at (x,y) in cleared state?
*/
static INLINE uint
-is_clear_flag_set(const uint *bitvec, int x, int y)
+is_clear_flag_set(const uint *bitvec, union tile_address addr)
{
int pos, bit;
- x /= TILE_SIZE;
- y /= TILE_SIZE;
- pos = y * (MAX_WIDTH / TILE_SIZE) + x;
+ pos = addr.bits.y * (MAX_WIDTH / TILE_SIZE) + addr.bits.x;
assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32);
bit = bitvec[pos / 32] & (1 << (pos & 31));
return bit;
@@ -100,12 +68,10 @@ is_clear_flag_set(const uint *bitvec, int x, int y)
* Mark the tile at (x,y) as not cleared.
*/
static INLINE void
-clear_clear_flag(uint *bitvec, int x, int y)
+clear_clear_flag(uint *bitvec, union tile_address addr)
{
int pos;
- x /= TILE_SIZE;
- y /= TILE_SIZE;
- pos = y * (MAX_WIDTH / TILE_SIZE) + x;
+ pos = addr.bits.y * (MAX_WIDTH / TILE_SIZE) + addr.bits.x;
assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32);
bitvec[pos / 32] &= ~(1 << (pos & 31));
}
@@ -127,9 +93,9 @@ sp_create_tile_cache( struct pipe_screen *screen )
if (tc) {
tc->screen = screen;
for (pos = 0; pos < NUM_ENTRIES; pos++) {
- tc->entries[pos].x =
- tc->entries[pos].y = -1;
+ tc->entries[pos].addr.bits.invalid = 1;
}
+ tc->last_tile = &tc->entries[0]; /* any tile */
}
return tc;
}
@@ -148,10 +114,6 @@ sp_destroy_tile_cache(struct softpipe_tile_cache *tc)
screen = tc->transfer->texture->screen;
screen->tex_transfer_destroy(tc->transfer);
}
- if (tc->tex_trans) {
- screen = tc->tex_trans->texture->screen;
- screen->tex_transfer_destroy(tc->tex_trans);
- }
FREE( tc );
}
@@ -164,8 +126,6 @@ void
sp_tile_cache_set_surface(struct softpipe_tile_cache *tc,
struct pipe_surface *ps)
{
- assert(!tc->texture);
-
if (tc->transfer) {
struct pipe_screen *screen = tc->transfer->texture->screen;
@@ -217,9 +177,6 @@ sp_tile_cache_map_transfers(struct softpipe_tile_cache *tc)
{
if (tc->transfer && !tc->transfer_map)
tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer);
-
- if (tc->tex_trans && !tc->tex_trans_map)
- tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans);
}
@@ -230,47 +187,6 @@ sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc)
tc->screen->transfer_unmap(tc->screen, tc->transfer);
tc->transfer_map = NULL;
}
-
- if (tc->tex_trans_map) {
- tc->screen->transfer_unmap(tc->screen, tc->tex_trans);
- tc->tex_trans_map = NULL;
- }
-}
-
-
-/**
- * Specify the texture to cache.
- */
-void
-sp_tile_cache_set_texture(struct pipe_context *pipe,
- struct softpipe_tile_cache *tc,
- struct pipe_texture *texture)
-{
- uint i;
-
- assert(!tc->transfer);
-
- pipe_texture_reference(&tc->texture, texture);
-
- if (tc->tex_trans) {
- struct pipe_screen *screen = tc->tex_trans->texture->screen;
-
- if (tc->tex_trans_map) {
- screen->transfer_unmap(screen, tc->tex_trans);
- tc->tex_trans_map = NULL;
- }
-
- screen->tex_transfer_destroy(tc->tex_trans);
- tc->tex_trans = NULL;
- }
-
- /* mark as entries as invalid/empty */
- /* XXX we should try to avoid this when the teximage hasn't changed */
- for (i = 0; i < NUM_ENTRIES; i++) {
- tc->entries[i].x = -1;
- }
-
- tc->tex_face = -1; /* any invalid value here */
}
@@ -314,7 +230,7 @@ clear_tile(struct softpipe_cached_tile *tile,
switch (pf_get_size(format)) {
case 1:
- memset(tile->data.any, 0, TILE_SIZE * TILE_SIZE);
+ memset(tile->data.any, clear_value, TILE_SIZE * TILE_SIZE);
break;
case 2:
if (clear_value == 0) {
@@ -350,8 +266,7 @@ clear_tile(struct softpipe_cached_tile *tile,
* Actually clear the tiles which were flagged as being in a clear state.
*/
static void
-sp_tile_cache_flush_clear(struct pipe_context *pipe,
- struct softpipe_tile_cache *tc)
+sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc)
{
struct pipe_transfer *pt = tc->transfer;
const uint w = tc->transfer->width;
@@ -365,13 +280,15 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe,
/* push the tile to all positions marked as clear */
for (y = 0; y < h; y += TILE_SIZE) {
for (x = 0; x < w; x += TILE_SIZE) {
- if (is_clear_flag_set(tc->clear_flags, x, y)) {
+ union tile_address addr = tile_address(x, y);
+
+ if (is_clear_flag_set(tc->clear_flags, addr)) {
pipe_put_tile_raw(pt,
x, y, TILE_SIZE, TILE_SIZE,
tc->tile.data.color32, 0/*STRIDE*/);
/* do this? */
- clear_clear_flag(tc->clear_flags, x, y);
+ clear_clear_flag(tc->clear_flags, addr);
numCleared++;
}
@@ -388,8 +305,7 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe,
* any tiles "flagged" as cleared will be "really" cleared.
*/
void
-sp_flush_tile_cache(struct softpipe_context *softpipe,
- struct softpipe_tile_cache *tc)
+sp_flush_tile_cache(struct softpipe_tile_cache *tc)
{
struct pipe_transfer *pt = tc->transfer;
int inuse = 0, pos;
@@ -398,33 +314,30 @@ sp_flush_tile_cache(struct softpipe_context *softpipe,
/* caching a drawing transfer */
for (pos = 0; pos < NUM_ENTRIES; pos++) {
struct softpipe_cached_tile *tile = tc->entries + pos;
- if (tile->x >= 0) {
+ if (!tile->addr.bits.invalid) {
if (tc->depth_stencil) {
pipe_put_tile_raw(pt,
- tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ tile->addr.bits.x * TILE_SIZE,
+ tile->addr.bits.y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
tile->data.depth32, 0/*STRIDE*/);
}
else {
pipe_put_tile_rgba(pt,
- tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ tile->addr.bits.x * TILE_SIZE,
+ tile->addr.bits.y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
(float *) tile->data.color);
}
- tile->x = tile->y = -1; /* mark as empty */
+ tile->addr.bits.invalid = 1; /* mark as empty */
inuse++;
}
}
#if TILE_CLEAR_OPTIMIZATION
- sp_tile_cache_flush_clear(&softpipe->pipe, tc);
+ sp_tile_cache_flush_clear(tc);
#endif
}
- else if (tc->texture) {
- /* caching a texture, mark all entries as empty */
- for (pos = 0; pos < NUM_ENTRIES; pos++) {
- tc->entries[pos].x = -1;
- }
- tc->tex_face = -1;
- }
#if 0
debug_printf("flushed tiles in use: %d\n", inuse);
@@ -437,40 +350,39 @@ sp_flush_tile_cache(struct softpipe_context *softpipe,
* \param x, y position of tile, in pixels
*/
struct softpipe_cached_tile *
-sp_get_cached_tile(struct softpipe_context *softpipe,
- struct softpipe_tile_cache *tc, int x, int y)
+sp_find_cached_tile(struct softpipe_tile_cache *tc,
+ union tile_address addr )
{
struct pipe_transfer *pt = tc->transfer;
-
- /* tile pos in framebuffer: */
- const int tile_x = x & ~(TILE_SIZE - 1);
- const int tile_y = y & ~(TILE_SIZE - 1);
-
+
/* cache pos/entry: */
- const int pos = CACHE_POS(x, y);
+ const int pos = CACHE_POS(addr.bits.x,
+ addr.bits.y);
struct softpipe_cached_tile *tile = tc->entries + pos;
- if (tile_x != tile->x ||
- tile_y != tile->y) {
+ if (addr.value != tile->addr.value) {
- if (tile->x != -1) {
+ if (tile->addr.bits.invalid == 0) {
/* put dirty tile back in framebuffer */
if (tc->depth_stencil) {
pipe_put_tile_raw(pt,
- tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ tile->addr.bits.x * TILE_SIZE,
+ tile->addr.bits.y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
tile->data.depth32, 0/*STRIDE*/);
}
else {
pipe_put_tile_rgba(pt,
- tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ tile->addr.bits.x * TILE_SIZE,
+ tile->addr.bits.y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
(float *) tile->data.color);
}
}
- tile->x = tile_x;
- tile->y = tile_y;
+ tile->addr = addr;
- if (is_clear_flag_set(tc->clear_flags, x, y)) {
+ if (is_clear_flag_set(tc->clear_flags, addr)) {
/* don't get tile from framebuffer, just clear it */
if (tc->depth_stencil) {
clear_tile(tile, pt->format, tc->clear_val);
@@ -478,125 +390,33 @@ sp_get_cached_tile(struct softpipe_context *softpipe,
else {
clear_tile_rgba(tile, pt->format, tc->clear_color);
}
- clear_clear_flag(tc->clear_flags, x, y);
+ clear_clear_flag(tc->clear_flags, addr);
}
else {
/* get new tile data from transfer */
if (tc->depth_stencil) {
pipe_get_tile_raw(pt,
- tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ tile->addr.bits.x * TILE_SIZE,
+ tile->addr.bits.y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
tile->data.depth32, 0/*STRIDE*/);
}
else {
pipe_get_tile_rgba(pt,
- tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ tile->addr.bits.x * TILE_SIZE,
+ tile->addr.bits.y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
(float *) tile->data.color);
}
}
}
+ tc->last_tile = tile;
return tile;
}
-/**
- * Given the texture face, level, zslice, x and y values, compute
- * the cache entry position/index where we'd hope to find the
- * cached texture tile.
- * This is basically a direct-map cache.
- * XXX There's probably lots of ways in which we can improve this.
- */
-static INLINE uint
-tex_cache_pos(int x, int y, int z, int face, int level)
-{
- uint entry = x + y * 9 + z * 3 + face + level * 7;
- return entry % NUM_ENTRIES;
-}
-
-/**
- * Similar to sp_get_cached_tile() but for textures.
- * Tiles are read-only and indexed with more params.
- */
-const struct softpipe_cached_tile *
-sp_get_cached_tile_tex(struct softpipe_context *sp,
- struct softpipe_tile_cache *tc, int x, int y, int z,
- int face, int level)
-{
- struct pipe_screen *screen = sp->pipe.screen;
- /* tile pos in framebuffer: */
- const int tile_x = x & ~(TILE_SIZE - 1);
- const int tile_y = y & ~(TILE_SIZE - 1);
- /* cache pos/entry: */
- const uint pos = tex_cache_pos(x / TILE_SIZE, y / TILE_SIZE, z,
- face, level);
- struct softpipe_cached_tile *tile = tc->entries + pos;
-
- if (tc->texture) {
- struct softpipe_texture *spt = softpipe_texture(tc->texture);
- if (spt->modified) {
- /* texture was modified, invalidate all cached tiles */
- uint p;
- for (p = 0; p < NUM_ENTRIES; p++) {
- tile = tc->entries + p;
- tile->x = -1;
- }
- spt->modified = FALSE;
- }
- }
-
- if (tile_x != tile->x ||
- tile_y != tile->y ||
- z != tile->z ||
- face != tile->face ||
- level != tile->level) {
- /* cache miss */
-
-#if 0
- printf("miss at %u x=%d y=%d z=%d face=%d level=%d\n", pos,
- x/TILE_SIZE, y/TILE_SIZE, z, face, level);
-#endif
- /* check if we need to get a new transfer */
- if (!tc->tex_trans ||
- tc->tex_face != face ||
- tc->tex_level != level ||
- tc->tex_z != z) {
- /* get new transfer (view into texture) */
-
- if (tc->tex_trans) {
- if (tc->tex_trans_map) {
- tc->screen->transfer_unmap(tc->screen, tc->tex_trans);
- tc->tex_trans_map = NULL;
- }
-
- screen->tex_transfer_destroy(tc->tex_trans);
- tc->tex_trans = NULL;
- }
-
- tc->tex_trans = screen->get_tex_transfer(screen, tc->texture, face, level, z,
- PIPE_TRANSFER_READ, 0, 0,
- tc->texture->width[level],
- tc->texture->height[level]);
- tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
-
- tc->tex_face = face;
- tc->tex_level = level;
- tc->tex_z = z;
- }
-
- /* get tile from the transfer (view into texture) */
- pipe_get_tile_rgba(tc->tex_trans,
- tile_x, tile_y, TILE_SIZE, TILE_SIZE,
- (float *) tile->data.color);
- tile->x = tile_x;
- tile->y = tile_y;
- tile->z = z;
- tile->face = face;
- tile->level = level;
- }
-
- return tile;
-}
/**
@@ -627,6 +447,6 @@ sp_tile_cache_clear(struct softpipe_tile_cache *tc, const float *rgba,
for (pos = 0; pos < NUM_ENTRIES; pos++) {
struct softpipe_cached_tile *tile = tc->entries + pos;
- tile->x = tile->y = -1;
+ tile->addr.bits.invalid = 1;
}
}
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h
index 8f247d0e58..a12092702a 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.h
@@ -34,7 +34,6 @@
#include "pipe/p_compiler.h"
-struct softpipe_context;
struct softpipe_tile_cache;
@@ -44,11 +43,23 @@ struct softpipe_tile_cache;
#define TILE_SIZE 64
+/* If we need to support > 4096, just expand this to be a 64 bit
+ * union, or consider tiling in Z as well.
+ */
+union tile_address {
+ struct {
+ unsigned x:6; /* 4096 / TILE_SIZE */
+ unsigned y:6; /* 4096 / TILE_SIZE */
+ unsigned invalid:1;
+ unsigned pad:19;
+ } bits;
+ unsigned value;
+};
+
struct softpipe_cached_tile
{
- int x, y; /**< pos of tile in window coords */
- int z, face, level; /**< Extra texture indexes */
+ union tile_address addr;
union {
float color[TILE_SIZE][TILE_SIZE][4];
uint color32[TILE_SIZE][TILE_SIZE];
@@ -59,6 +70,32 @@ struct softpipe_cached_tile
} data;
};
+#define NUM_ENTRIES 50
+
+
+/** XXX move these */
+#define MAX_WIDTH 4096
+#define MAX_HEIGHT 4096
+
+
+struct softpipe_tile_cache
+{
+ struct pipe_screen *screen;
+ struct pipe_surface *surface; /**< the surface we're caching */
+ struct pipe_transfer *transfer;
+ void *transfer_map;
+
+ struct softpipe_cached_tile entries[NUM_ENTRIES];
+ uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32];
+ float clear_color[4]; /**< for color bufs */
+ uint clear_val; /**< for z+stencil, or packed color clear value */
+ boolean depth_stencil; /**< Is the surface a depth/stencil format? */
+
+ struct softpipe_cached_tile tile; /**< scratch tile for clears */
+
+ struct softpipe_cached_tile *last_tile; /**< most recently retrieved tile */
+};
+
extern struct softpipe_tile_cache *
sp_create_tile_cache( struct pipe_screen *screen );
@@ -80,26 +117,45 @@ extern void
sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc);
extern void
-sp_tile_cache_set_texture(struct pipe_context *pipe,
- struct softpipe_tile_cache *tc,
- struct pipe_texture *texture);
-
-extern void
-sp_flush_tile_cache(struct softpipe_context *softpipe,
- struct softpipe_tile_cache *tc);
+sp_flush_tile_cache(struct softpipe_tile_cache *tc);
extern void
sp_tile_cache_clear(struct softpipe_tile_cache *tc, const float *rgba,
uint clearValue);
extern struct softpipe_cached_tile *
-sp_get_cached_tile(struct softpipe_context *softpipe,
- struct softpipe_tile_cache *tc, int x, int y);
+sp_find_cached_tile(struct softpipe_tile_cache *tc,
+ union tile_address addr );
+
+
+static INLINE union tile_address
+tile_address( unsigned x,
+ unsigned y )
+{
+ union tile_address addr;
+
+ addr.value = 0;
+ addr.bits.x = x / TILE_SIZE;
+ addr.bits.y = y / TILE_SIZE;
+
+ return addr;
+}
+
+/* Quickly retrieve tile if it matches last lookup.
+ */
+static INLINE struct softpipe_cached_tile *
+sp_get_cached_tile(struct softpipe_tile_cache *tc,
+ int x, int y )
+{
+ union tile_address addr = tile_address( x, y );
+
+ if (tc->last_tile->addr.value == addr.value)
+ return tc->last_tile;
+
+ return sp_find_cached_tile( tc, addr );
+}
+
-extern const struct softpipe_cached_tile *
-sp_get_cached_tile_tex(struct softpipe_context *softpipe,
- struct softpipe_tile_cache *tc, int x, int y, int z,
- int face, int level);
#endif /* SP_TILE_CACHE_H */
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
new file mode 100644
index 0000000000..ccb29726b6
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -0,0 +1,268 @@
+#include "sp_video_context.h"
+#include <pipe/p_inlines.h>
+#include <util/u_memory.h>
+#include "softpipe/sp_winsys.h"
+#include "softpipe/sp_texture.h"
+
+static void
+sp_mpeg12_destroy(struct pipe_video_context *vpipe)
+{
+ struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+ assert(vpipe);
+
+ /* Asserted in softpipe_delete_fs_state() for some reason */
+ ctx->pipe->bind_vs_state(ctx->pipe, NULL);
+ ctx->pipe->bind_fs_state(ctx->pipe, NULL);
+
+ ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend);
+ ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast);
+ ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
+
+ pipe_video_surface_reference(&ctx->decode_target, NULL);
+ vl_compositor_cleanup(&ctx->compositor);
+ vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+ ctx->pipe->destroy(ctx->pipe);
+
+ FREE(ctx);
+}
+
+static void
+sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
+ struct pipe_video_surface *past,
+ struct pipe_video_surface *future,
+ unsigned num_macroblocks,
+ struct pipe_macroblock *macroblocks,
+ struct pipe_fence_handle **fence)
+{
+ struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+ struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks;
+
+ assert(vpipe);
+ assert(num_macroblocks);
+ assert(macroblocks);
+ assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
+ assert(ctx->decode_target);
+
+ vl_mpeg12_mc_renderer_render_macroblocks(&ctx->mc_renderer,
+ softpipe_video_surface(ctx->decode_target)->tex,
+ past ? softpipe_video_surface(past)->tex : NULL,
+ future ? softpipe_video_surface(future)->tex : NULL,
+ num_macroblocks, mpeg12_macroblocks, fence);
+}
+
+static void
+sp_mpeg12_clear_surface(struct pipe_video_context *vpipe,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height,
+ unsigned value,
+ struct pipe_surface *surface)
+{
+ struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+ assert(vpipe);
+ assert(surface);
+
+ ctx->pipe->surface_fill(ctx->pipe, surface, x, y, width, height, value);
+}
+
+static void
+sp_mpeg12_render_picture(struct pipe_video_context *vpipe,
+ /*struct pipe_surface *backround,
+ struct pipe_video_rect *backround_area,*/
+ struct pipe_video_surface *src_surface,
+ enum pipe_mpeg12_picture_type picture_type,
+ /*unsigned num_past_surfaces,
+ struct pipe_video_surface *past_surfaces,
+ unsigned num_future_surfaces,
+ struct pipe_video_surface *future_surfaces,*/
+ struct pipe_video_rect *src_area,
+ struct pipe_surface *dst_surface,
+ struct pipe_video_rect *dst_area,
+ /*unsigned num_layers,
+ struct pipe_surface *layers,
+ struct pipe_video_rect *layer_src_areas,
+ struct pipe_video_rect *layer_dst_areas*/
+ struct pipe_fence_handle **fence)
+{
+ struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+ assert(vpipe);
+ assert(src_surface);
+ assert(src_area);
+ assert(dst_surface);
+ assert(dst_area);
+
+ vl_compositor_render(&ctx->compositor, softpipe_video_surface(src_surface)->tex,
+ picture_type, src_area, dst_surface->texture, dst_area, fence);
+}
+
+static void
+sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
+ struct pipe_video_surface *dt)
+{
+ struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+ assert(vpipe);
+ assert(dt);
+
+ pipe_video_surface_reference(&ctx->decode_target, dt);
+}
+
+static bool
+init_pipe_state(struct sp_mpeg12_context *ctx)
+{
+ struct pipe_rasterizer_state rast;
+ struct pipe_blend_state blend;
+ struct pipe_depth_stencil_alpha_state dsa;
+ unsigned i;
+
+ assert(ctx);
+
+ rast.flatshade = 1;
+ rast.flatshade_first = 0;
+ rast.light_twoside = 0;
+ rast.front_winding = PIPE_WINDING_CCW;
+ rast.cull_mode = PIPE_WINDING_CW;
+ rast.fill_cw = PIPE_POLYGON_MODE_FILL;
+ rast.fill_ccw = PIPE_POLYGON_MODE_FILL;
+ rast.offset_cw = 0;
+ rast.offset_ccw = 0;
+ rast.scissor = 0;
+ rast.poly_smooth = 0;
+ rast.poly_stipple_enable = 0;
+ rast.point_sprite = 0;
+ rast.point_size_per_vertex = 0;
+ rast.multisample = 0;
+ rast.line_smooth = 0;
+ rast.line_stipple_enable = 0;
+ rast.line_stipple_factor = 0;
+ rast.line_stipple_pattern = 0;
+ rast.line_last_pixel = 0;
+ rast.bypass_vs_clip_and_viewport = 0;
+ rast.line_width = 1;
+ rast.point_smooth = 0;
+ rast.point_size = 1;
+ rast.offset_units = 1;
+ rast.offset_scale = 1;
+ /*rast.sprite_coord_mode[i] = ;*/
+ ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
+ ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
+
+ blend.blend_enable = 0;
+ blend.rgb_func = PIPE_BLEND_ADD;
+ blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+ blend.alpha_func = PIPE_BLEND_ADD;
+ blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+ blend.logicop_enable = 0;
+ blend.logicop_func = PIPE_LOGICOP_CLEAR;
+ /* Needed to allow color writes to FB, even if blending disabled */
+ blend.colormask = PIPE_MASK_RGBA;
+ blend.dither = 0;
+ ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend);
+ ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
+
+ dsa.depth.enabled = 0;
+ dsa.depth.writemask = 0;
+ dsa.depth.func = PIPE_FUNC_ALWAYS;
+ dsa.depth.occlusion_count = 0;
+ for (i = 0; i < 2; ++i) {
+ dsa.stencil[i].enabled = 0;
+ dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
+ dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
+ dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
+ dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
+ dsa.stencil[i].ref_value = 0;
+ dsa.stencil[i].valuemask = 0;
+ dsa.stencil[i].writemask = 0;
+ }
+ dsa.alpha.enabled = 0;
+ dsa.alpha.func = PIPE_FUNC_ALWAYS;
+ dsa.alpha.ref_value = 0;
+ ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa);
+ ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
+
+ return true;
+}
+
+static struct pipe_video_context *
+sp_mpeg12_create(struct pipe_screen *screen, enum pipe_video_profile profile,
+ enum pipe_video_chroma_format chroma_format,
+ unsigned width, unsigned height)
+{
+ struct sp_mpeg12_context *ctx;
+
+ assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
+
+ ctx = CALLOC_STRUCT(sp_mpeg12_context);
+
+ if (!ctx)
+ return NULL;
+
+ ctx->base.profile = profile;
+ ctx->base.chroma_format = chroma_format;
+ ctx->base.width = width;
+ ctx->base.height = height;
+
+ ctx->base.screen = screen;
+ ctx->base.destroy = sp_mpeg12_destroy;
+ ctx->base.decode_macroblocks = sp_mpeg12_decode_macroblocks;
+ ctx->base.clear_surface = sp_mpeg12_clear_surface;
+ ctx->base.render_picture = sp_mpeg12_render_picture;
+ ctx->base.set_decode_target = sp_mpeg12_set_decode_target;
+
+ ctx->pipe = softpipe_create(screen);
+ if (!ctx->pipe) {
+ FREE(ctx);
+ return NULL;
+ }
+
+ /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture */
+ if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
+ width, height, chroma_format,
+ VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
+ /* TODO: Use XFER_NONE when implemented */
+ VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
+ true)) {
+ ctx->pipe->destroy(ctx->pipe);
+ FREE(ctx);
+ return NULL;
+ }
+
+ if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) {
+ vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+ ctx->pipe->destroy(ctx->pipe);
+ FREE(ctx);
+ return NULL;
+ }
+
+ if (!init_pipe_state(ctx)) {
+ vl_compositor_cleanup(&ctx->compositor);
+ vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+ ctx->pipe->destroy(ctx->pipe);
+ FREE(ctx);
+ return NULL;
+ }
+
+ return &ctx->base;
+}
+
+struct pipe_video_context *
+sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
+ enum pipe_video_chroma_format chroma_format,
+ unsigned width, unsigned height)
+{
+ assert(screen);
+ assert(width && height);
+
+ switch (u_reduce_video_profile(profile)) {
+ case PIPE_VIDEO_CODEC_MPEG12:
+ return sp_mpeg12_create(screen, profile,
+ chroma_format,
+ width, height);
+ default:
+ return NULL;
+ }
+}
diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h
new file mode 100644
index 0000000000..2c7691c7cb
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_video_context.h
@@ -0,0 +1,30 @@
+#ifndef SP_VIDEO_CONTEXT_H
+#define SP_VIDEO_CONTEXT_H
+
+#include <pipe/p_video_context.h>
+#include <vl/vl_mpeg12_mc_renderer.h>
+#include <vl/vl_compositor.h>
+
+struct pipe_screen;
+struct pipe_context;
+struct pipe_video_surface;
+
+struct sp_mpeg12_context
+{
+ struct pipe_video_context base;
+ struct pipe_context *pipe;
+ struct pipe_video_surface *decode_target;
+ struct vl_mpeg12_mc_renderer mc_renderer;
+ struct vl_compositor compositor;
+
+ void *rast;
+ void *dsa;
+ void *blend;
+};
+
+struct pipe_video_context *
+sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
+ enum pipe_video_chroma_format chroma_format,
+ unsigned width, unsigned height);
+
+#endif /* SP_VIDEO_CONTEXT_H */
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index ae0af4d055..bf470b46ae 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -125,11 +125,11 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag)
} else if ((tr_ctx->draw_rule.blocker & flag) &&
(tr_ctx->draw_blocker & 4)) {
boolean block = FALSE;
- debug_printf("%s (%lu %lu) (%lu %lu) (%lu %u) (%lu %u)\n", __FUNCTION__,
- tr_ctx->draw_rule.fs, tr_ctx->curr.fs,
- tr_ctx->draw_rule.vs, tr_ctx->curr.vs,
- tr_ctx->draw_rule.surf, 0,
- tr_ctx->draw_rule.tex, 0);
+ debug_printf("%s (%p %p) (%p %p) (%p %u) (%p %u)\n", __FUNCTION__,
+ (void *) tr_ctx->draw_rule.fs, (void *) tr_ctx->curr.fs,
+ (void *) tr_ctx->draw_rule.vs, (void *) tr_ctx->curr.vs,
+ (void *) tr_ctx->draw_rule.surf, 0,
+ (void *) tr_ctx->draw_rule.tex, 0);
if (tr_ctx->draw_rule.fs &&
tr_ctx->draw_rule.fs == tr_ctx->curr.fs)
block = TRUE;
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index b01ab6d137..1980831dd9 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -281,7 +281,7 @@ enum pipe_transfer_usage {
#define PIPE_CAP_NPOT_TEXTURES 2
#define PIPE_CAP_TWO_SIDED_STENCIL 3
#define PIPE_CAP_GLSL 4 /* XXX need something better */
-#define PIPE_CAP_S3TC 5
+#define PIPE_CAP_S3TC 5 /* XXX: deprecated; cap determined via supported sampler formats */
#define PIPE_CAP_ANISOTROPIC_FILTER 6
#define PIPE_CAP_POINT_SPRITE 7
#define PIPE_CAP_MAX_RENDER_TARGETS 8
@@ -315,6 +315,30 @@ enum pipe_transfer_usage {
#define PIPE_REFERENCED_FOR_READ (1 << 0)
#define PIPE_REFERENCED_FOR_WRITE (1 << 1)
+
+enum pipe_video_codec
+{
+ PIPE_VIDEO_CODEC_MPEG12, /**< MPEG1, MPEG2 */
+ PIPE_VIDEO_CODEC_MPEG4, /**< DIVX, XVID */
+ PIPE_VIDEO_CODEC_VC1, /**< WMV */
+ PIPE_VIDEO_CODEC_MPEG4_AVC /**< H.264 */
+};
+
+enum pipe_video_profile
+{
+ PIPE_VIDEO_PROFILE_MPEG1,
+ PIPE_VIDEO_PROFILE_MPEG2_SIMPLE,
+ PIPE_VIDEO_PROFILE_MPEG2_MAIN,
+ PIPE_VIDEO_PROFILE_MPEG4_SIMPLE,
+ PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE,
+ PIPE_VIDEO_PROFILE_VC1_SIMPLE,
+ PIPE_VIDEO_PROFILE_VC1_MAIN,
+ PIPE_VIDEO_PROFILE_VC1_ADVANCED,
+ PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE,
+ PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN,
+ PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
+};
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index c4469d4a9e..af23080920 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -613,6 +613,24 @@ pf_has_alpha( enum pipe_format format )
}
}
+enum pipe_video_chroma_format
+{
+ PIPE_VIDEO_CHROMA_FORMAT_420,
+ PIPE_VIDEO_CHROMA_FORMAT_422,
+ PIPE_VIDEO_CHROMA_FORMAT_444
+};
+
+#if 0
+enum pipe_video_surface_format
+{
+ PIPE_VIDEO_SURFACE_FORMAT_NV12, /**< Planar; Y plane, UV plane */
+ PIPE_VIDEO_SURFACE_FORMAT_YV12, /**< Planar; Y plane, U plane, V plane */
+ PIPE_VIDEO_SURFACE_FORMAT_YUYV, /**< Interleaved; Y,U,Y,V,Y,U,Y,V */
+ PIPE_VIDEO_SURFACE_FORMAT_UYVY, /**< Interleaved; U,Y,V,Y,U,Y,V,Y */
+ PIPE_VIDEO_SURFACE_FORMAT_VUYA /**< Packed; A31-24|Y23-16|U15-8|V7-0 */
+};
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h
index a5c1e8270a..30a4aaf409 100644
--- a/src/gallium/include/pipe/p_inlines.h
+++ b/src/gallium/include/pipe/p_inlines.h
@@ -118,7 +118,7 @@ pipe_buffer_write(struct pipe_screen *screen,
unsigned offset, unsigned size,
const void *data)
{
- uint8_t *map;
+ void *map;
assert(offset < buf->size);
assert(offset + size <= buf->size);
@@ -129,7 +129,7 @@ pipe_buffer_write(struct pipe_screen *screen,
PIPE_BUFFER_USAGE_FLUSH_EXPLICIT);
assert(map);
if(map) {
- memcpy(map + offset, data, size);
+ memcpy((uint8_t *)map + offset, data, size);
pipe_buffer_flush_mapped_range(screen, buf, offset, size);
pipe_buffer_unmap(screen, buf);
}
@@ -141,7 +141,7 @@ pipe_buffer_read(struct pipe_screen *screen,
unsigned offset, unsigned size,
void *data)
{
- uint8_t *map;
+ void *map;
assert(offset < buf->size);
assert(offset + size <= buf->size);
@@ -150,11 +150,31 @@ pipe_buffer_read(struct pipe_screen *screen,
map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_READ);
assert(map);
if(map) {
- memcpy(data, map + offset, size);
+ memcpy(data, (const uint8_t *)map + offset, size);
pipe_buffer_unmap(screen, buf);
}
}
+static INLINE void *
+pipe_transfer_map( struct pipe_transfer *transf )
+{
+ struct pipe_screen *screen = transf->texture->screen;
+ return screen->transfer_map(screen, transf);
+}
+
+static INLINE void
+pipe_transfer_unmap( struct pipe_transfer *transf )
+{
+ struct pipe_screen *screen = transf->texture->screen;
+ screen->transfer_unmap(screen, transf);
+}
+
+static INLINE void
+pipe_transfer_destroy( struct pipe_transfer *transf )
+{
+ struct pipe_screen *screen = transf->texture->screen;
+ screen->tex_transfer_destroy(transf);
+}
#ifdef __cplusplus
}
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index 3f30c52a16..f0a4de5df3 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -53,7 +53,10 @@ extern "C" {
struct pipe_fence_handle;
struct pipe_winsys;
struct pipe_buffer;
-
+struct pipe_texture;
+struct pipe_surface;
+struct pipe_video_surface;
+struct pipe_transfer;
/**
@@ -252,6 +255,17 @@ struct pipe_screen {
void (*buffer_destroy)( struct pipe_buffer *buf );
+ /**
+ * Create a video surface suitable for use as a decoding target by the
+ * driver's pipe_video_context.
+ */
+ struct pipe_video_surface*
+ (*video_surface_create)( struct pipe_screen *screen,
+ enum pipe_video_chroma_format chroma_format,
+ unsigned width, unsigned height );
+
+ void (*video_surface_destroy)( struct pipe_video_surface *vsfc );
+
/**
* Do any special operations to ensure frontbuffer contents are
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 626bedb35a..b59d6b7ae3 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -114,11 +114,29 @@ struct pipe_rasterizer_state
* the vertex shader, clipping and viewport processing. Note that
* a vertex shader is still needed though, to indicate the mapping
* from vertex elements to fragment shader input semantics.
+ *
+ * XXX: considered for removal.
*/
unsigned bypass_vs_clip_and_viewport:1;
- unsigned flatshade_first:1; /**< take color attribute from the first vertex of a primitive */
- unsigned gl_rasterization_rules:1; /**< enable tweaks for GL rasterization? */
+ /**
+ * Use the first vertex of a primitive as the provoking vertex for
+ * flat shading.
+ */
+ unsigned flatshade_first:1;
+
+ /**
+ * When true, triangle rasterization uses (0.5, 0.5) pixel centers
+ * for determining pixel ownership.
+ *
+ * When false, triangle rasterization uses (0,0) pixel centers for
+ * determining pixel ownership.
+ *
+ * Triangle rasterization always uses a 'top,left' rule for pixel
+ * ownership, this just alters which point we consider the pixel
+ * center for that test.
+ */
+ unsigned gl_rasterization_rules:1;
float line_width;
float point_size; /**< used when no per-vertex size */
@@ -307,7 +325,7 @@ struct pipe_transfer
unsigned nblocksx; /**< allocated width in blocks */
unsigned nblocksy; /**< allocated height in blocks */
unsigned stride; /**< stride in bytes between rows of blocks */
- unsigned usage; /**< PIPE_TRANSFER_* */
+ enum pipe_transfer_usage usage; /**< PIPE_TRANSFER_* */
struct pipe_texture *texture; /**< texture to transfer to/from */
unsigned face;
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
new file mode 100644
index 0000000000..937705ac50
--- /dev/null
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -0,0 +1,92 @@
+#ifndef PIPE_VIDEO_CONTEXT_H
+#define PIPE_VIDEO_CONTEXT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <pipe/p_video_state.h>
+
+struct pipe_screen;
+struct pipe_buffer;
+struct pipe_surface;
+struct pipe_video_surface;
+struct pipe_macroblock;
+struct pipe_picture_desc;
+struct pipe_fence_handle;
+
+/**
+ * Gallium video rendering context
+ */
+struct pipe_video_context
+{
+ struct pipe_screen *screen;
+ enum pipe_video_profile profile;
+ enum pipe_video_chroma_format chroma_format;
+ unsigned width;
+ unsigned height;
+
+ void *priv; /**< context private data (for DRI for example) */
+
+ void (*destroy)(struct pipe_video_context *vpipe);
+
+ /**
+ * Picture decoding and displaying
+ */
+ /*@{*/
+ void (*decode_bitstream)(struct pipe_video_context *vpipe,
+ unsigned num_bufs,
+ struct pipe_buffer **bitstream_buf);
+
+ void (*decode_macroblocks)(struct pipe_video_context *vpipe,
+ struct pipe_video_surface *past,
+ struct pipe_video_surface *future,
+ unsigned num_macroblocks,
+ struct pipe_macroblock *macroblocks,
+ struct pipe_fence_handle **fence);
+
+ void (*clear_surface)(struct pipe_video_context *vpipe,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height,
+ unsigned value,
+ struct pipe_surface *surface);
+
+ void (*render_picture)(struct pipe_video_context *vpipe,
+ /*struct pipe_surface *backround,
+ struct pipe_video_rect *backround_area,*/
+ struct pipe_video_surface *src_surface,
+ enum pipe_mpeg12_picture_type picture_type,
+ /*unsigned num_past_surfaces,
+ struct pipe_video_surface *past_surfaces,
+ unsigned num_future_surfaces,
+ struct pipe_video_surface *future_surfaces,*/
+ struct pipe_video_rect *src_area,
+ struct pipe_surface *dst_surface,
+ struct pipe_video_rect *dst_area,
+ /*unsigned num_layers,
+ struct pipe_texture *layers,
+ struct pipe_video_rect *layer_src_areas,
+ struct pipe_video_rect *layer_dst_areas,*/
+ struct pipe_fence_handle **fence);
+ /*@}*/
+
+ /**
+ * Parameter-like states (or properties)
+ */
+ /*@{*/
+ void (*set_picture_desc)(struct pipe_video_context *vpipe,
+ const struct pipe_picture_desc *desc);
+
+ void (*set_decode_target)(struct pipe_video_context *vpipe,
+ struct pipe_video_surface *dt);
+
+ /* TODO: Interface for CSC matrix, scaling modes, post-processing, etc. */
+ /*@}*/
+};
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PIPE_VIDEO_CONTEXT_H */
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
new file mode 100644
index 0000000000..a0128fbd48
--- /dev/null
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -0,0 +1,158 @@
+#ifndef PIPE_VIDEO_STATE_H
+#define PIPE_VIDEO_STATE_H
+
+/* u_reduce_video_profile() needs these */
+#include <assert.h>
+#include <pipe/p_compiler.h>
+
+#include <pipe/p_defines.h>
+#include <pipe/p_format.h>
+#include <pipe/p_refcnt.h>
+#include <pipe/p_screen.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_video_surface
+{
+ struct pipe_reference reference;
+ struct pipe_screen *screen;
+ enum pipe_video_chroma_format chroma_format;
+ /*enum pipe_video_surface_format surface_format;*/
+ unsigned width;
+ unsigned height;
+};
+
+static INLINE void
+pipe_video_surface_reference(struct pipe_video_surface **ptr, struct pipe_video_surface *surf)
+{
+ struct pipe_video_surface *old_surf = *ptr;
+
+ if (pipe_reference((struct pipe_reference **)ptr, &surf->reference))
+ old_surf->screen->video_surface_destroy(old_surf);
+}
+
+struct pipe_video_rect
+{
+ unsigned x, y, w, h;
+};
+
+static INLINE enum pipe_video_codec
+u_reduce_video_profile(enum pipe_video_profile profile)
+{
+ switch (profile)
+ {
+ case PIPE_VIDEO_PROFILE_MPEG1:
+ case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
+ case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
+ return PIPE_VIDEO_CODEC_MPEG12;
+
+ case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
+ case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
+ return PIPE_VIDEO_CODEC_MPEG4;
+
+ case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
+ case PIPE_VIDEO_PROFILE_VC1_MAIN:
+ case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
+ return PIPE_VIDEO_CODEC_VC1;
+
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+ return PIPE_VIDEO_CODEC_MPEG4_AVC;
+
+ default:
+ assert(false);
+ }
+
+ return -1;
+}
+
+enum pipe_mpeg12_picture_type
+{
+ PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP,
+ PIPE_MPEG12_PICTURE_TYPE_FIELD_BOTTOM,
+ PIPE_MPEG12_PICTURE_TYPE_FRAME
+};
+
+enum pipe_mpeg12_macroblock_type
+{
+ PIPE_MPEG12_MACROBLOCK_TYPE_INTRA,
+ PIPE_MPEG12_MACROBLOCK_TYPE_FWD,
+ PIPE_MPEG12_MACROBLOCK_TYPE_BKWD,
+ PIPE_MPEG12_MACROBLOCK_TYPE_BI,
+
+ PIPE_MPEG12_MACROBLOCK_NUM_TYPES
+};
+
+enum pipe_mpeg12_motion_type
+{
+ PIPE_MPEG12_MOTION_TYPE_FIELD,
+ PIPE_MPEG12_MOTION_TYPE_FRAME,
+ PIPE_MPEG12_MOTION_TYPE_DUALPRIME,
+ PIPE_MPEG12_MOTION_TYPE_16x8
+};
+
+enum pipe_mpeg12_dct_type
+{
+ PIPE_MPEG12_DCT_TYPE_FIELD,
+ PIPE_MPEG12_DCT_TYPE_FRAME
+};
+
+struct pipe_macroblock
+{
+ enum pipe_video_codec codec;
+};
+
+struct pipe_mpeg12_macroblock
+{
+ struct pipe_macroblock base;
+
+ unsigned mbx;
+ unsigned mby;
+ enum pipe_mpeg12_macroblock_type mb_type;
+ enum pipe_mpeg12_motion_type mo_type;
+ enum pipe_mpeg12_dct_type dct_type;
+ signed pmv[2][2][2];
+ unsigned cbp;
+ void *blocks;
+};
+
+#if 0
+struct pipe_picture_desc
+{
+ enum pipe_video_format format;
+};
+
+struct pipe_mpeg12_picture_desc
+{
+ struct pipe_picture_desc base;
+
+ /* TODO: Use bitfields where possible? */
+ struct pipe_surface *forward_reference;
+ struct pipe_surface *backward_reference;
+ unsigned picture_coding_type;
+ unsigned fcode;
+ unsigned intra_dc_precision;
+ unsigned picture_structure;
+ unsigned top_field_first;
+ unsigned frame_pred_frame_dct;
+ unsigned concealment_motion_vectors;
+ unsigned q_scale_type;
+ unsigned intra_vlc_format;
+ unsigned alternate_scan;
+ unsigned full_pel_forward_vector;
+ unsigned full_pel_backward_vector;
+ struct pipe_buffer *intra_quantizer_matrix;
+ struct pipe_buffer *non_intra_quantizer_matrix;
+ struct pipe_buffer *chroma_intra_quantizer_matrix;
+ struct pipe_buffer *chroma_non_intra_quantizer_matrix;
+};
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PIPE_VIDEO_STATE_H */
diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c
index 69e2d6b708..542ac56121 100644
--- a/src/gallium/state_trackers/egl/egl_surface.c
+++ b/src/gallium/state_trackers/egl/egl_surface.c
@@ -152,7 +152,6 @@ drm_takedown_shown_screen(_EGLDisplay *dpy, struct drm_screen *screen)
pipe_surface_reference(&screen->surface, NULL);
pipe_texture_reference(&screen->tex, NULL);
- pipe_buffer_reference(&screen->buffer, NULL);
screen->shown = 0;
}
@@ -250,8 +249,8 @@ drm_show_screen_surface_mesa(_EGLDriver *drv, _EGLDisplay *dpy,
drm_create_texture(dpy, scrn, mode->Width, mode->Height);
- if (!scrn->buffer)
- return EGL_FALSE;
+ if (!scrn->tex)
+ goto err_tex;
ret = drmModeAddFB(dev->drmFD,
scrn->front.width, scrn->front.height,
@@ -325,8 +324,8 @@ err_fb:
err_bo:
pipe_surface_reference(&scrn->surface, NULL);
pipe_texture_reference(&scrn->tex, NULL);
- pipe_buffer_reference(&scrn->buffer, NULL);
+err_tex:
return EGL_FALSE;
}
diff --git a/src/gallium/state_trackers/egl/egl_tracker.h b/src/gallium/state_trackers/egl/egl_tracker.h
index dd4730f957..f280748d65 100644
--- a/src/gallium/state_trackers/egl/egl_tracker.h
+++ b/src/gallium/state_trackers/egl/egl_tracker.h
@@ -94,7 +94,6 @@ struct drm_screen
* pipe
*/
- struct pipe_buffer *buffer;
struct pipe_texture *tex;
struct pipe_surface *surface;
diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c
index 20d682de3f..b1683b891b 100644
--- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c
+++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c
@@ -51,6 +51,7 @@ static int vlResizeFrameBuffer
struct vlBasicCSC *basic_csc;
struct pipe_context *pipe;
struct pipe_texture template;
+ float clear_color[4];
assert(csc);
@@ -68,7 +69,12 @@ static int vlResizeFrameBuffer
basic_csc->viewport.translate[1] = 0;
basic_csc->viewport.translate[2] = 0;
basic_csc->viewport.translate[3] = 0;
-
+
+ clear_color[0] = 0.0f;
+ clear_color[1] = 0.0f;
+ clear_color[2] = 0.0f;
+ clear_color[3] = 0.0f;
+
if (basic_csc->framebuffer_tex)
{
pipe_surface_reference(&basic_csc->framebuffer.cbufs[0], NULL);
@@ -98,7 +104,7 @@ static int vlResizeFrameBuffer
/* Clear to black, in case video doesn't fill the entire window */
pipe->set_framebuffer_state(pipe, &basic_csc->framebuffer);
- pipe->clear(pipe, PIPE_CLEAR_COLOR, 0, 0.0f, 0);
+ pipe->clear(pipe, PIPE_CLEAR_COLOR, clear_color, 0.0f, 0);
return 0;
}
@@ -425,7 +431,7 @@ static int vlCreateVertexShader
*/
for (i = 0; i < 2; ++i)
{
- inst = vl_inst4(TGSI_OPCODE_MADD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1);
+ inst = vl_inst4(TGSI_OPCODE_MAD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
}
diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc
index ef4a4b2add..34d93e1df0 100644
--- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc
+++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc
@@ -615,7 +615,7 @@ static int vlCreateFragmentShaderFieldPMB
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/* floor t3, t3 ; Get rid of fractional part */
- inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
+ inst = vl_inst2(TGSI_OPCODE_FLR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/* mul t3, t3, c1.y ; Multiply by 2 */
@@ -632,7 +632,7 @@ static int vlCreateFragmentShaderFieldPMB
/* TODO: Move to conditional tex fetch on t3 instead of lerp */
/* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
- inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+ inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/* add o0, t0, t1 ; Add ref and differential to form final output */
@@ -969,7 +969,7 @@ static int vlCreateFragmentShaderFrameBMB
}
/* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
- inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+ inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
@@ -1116,7 +1116,7 @@ static int vlCreateFragmentShaderFieldBMB
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/* floor t3, t3 ; Get rid of fractional part */
- inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
+ inst = vl_inst2(TGSI_OPCODE_FLR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/* mul t3, t3, c1.y ; Multiply by 2 */
@@ -1143,7 +1143,7 @@ static int vlCreateFragmentShaderFieldBMB
/* TODO: Move to conditional tex fetch on t3 instead of lerp */
/* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
- inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+ inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/*
@@ -1158,11 +1158,11 @@ static int vlCreateFragmentShaderFieldBMB
/* TODO: Move to conditional tex fetch on t3 instead of lerp */
/* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
- inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
+ inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
- inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+ inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript
index 69b88618ec..b05944a33b 100644
--- a/src/gallium/state_trackers/wgl/SConscript
+++ b/src/gallium/state_trackers/wgl/SConscript
@@ -18,20 +18,17 @@ if env['platform'] in ['windows']:
])
sources = [
- 'icd/stw_icd.c',
-
- 'wgl/stw_wgl.c',
-
- 'shared/stw_context.c',
- 'shared/stw_device.c',
- 'shared/stw_framebuffer.c',
- 'shared/stw_pixelformat.c',
- 'shared/stw_extensionsstring.c',
- 'shared/stw_extswapinterval.c',
- 'shared/stw_getprocaddress.c',
- 'shared/stw_extgallium.c',
- 'shared/stw_arbpixelformat.c',
- 'shared/stw_tls.c',
+ 'stw_context.c',
+ 'stw_device.c',
+ 'stw_ext_extensionsstring.c',
+ 'stw_ext_gallium.c',
+ 'stw_ext_pixelformat.c',
+ 'stw_ext_swapinterval.c',
+ 'stw_framebuffer.c',
+ 'stw_getprocaddress.c',
+ 'stw_pixelformat.c',
+ 'stw_tls.c',
+ 'stw_wgl.c',
]
wgl = env.ConvenienceLibrary(
diff --git a/src/gallium/state_trackers/wgl/opengl32.def b/src/gallium/state_trackers/wgl/opengl32.def
index 596417ed84..5daa6ddd41 100644
--- a/src/gallium/state_trackers/wgl/opengl32.def
+++ b/src/gallium/state_trackers/wgl/opengl32.def
@@ -376,6 +376,7 @@ EXPORTS
DrvDescribePixelFormat
DrvGetLayerPaletteEntries
DrvGetProcAddress
+ DrvPresentBuffers
DrvRealizeLayerPalette
DrvReleaseContext
DrvSetCallbackProcs
diff --git a/src/gallium/state_trackers/wgl/opengl32.mingw.def b/src/gallium/state_trackers/wgl/opengl32.mingw.def
index 1f03ea3b37..6ebb31a6f1 100644
--- a/src/gallium/state_trackers/wgl/opengl32.mingw.def
+++ b/src/gallium/state_trackers/wgl/opengl32.mingw.def
@@ -375,6 +375,7 @@ EXPORTS
DrvDescribePixelFormat = DrvDescribePixelFormat@16
DrvGetLayerPaletteEntries = DrvGetLayerPaletteEntries@20
DrvGetProcAddress = DrvGetProcAddress@4
+ DrvPresentBuffers = DrvPresentBuffers@8
DrvRealizeLayerPalette = DrvRealizeLayerPalette@12
DrvReleaseContext = DrvReleaseContext@4
DrvSetCallbackProcs = DrvSetCallbackProcs@8
diff --git a/src/gallium/state_trackers/wgl/shared/stw_context.c b/src/gallium/state_trackers/wgl/shared/stw_context.c
deleted file mode 100644
index 4968ecc692..0000000000
--- a/src/gallium/state_trackers/wgl/shared/stw_context.c
+++ /dev/null
@@ -1,382 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include <windows.h>
-
-#include "main/mtypes.h"
-#include "main/context.h"
-#include "pipe/p_compiler.h"
-#include "pipe/p_context.h"
-#include "state_tracker/st_context.h"
-#include "state_tracker/st_public.h"
-
-#ifdef DEBUG
-#include "trace/tr_screen.h"
-#include "trace/tr_context.h"
-#endif
-
-#include "shared/stw_device.h"
-#include "shared/stw_winsys.h"
-#include "shared/stw_framebuffer.h"
-#include "shared/stw_pixelformat.h"
-#include "stw_public.h"
-#include "stw_context.h"
-#include "stw_tls.h"
-
-
-static INLINE struct stw_context *
-stw_context(GLcontext *glctx)
-{
- if(!glctx)
- return NULL;
- assert(glctx->DriverCtx);
- return (struct stw_context *)glctx->DriverCtx;
-}
-
-static INLINE struct stw_context *
-stw_current_context(void)
-{
- /* We must check if multiple threads are being used or GET_CURRENT_CONTEXT
- * might return the current context of the thread first seen. */
- _glapi_check_multithread();
-
- {
- GET_CURRENT_CONTEXT( glctx );
- return stw_context(glctx);
- }
-}
-
-BOOL
-stw_copy_context(
- UINT_PTR hglrcSrc,
- UINT_PTR hglrcDst,
- UINT mask )
-{
- struct stw_context *src;
- struct stw_context *dst;
- BOOL ret = FALSE;
-
- pipe_mutex_lock( stw_dev->ctx_mutex );
-
- src = stw_lookup_context_locked( hglrcSrc );
- dst = stw_lookup_context_locked( hglrcDst );
-
- if (src && dst) {
- /* FIXME */
- assert(0);
- (void) src;
- (void) dst;
- (void) mask;
- }
-
- pipe_mutex_unlock( stw_dev->ctx_mutex );
-
- return ret;
-}
-
-BOOL
-stw_share_lists(
- UINT_PTR hglrc1,
- UINT_PTR hglrc2 )
-{
- struct stw_context *ctx1;
- struct stw_context *ctx2;
- BOOL ret = FALSE;
-
- pipe_mutex_lock( stw_dev->ctx_mutex );
-
- ctx1 = stw_lookup_context_locked( hglrc1 );
- ctx2 = stw_lookup_context_locked( hglrc2 );
-
- if (ctx1 && ctx2 &&
- ctx1->iPixelFormat == ctx2->iPixelFormat) {
- ret = _mesa_share_state(ctx2->st->ctx, ctx1->st->ctx);
- }
-
- pipe_mutex_unlock( stw_dev->ctx_mutex );
-
- return ret;
-}
-
-static void
-stw_viewport(GLcontext * glctx, GLint x, GLint y,
- GLsizei width, GLsizei height)
-{
- struct stw_context *ctx = (struct stw_context *)glctx->DriverCtx;
- struct stw_framebuffer *fb;
-
- fb = stw_framebuffer_from_hdc( ctx->hdc );
- if(fb) {
- stw_framebuffer_update(fb);
- stw_framebuffer_release(fb);
- }
-}
-
-UINT_PTR
-stw_create_layer_context(
- HDC hdc,
- int iLayerPlane )
-{
- int iPixelFormat;
- const struct stw_pixelformat_info *pfi;
- GLvisual visual;
- struct stw_context *ctx = NULL;
- struct pipe_screen *screen = NULL;
- struct pipe_context *pipe = NULL;
-
- if(!stw_dev)
- return 0;
-
- if (iLayerPlane != 0)
- return 0;
-
- iPixelFormat = GetPixelFormat(hdc);
- if(!iPixelFormat)
- return 0;
-
- pfi = stw_pixelformat_get_info( iPixelFormat - 1 );
- stw_pixelformat_visual(&visual, pfi);
-
- ctx = CALLOC_STRUCT( stw_context );
- if (ctx == NULL)
- goto no_ctx;
-
- ctx->hdc = hdc;
- ctx->iPixelFormat = iPixelFormat;
-
- screen = stw_dev->screen;
-
-#ifdef DEBUG
- /* Unwrap screen */
- if(stw_dev->trace_running)
- screen = trace_screen(screen)->screen;
-#endif
-
- pipe = stw_dev->stw_winsys->create_context( screen );
- if (pipe == NULL)
- goto no_pipe;
-
-#ifdef DEBUG
- /* Wrap context */
- if(stw_dev->trace_running)
- pipe = trace_context_create(stw_dev->screen, pipe);
-#endif
-
- /* pass to stw_flush_frontbuffer as context_private */
- assert(!pipe->priv);
- pipe->priv = hdc;
-
- ctx->st = st_create_context( pipe, &visual, NULL );
- if (ctx->st == NULL)
- goto no_st_ctx;
-
- ctx->st->ctx->DriverCtx = ctx;
- ctx->st->ctx->Driver.Viewport = stw_viewport;
-
- pipe_mutex_lock( stw_dev->ctx_mutex );
- ctx->hglrc = handle_table_add(stw_dev->ctx_table, ctx);
- pipe_mutex_unlock( stw_dev->ctx_mutex );
- if (!ctx->hglrc)
- goto no_hglrc;
-
- return ctx->hglrc;
-
-no_hglrc:
- st_destroy_context(ctx->st);
- goto no_pipe; /* st_context_destroy already destroys pipe */
-no_st_ctx:
- pipe->destroy( pipe );
-no_pipe:
- FREE(ctx);
-no_ctx:
- return 0;
-}
-
-BOOL
-stw_delete_context(
- UINT_PTR hglrc )
-{
- struct stw_context *ctx ;
- BOOL ret = FALSE;
-
- if (!stw_dev)
- return FALSE;
-
- pipe_mutex_lock( stw_dev->ctx_mutex );
- ctx = stw_lookup_context_locked(hglrc);
- handle_table_remove(stw_dev->ctx_table, hglrc);
- pipe_mutex_unlock( stw_dev->ctx_mutex );
-
- if (ctx) {
- struct stw_context *curctx = stw_current_context();
-
- /* Unbind current if deleting current context. */
- if (curctx == ctx)
- st_make_current( NULL, NULL, NULL );
-
- st_destroy_context(ctx->st);
- FREE(ctx);
-
- ret = TRUE;
- }
-
- return ret;
-}
-
-BOOL
-stw_release_context(
- UINT_PTR hglrc )
-{
- struct stw_context *ctx;
-
- if (!stw_dev)
- return FALSE;
-
- pipe_mutex_lock( stw_dev->ctx_mutex );
- ctx = stw_lookup_context_locked( hglrc );
- pipe_mutex_unlock( stw_dev->ctx_mutex );
-
- if (!ctx)
- return FALSE;
-
- /* The expectation is that ctx is the same context which is
- * current for this thread. We should check that and return False
- * if not the case.
- */
- if (ctx != stw_current_context())
- return FALSE;
-
- if (stw_make_current( NULL, 0 ) == FALSE)
- return FALSE;
-
- return TRUE;
-}
-
-
-UINT_PTR
-stw_get_current_context( void )
-{
- struct stw_context *ctx;
-
- ctx = stw_current_context();
- if(!ctx)
- return 0;
-
- return ctx->hglrc;
-}
-
-HDC
-stw_get_current_dc( void )
-{
- struct stw_context *ctx;
-
- ctx = stw_current_context();
- if(!ctx)
- return NULL;
-
- return ctx->hdc;
-}
-
-BOOL
-stw_make_current(
- HDC hdc,
- UINT_PTR hglrc )
-{
- struct stw_context *curctx = NULL;
- struct stw_context *ctx = NULL;
- struct stw_framebuffer *fb = NULL;
-
- if (!stw_dev)
- goto fail;
-
- curctx = stw_current_context();
- if (curctx != NULL) {
- if (curctx->hglrc != hglrc)
- st_flush(curctx->st, PIPE_FLUSH_RENDER_CACHE, NULL);
-
- /* Return if already current. */
- if (curctx->hglrc == hglrc && curctx->hdc == hdc) {
- ctx = curctx;
- fb = stw_framebuffer_from_hdc( hdc );
- goto success;
- }
- }
-
- if (hdc == NULL || hglrc == 0) {
- return st_make_current( NULL, NULL, NULL );
- }
-
- pipe_mutex_lock( stw_dev->ctx_mutex );
- ctx = stw_lookup_context_locked( hglrc );
- pipe_mutex_unlock( stw_dev->ctx_mutex );
- if(!ctx)
- goto fail;
-
- fb = stw_framebuffer_from_hdc( hdc );
- if(!fb) {
- /* Applications should call SetPixelFormat before creating a context,
- * but not all do, and the opengl32 runtime seems to use a default pixel
- * format in some cases, so we must create a framebuffer for those here
- */
- int iPixelFormat = GetPixelFormat(hdc);
- if(iPixelFormat)
- fb = stw_framebuffer_create( hdc, iPixelFormat );
- if(!fb)
- goto fail;
- }
-
- if(fb->iPixelFormat != ctx->iPixelFormat)
- goto fail;
-
- /* Lazy allocation of the frame buffer */
- if(!stw_framebuffer_allocate(fb))
- goto fail;
-
- /* Bind the new framebuffer */
- ctx->hdc = hdc;
-
- /* pass to stw_flush_frontbuffer as context_private */
- ctx->st->pipe->priv = hdc;
-
- if(!st_make_current( ctx->st, fb->stfb, fb->stfb ))
- goto fail;
-
-success:
- assert(fb);
- if(fb) {
- stw_framebuffer_update(fb);
- stw_framebuffer_release(fb);
- }
-
- return TRUE;
-
-fail:
- if(fb)
- stw_framebuffer_release(fb);
- st_make_current( NULL, NULL, NULL );
- return FALSE;
-}
diff --git a/src/gallium/state_trackers/wgl/shared/stw_public.h b/src/gallium/state_trackers/wgl/shared/stw_public.h
deleted file mode 100644
index 7fe9cfb356..0000000000
--- a/src/gallium/state_trackers/wgl/shared/stw_public.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef STW_PUBLIC_H
-#define STW_PUBLIC_H
-
-#include <windows.h>
-
-BOOL stw_copy_context( UINT_PTR hglrcSrc,
- UINT_PTR hglrcDst,
- UINT mask );
-
-UINT_PTR stw_create_layer_context( HDC hdc,
- int iLayerPlane );
-
-BOOL stw_share_lists( UINT_PTR hglrc1, UINT_PTR hglrc2 );
-
-BOOL stw_delete_context( UINT_PTR hglrc );
-
-BOOL
-stw_release_context( UINT_PTR dhglrc );
-
-UINT_PTR stw_get_current_context( void );
-
-HDC stw_get_current_dc( void );
-
-BOOL stw_make_current( HDC hdc, UINT_PTR hglrc );
-
-BOOL stw_swap_buffers( HDC hdc );
-
-BOOL
-stw_swap_layer_buffers( HDC hdc, UINT fuPlanes );
-
-PROC stw_get_proc_address( LPCSTR lpszProc );
-
-int stw_pixelformat_describe( HDC hdc,
- int iPixelFormat,
- UINT nBytes,
- LPPIXELFORMATDESCRIPTOR ppfd );
-
-int stw_pixelformat_get( HDC hdc );
-
-BOOL stw_pixelformat_set( HDC hdc,
- int iPixelFormat );
-
-int stw_pixelformat_choose( HDC hdc,
- CONST PIXELFORMATDESCRIPTOR *ppfd );
-
-#endif
diff --git a/src/gallium/state_trackers/wgl/icd/stw_icd.c b/src/gallium/state_trackers/wgl/stw_context.c
index 347f40aa06..f2f0264844 100644
--- a/src/gallium/state_trackers/wgl/icd/stw_icd.c
+++ b/src/gallium/state_trackers/wgl/stw_context.c
@@ -26,18 +26,49 @@
**************************************************************************/
#include <windows.h>
-#include <stdio.h>
-#include "GL/gl.h"
-
-#include "util/u_debug.h"
-#include "pipe/p_thread.h"
-
-#include "shared/stw_public.h"
-#include "icd/stw_icd.h"
+#include "main/mtypes.h"
+#include "main/context.h"
+#include "pipe/p_compiler.h"
+#include "pipe/p_context.h"
+#include "state_tracker/st_context.h"
+#include "state_tracker/st_public.h"
+
+#ifdef DEBUG
+#include "trace/tr_screen.h"
+#include "trace/tr_context.h"
+#endif
+
+#include "stw_icd.h"
+#include "stw_device.h"
+#include "stw_winsys.h"
+#include "stw_framebuffer.h"
+#include "stw_pixelformat.h"
+#include "stw_context.h"
+#include "stw_tls.h"
+
+
+static INLINE struct stw_context *
+stw_context(GLcontext *glctx)
+{
+ if(!glctx)
+ return NULL;
+ assert(glctx->DriverCtx);
+ return (struct stw_context *)glctx->DriverCtx;
+}
-#define DBG 0
+static INLINE struct stw_context *
+stw_current_context(void)
+{
+ /* We must check if multiple threads are being used or GET_CURRENT_CONTEXT
+ * might return the current context of the thread first seen. */
+ _glapi_check_multithread();
+ {
+ GET_CURRENT_CONTEXT( glctx );
+ return stw_context(glctx);
+ }
+}
BOOL APIENTRY
DrvCopyContext(
@@ -45,24 +76,64 @@ DrvCopyContext(
DHGLRC dhrcDest,
UINT fuMask )
{
- return stw_copy_context(dhrcSource, dhrcDest, fuMask);
-}
+ struct stw_context *src;
+ struct stw_context *dst;
+ BOOL ret = FALSE;
+ pipe_mutex_lock( stw_dev->ctx_mutex );
+
+ src = stw_lookup_context_locked( dhrcSource );
+ dst = stw_lookup_context_locked( dhrcDest );
+
+ if (src && dst) {
+ /* FIXME */
+ assert(0);
+ (void) src;
+ (void) dst;
+ (void) fuMask;
+ }
-DHGLRC APIENTRY
-DrvCreateLayerContext(
- HDC hdc,
- INT iLayerPlane )
+ pipe_mutex_unlock( stw_dev->ctx_mutex );
+
+ return ret;
+}
+
+BOOL APIENTRY
+DrvShareLists(
+ DHGLRC dhglrc1,
+ DHGLRC dhglrc2 )
{
- DHGLRC r;
+ struct stw_context *ctx1;
+ struct stw_context *ctx2;
+ BOOL ret = FALSE;
+
+ pipe_mutex_lock( stw_dev->ctx_mutex );
- r = stw_create_layer_context( hdc, iLayerPlane );
+ ctx1 = stw_lookup_context_locked( dhglrc1 );
+ ctx2 = stw_lookup_context_locked( dhglrc2 );
+
+ if (ctx1 && ctx2 &&
+ ctx1->iPixelFormat == ctx2->iPixelFormat) {
+ ret = _mesa_share_state(ctx2->st->ctx, ctx1->st->ctx);
+ }
+
+ pipe_mutex_unlock( stw_dev->ctx_mutex );
- if (DBG)
- debug_printf( "%s( %p, %i ) = %u\n",
- __FUNCTION__, hdc, iLayerPlane, r );
+ return ret;
+}
+
+static void
+stw_viewport(GLcontext * glctx, GLint x, GLint y,
+ GLsizei width, GLsizei height)
+{
+ struct stw_context *ctx = (struct stw_context *)glctx->DriverCtx;
+ struct stw_framebuffer *fb;
- return r;
+ fb = stw_framebuffer_from_hdc( ctx->hdc );
+ if(fb) {
+ stw_framebuffer_update(fb);
+ stw_framebuffer_release(fb);
+ }
}
DHGLRC APIENTRY
@@ -72,114 +143,253 @@ DrvCreateContext(
return DrvCreateLayerContext( hdc, 0 );
}
-BOOL APIENTRY
-DrvDeleteContext(
- DHGLRC dhglrc )
+DHGLRC APIENTRY
+DrvCreateLayerContext(
+ HDC hdc,
+ INT iLayerPlane )
{
- BOOL r;
+ int iPixelFormat;
+ const struct stw_pixelformat_info *pfi;
+ GLvisual visual;
+ struct stw_context *ctx = NULL;
+ struct pipe_screen *screen = NULL;
+ struct pipe_context *pipe = NULL;
- r = stw_delete_context( dhglrc );
+ if(!stw_dev)
+ return 0;
- if (DBG)
- debug_printf( "%s( %u ) = %u\n",
- __FUNCTION__, dhglrc, r );
+ if (iLayerPlane != 0)
+ return 0;
+
+ iPixelFormat = GetPixelFormat(hdc);
+ if(!iPixelFormat)
+ return 0;
- return r;
+ pfi = stw_pixelformat_get_info( iPixelFormat - 1 );
+ stw_pixelformat_visual(&visual, pfi);
+
+ ctx = CALLOC_STRUCT( stw_context );
+ if (ctx == NULL)
+ goto no_ctx;
+
+ ctx->hdc = hdc;
+ ctx->iPixelFormat = iPixelFormat;
+
+ screen = stw_dev->screen;
+
+#ifdef DEBUG
+ /* Unwrap screen */
+ if(stw_dev->trace_running)
+ screen = trace_screen(screen)->screen;
+#endif
+
+ pipe = stw_dev->stw_winsys->create_context( screen );
+ if (pipe == NULL)
+ goto no_pipe;
+
+#ifdef DEBUG
+ /* Wrap context */
+ if(stw_dev->trace_running)
+ pipe = trace_context_create(stw_dev->screen, pipe);
+#endif
+
+ /* pass to stw_flush_frontbuffer as context_private */
+ assert(!pipe->priv);
+ pipe->priv = hdc;
+
+ ctx->st = st_create_context( pipe, &visual, NULL );
+ if (ctx->st == NULL)
+ goto no_st_ctx;
+
+ ctx->st->ctx->DriverCtx = ctx;
+ ctx->st->ctx->Driver.Viewport = stw_viewport;
+
+ pipe_mutex_lock( stw_dev->ctx_mutex );
+ ctx->dhglrc = handle_table_add(stw_dev->ctx_table, ctx);
+ pipe_mutex_unlock( stw_dev->ctx_mutex );
+ if (!ctx->dhglrc)
+ goto no_hglrc;
+
+ return ctx->dhglrc;
+
+no_hglrc:
+ st_destroy_context(ctx->st);
+ goto no_pipe; /* st_context_destroy already destroys pipe */
+no_st_ctx:
+ pipe->destroy( pipe );
+no_pipe:
+ FREE(ctx);
+no_ctx:
+ return 0;
}
BOOL APIENTRY
-DrvDescribeLayerPlane(
- HDC hdc,
- INT iPixelFormat,
- INT iLayerPlane,
- UINT nBytes,
- LPLAYERPLANEDESCRIPTOR plpd )
+DrvDeleteContext(
+ DHGLRC dhglrc )
{
- if (DBG)
- debug_printf( "%s\n", __FUNCTION__ );
+ struct stw_context *ctx ;
+ BOOL ret = FALSE;
+
+ if (!stw_dev)
+ return FALSE;
- return FALSE;
-}
+ pipe_mutex_lock( stw_dev->ctx_mutex );
+ ctx = stw_lookup_context_locked(dhglrc);
+ handle_table_remove(stw_dev->ctx_table, dhglrc);
+ pipe_mutex_unlock( stw_dev->ctx_mutex );
-LONG APIENTRY
-DrvDescribePixelFormat(
- HDC hdc,
- INT iPixelFormat,
- ULONG cjpfd,
- PIXELFORMATDESCRIPTOR *ppfd )
-{
- LONG r;
+ if (ctx) {
+ struct stw_context *curctx = stw_current_context();
+
+ /* Unbind current if deleting current context. */
+ if (curctx == ctx)
+ st_make_current( NULL, NULL, NULL );
- r = stw_pixelformat_describe( hdc, iPixelFormat, cjpfd, ppfd );
+ st_destroy_context(ctx->st);
+ FREE(ctx);
- if (DBG)
- debug_printf( "%s( %p, %d, %u, %p ) = %d\n",
- __FUNCTION__, hdc, iPixelFormat, cjpfd, ppfd, r );
+ ret = TRUE;
+ }
- return r;
+ return ret;
}
-int APIENTRY
-DrvGetLayerPaletteEntries(
- HDC hdc,
- INT iLayerPlane,
- INT iStart,
- INT cEntries,
- COLORREF *pcr )
+BOOL APIENTRY
+DrvReleaseContext(
+ DHGLRC dhglrc )
{
- if (DBG)
- debug_printf( "%s\n", __FUNCTION__ );
+ struct stw_context *ctx;
- return 0;
-}
+ if (!stw_dev)
+ return FALSE;
-PROC APIENTRY
-DrvGetProcAddress(
- LPCSTR lpszProc )
-{
- PROC r;
+ pipe_mutex_lock( stw_dev->ctx_mutex );
+ ctx = stw_lookup_context_locked( dhglrc );
+ pipe_mutex_unlock( stw_dev->ctx_mutex );
- r = stw_get_proc_address( lpszProc );
+ if (!ctx)
+ return FALSE;
+
+ /* The expectation is that ctx is the same context which is
+ * current for this thread. We should check that and return False
+ * if not the case.
+ */
+ if (ctx != stw_current_context())
+ return FALSE;
- if (DBG)
- debug_printf( "%s( \"%s\" ) = %p\n", __FUNCTION__, lpszProc, r );
+ if (stw_make_current( NULL, 0 ) == FALSE)
+ return FALSE;
- return r;
+ return TRUE;
}
-BOOL APIENTRY
-DrvRealizeLayerPalette(
- HDC hdc,
- INT iLayerPlane,
- BOOL bRealize )
+
+DHGLRC
+stw_get_current_context( void )
{
- if (DBG)
- debug_printf( "%s\n", __FUNCTION__ );
+ struct stw_context *ctx;
- return FALSE;
+ ctx = stw_current_context();
+ if(!ctx)
+ return 0;
+
+ return ctx->dhglrc;
}
-BOOL APIENTRY
-DrvReleaseContext(
- DHGLRC dhglrc )
+HDC
+stw_get_current_dc( void )
{
- return stw_release_context(dhglrc);
+ struct stw_context *ctx;
+
+ ctx = stw_current_context();
+ if(!ctx)
+ return NULL;
+
+ return ctx->hdc;
}
-void APIENTRY
-DrvSetCallbackProcs(
- INT nProcs,
- PROC *pProcs )
+BOOL
+stw_make_current(
+ HDC hdc,
+ DHGLRC dhglrc )
{
- if (DBG)
- debug_printf( "%s( %d, %p )\n", __FUNCTION__, nProcs, pProcs );
+ struct stw_context *curctx = NULL;
+ struct stw_context *ctx = NULL;
+ struct stw_framebuffer *fb = NULL;
- return;
-}
+ if (!stw_dev)
+ goto fail;
+
+ curctx = stw_current_context();
+ if (curctx != NULL) {
+ if (curctx->dhglrc != dhglrc)
+ st_flush(curctx->st, PIPE_FLUSH_RENDER_CACHE, NULL);
+
+ /* Return if already current. */
+ if (curctx->dhglrc == dhglrc && curctx->hdc == hdc) {
+ ctx = curctx;
+ fb = stw_framebuffer_from_hdc( hdc );
+ goto success;
+ }
+ }
+
+ if (hdc == NULL || dhglrc == 0) {
+ return st_make_current( NULL, NULL, NULL );
+ }
+
+ pipe_mutex_lock( stw_dev->ctx_mutex );
+ ctx = stw_lookup_context_locked( dhglrc );
+ pipe_mutex_unlock( stw_dev->ctx_mutex );
+ if(!ctx)
+ goto fail;
+
+ fb = stw_framebuffer_from_hdc( hdc );
+ if(!fb) {
+ /* Applications should call SetPixelFormat before creating a context,
+ * but not all do, and the opengl32 runtime seems to use a default pixel
+ * format in some cases, so we must create a framebuffer for those here
+ */
+ int iPixelFormat = GetPixelFormat(hdc);
+ if(iPixelFormat)
+ fb = stw_framebuffer_create( hdc, iPixelFormat );
+ if(!fb)
+ goto fail;
+ }
+
+ if(fb->iPixelFormat != ctx->iPixelFormat)
+ goto fail;
+
+ /* Lazy allocation of the frame buffer */
+ if(!stw_framebuffer_allocate(fb))
+ goto fail;
+
+ /* Bind the new framebuffer */
+ ctx->hdc = hdc;
+
+ /* pass to stw_flush_frontbuffer as context_private */
+ ctx->st->pipe->priv = hdc;
+
+ if(!st_make_current( ctx->st, fb->stfb, fb->stfb ))
+ goto fail;
+
+success:
+ assert(fb);
+ if(fb) {
+ stw_framebuffer_update(fb);
+ stw_framebuffer_release(fb);
+ }
+
+ return TRUE;
+fail:
+ if(fb)
+ stw_framebuffer_release(fb);
+ st_make_current( NULL, NULL, NULL );
+ return FALSE;
+}
/**
- * Although WGL allows different dispatch entrypoints per context
+ * Although WGL allows different dispatch entrypoints per context
*/
static const GLCLTPROCTABLE cpt =
{
@@ -524,7 +734,6 @@ static const GLCLTPROCTABLE cpt =
}
};
-
PGLCLTPROCTABLE APIENTRY
DrvSetContext(
HDC hdc,
@@ -532,86 +741,9 @@ DrvSetContext(
PFN_SETPROCTABLE pfnSetProcTable )
{
PGLCLTPROCTABLE r = (PGLCLTPROCTABLE)&cpt;
-
+
if (!stw_make_current( hdc, dhglrc ))
r = NULL;
-
- if (DBG)
- debug_printf( "%s( 0x%p, %u, 0x%p ) = %p\n",
- __FUNCTION__, hdc, dhglrc, pfnSetProcTable, r );
-
- return r;
-}
-
-int APIENTRY
-DrvSetLayerPaletteEntries(
- HDC hdc,
- INT iLayerPlane,
- INT iStart,
- INT cEntries,
- CONST COLORREF *pcr )
-{
- if (DBG)
- debug_printf( "%s\n", __FUNCTION__ );
-
- return 0;
-}
-
-BOOL APIENTRY
-DrvSetPixelFormat(
- HDC hdc,
- LONG iPixelFormat )
-{
- BOOL r;
-
- r = stw_pixelformat_set( hdc, iPixelFormat );
-
- if (DBG)
- debug_printf( "%s( %p, %d ) = %s\n", __FUNCTION__, hdc, iPixelFormat, r ? "TRUE" : "FALSE" );
return r;
}
-
-BOOL APIENTRY
-DrvShareLists(
- DHGLRC dhglrc1,
- DHGLRC dhglrc2 )
-{
- if (DBG)
- debug_printf( "%s\n", __FUNCTION__ );
-
- return stw_share_lists(dhglrc1, dhglrc2);
-}
-
-BOOL APIENTRY
-DrvSwapBuffers(
- HDC hdc )
-{
- if (DBG)
- debug_printf( "%s( %p )\n", __FUNCTION__, hdc );
-
- return stw_swap_buffers( hdc );
-}
-
-BOOL APIENTRY
-DrvSwapLayerBuffers(
- HDC hdc,
- UINT fuPlanes )
-{
- if (DBG)
- debug_printf( "%s\n", __FUNCTION__ );
-
- return stw_swap_layer_buffers( hdc, fuPlanes );
-}
-
-BOOL APIENTRY
-DrvValidateVersion(
- ULONG ulVersion )
-{
- if (DBG)
- debug_printf( "%s( %u )\n", __FUNCTION__, ulVersion );
-
- /* TODO: get the expected version from the winsys */
-
- return ulVersion == 1;
-}
diff --git a/src/gallium/state_trackers/wgl/shared/stw_context.h b/src/gallium/state_trackers/wgl/stw_context.h
index 166471de5e..256c27e21e 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_context.h
+++ b/src/gallium/state_trackers/wgl/stw_context.h
@@ -35,9 +35,15 @@ struct st_context;
struct stw_context
{
struct st_context *st;
- UINT_PTR hglrc;
+ DHGLRC dhglrc;
int iPixelFormat;
HDC hdc;
};
+DHGLRC stw_get_current_context( void );
+
+HDC stw_get_current_dc( void );
+
+BOOL stw_make_current( HDC hdc, DHGLRC dhglrc );
+
#endif /* STW_CONTEXT_H */
diff --git a/src/gallium/state_trackers/wgl/shared/stw_device.c b/src/gallium/state_trackers/wgl/stw_device.c
index 0b6954915a..985b8f0456 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_device.c
+++ b/src/gallium/state_trackers/wgl/stw_device.c
@@ -29,6 +29,7 @@
#include "glapi/glthread.h"
#include "util/u_debug.h"
+#include "util/u_math.h"
#include "pipe/p_screen.h"
#include "state_tracker/st_public.h"
@@ -37,12 +38,12 @@
#include "trace/tr_texture.h"
#endif
-#include "shared/stw_device.h"
-#include "shared/stw_winsys.h"
-#include "shared/stw_pixelformat.h"
-#include "shared/stw_public.h"
-#include "shared/stw_tls.h"
-#include "shared/stw_framebuffer.h"
+#include "stw_device.h"
+#include "stw_winsys.h"
+#include "stw_pixelformat.h"
+#include "stw_icd.h"
+#include "stw_tls.h"
+#include "stw_framebuffer.h"
#ifdef WIN32_THREADS
extern _glthread_Mutex OneTimeLock;
@@ -62,38 +63,28 @@ stw_flush_frontbuffer(struct pipe_screen *screen,
struct pipe_surface *surface,
void *context_private )
{
- const struct stw_winsys *stw_winsys = stw_dev->stw_winsys;
HDC hdc = (HDC)context_private;
struct stw_framebuffer *fb;
fb = stw_framebuffer_from_hdc( hdc );
- /* fb can be NULL if window was destroyed already */
- if (fb) {
+ if (!fb) {
+ /* fb can be NULL if window was destroyed already */
+ return;
+ }
+
#if DEBUG
- {
- struct pipe_surface *surface2;
-
- if(!st_get_framebuffer_surface( fb->stfb, ST_SURFACE_FRONT_LEFT, &surface2 ))
- assert(0);
- else
- assert(surface2 == surface);
- }
-#endif
+ {
+ /* ensure that a random surface was not passed to us */
+ struct pipe_surface *surface2;
-#ifdef DEBUG
- if(stw_dev->trace_running) {
- screen = trace_screen(screen)->screen;
- surface = trace_surface(surface)->surface;
- }
-#endif
- }
-
- stw_winsys->flush_frontbuffer(screen, surface, hdc);
-
- if(fb) {
- stw_framebuffer_update(fb);
- stw_framebuffer_release(fb);
+ if(!st_get_framebuffer_surface( fb->stfb, ST_SURFACE_FRONT_LEFT, &surface2 ))
+ assert(0);
+ else
+ assert(surface2 == surface);
}
+#endif
+
+ stw_framebuffer_present_locked(hdc, fb, ST_SURFACE_FRONT_LEFT);
}
@@ -126,6 +117,9 @@ stw_init(const struct stw_winsys *stw_winsys)
if(!screen)
goto error1;
+ if(stw_winsys->get_adapter_luid)
+ stw_winsys->get_adapter_luid(screen, &stw_dev->AdapterLuid);
+
#ifdef DEBUG
stw_dev->screen = trace_screen_create(screen);
stw_dev->trace_running = stw_dev->screen != screen ? TRUE : FALSE;
@@ -182,7 +176,7 @@ stw_cleanup(void)
/* Ensure all contexts are destroyed */
i = handle_table_get_first_handle(stw_dev->ctx_table);
while (i) {
- stw_delete_context(i);
+ DrvDeleteContext(i);
i = handle_table_get_next_handle(stw_dev->ctx_table, i);
}
handle_table_destroy(stw_dev->ctx_table);
@@ -212,7 +206,7 @@ stw_cleanup(void)
struct stw_context *
-stw_lookup_context_locked( UINT_PTR dhglrc )
+stw_lookup_context_locked( DHGLRC dhglrc )
{
if (dhglrc == 0)
return NULL;
@@ -223,3 +217,28 @@ stw_lookup_context_locked( UINT_PTR dhglrc )
return (struct stw_context *) handle_table_get(stw_dev->ctx_table, dhglrc);
}
+
+void APIENTRY
+DrvSetCallbackProcs(
+ INT nProcs,
+ PROC *pProcs )
+{
+ size_t size;
+
+ if (stw_dev == NULL)
+ return;
+
+ size = MIN2(nProcs * sizeof *pProcs, sizeof stw_dev->callbacks);
+ memcpy(&stw_dev->callbacks, pProcs, size);
+
+ return;
+}
+
+
+BOOL APIENTRY
+DrvValidateVersion(
+ ULONG ulVersion )
+{
+ /* TODO: get the expected version from the winsys */
+ return ulVersion == 1;
+}
diff --git a/src/gallium/state_trackers/wgl/shared/stw_device.h b/src/gallium/state_trackers/wgl/stw_device.h
index e1bb9518dd..0bf3b0da82 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_device.h
+++ b/src/gallium/state_trackers/wgl/stw_device.h
@@ -29,11 +29,10 @@
#define STW_DEVICE_H_
-#include <windows.h>
-
#include "pipe/p_compiler.h"
#include "pipe/p_thread.h"
#include "util/u_handle_table.h"
+#include "stw_icd.h"
#include "stw_pixelformat.h"
@@ -53,10 +52,14 @@ struct stw_device
boolean trace_running;
#endif
+ LUID AdapterLuid;
+
struct stw_pixelformat_info pixelformats[STW_MAX_PIXELFORMATS];
unsigned pixelformat_count;
unsigned pixelformat_extended_count;
+ GLCALLBACKTABLE callbacks;
+
pipe_mutex ctx_mutex;
struct handle_table *ctx_table;
@@ -69,7 +72,7 @@ struct stw_device
};
struct stw_context *
-stw_lookup_context_locked( UINT_PTR hglrc );
+stw_lookup_context_locked( DHGLRC hglrc );
extern struct stw_device *stw_dev;
diff --git a/src/gallium/state_trackers/wgl/shared/stw_extensionsstring.c b/src/gallium/state_trackers/wgl/stw_ext_extensionsstring.c
index 62c859e1f9..62c859e1f9 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_extensionsstring.c
+++ b/src/gallium/state_trackers/wgl/stw_ext_extensionsstring.c
diff --git a/src/gallium/state_trackers/wgl/shared/stw_extgallium.c b/src/gallium/state_trackers/wgl/stw_ext_gallium.c
index fc22737d7e..fb30ec5dba 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_extgallium.c
+++ b/src/gallium/state_trackers/wgl/stw_ext_gallium.c
@@ -27,9 +27,9 @@
#include "pipe/p_screen.h"
-#include "stw_public.h"
#include "stw_device.h"
#include "stw_winsys.h"
+#include "stw_ext_gallium.h"
#ifdef DEBUG
#include "trace/tr_screen.h"
diff --git a/src/gallium/state_trackers/wgl/shared/stw_extgallium.h b/src/gallium/state_trackers/wgl/stw_ext_gallium.h
index cc35f2bb7f..cc35f2bb7f 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_extgallium.h
+++ b/src/gallium/state_trackers/wgl/stw_ext_gallium.h
diff --git a/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.c b/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c
index 0e2d407699..8a9995aba8 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.c
+++ b/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c
@@ -43,7 +43,6 @@
#include "pipe/p_compiler.h"
#include "util/u_memory.h"
-#include "stw_public.h"
#include "stw_pixelformat.h"
diff --git a/src/gallium/state_trackers/wgl/shared/stw_extswapinterval.c b/src/gallium/state_trackers/wgl/stw_ext_swapinterval.c
index 9eac6a1d09..9eac6a1d09 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_extswapinterval.c
+++ b/src/gallium/state_trackers/wgl/stw_ext_swapinterval.c
diff --git a/src/gallium/state_trackers/wgl/shared/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index b8956bb550..8a3e11b6b4 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -1,8 +1,8 @@
/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ *
+ * Copyright 2008-2009 Vmware, Inc.
* All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -10,19 +10,19 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ *
**************************************************************************/
#include <windows.h>
@@ -38,9 +38,9 @@
#include "trace/tr_texture.h"
#endif
+#include "stw_icd.h"
#include "stw_framebuffer.h"
#include "stw_device.h"
-#include "stw_public.h"
#include "stw_winsys.h"
#include "stw_tls.h"
@@ -83,6 +83,9 @@ stw_framebuffer_destroy_locked(
*link = fb->next;
fb->next = NULL;
+ if(fb->shared_surface)
+ stw_dev->stw_winsys->shared_surface_close(stw_dev->screen, fb->shared_surface);
+
st_unreference_framebuffer(fb->stfb);
pipe_mutex_unlock( fb->mutex );
@@ -106,13 +109,18 @@ static INLINE void
stw_framebuffer_get_size( struct stw_framebuffer *fb )
{
unsigned width, height;
- RECT rect;
+ RECT client_rect;
+ RECT window_rect;
+ POINT client_pos;
assert(fb->hWnd);
- GetClientRect( fb->hWnd, &rect );
- width = rect.right - rect.left;
- height = rect.bottom - rect.top;
+ /* Get the client area size. */
+ GetClientRect( fb->hWnd, &client_rect );
+ assert(client_rect.left == 0);
+ assert(client_rect.top == 0);
+ width = client_rect.right - client_rect.left;
+ height = client_rect.bottom - client_rect.top;
if(width < 1)
width = 1;
@@ -124,6 +132,31 @@ stw_framebuffer_get_size( struct stw_framebuffer *fb )
fb->width = width;
fb->height = height;
}
+
+ client_pos.x = 0;
+ client_pos.y = 0;
+ ClientToScreen(fb->hWnd, &client_pos);
+
+ GetWindowRect(fb->hWnd, &window_rect);
+
+ fb->client_rect.left = client_pos.x - window_rect.left;
+ fb->client_rect.top = client_pos.y - window_rect.top;
+ fb->client_rect.right = fb->client_rect.left + fb->width;
+ fb->client_rect.bottom = fb->client_rect.top + fb->height;
+
+#if 0
+ debug_printf("\n");
+ debug_printf("%s: client_position = (%i, %i)\n",
+ __FUNCTION__, client_pos.x, client_pos.y);
+ debug_printf("%s: window_rect = (%i, %i) - (%i, %i)\n",
+ __FUNCTION__,
+ window_rect.left, window_rect.top,
+ window_rect.right, window_rect.bottom);
+ debug_printf("%s: client_rect = (%i, %i) - (%i, %i)\n",
+ __FUNCTION__,
+ fb->client_rect.left, fb->client_rect.top,
+ fb->client_rect.right, fb->client_rect.bottom);
+#endif
}
@@ -155,6 +188,7 @@ stw_call_window_proc(
* can be masked out by the application. */
LPWINDOWPOS lpWindowPos = (LPWINDOWPOS)pParams->lParam;
if((lpWindowPos->flags & SWP_SHOWWINDOW) ||
+ !(lpWindowPos->flags & SWP_NOMOVE) ||
!(lpWindowPos->flags & SWP_NOSIZE)) {
fb = stw_framebuffer_from_hwnd( pParams->hwnd );
if(fb) {
@@ -379,10 +413,10 @@ stw_framebuffer_from_hwnd(
}
-BOOL
-stw_pixelformat_set(
+BOOL APIENTRY
+DrvSetPixelFormat(
HDC hdc,
- int iPixelFormat )
+ LONG iPixelFormat )
{
uint count;
uint index;
@@ -435,35 +469,24 @@ stw_pixelformat_get(
}
-BOOL
-stw_swap_buffers(
- HDC hdc )
+BOOL APIENTRY
+DrvPresentBuffers(HDC hdc, PGLPRESENTBUFFERSDATA data)
{
struct stw_framebuffer *fb;
struct pipe_screen *screen;
struct pipe_surface *surface;
+ unsigned surface_index;
+ BOOL ret = FALSE;
fb = stw_framebuffer_from_hdc( hdc );
if (fb == NULL)
return FALSE;
- if (!(fb->pfi->pfd.dwFlags & PFD_DOUBLEBUFFER)) {
- stw_framebuffer_release(fb);
- return TRUE;
- }
-
- /* If we're swapping the buffer associated with the current context
- * we have to flush any pending rendering commands first.
- */
- st_notify_swapbuffers( fb->stfb );
-
screen = stw_dev->screen;
-
- if(!st_get_framebuffer_surface( fb->stfb, ST_SURFACE_BACK_LEFT, &surface )) {
- /* FIXME: this shouldn't happen, but does on glean */
- stw_framebuffer_release(fb);
- return FALSE;
- }
+
+ surface_index = (unsigned)(uintptr_t)data->pPrivateData;
+ if(!st_get_framebuffer_surface( fb->stfb, surface_index, &surface ))
+ goto fail;
#ifdef DEBUG
if(stw_dev->trace_running) {
@@ -472,22 +495,127 @@ stw_swap_buffers(
}
#endif
- stw_dev->stw_winsys->flush_frontbuffer( screen, surface, hdc );
-
+ if(data->hSharedSurface != fb->hSharedSurface) {
+ if(fb->shared_surface) {
+ stw_dev->stw_winsys->shared_surface_close(screen, fb->shared_surface);
+ fb->shared_surface = NULL;
+ }
+
+ fb->hSharedSurface = data->hSharedSurface;
+
+ if(data->hSharedSurface &&
+ stw_dev->stw_winsys->shared_surface_open) {
+ fb->shared_surface = stw_dev->stw_winsys->shared_surface_open(screen, fb->hSharedSurface);
+ }
+ }
+
+ if(fb->shared_surface) {
+ stw_dev->stw_winsys->compose(screen,
+ surface,
+ fb->shared_surface,
+ &fb->client_rect,
+ data->PresentHistoryToken);
+ }
+ else {
+ stw_dev->stw_winsys->present( screen, surface, hdc );
+ }
+
+ ret = TRUE;
+
+fail:
+
stw_framebuffer_update(fb);
+
stw_framebuffer_release(fb);
-
- return TRUE;
+
+ return ret;
}
+/**
+ * Queue a composition.
+ *
+ * It will drop the lock on success.
+ */
BOOL
-stw_swap_layer_buffers(
+stw_framebuffer_present_locked(HDC hdc,
+ struct stw_framebuffer *fb,
+ unsigned surface_index)
+{
+ if(stw_dev->callbacks.wglCbPresentBuffers &&
+ stw_dev->stw_winsys->compose) {
+ GLCBPRESENTBUFFERSDATA data;
+
+ memset(&data, 0, sizeof data);
+ data.magic1 = 2;
+ data.magic2 = 0;
+ data.AdapterLuid = stw_dev->AdapterLuid;
+ data.rect = fb->client_rect;
+ data.pPrivateData = (void *)(uintptr_t)surface_index;
+
+ stw_framebuffer_release(fb);
+
+ return stw_dev->callbacks.wglCbPresentBuffers(hdc, &data);
+ }
+ else {
+ struct pipe_screen *screen = stw_dev->screen;
+ struct pipe_surface *surface;
+
+ if(!st_get_framebuffer_surface( fb->stfb, surface_index, &surface )) {
+ /* FIXME: this shouldn't happen, but does on glean */
+ stw_framebuffer_release(fb);
+ return FALSE;
+ }
+
+#ifdef DEBUG
+ if(stw_dev->trace_running) {
+ screen = trace_screen(screen)->screen;
+ surface = trace_surface(surface)->surface;
+ }
+#endif
+
+ stw_dev->stw_winsys->present( screen, surface, hdc );
+
+ stw_framebuffer_update(fb);
+
+ stw_framebuffer_release(fb);
+
+ return TRUE;
+ }
+}
+
+
+BOOL APIENTRY
+DrvSwapBuffers(
+ HDC hdc )
+{
+ struct stw_framebuffer *fb;
+
+ fb = stw_framebuffer_from_hdc( hdc );
+ if (fb == NULL)
+ return FALSE;
+
+ if (!(fb->pfi->pfd.dwFlags & PFD_DOUBLEBUFFER)) {
+ stw_framebuffer_release(fb);
+ return TRUE;
+ }
+
+ /* If we're swapping the buffer associated with the current context
+ * we have to flush any pending rendering commands first.
+ */
+ st_notify_swapbuffers( fb->stfb );
+
+ return stw_framebuffer_present_locked(hdc, fb, ST_SURFACE_BACK_LEFT);
+}
+
+
+BOOL APIENTRY
+DrvSwapLayerBuffers(
HDC hdc,
UINT fuPlanes )
{
if(fuPlanes & WGL_SWAP_MAIN_PLANE)
- return stw_swap_buffers(hdc);
+ return DrvSwapBuffers(hdc);
return FALSE;
}
diff --git a/src/gallium/state_trackers/wgl/shared/stw_framebuffer.h b/src/gallium/state_trackers/wgl/stw_framebuffer.h
index 13d29f37e4..5afbe74908 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_framebuffer.h
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.h
@@ -73,9 +73,20 @@ struct stw_framebuffer
/* FIXME: Make this work for multiple contexts bound to the same framebuffer */
boolean must_resize;
+
unsigned width;
unsigned height;
+ /**
+ * Client area rectangle, relative to the window upper-left corner.
+ *
+ * @sa GLCBPRESENTBUFFERSDATA::rect.
+ */
+ RECT client_rect;
+
+ HANDLE hSharedSurface;
+ struct stw_shared_surface *shared_surface;
+
/**
* This is protected by stw_device::fb_mutex, not the mutex above.
*
@@ -126,6 +137,11 @@ BOOL
stw_framebuffer_allocate(
struct stw_framebuffer *fb );
+BOOL
+stw_framebuffer_present_locked(HDC hdc,
+ struct stw_framebuffer *fb,
+ unsigned surface_index);
+
void
stw_framebuffer_update(
struct stw_framebuffer *fb);
diff --git a/src/gallium/state_trackers/wgl/shared/stw_getprocaddress.c b/src/gallium/state_trackers/wgl/stw_getprocaddress.c
index 879ced925a..8875dc22f3 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_getprocaddress.c
+++ b/src/gallium/state_trackers/wgl/stw_getprocaddress.c
@@ -33,8 +33,7 @@
#include <GL/wglext.h>
#include "glapi/glapi.h"
-#include "stw_public.h"
-#include "stw_extgallium.h"
+#include "stw_ext_gallium.h"
struct stw_extension_entry
{
@@ -68,8 +67,8 @@ static const struct stw_extension_entry stw_extension_entries[] = {
{ NULL, NULL }
};
-PROC
-stw_get_proc_address(
+PROC APIENTRY
+DrvGetProcAddress(
LPCSTR lpszProc )
{
const struct stw_extension_entry *entry;
diff --git a/src/gallium/state_trackers/wgl/icd/stw_icd.h b/src/gallium/state_trackers/wgl/stw_icd.h
index cbc1a66548..02eb543fef 100644
--- a/src/gallium/state_trackers/wgl/icd/stw_icd.h
+++ b/src/gallium/state_trackers/wgl/stw_icd.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2008-2009 Vmware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -18,7 +18,7 @@
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -388,6 +388,113 @@ typedef struct _GLCLTPROCTABLE
typedef VOID (APIENTRY * PFN_SETPROCTABLE)(PGLCLTPROCTABLE);
+/**
+ * Presentation data passed to opengl32!wglCbPresentBuffers.
+ *
+ * Pure software drivers don't need to worry about this -- if they stick to the
+ * GDI API then will integrate with the Desktop Window Manager (DWM) without
+ * problems. Hardware drivers, however, cannot present directly to the primary
+ * surface while the DWM is active, as DWM gets exclusive access to the primary
+ * surface.
+ *
+ * Proper DWM integration requires:
+ * - advertise the PFD_SUPPORT_COMPOSITION flag
+ * - redirect glFlush/glfinish/wglSwapBuffers into a surface shared with the
+ * DWM process.
+ *
+ * @sa http://www.opengl.org/pipeline/article/vol003_7/
+ * @sa http://blogs.msdn.com/greg_schechter/archive/2006/05/02/588934.aspx
+ */
+typedef struct _GLCBPRESENTBUFFERSDATA
+{
+ /**
+ * wglCbPresentBuffers enforces this to be 2.
+ */
+ DWORD magic1;
+
+ /**
+ * wglCbPresentBuffers enforces to be 0 or 1, but it is most commonly
+ * set to 0.
+ */
+ DWORD magic2;
+
+ /**
+ * Locally unique identifier (LUID) of the graphics adapter.
+ *
+ * This should contain the value returned by D3DKMTOpenAdapterFromHdc. It
+ * is passed to dwmapi!DwmpDxGetWindowSharedSurface in order to obtain
+ * the shared surface handle for the bound drawable (window).
+ *
+ * @sa http://msdn.microsoft.com/en-us/library/ms799177.aspx
+ */
+ LUID AdapterLuid;
+
+ /**
+ * This is passed unmodified to DrvPresentBuffers
+ */
+ LPVOID pPrivateData;
+
+ /**
+ * Client area rectangle to update, relative to the window upper-left corner.
+ */
+ RECT rect;
+} GLCBPRESENTBUFFERSDATA, *PGLCBPRESENTBUFFERSDATA;
+
+/**
+ * Callbacks supplied to DrvSetCallbackProcs by the OpenGL runtime.
+ *
+ * Pointers to several callback functions in opengl32.dll.
+ */
+typedef struct _GLCALLBACKTABLE
+{
+ /** Unused */
+ PROC wglCbSetCurrentValue;
+
+ /** Unused */
+ PROC wglCbGetCurrentValue;
+
+ /** Unused */
+ PROC wglCbGetDhglrc;
+
+ /** Unused */
+ PROC wglCbGetDdHandle;
+
+ /**
+ * Queue a present composition.
+ *
+ * Makes the runtime call DrvPresentBuffers with the composition information.
+ */
+ BOOL (APIENTRY *wglCbPresentBuffers)(HDC hdc, PGLCBPRESENTBUFFERSDATA data);
+
+} GLCALLBACKTABLE;
+
+typedef struct _GLPRESENTBUFFERSDATA
+{
+ /**
+ * The shared surface handle.
+ *
+ * Return by dwmapi!DwmpDxGetWindowSharedSurface.
+ *
+ * @sa http://channel9.msdn.com/forums/TechOff/251261-Help-Getting-the-shared-window-texture-out-of-DWM-/
+ */
+ HANDLE hSharedSurface;
+
+ LUID AdapterLuid;
+
+ /**
+ * Present history token.
+ *
+ * This is returned by dwmapi!DwmpDxGetWindowSharedSurface and
+ * should be passed to D3DKMTRender in D3DKMT_RENDER::PresentHistoryToken.
+ *
+ * @sa http://msdn.microsoft.com/en-us/library/ms799176.aspx
+ */
+ ULONGLONG PresentHistoryToken;
+
+ /** Same as GLCBPRESENTBUFFERSDATA::pPrivateData */
+ LPVOID pPrivateData;
+} GLPRESENTBUFFERSDATA, *PGLPRESENTBUFFERSDATA;
+
BOOL APIENTRY
DrvCopyContext(
DHGLRC dhrcSource,
@@ -435,6 +542,9 @@ DrvGetProcAddress(
LPCSTR lpszProc );
BOOL APIENTRY
+DrvPresentBuffers(HDC hdc, PGLPRESENTBUFFERSDATA data);
+
+BOOL APIENTRY
DrvRealizeLayerPalette(
HDC hdc,
INT iLayerPlane,
diff --git a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c b/src/gallium/state_trackers/wgl/stw_pixelformat.c
index c296744838..7abe5d9f7f 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c
+++ b/src/gallium/state_trackers/wgl/stw_pixelformat.c
@@ -34,9 +34,9 @@
#include "util/u_debug.h"
+#include "stw_icd.h"
#include "stw_device.h"
#include "stw_pixelformat.h"
-#include "stw_public.h"
#include "stw_tls.h"
@@ -154,8 +154,11 @@ stw_pixelformat_add(
pfi->pfd.dwFlags = PFD_SUPPORT_OPENGL;
/* TODO: also support non-native pixel formats */
- pfi->pfd.dwFlags |= PFD_DRAW_TO_WINDOW ;
-
+ pfi->pfd.dwFlags |= PFD_DRAW_TO_WINDOW;
+
+ /* See http://www.opengl.org/pipeline/article/vol003_7/ */
+ pfi->pfd.dwFlags |= PFD_SUPPORT_COMPOSITION;
+
if (doublebuffer)
pfi->pfd.dwFlags |= PFD_DOUBLEBUFFER | PFD_SWAP_COPY;
@@ -288,12 +291,12 @@ stw_pixelformat_visual(GLvisual *visual,
}
-int
-stw_pixelformat_describe(
+LONG APIENTRY
+DrvDescribePixelFormat(
HDC hdc,
- int iPixelFormat,
- UINT nBytes,
- LPPIXELFORMATDESCRIPTOR ppfd )
+ INT iPixelFormat,
+ ULONG cjpfd,
+ PIXELFORMATDESCRIPTOR *ppfd )
{
uint count;
uint index;
@@ -306,7 +309,7 @@ stw_pixelformat_describe(
if (ppfd == NULL)
return count;
- if (index >= count || nBytes != sizeof( PIXELFORMATDESCRIPTOR ))
+ if (index >= count || cjpfd != sizeof( PIXELFORMATDESCRIPTOR ))
return 0;
pfi = stw_pixelformat_get_info( index );
@@ -316,6 +319,52 @@ stw_pixelformat_describe(
return count;
}
+BOOL APIENTRY
+DrvDescribeLayerPlane(
+ HDC hdc,
+ INT iPixelFormat,
+ INT iLayerPlane,
+ UINT nBytes,
+ LPLAYERPLANEDESCRIPTOR plpd )
+{
+ assert(0);
+ return FALSE;
+}
+
+int APIENTRY
+DrvGetLayerPaletteEntries(
+ HDC hdc,
+ INT iLayerPlane,
+ INT iStart,
+ INT cEntries,
+ COLORREF *pcr )
+{
+ assert(0);
+ return 0;
+}
+
+int APIENTRY
+DrvSetLayerPaletteEntries(
+ HDC hdc,
+ INT iLayerPlane,
+ INT iStart,
+ INT cEntries,
+ CONST COLORREF *pcr )
+{
+ assert(0);
+ return 0;
+}
+
+BOOL APIENTRY
+DrvRealizeLayerPalette(
+ HDC hdc,
+ INT iLayerPlane,
+ BOOL bRealize )
+{
+ assert(0);
+ return FALSE;
+}
+
/* Only used by the wgl code, but have it here to avoid exporting the
* pixelformat.h functionality.
*/
diff --git a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.h b/src/gallium/state_trackers/wgl/stw_pixelformat.h
index bec429231b..3a690b35ba 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.h
+++ b/src/gallium/state_trackers/wgl/stw_pixelformat.h
@@ -30,6 +30,10 @@
#include <windows.h>
+#ifndef PFD_SUPPORT_COMPOSITION
+#define PFD_SUPPORT_COMPOSITION 0x00008000
+#endif
+
#include "main/mtypes.h"
#include "pipe/p_compiler.h"
@@ -62,4 +66,11 @@ void
stw_pixelformat_visual(GLvisual *visual,
const struct stw_pixelformat_info *pfi );
+int
+stw_pixelformat_choose( HDC hdc,
+ CONST PIXELFORMATDESCRIPTOR *ppfd );
+
+int
+stw_pixelformat_get(HDC hdc);
+
#endif /* STW_PIXELFORMAT_H */
diff --git a/src/gallium/state_trackers/wgl/shared/stw_tls.c b/src/gallium/state_trackers/wgl/stw_tls.c
index 4bd6a9289c..4bd6a9289c 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_tls.c
+++ b/src/gallium/state_trackers/wgl/stw_tls.c
diff --git a/src/gallium/state_trackers/wgl/shared/stw_tls.h b/src/gallium/state_trackers/wgl/stw_tls.h
index fbf8b1cbee..fbf8b1cbee 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_tls.h
+++ b/src/gallium/state_trackers/wgl/stw_tls.h
diff --git a/src/gallium/state_trackers/wgl/wgl/stw_wgl.c b/src/gallium/state_trackers/wgl/stw_wgl.c
index a131292f7a..bb199fdd25 100644
--- a/src/gallium/state_trackers/wgl/wgl/stw_wgl.c
+++ b/src/gallium/state_trackers/wgl/stw_wgl.c
@@ -28,7 +28,9 @@
#include <windows.h>
#include "util/u_debug.h"
-#include "shared/stw_public.h"
+#include "stw_icd.h"
+#include "stw_context.h"
+#include "stw_pixelformat.h"
#include "stw_wgl.h"
@@ -38,16 +40,16 @@ wglCopyContext(
HGLRC hglrcDst,
UINT mask )
{
- return stw_copy_context( (UINT_PTR)hglrcSrc,
- (UINT_PTR)hglrcDst,
- mask );
+ return DrvCopyContext( (DHGLRC)(UINT_PTR)hglrcSrc,
+ (DHGLRC)(UINT_PTR)hglrcDst,
+ mask );
}
WINGDIAPI HGLRC APIENTRY
wglCreateContext(
HDC hdc )
{
- return wglCreateLayerContext(hdc, 0);
+ return (HGLRC) DrvCreateContext(hdc);
}
WINGDIAPI HGLRC APIENTRY
@@ -55,21 +57,21 @@ wglCreateLayerContext(
HDC hdc,
int iLayerPlane )
{
- return (HGLRC) stw_create_layer_context( hdc, iLayerPlane );
+ return (HGLRC) DrvCreateLayerContext( hdc, iLayerPlane );
}
WINGDIAPI BOOL APIENTRY
wglDeleteContext(
HGLRC hglrc )
{
- return stw_delete_context( (UINT_PTR)hglrc );
+ return DrvDeleteContext((DHGLRC)(UINT_PTR)hglrc );
}
WINGDIAPI HGLRC APIENTRY
wglGetCurrentContext( VOID )
{
- return (HGLRC)stw_get_current_context();
+ return (HGLRC)(UINT_PTR)stw_get_current_context();
}
WINGDIAPI HDC APIENTRY
@@ -83,7 +85,7 @@ wglMakeCurrent(
HDC hdc,
HGLRC hglrc )
{
- return stw_make_current( hdc, (UINT_PTR)hglrc );
+ return DrvSetContext( hdc, (DHGLRC)(UINT_PTR)hglrc, NULL ) ? TRUE : FALSE;
}
@@ -91,7 +93,7 @@ WINGDIAPI BOOL APIENTRY
wglSwapBuffers(
HDC hdc )
{
- return stw_swap_buffers( hdc );
+ return DrvSwapBuffers( hdc );
}
@@ -100,14 +102,14 @@ wglSwapLayerBuffers(
HDC hdc,
UINT fuPlanes )
{
- return stw_swap_layer_buffers( hdc, fuPlanes );
+ return DrvSwapLayerBuffers( hdc, fuPlanes );
}
WINGDIAPI PROC APIENTRY
wglGetProcAddress(
LPCSTR lpszProc )
{
- return stw_get_proc_address( lpszProc );
+ return DrvGetProcAddress( lpszProc );
}
@@ -141,7 +143,7 @@ wglDescribePixelFormat(
UINT nBytes,
LPPIXELFORMATDESCRIPTOR ppfd )
{
- return stw_pixelformat_describe( hdc, iPixelFormat, nBytes, ppfd );
+ return DrvDescribePixelFormat( hdc, iPixelFormat, nBytes, ppfd );
}
WINGDIAPI int APIENTRY
@@ -160,7 +162,7 @@ wglSetPixelFormat(
if (ppfd->nSize != sizeof( PIXELFORMATDESCRIPTOR ))
return FALSE;
- return stw_pixelformat_set( hdc, iPixelFormat );
+ return DrvSetPixelFormat( hdc, iPixelFormat );
}
@@ -186,7 +188,8 @@ wglShareLists(
HGLRC hglrc1,
HGLRC hglrc2 )
{
- return stw_share_lists( (UINT_PTR)hglrc1, (UINT_PTR)hglrc2);;
+ return DrvShareLists((DHGLRC)(UINT_PTR)hglrc1,
+ (DHGLRC)(UINT_PTR)hglrc2);
}
WINGDIAPI BOOL APIENTRY
@@ -264,15 +267,7 @@ wglDescribeLayerPlane(
UINT nBytes,
LPLAYERPLANEDESCRIPTOR plpd )
{
- (void) hdc;
- (void) iPixelFormat;
- (void) iLayerPlane;
- (void) nBytes;
- (void) plpd;
-
- assert( 0 );
-
- return FALSE;
+ return DrvDescribeLayerPlane(hdc, iPixelFormat, iLayerPlane, nBytes, plpd);
}
WINGDIAPI int APIENTRY
@@ -283,15 +278,7 @@ wglSetLayerPaletteEntries(
int cEntries,
CONST COLORREF *pcr )
{
- (void) hdc;
- (void) iLayerPlane;
- (void) iStart;
- (void) cEntries;
- (void) pcr;
-
- assert( 0 );
-
- return 0;
+ return DrvSetLayerPaletteEntries(hdc, iLayerPlane, iStart, cEntries, pcr);
}
WINGDIAPI int APIENTRY
@@ -302,15 +289,7 @@ wglGetLayerPaletteEntries(
int cEntries,
COLORREF *pcr )
{
- (void) hdc;
- (void) iLayerPlane;
- (void) iStart;
- (void) cEntries;
- (void) pcr;
-
- assert( 0 );
-
- return 0;
+ return DrvGetLayerPaletteEntries(hdc, iLayerPlane, iStart, cEntries, pcr);
}
WINGDIAPI BOOL APIENTRY
diff --git a/src/gallium/state_trackers/wgl/wgl/stw_wgl.h b/src/gallium/state_trackers/wgl/stw_wgl.h
index a98179944a..a98179944a 100644
--- a/src/gallium/state_trackers/wgl/wgl/stw_wgl.h
+++ b/src/gallium/state_trackers/wgl/stw_wgl.h
diff --git a/src/gallium/state_trackers/wgl/shared/stw_winsys.h b/src/gallium/state_trackers/wgl/stw_winsys.h
index c0bf82c9ed..1ead47d6e6 100644
--- a/src/gallium/state_trackers/wgl/shared/stw_winsys.h
+++ b/src/gallium/state_trackers/wgl/stw_winsys.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2008-2009 Vmware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -18,7 +18,7 @@
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -36,6 +36,8 @@ struct pipe_screen;
struct pipe_context;
struct pipe_surface;
+struct stw_shared_surface;
+
struct stw_winsys
{
struct pipe_screen *
@@ -44,10 +46,52 @@ struct stw_winsys
struct pipe_context *
(*create_context)( struct pipe_screen *screen );
+ /**
+ * Present the color buffer to the window associated with the device context.
+ */
+ void
+ (*present)( struct pipe_screen *screen,
+ struct pipe_surface *surf,
+ HDC hDC );
+
+ /**
+ * Locally unique identifier (LUID) of the graphics adapter.
+ *
+ * @sa GLCBPRESENTBUFFERSDATA::AdapterLuid;
+ */
+ boolean
+ (*get_adapter_luid)( struct pipe_screen *screen,
+ LUID *pAdapterLuid );
+
+ /**
+ * Open a shared surface (optional).
+ *
+ * @sa GLCBPRESENTBUFFERSDATA::hSharedSurface;
+ */
+ struct stw_shared_surface *
+ (*shared_surface_open)(struct pipe_screen *screen,
+ HANDLE hSharedSurface);
+
+ /**
+ * Open a shared surface (optional).
+ */
+ void
+ (*shared_surface_close)(struct pipe_screen *screen,
+ struct stw_shared_surface *surface);
+
+ /**
+ * Compose into a shared (optional).
+ *
+ * Blit the color buffer into a shared surface.
+ *
+ * @sa GLPRESENTBUFFERSDATA::PresentHistoryToken.
+ */
void
- (*flush_frontbuffer)( struct pipe_screen *screen,
- struct pipe_surface *surf,
- HDC hDC );
+ (*compose)( struct pipe_screen *screen,
+ struct pipe_surface *src,
+ struct stw_shared_surface *dest,
+ LPCRECT pRect,
+ ULONGLONG PresentHistoryToken );
};
boolean
diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c
index c708ac3170..9d15a615f1 100644
--- a/src/gallium/state_trackers/xorg/xorg_composite.c
+++ b/src/gallium/state_trackers/xorg/xorg_composite.c
@@ -4,6 +4,7 @@
#include "cso_cache/cso_context.h"
#include "util/u_draw_quad.h"
+#include "util/u_math.h"
#include "pipe/p_inlines.h"
@@ -11,9 +12,9 @@ struct xorg_composite_blend {
int op:8;
unsigned rgb_src_factor:5; /**< PIPE_BLENDFACTOR_x */
- unsigned rgb_dst_factor:5; /**< PIPE_BLENDFACTOR_x */
-
unsigned alpha_src_factor:5; /**< PIPE_BLENDFACTOR_x */
+
+ unsigned rgb_dst_factor:5; /**< PIPE_BLENDFACTOR_x */
unsigned alpha_dst_factor:5; /**< PIPE_BLENDFACTOR_x */
};
@@ -40,22 +41,20 @@ static const struct xorg_composite_blend xorg_blends[] = {
PIPE_BLENDFACTOR_INV_SRC_ALPHA, PIPE_BLENDFACTOR_INV_SRC_ALPHA },
};
+
static INLINE void
-pixel_to_float4(PictFormatPtr format,
- CARD32 pixel, float *color)
+pixel_to_float4(Pixel pixel, float *color)
{
CARD32 r, g, b, a;
- debug_assert(format->type == PictTypeDirect);
-
- r = (pixel >> format->direct.red) & format->direct.redMask;
- g = (pixel >> format->direct.green) & format->direct.greenMask;
- b = (pixel >> format->direct.blue) & format->direct.blueMask;
- a = (pixel >> format->direct.alpha) & format->direct.alphaMask;
- color[0] = ((float)r) / ((float)format->direct.redMask);
- color[1] = ((float)g) / ((float)format->direct.greenMask);
- color[2] = ((float)b) / ((float)format->direct.blueMask);
- color[3] = ((float)a) / ((float)format->direct.alphaMask);
+ a = (pixel >> 24) & 0xff;
+ r = (pixel >> 16) & 0xff;
+ g = (pixel >> 8) & 0xff;
+ b = (pixel >> 0) & 0xff;
+ color[0] = ((float)r) / 255.;
+ color[1] = ((float)g) / 255.;
+ color[2] = ((float)b) / 255.;
+ color[3] = ((float)a) / 255.;
}
struct acceleration_info {
@@ -133,24 +132,22 @@ setup_vertex_data0(struct exa_context *ctx,
int srcX, int srcY, int maskX, int maskY,
int dstX, int dstY, int width, int height)
{
- float vertices[4][2][4];
-
/* 1st vertex */
- setup_vertex0(vertices[0], dstX, dstY,
+ setup_vertex0(ctx->vertices2[0], dstX, dstY,
ctx->solid_color);
/* 2nd vertex */
- setup_vertex0(vertices[1], dstX + width, dstY,
+ setup_vertex0(ctx->vertices2[1], dstX + width, dstY,
ctx->solid_color);
/* 3rd vertex */
- setup_vertex0(vertices[2], dstX + width, dstY + height,
+ setup_vertex0(ctx->vertices2[2], dstX + width, dstY + height,
ctx->solid_color);
/* 4th vertex */
- setup_vertex0(vertices[3], dstX, dstY + height,
+ setup_vertex0(ctx->vertices2[3], dstX, dstY + height,
ctx->solid_color);
- return pipe_user_buffer_create(ctx->ctx->screen,
- vertices,
- sizeof(vertices));
+ return pipe_user_buffer_create(ctx->pipe->screen,
+ ctx->vertices2,
+ sizeof(ctx->vertices2));
}
static INLINE void
@@ -172,7 +169,6 @@ setup_vertex_data1(struct exa_context *ctx,
int srcX, int srcY, int maskX, int maskY,
int dstX, int dstY, int width, int height)
{
- float vertices[4][2][4];
float s0, t0, s1, t1;
struct pipe_texture *src = ctx->bound_textures[0];
@@ -182,23 +178,48 @@ setup_vertex_data1(struct exa_context *ctx,
t1 = srcY + height / src->height[0];
/* 1st vertex */
- setup_vertex1(vertices[0], dstX, dstY,
+ setup_vertex1(ctx->vertices2[0], dstX, dstY,
s0, t0);
/* 2nd vertex */
- setup_vertex1(vertices[1], dstX + width, dstY,
+ setup_vertex1(ctx->vertices2[1], dstX + width, dstY,
s1, t0);
/* 3rd vertex */
- setup_vertex1(vertices[2], dstX + width, dstY + height,
+ setup_vertex1(ctx->vertices2[2], dstX + width, dstY + height,
s1, t1);
/* 4th vertex */
- setup_vertex1(vertices[3], dstX, dstY + height,
+ setup_vertex1(ctx->vertices2[3], dstX, dstY + height,
s0, t1);
- return pipe_user_buffer_create(ctx->ctx->screen,
- vertices,
- sizeof(vertices));
+ return pipe_user_buffer_create(ctx->pipe->screen,
+ ctx->vertices2,
+ sizeof(ctx->vertices2));
}
+static struct pipe_buffer *
+setup_vertex_data_tex(struct exa_context *ctx,
+ float x0, float y0, float x1, float y1,
+ float s0, float t0, float s1, float t1,
+ float z)
+{
+ /* 1st vertex */
+ setup_vertex1(ctx->vertices2[0], x0, y0,
+ s0, t0);
+ /* 2nd vertex */
+ setup_vertex1(ctx->vertices2[1], x1, y0,
+ s1, t0);
+ /* 3rd vertex */
+ setup_vertex1(ctx->vertices2[2], x1, y1,
+ s1, t1);
+ /* 4th vertex */
+ setup_vertex1(ctx->vertices2[3], x0, y1,
+ s0, t1);
+
+ return pipe_user_buffer_create(ctx->pipe->screen,
+ ctx->vertices2,
+ sizeof(ctx->vertices2));
+}
+
+
static INLINE void
setup_vertex2(float vertex[3][4], float x, float y,
@@ -225,7 +246,6 @@ setup_vertex_data2(struct exa_context *ctx,
int srcX, int srcY, int maskX, int maskY,
int dstX, int dstY, int width, int height)
{
- float vertices[4][3][4];
float st0[4], st1[4];
struct pipe_texture *src = ctx->bound_textures[0];
struct pipe_texture *mask = ctx->bound_textures[0];
@@ -241,21 +261,21 @@ setup_vertex_data2(struct exa_context *ctx,
st1[3] = maskY + height / mask->height[0];
/* 1st vertex */
- setup_vertex2(vertices[0], dstX, dstY,
+ setup_vertex2(ctx->vertices3[0], dstX, dstY,
st0[0], st0[1], st1[0], st1[1]);
/* 2nd vertex */
- setup_vertex2(vertices[1], dstX + width, dstY,
+ setup_vertex2(ctx->vertices3[1], dstX + width, dstY,
st0[2], st0[1], st1[2], st1[1]);
/* 3rd vertex */
- setup_vertex2(vertices[2], dstX + width, dstY + height,
+ setup_vertex2(ctx->vertices3[2], dstX + width, dstY + height,
st0[2], st0[3], st1[2], st1[3]);
/* 4th vertex */
- setup_vertex2(vertices[3], dstX, dstY + height,
+ setup_vertex2(ctx->vertices3[3], dstX, dstY + height,
st0[0], st0[3], st1[0], st1[3]);
- return pipe_user_buffer_create(ctx->ctx->screen,
- vertices,
- sizeof(vertices));
+ return pipe_user_buffer_create(ctx->pipe->screen,
+ ctx->vertices3,
+ sizeof(ctx->vertices3));
}
boolean xorg_composite_accelerated(int op,
@@ -267,22 +287,25 @@ boolean xorg_composite_accelerated(int op,
unsigned accel_ops_count =
sizeof(accelerated_ops)/sizeof(struct acceleration_info);
-
- /*FIXME: currently accel is disabled */
- return FALSE;
-
- if (pSrcPicture) {
- /* component alpha not supported */
- if (pSrcPicture->componentAlpha)
+ if (pSrcPicture->pSourcePict) {
+ /* Gradients not yet supported */
+ if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
return FALSE;
- /* fills not supported */
- if (pSrcPicture->pSourcePict)
+
+ /* Solid source with mask not yet handled properly */
+ if (pMaskPicture)
return FALSE;
}
for (i = 0; i < accel_ops_count; ++i) {
if (op == accelerated_ops[i].op) {
- if (pMaskPicture && !accelerated_ops[i].with_mask)
+ /* Check for unsupported component alpha */
+ if ((pSrcPicture->componentAlpha &&
+ !accelerated_ops[i].component_alpha) ||
+ (pMaskPicture &&
+ (!accelerated_ops[i].with_mask ||
+ (pMaskPicture->componentAlpha &&
+ !accelerated_ops[i].component_alpha))))
return FALSE;
return TRUE;
}
@@ -291,16 +314,20 @@ boolean xorg_composite_accelerated(int op,
}
static void
-bind_framebuffer_state(struct exa_context *exa, PicturePtr pDstPicture,
- struct exa_pixmap_priv *pDst)
+bind_clip_state(struct exa_context *exa)
+{
+}
+
+static void
+bind_framebuffer_state(struct exa_context *exa, struct exa_pixmap_priv *pDst)
{
unsigned i;
struct pipe_framebuffer_state state;
struct pipe_surface *surface = exa_gpu_surface(exa, pDst);
memset(&state, 0, sizeof(struct pipe_framebuffer_state));
- state.width = pDstPicture->pDrawable->width;
- state.height = pDstPicture->pDrawable->height;
+ state.width = pDst->tex->width[0];
+ state.height = pDst->tex->height[0];
state.nr_cbufs = 1;
state.cbufs[0] = surface;
@@ -311,6 +338,9 @@ bind_framebuffer_state(struct exa_context *exa, PicturePtr pDstPicture,
state.zsbuf = 0;
cso_set_framebuffer(exa->cso, &state);
+
+ /* we do fire and forget for the framebuffer, this is the forget part */
+ pipe_surface_reference(&surface, NULL);
}
enum AxisOrientation {
@@ -338,10 +368,12 @@ set_viewport(struct exa_context *exa, int width, int height,
}
static void
-bind_viewport_state(struct exa_context *exa, PicturePtr pDstPicture)
+bind_viewport_state(struct exa_context *exa, struct exa_pixmap_priv *pDst)
{
- int width = pDstPicture->pDrawable->width;
- int height = pDstPicture->pDrawable->height;
+ int width = pDst->tex->width[0];
+ int height = pDst->tex->height[0];
+
+ /*debug_printf("Bind viewport (%d, %d)\n", width, height);*/
set_viewport(exa, width, height, Y0_TOP);
}
@@ -350,13 +382,9 @@ static void
bind_blend_state(struct exa_context *exa, int op,
PicturePtr pSrcPicture, PicturePtr pMaskPicture)
{
- boolean component_alpha = pSrcPicture->componentAlpha;
struct xorg_composite_blend blend_opt;
struct pipe_blend_state blend;
- if (component_alpha) {
- op = PictOpOver;
- }
blend_opt = blend_for_op(op);
memset(&blend, 0, sizeof(struct pipe_blend_state));
@@ -390,14 +418,17 @@ bind_shaders(struct exa_context *exa, int op,
unsigned vs_traits = 0, fs_traits = 0;
struct xorg_shader shader;
+ exa->has_solid_color = FALSE;
+
if (pSrcPicture) {
if (pSrcPicture->pSourcePict) {
if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) {
fs_traits |= FS_SOLID_FILL;
vs_traits |= VS_SOLID_FILL;
- pixel_to_float4(pSrcPicture->pFormat,
- pSrcPicture->pSourcePict->solidFill.color,
+ debug_assert(pSrcPicture->format == PICT_a8r8g8b8);
+ pixel_to_float4(pSrcPicture->pSourcePict->solidFill.color,
exa->solid_color);
+ exa->has_solid_color = TRUE;
} else {
debug_assert("!gradients not supported");
}
@@ -434,6 +465,12 @@ bind_samplers(struct exa_context *exa, int op,
memset(&src_sampler, 0, sizeof(struct pipe_sampler_state));
memset(&mask_sampler, 0, sizeof(struct pipe_sampler_state));
+ if ((pSrc && exa->pipe->is_texture_referenced(exa->pipe, pSrc->tex, 0, 0) &
+ PIPE_REFERENCED_FOR_WRITE) ||
+ (pMask && exa->pipe->is_texture_referenced(exa->pipe, pMask->tex, 0, 0) &
+ PIPE_REFERENCED_FOR_WRITE))
+ exa->pipe->flush(exa->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+
if (pSrcPicture && pSrc) {
unsigned src_wrap = render_repeat_to_gallium(
pSrcPicture->repeatType);
@@ -478,15 +515,15 @@ setup_vs_constant_buffer(struct exa_context *exa,
struct pipe_constant_buffer *cbuf = &exa->vs_const_buffer;
pipe_buffer_reference(&cbuf->buffer, NULL);
- cbuf->buffer = pipe_buffer_create(exa->ctx->screen, 16,
+ cbuf->buffer = pipe_buffer_create(exa->pipe->screen, 16,
PIPE_BUFFER_USAGE_CONSTANT,
param_bytes);
if (cbuf->buffer) {
- pipe_buffer_write(exa->ctx->screen, cbuf->buffer,
+ pipe_buffer_write(exa->pipe->screen, cbuf->buffer,
0, param_bytes, vs_consts);
}
- exa->ctx->set_constant_buffer(exa->ctx, PIPE_SHADER_VERTEX, 0, cbuf);
+ exa->pipe->set_constant_buffer(exa->pipe, PIPE_SHADER_VERTEX, 0, cbuf);
}
@@ -500,22 +537,22 @@ setup_fs_constant_buffer(struct exa_context *exa)
struct pipe_constant_buffer *cbuf = &exa->fs_const_buffer;
pipe_buffer_reference(&cbuf->buffer, NULL);
- cbuf->buffer = pipe_buffer_create(exa->ctx->screen, 16,
+ cbuf->buffer = pipe_buffer_create(exa->pipe->screen, 16,
PIPE_BUFFER_USAGE_CONSTANT,
param_bytes);
if (cbuf->buffer) {
- pipe_buffer_write(exa->ctx->screen, cbuf->buffer,
+ pipe_buffer_write(exa->pipe->screen, cbuf->buffer,
0, param_bytes, fs_consts);
}
- exa->ctx->set_constant_buffer(exa->ctx, PIPE_SHADER_FRAGMENT, 0, cbuf);
+ exa->pipe->set_constant_buffer(exa->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf);
}
static void
-setup_constant_buffers(struct exa_context *exa, PicturePtr pDstPicture)
+setup_constant_buffers(struct exa_context *exa, struct exa_pixmap_priv *pDst)
{
- int width = pDstPicture->pDrawable->width;
- int height = pDstPicture->pDrawable->height;
+ int width = pDst->tex->width[0];
+ int height = pDst->tex->height[0];
setup_vs_constant_buffer(exa, width, height);
setup_fs_constant_buffer(exa);
@@ -530,15 +567,15 @@ boolean xorg_composite_bind_state(struct exa_context *exa,
struct exa_pixmap_priv *pMask,
struct exa_pixmap_priv *pDst)
{
- bind_framebuffer_state(exa, pDstPicture, pDst);
- bind_viewport_state(exa, pDstPicture);
+ bind_framebuffer_state(exa, pDst);
+ bind_viewport_state(exa, pDst);
bind_blend_state(exa, op, pSrcPicture, pMaskPicture);
bind_rasterizer_state(exa);
bind_shaders(exa, op, pSrcPicture, pMaskPicture);
bind_samplers(exa, op, pSrcPicture, pMaskPicture,
pDstPicture, pSrc, pMask, pDst);
-
- setup_constant_buffers(exa, pDstPicture);
+ bind_clip_state(exa);
+ setup_constant_buffers(exa, pDst);
return FALSE;
}
@@ -548,7 +585,7 @@ void xorg_composite(struct exa_context *exa,
int srcX, int srcY, int maskX, int maskY,
int dstX, int dstY, int width, int height)
{
- struct pipe_context *pipe = exa->ctx;
+ struct pipe_context *pipe = exa->pipe;
struct pipe_buffer *buf = 0;
if (exa->num_bound_samplers == 0 ) { /* solid fill */
@@ -573,12 +610,457 @@ void xorg_composite(struct exa_context *exa,
}
if (buf) {
+ int num_attribs = 1; /*pos*/
+ num_attribs += exa->num_bound_samplers;
+ if (exa->has_solid_color)
+ ++num_attribs;
+
+ util_draw_vertex_buffer(pipe, buf, 0,
+ PIPE_PRIM_TRIANGLE_FAN,
+ 4, /* verts */
+ num_attribs); /* attribs/vert */
+
+ pipe_buffer_reference(&buf, NULL);
+ }
+}
+
+boolean xorg_solid_bind_state(struct exa_context *exa,
+ struct exa_pixmap_priv *pixmap,
+ Pixel fg)
+{
+ unsigned vs_traits, fs_traits;
+ struct xorg_shader shader;
+
+ pixel_to_float4(fg, exa->solid_color);
+ exa->has_solid_color = TRUE;
+
+ exa->solid_color[3] = 1.f;
+
+#if 0
+ debug_printf("Color Pixel=(%d, %d, %d, %d), RGBA=(%f, %f, %f, %f)\n",
+ (fg >> 24) & 0xff, (fg >> 16) & 0xff,
+ (fg >> 8) & 0xff, (fg >> 0) & 0xff,
+ exa->solid_color[0], exa->solid_color[1],
+ exa->solid_color[2], exa->solid_color[3]);
+#endif
+
+ vs_traits = VS_SOLID_FILL;
+ fs_traits = FS_SOLID_FILL;
+
+ bind_framebuffer_state(exa, pixmap);
+ bind_viewport_state(exa, pixmap);
+ bind_rasterizer_state(exa);
+ bind_blend_state(exa, PictOpSrc, NULL, NULL);
+ setup_constant_buffers(exa, pixmap);
+ bind_clip_state(exa);
+
+ shader = xorg_shaders_get(exa->shaders, vs_traits, fs_traits);
+ cso_set_vertex_shader_handle(exa->cso, shader.vs);
+ cso_set_fragment_shader_handle(exa->cso, shader.fs);
+
+ return TRUE;
+}
+
+void xorg_solid(struct exa_context *exa,
+ struct exa_pixmap_priv *pixmap,
+ int x0, int y0, int x1, int y1)
+{
+ struct pipe_context *pipe = exa->pipe;
+ struct pipe_buffer *buf = 0;
+
+ /* 1st vertex */
+ setup_vertex0(exa->vertices2[0], x0, y0,
+ exa->solid_color);
+ /* 2nd vertex */
+ setup_vertex0(exa->vertices2[1], x1, y0,
+ exa->solid_color);
+ /* 3rd vertex */
+ setup_vertex0(exa->vertices2[2], x1, y1,
+ exa->solid_color);
+ /* 4th vertex */
+ setup_vertex0(exa->vertices2[3], x0, y1,
+ exa->solid_color);
+
+ buf = pipe_user_buffer_create(exa->pipe->screen,
+ exa->vertices2,
+ sizeof(exa->vertices2));
+
+
+ if (buf) {
util_draw_vertex_buffer(pipe, buf, 0,
PIPE_PRIM_TRIANGLE_FAN,
4, /* verts */
- 1 + exa->num_bound_samplers); /* attribs/vert */
+ 2); /* attribs/vert */
+
+ pipe_buffer_reference(&buf, NULL);
+ }
+}
+
+
+static INLINE void shift_rectx(float coords[4],
+ const float *bounds,
+ const float shift)
+{
+ coords[0] += shift;
+ coords[2] -= shift;
+ if (bounds) {
+ coords[2] = MIN2(coords[2], bounds[2]);
+ /* bound x/y + width/height */
+ if ((coords[0] + coords[2]) > (bounds[0] + bounds[2])) {
+ coords[2] = (bounds[0] + bounds[2]) - coords[0];
+ }
+ }
+}
+
+static INLINE void shift_recty(float coords[4],
+ const float *bounds,
+ const float shift)
+{
+ coords[1] += shift;
+ coords[3] -= shift;
+ if (bounds) {
+ coords[3] = MIN2(coords[3], bounds[3]);
+ if ((coords[1] + coords[3]) > (bounds[1] + bounds[3])) {
+ coords[3] = (bounds[1] + bounds[3]) - coords[1];
+ }
+ }
+}
+
+static INLINE void bound_rect(float coords[4],
+ const float bounds[4],
+ float shift[4])
+{
+ /* if outside the bounds */
+ if (coords[0] > (bounds[0] + bounds[2]) ||
+ coords[1] > (bounds[1] + bounds[3]) ||
+ (coords[0] + coords[2]) < bounds[0] ||
+ (coords[1] + coords[3]) < bounds[1]) {
+ coords[0] = 0.f;
+ coords[1] = 0.f;
+ coords[2] = 0.f;
+ coords[3] = 0.f;
+ shift[0] = 0.f;
+ shift[1] = 0.f;
+ return;
+ }
+
+ /* bound x */
+ if (coords[0] < bounds[0]) {
+ shift[0] = bounds[0] - coords[0];
+ coords[2] -= shift[0];
+ coords[0] = bounds[0];
+ } else
+ shift[0] = 0.f;
+
+ /* bound y */
+ if (coords[1] < bounds[1]) {
+ shift[1] = bounds[1] - coords[1];
+ coords[3] -= shift[1];
+ coords[1] = bounds[1];
+ } else
+ shift[1] = 0.f;
+
+ shift[2] = bounds[2] - coords[2];
+ shift[3] = bounds[3] - coords[3];
+ /* bound width/height */
+ coords[2] = MIN2(coords[2], bounds[2]);
+ coords[3] = MIN2(coords[3], bounds[3]);
+
+ /* bound x/y + width/height */
+ if ((coords[0] + coords[2]) > (bounds[0] + bounds[2])) {
+ coords[2] = (bounds[0] + bounds[2]) - coords[0];
+ }
+ if ((coords[1] + coords[3]) > (bounds[1] + bounds[3])) {
+ coords[3] = (bounds[1] + bounds[3]) - coords[1];
+ }
+
+ /* if outside the bounds */
+ if ((coords[0] + coords[2]) < bounds[0] ||
+ (coords[1] + coords[3]) < bounds[1]) {
+ coords[0] = 0.f;
+ coords[1] = 0.f;
+ coords[2] = 0.f;
+ coords[3] = 0.f;
+ return;
+ }
+}
+
+static INLINE void sync_size(float *src_loc, float *dst_loc)
+{
+ src_loc[2] = MIN2(src_loc[2], dst_loc[2]);
+ src_loc[3] = MIN2(src_loc[3], dst_loc[3]);
+ dst_loc[2] = src_loc[2];
+ dst_loc[3] = src_loc[3];
+}
+
+
+static void renderer_copy_texture(struct exa_context *exa,
+ struct pipe_texture *src,
+ float sx1, float sy1,
+ float sx2, float sy2,
+ struct pipe_texture *dst,
+ float dx1, float dy1,
+ float dx2, float dy2)
+{
+ struct pipe_context *pipe = exa->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_buffer *buf;
+ struct pipe_surface *dst_surf = screen->get_tex_surface(
+ screen, dst, 0, 0, 0,
+ PIPE_BUFFER_USAGE_GPU_WRITE);
+ struct pipe_framebuffer_state fb;
+ float s0, t0, s1, t1;
+ struct xorg_shader shader;
+
+ assert(src->width[0] != 0);
+ assert(src->height[0] != 0);
+ assert(dst->width[0] != 0);
+ assert(dst->height[0] != 0);
+
+#if 1
+ s0 = sx1 / src->width[0];
+ s1 = sx2 / src->width[0];
+ t0 = sy1 / src->height[0];
+ t1 = sy2 / src->height[0];
+#else
+ s0 = 0;
+ s1 = 1;
+ t0 = 0;
+ t1 = 1;
+#endif
+
+#if 1
+ debug_printf("copy texture src=[%f, %f, %f, %f], dst=[%f, %f, %f, %f], tex=[%f, %f, %f, %f]\n",
+ sx1, sy1, sx2, sy2, dx1, dy1, dx2, dy2,
+ s0, t0, s1, t1);
+#endif
+
+ assert(screen->is_format_supported(screen, dst_surf->format,
+ PIPE_TEXTURE_2D,
+ PIPE_TEXTURE_USAGE_RENDER_TARGET,
+ 0));
+
+ /* save state (restored below) */
+ cso_save_blend(exa->cso);
+ cso_save_samplers(exa->cso);
+ cso_save_sampler_textures(exa->cso);
+ cso_save_framebuffer(exa->cso);
+ cso_save_fragment_shader(exa->cso);
+ cso_save_vertex_shader(exa->cso);
+
+ cso_save_viewport(exa->cso);
+
+
+ /* set misc state we care about */
+ {
+ struct pipe_blend_state blend;
+ memset(&blend, 0, sizeof(blend));
+ blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
+ blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
+ blend.colormask = PIPE_MASK_RGBA;
+ cso_set_blend(exa->cso, &blend);
+ }
+
+ /* sampler */
+ {
+ struct pipe_sampler_state sampler;
+ memset(&sampler, 0, sizeof(sampler));
+ sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+ sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ sampler.normalized_coords = 1;
+ cso_single_sampler(exa->cso, 0, &sampler);
+ cso_single_sampler_done(exa->cso);
+ }
+
+ set_viewport(exa, dst_surf->width, dst_surf->height, Y0_TOP);
+
+ /* texture */
+ cso_set_sampler_textures(exa->cso, 1, &src);
+
+ bind_rasterizer_state(exa);
+
+ /* shaders */
+ shader = xorg_shaders_get(exa->shaders,
+ VS_COMPOSITE,
+ FS_COMPOSITE);
+ cso_set_vertex_shader_handle(exa->cso, shader.vs);
+ cso_set_fragment_shader_handle(exa->cso, shader.fs);
+
+ /* drawing dest */
+ memset(&fb, 0, sizeof(fb));
+ fb.width = dst_surf->width;
+ fb.height = dst_surf->height;
+ fb.nr_cbufs = 1;
+ fb.cbufs[0] = dst_surf;
+ {
+ int i;
+ for (i = 1; i < PIPE_MAX_COLOR_BUFS; ++i)
+ fb.cbufs[i] = 0;
+ }
+ cso_set_framebuffer(exa->cso, &fb);
+ setup_vs_constant_buffer(exa, fb.width, fb.height);
+ setup_fs_constant_buffer(exa);
+
+ /* draw quad */
+ buf = setup_vertex_data_tex(exa,
+ dx1, dy1,
+ dx2, dy2,
+ s0, t0, s1, t1,
+ 0.0f);
+
+ if (buf) {
+ util_draw_vertex_buffer(exa->pipe, buf, 0,
+ PIPE_PRIM_TRIANGLE_FAN,
+ 4, /* verts */
+ 2); /* attribs/vert */
pipe_buffer_reference(&buf, NULL);
}
+
+ /* restore state we changed */
+ cso_restore_blend(exa->cso);
+ cso_restore_samplers(exa->cso);
+ cso_restore_sampler_textures(exa->cso);
+ cso_restore_framebuffer(exa->cso);
+ cso_restore_vertex_shader(exa->cso);
+ cso_restore_fragment_shader(exa->cso);
+ cso_restore_viewport(exa->cso);
+
+ pipe_surface_reference(&dst_surf, NULL);
+}
+
+
+static struct pipe_texture *
+create_sampler_texture(struct exa_context *ctx,
+ struct pipe_texture *src)
+{
+ enum pipe_format format;
+ struct pipe_context *pipe = ctx->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_texture *pt;
+ struct pipe_texture templ;
+
+ pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+
+ /* the coming in texture should already have that invariance */
+ debug_assert(screen->is_format_supported(screen, src->format,
+ PIPE_TEXTURE_2D,
+ PIPE_TEXTURE_USAGE_SAMPLER, 0));
+
+ format = src->format;
+
+ memset(&templ, 0, sizeof(templ));
+ templ.target = PIPE_TEXTURE_2D;
+ templ.format = format;
+ templ.last_level = 0;
+ templ.width[0] = src->width[0];
+ templ.height[0] = src->height[0];
+ templ.depth[0] = 1;
+ pf_get_block(format, &templ.block);
+ templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
+
+ pt = screen->texture_create(screen, &templ);
+
+ debug_assert(!pt || pipe_is_referenced(&pt->reference));
+
+ if (!pt)
+ return NULL;
+
+ {
+ /* copy source framebuffer surface into texture */
+ struct pipe_surface *ps_read = screen->get_tex_surface(
+ screen, src, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ);
+ struct pipe_surface *ps_tex = screen->get_tex_surface(
+ screen, pt, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE );
+ pipe->surface_copy(pipe,
+ ps_tex, /* dest */
+ 0, 0, /* destx/y */
+ ps_read,
+ 0, 0, src->width[0], src->height[0]);
+ pipe_surface_reference(&ps_read, NULL);
+ pipe_surface_reference(&ps_tex, NULL);
+ }
+
+ return pt;
+}
+
+void xorg_copy_pixmap(struct exa_context *ctx,
+ struct exa_pixmap_priv *dst_priv, int dx, int dy,
+ struct exa_pixmap_priv *src_priv, int sx, int sy,
+ int width, int height)
+{
+ float dst_loc[4], src_loc[4];
+ float dst_bounds[4], src_bounds[4];
+ float src_shift[4], dst_shift[4], shift[4];
+ struct pipe_texture *dst = dst_priv->tex;
+ struct pipe_texture *src = src_priv->tex;
+
+ if (ctx->pipe->is_texture_referenced(ctx->pipe, src, 0, 0) &
+ PIPE_REFERENCED_FOR_WRITE)
+ ctx->pipe->flush(ctx->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+
+ dst_loc[0] = dx;
+ dst_loc[1] = dy;
+ dst_loc[2] = width;
+ dst_loc[3] = height;
+ dst_bounds[0] = 0.f;
+ dst_bounds[1] = 0.f;
+ dst_bounds[2] = dst->width[0];
+ dst_bounds[3] = dst->height[0];
+
+ src_loc[0] = sx;
+ src_loc[1] = sy;
+ src_loc[2] = width;
+ src_loc[3] = height;
+ src_bounds[0] = 0.f;
+ src_bounds[1] = 0.f;
+ src_bounds[2] = src->width[0];
+ src_bounds[3] = src->height[0];
+
+ bound_rect(src_loc, src_bounds, src_shift);
+ bound_rect(dst_loc, dst_bounds, dst_shift);
+ shift[0] = src_shift[0] - dst_shift[0];
+ shift[1] = src_shift[1] - dst_shift[1];
+
+ if (shift[0] < 0)
+ shift_rectx(src_loc, src_bounds, -shift[0]);
+ else
+ shift_rectx(dst_loc, dst_bounds, shift[0]);
+
+ if (shift[1] < 0)
+ shift_recty(src_loc, src_bounds, -shift[1]);
+ else
+ shift_recty(dst_loc, dst_bounds, shift[1]);
+
+ sync_size(src_loc, dst_loc);
+
+ if (src_loc[2] >= 0 && src_loc[3] >= 0 &&
+ dst_loc[2] >= 0 && dst_loc[3] >= 0) {
+ struct pipe_texture *temp_src = src;
+
+ if (src == dst)
+ temp_src = create_sampler_texture(ctx, src);
+
+ renderer_copy_texture(ctx,
+ temp_src,
+ src_loc[0],
+ src_loc[1],
+ src_loc[0] + src_loc[2],
+ src_loc[1] + src_loc[3],
+ dst,
+ dst_loc[0],
+ dst_loc[1],
+ dst_loc[0] + dst_loc[2],
+ dst_loc[1] + dst_loc[3]);
+
+ if (src == dst)
+ pipe_texture_reference(&temp_src, NULL);
+ }
}
diff --git a/src/gallium/state_trackers/xorg/xorg_composite.h b/src/gallium/state_trackers/xorg/xorg_composite.h
index 17dfcb199e..e73f1c704a 100644
--- a/src/gallium/state_trackers/xorg/xorg_composite.h
+++ b/src/gallium/state_trackers/xorg/xorg_composite.h
@@ -22,4 +22,16 @@ void xorg_composite(struct exa_context *exa,
int srcX, int srcY, int maskX, int maskY,
int dstX, int dstY, int width, int height);
+boolean xorg_solid_bind_state(struct exa_context *exa,
+ struct exa_pixmap_priv *pixmap,
+ Pixel fg);
+void xorg_solid(struct exa_context *exa,
+ struct exa_pixmap_priv *pixmap,
+ int x0, int y0, int x1, int y1);
+
+void xorg_copy_pixmap(struct exa_context *ctx,
+ struct exa_pixmap_priv *dst, int dx, int dy,
+ struct exa_pixmap_priv *src, int sx, int sy,
+ int width, int height);
+
#endif
diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c
index fe08bde9ef..67fe29a69d 100644
--- a/src/gallium/state_trackers/xorg/xorg_crtc.c
+++ b/src/gallium/state_trackers/xorg/xorg_crtc.c
@@ -42,8 +42,12 @@
#include "xorg_tracker.h"
#include "xf86Modes.h"
+#ifdef HAVE_XEXTPROTO_71
+#include <X11/extensions/dpmsconst.h>
+#else
#define DPMS_SERVER
#include <X11/extensions/dpms.h>
+#endif
#include "pipe/p_inlines.h"
#include "util/u_rect.h"
diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c
index 6431a0fe25..8a362596c7 100644
--- a/src/gallium/state_trackers/xorg/xorg_dri2.c
+++ b/src/gallium/state_trackers/xorg/xorg_dri2.c
@@ -118,6 +118,7 @@ driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format)
}
if (!tex) {
+ exaMoveInPixmap(private->pPixmap);
xorg_exa_set_shared_usage(private->pPixmap);
pScreen->ModifyPixmapHeader(private->pPixmap, 0, 0, 0, 0, 0, NULL);
tex = xorg_exa_get_texture(private->pPixmap);
diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c
index a17a71f23a..3f48ab98ac 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa.c
@@ -47,6 +47,9 @@
#include "util/u_rect.h"
+#define DEBUG_SOLID 0
+#define DISABLE_ACCEL 0
+
/*
* Helper functions
*/
@@ -72,6 +75,9 @@ exa_get_pipe_format(int depth, enum pipe_format *format, int *bbp)
assert(*bbp == 16);
break;
case 8:
+ *format = PIPE_FORMAT_I8_UNORM;
+ assert(*bbp == 8);
+ break;
case 4:
case 1:
*format = PIPE_FORMAT_A8R8G8B8_UNORM; /* bad bad bad */
@@ -82,6 +88,25 @@ exa_get_pipe_format(int depth, enum pipe_format *format, int *bbp)
}
}
+static void
+xorg_exa_init_state(struct exa_context *exa)
+{
+ struct pipe_depth_stencil_alpha_state dsa;
+
+ /* set common initial clip state */
+ memset(&dsa, 0, sizeof(struct pipe_depth_stencil_alpha_state));
+ cso_set_depth_stencil_alpha(exa->cso, &dsa);
+}
+
+static void
+xorg_exa_common_done(struct exa_context *exa)
+{
+ exa->copy.src = NULL;
+ exa->copy.dst = NULL;
+ exa->has_solid_color = FALSE;
+ exa->num_bound_samplers = 0;
+}
+
/*
* Static exported EXA functions
*/
@@ -111,9 +136,9 @@ ExaDownloadFromScreen(PixmapPtr pPix, int x, int y, int w, int h, char *dst,
if (!priv || !priv->tex)
return FALSE;
- if (exa->ctx->is_texture_referenced(exa->ctx, priv->tex, 0, 0) &
+ if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) &
PIPE_REFERENCED_FOR_WRITE)
- exa->ctx->flush(exa->ctx, 0, NULL);
+ exa->pipe->flush(exa->pipe, 0, NULL);
transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0,
PIPE_TRANSFER_READ, x, y, w, h);
@@ -178,9 +203,9 @@ ExaPrepareAccess(PixmapPtr pPix, int index)
if (priv->map_count++ == 0)
{
- if (exa->ctx->is_texture_referenced(exa->ctx, priv->tex, 0, 0) &
+ if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) &
PIPE_REFERENCED_FOR_WRITE)
- exa->ctx->flush(exa->ctx, 0, NULL);
+ exa->pipe->flush(exa->pipe, 0, NULL);
priv->map_transfer =
exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0,
@@ -231,15 +256,22 @@ ExaDone(PixmapPtr pPixmap)
if (!priv)
return;
- if (priv->src_surf)
- exa->scrn->tex_surface_destroy(priv->src_surf);
- priv->src_surf = NULL;
+#if 1
+ xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, NULL);
+#else
+ xorg_exa_finish(exa);
+#endif
+ xorg_exa_common_done(exa);
}
static void
ExaDoneComposite(PixmapPtr pPixmap)
{
+ ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
+ modesettingPtr ms = modesettingPTR(pScrn);
+ struct exa_context *exa = ms->exa;
+ xorg_exa_common_done(exa);
}
static Bool
@@ -250,24 +282,36 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg)
struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pPixmap);
struct exa_context *exa = ms->exa;
- if (pPixmap->drawable.depth < 15)
- return FALSE;
-
+#if 1
+ debug_printf("ExaPrepareSolid(0x%x)\n", fg);
+#endif
if (!EXA_PM_IS_SOLID(&pPixmap->drawable, planeMask))
return FALSE;
if (!priv || !priv->tex)
return FALSE;
+ if (!exa->scrn->is_format_supported(exa->scrn, priv->tex->format,
+ priv->tex->target,
+ PIPE_TEXTURE_USAGE_RENDER_TARGET, 0))
+ return FALSE;
+
if (alu != GXcopy)
return FALSE;
- if (!exa->ctx || !exa->ctx->surface_fill)
+ if (!exa->pipe)
return FALSE;
- priv->color = fg;
- return TRUE;
+#if DEBUG_SOLID
+ fg = 0xffff0000;
+#endif
+
+#if DISABLE_ACCEL
+ return FALSE;
+#else
+ return xorg_solid_bind_state(exa, priv, fg);
+#endif
}
static void
@@ -277,12 +321,49 @@ ExaSolid(PixmapPtr pPixmap, int x0, int y0, int x1, int y1)
modesettingPtr ms = modesettingPTR(pScrn);
struct exa_context *exa = ms->exa;
struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pPixmap);
- struct pipe_surface *surf = exa_gpu_surface(exa, priv);
- exa->ctx->surface_fill(exa->ctx, surf, x0, y0, x1 - x0, y1 - y0,
- priv->color);
+ debug_printf("\tExaSolid(%d, %d, %d, %d)\n", x0, y0, x1, y1);
- exa->scrn->tex_surface_destroy(surf);
+#if 0
+ if (x0 == 0 && y0 == 0 &&
+ x1 == priv->tex->width[0] &&
+ y1 == priv->tex->height[0]) {
+ exa->ctx->clear(exa->pipe, PIPE_CLEAR_COLOR,
+ exa->solid_color, 1., 0);
+ } else
+#endif
+
+#if DEBUG_SOLID
+ exa->solid_color[0] = 0.f;
+ exa->solid_color[1] = 1.f;
+ exa->solid_color[2] = 0.f;
+ exa->solid_color[3] = 1.f;
+ xorg_solid(exa, priv, 0, 0, 1024, 768);
+ exa->solid_color[0] = 1.f;
+ exa->solid_color[1] = 0.f;
+ exa->solid_color[2] = 0.f;
+ exa->solid_color[3] = 1.f;
+ xorg_solid(exa, priv, 0, 0, 300, 300);
+ xorg_solid(exa, priv, 300, 300, 350, 350);
+ xorg_solid(exa, priv, 350, 350, 500, 500);
+
+ xorg_solid(exa, priv,
+ priv->tex->width[0] - 10,
+ priv->tex->height[0] - 10,
+ priv->tex->width[0],
+ priv->tex->height[0]);
+
+ exa->solid_color[0] = 0.f;
+ exa->solid_color[1] = 0.f;
+ exa->solid_color[2] = 1.f;
+ exa->solid_color[3] = 1.f;
+
+ exa->has_solid_color = FALSE;
+ ExaPrepareCopy(pPixmap, pPixmap, 0, 0, GXcopy, 0xffffffff);
+ ExaCopy(pPixmap, 0, 0, 50, 50, 500, 500);
+#else
+ xorg_solid(exa, priv, x0, y0, x1, y1) ;
+#endif
}
static Bool
@@ -295,10 +376,9 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir,
struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap);
struct exa_pixmap_priv *src_priv = exaGetPixmapDriverPrivate(pSrcPixmap);
- if (alu != GXcopy)
- return FALSE;
+ debug_printf("ExaPrepareCopy\n");
- if (pSrcPixmap->drawable.depth < 15 || pDstPixmap->drawable.depth < 15)
+ if (alu != GXcopy)
return FALSE;
if (!EXA_PM_IS_SOLID(&pSrcPixmap->drawable, planeMask))
@@ -307,30 +387,47 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir,
if (!priv || !src_priv)
return FALSE;
+ if (!exa->scrn->is_format_supported(exa->scrn, priv->tex->format,
+ priv->tex->target,
+ PIPE_TEXTURE_USAGE_RENDER_TARGET, 0) ||
+ !exa->scrn->is_format_supported(exa->scrn, src_priv->tex->format,
+ src_priv->tex->target,
+ PIPE_TEXTURE_USAGE_SAMPLER, 0))
+ return FALSE;
+
if (!priv->tex || !src_priv->tex)
return FALSE;
- if (!exa->ctx || !exa->ctx->surface_copy)
+ if (!exa->pipe)
return FALSE;
- priv->src_surf = exa_gpu_surface(exa, src_priv);
+ exa->copy.src = src_priv;
+ exa->copy.dst = priv;
+#if DISABLE_ACCEL
+ return FALSE;
+#else
return TRUE;
+#endif
}
static void
ExaCopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY,
int width, int height)
{
- ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
- modesettingPtr ms = modesettingPTR(pScrn);
- struct exa_context *exa = ms->exa;
- struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap);
- struct pipe_surface *surf = exa_gpu_surface(exa, priv);
+ ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
+ modesettingPtr ms = modesettingPTR(pScrn);
+ struct exa_context *exa = ms->exa;
+ struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap);
- exa->ctx->surface_copy(exa->ctx, surf, dstX, dstY, priv->src_surf,
- srcX, srcY, width, height);
- exa->scrn->tex_surface_destroy(surf);
+ debug_printf("\tExaCopy(srcx=%d, srcy=%d, dstX=%d, dstY=%d, w=%d, h=%d)\n",
+ srcX, srcY, dstX, dstY, width, height);
+
+ debug_assert(priv == exa->copy.dst);
+
+ xorg_copy_pixmap(exa, exa->copy.dst, dstX, dstY,
+ exa->copy.src, srcX, srcY,
+ width, height);
}
static Bool
@@ -341,12 +438,45 @@ ExaPrepareComposite(int op, PicturePtr pSrcPicture,
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
modesettingPtr ms = modesettingPTR(pScrn);
struct exa_context *exa = ms->exa;
+ struct exa_pixmap_priv *priv;
+
+ debug_printf("ExaPrepareComposite\n");
+
+ priv = exaGetPixmapDriverPrivate(pDst);
+ if (!priv || !priv->tex ||
+ !exa->scrn->is_format_supported(exa->scrn, priv->tex->format,
+ priv->tex->target,
+ PIPE_TEXTURE_USAGE_RENDER_TARGET, 0))
+ return FALSE;
+
+ if (pSrc) {
+ priv = exaGetPixmapDriverPrivate(pSrc);
+ if (!priv || !priv->tex ||
+ !exa->scrn->is_format_supported(exa->scrn, priv->tex->format,
+ priv->tex->target,
+ PIPE_TEXTURE_USAGE_SAMPLER, 0))
+ return FALSE;
+ }
+
+ if (pMask) {
+ priv = exaGetPixmapDriverPrivate(pMask);
+ if (!priv || !priv->tex ||
+ !exa->scrn->is_format_supported(exa->scrn, priv->tex->format,
+ priv->tex->target,
+ PIPE_TEXTURE_USAGE_SAMPLER, 0))
+ return FALSE;
+ }
+#if DISABLE_ACCEL
+ (void) exa;
+ return FALSE;
+#else
return xorg_composite_bind_state(exa, op, pSrcPicture, pMaskPicture,
pDstPicture,
- exaGetPixmapDriverPrivate(pSrc),
- exaGetPixmapDriverPrivate(pMask),
+ pSrc ? exaGetPixmapDriverPrivate(pSrc) : NULL,
+ pMask ? exaGetPixmapDriverPrivate(pMask) : NULL,
exaGetPixmapDriverPrivate(pDst));
+#endif
}
static void
@@ -358,6 +488,8 @@ ExaComposite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
struct exa_context *exa = ms->exa;
struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDst);
+ debug_printf("\tExaComposite\n");
+
xorg_composite(exa, priv, srcX, srcY, maskX, maskY,
dstX, dstY, width, height);
}
@@ -367,10 +499,13 @@ ExaCheckComposite(int op,
PicturePtr pSrcPicture, PicturePtr pMaskPicture,
PicturePtr pDstPicture)
{
- return xorg_composite_accelerated(op,
- pSrcPicture,
- pMaskPicture,
- pDstPicture);
+ boolean accelerated = xorg_composite_accelerated(op,
+ pSrcPicture,
+ pMaskPicture,
+ pDstPicture);
+ debug_printf("ExaCheckComposite(%d, %p, %p, %p) = %d\n",
+ op, pSrcPicture, pMaskPicture, pDstPicture, accelerated);
+ return accelerated;
}
static void *
@@ -389,14 +524,11 @@ static void
ExaDestroyPixmap(ScreenPtr pScreen, void *dPriv)
{
struct exa_pixmap_priv *priv = (struct exa_pixmap_priv *)dPriv;
- ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
- modesettingPtr ms = modesettingPTR(pScrn);
if (!priv)
return;
- if (priv->tex)
- ms->screen->texture_destroy(priv->tex);
+ pipe_texture_reference(&priv->tex, NULL);
xfree(priv);
}
@@ -537,16 +669,16 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height,
if (priv->tex) {
struct pipe_surface *dst_surf;
-
- dst_surf = exa->scrn->get_tex_surface(exa->scrn, texture, 0, 0, 0,
- PIPE_BUFFER_USAGE_GPU_WRITE);
- priv->src_surf = exa_gpu_surface(exa, priv);
- exa->ctx->surface_copy(exa->ctx, dst_surf, 0, 0, priv->src_surf,
- 0, 0, min(width, texture->width[0]),
- min(height, texture->height[0]));
+ struct pipe_surface *src_surf;
+
+ dst_surf = exa->scrn->get_tex_surface(
+ exa->scrn, texture, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE);
+ src_surf = exa_gpu_surface(exa, priv);
+ exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf,
+ 0, 0, min(width, texture->width[0]),
+ min(height, texture->height[0]));
exa->scrn->tex_surface_destroy(dst_surf);
- exa->scrn->tex_surface_destroy(priv->src_surf);
- priv->src_surf = NULL;
+ exa->scrn->tex_surface_destroy(src_surf);
} else if (pPixmap->devPrivate.ptr) {
struct pipe_transfer *transfer;
@@ -563,6 +695,9 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height,
pPixmap->devKind, 0, 0);
exa->scrn->transfer_unmap(exa->scrn, transfer);
exa->scrn->tex_transfer_destroy(transfer);
+
+ xfree(pPixmap->devPrivate.ptr);
+ pPixmap->devPrivate.ptr = NULL;
}
}
#ifdef DRM_MODE_FEATURE_DIRTYFB
@@ -574,6 +709,8 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height,
#endif
pipe_texture_reference(&priv->tex, texture);
+ /* the texture we create has one reference */
+ pipe_texture_reference(&texture, NULL);
}
return TRUE;
@@ -611,8 +748,8 @@ xorg_exa_close(ScrnInfoPtr pScrn)
cso_destroy_context(exa->cso);
}
- if (exa->ctx)
- exa->ctx->destroy(exa->ctx);
+ if (exa->pipe)
+ exa->pipe->destroy(exa->pipe);
exaDriverFini(pScrn->pScreen);
xfree(exa);
@@ -645,6 +782,12 @@ xorg_exa_init(ScrnInfoPtr pScrn)
pExa->pixmapOffsetAlign = 0;
pExa->pixmapPitchAlign = 1;
pExa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_HANDLES_PIXMAPS;
+#ifdef EXA_SUPPORTS_PREPARE_AUX
+ pExa->flags |= EXA_SUPPORTS_PREPARE_AUX;
+#endif
+#ifdef EXA_MIXED_PIXMAPS
+ pExa->flags |= EXA_MIXED_PIXMAPS;
+#endif
pExa->maxX = 8191; /* FIXME */
pExa->maxY = 8191; /* FIXME */
@@ -674,13 +817,15 @@ xorg_exa_init(ScrnInfoPtr pScrn)
}
exa->scrn = ms->screen;
- exa->ctx = ms->api->create_context(ms->api, exa->scrn);
+ exa->pipe = ms->api->create_context(ms->api, exa->scrn);
/* Share context with DRI */
- ms->ctx = exa->ctx;
+ ms->ctx = exa->pipe;
- exa->cso = cso_create_context(exa->ctx);
+ exa->cso = cso_create_context(exa->pipe);
exa->shaders = xorg_shaders_create(exa);
+ xorg_exa_init_state(exa);
+
return (void *)exa;
out_err:
@@ -698,3 +843,19 @@ exa_gpu_surface(struct exa_context *exa, struct exa_pixmap_priv *priv)
}
+void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags,
+ struct pipe_fence_handle **fence)
+{
+ exa->pipe->flush(exa->pipe, pipeFlushFlags, fence);
+}
+
+void xorg_exa_finish(struct exa_context *exa)
+{
+ struct pipe_fence_handle *fence = NULL;
+
+ xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, &fence);
+
+ exa->pipe->screen->fence_finish(exa->pipe->screen, fence, 0);
+ exa->pipe->screen->fence_reference(exa->pipe->screen, &fence, NULL);
+}
+
diff --git a/src/gallium/state_trackers/xorg/xorg_exa.h b/src/gallium/state_trackers/xorg/xorg_exa.h
index 5b515be139..fe1f1cd103 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa.h
+++ b/src/gallium/state_trackers/xorg/xorg_exa.h
@@ -14,7 +14,7 @@ struct xorg_shaders;
struct exa_context
{
ExaDriverPtr pExa;
- struct pipe_context *ctx;
+ struct pipe_context *pipe;
struct pipe_screen *scrn;
struct cso_context *cso;
struct xorg_shaders *shaders;
@@ -26,8 +26,17 @@ struct exa_context
int num_bound_samplers;
float solid_color[4];
-};
+ boolean has_solid_color;
+
+ struct {
+ struct exa_pixmap_priv *src;
+ struct exa_pixmap_priv *dst;
+ } copy;
+ /* we should combine these two */
+ float vertices2[4][2][4];
+ float vertices3[4][3][4];
+};
struct exa_pixmap_priv
{
@@ -36,8 +45,6 @@ struct exa_pixmap_priv
struct pipe_texture *tex;
struct pipe_texture *depth_stencil_tex;
- unsigned int color;
- struct pipe_surface *src_surf; /* for copies */
struct pipe_transfer *map_transfer;
unsigned map_count;
@@ -46,5 +53,8 @@ struct exa_pixmap_priv
struct pipe_surface *
exa_gpu_surface(struct exa_context *exa, struct exa_pixmap_priv *priv);
+void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags,
+ struct pipe_fence_handle **fence);
+void xorg_exa_finish(struct exa_context *exa);
#endif
diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c
index cfee10c3b3..bb5a42af37 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c
@@ -52,8 +52,7 @@ struct xorg_shaders {
static const char over_op[] =
"SUB TEMP[3], CONST[0].wwww, TEMP[1].wwww\n"
- "MUL TEMP[3], TEMP[0], TEMP[3]\n"
- "ADD TEMP[0], TEMP[3], TEMP[0]\n";
+ "MAD TEMP[3], TEMP[0], TEMP[3], TEMP[0]\n";
static INLINE void
@@ -79,8 +78,7 @@ vs_normalize_coords(struct ureg_program *ureg, struct ureg_src coords,
{
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
struct ureg_src ret;
- ureg_MUL(ureg, tmp, coords, const0);
- ureg_ADD(ureg, tmp, ureg_src(tmp), const1);
+ ureg_MAD(ureg, tmp, coords, const0, const1);
ret = ureg_src(tmp);
ureg_release_temporary(ureg, tmp);
return ret;
@@ -241,42 +239,39 @@ create_vs(struct pipe_context *pipe,
boolean is_fill = vs_traits & VS_FILL;
boolean is_composite = vs_traits & VS_COMPOSITE;
boolean has_mask = vs_traits & VS_MASK;
+ unsigned input_slot = 0;
ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
if (ureg == NULL)
return 0;
- const0 = ureg_DECL_constant(ureg);
- const1 = ureg_DECL_constant(ureg);
+ const0 = ureg_DECL_constant(ureg, 0);
+ const1 = ureg_DECL_constant(ureg, 1);
/* it has to be either a fill or a composite op */
debug_assert(is_fill ^ is_composite);
- src = ureg_DECL_vs_input(ureg,
- TGSI_SEMANTIC_POSITION, 0);
+ src = ureg_DECL_vs_input(ureg, input_slot++);
dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
src = vs_normalize_coords(ureg, src,
const0, const1);
ureg_MOV(ureg, dst, src);
-
if (is_composite) {
- src = ureg_DECL_vs_input(ureg,
- TGSI_SEMANTIC_GENERIC, 1);
- dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1);
+ src = ureg_DECL_vs_input(ureg, input_slot++);
+ dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
ureg_MOV(ureg, dst, src);
}
+
if (is_fill) {
- src = ureg_DECL_vs_input(ureg,
- TGSI_SEMANTIC_COLOR, 1);
- dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1);
+ src = ureg_DECL_vs_input(ureg, input_slot++);
+ dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
ureg_MOV(ureg, dst, src);
}
if (has_mask) {
- src = ureg_DECL_vs_input(ureg,
- TGSI_SEMANTIC_GENERIC, 2);
- dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 2);
+ src = ureg_DECL_vs_input(ureg, input_slot++);
+ dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 2);
ureg_MOV(ureg, dst, src);
}
@@ -315,11 +310,11 @@ create_fs(struct pipe_context *pipe,
if (is_composite) {
src_sampler = ureg_DECL_sampler(ureg, 0);
src_input = ureg_DECL_fs_input(ureg,
- TGSI_SEMANTIC_POSITION,
+ TGSI_SEMANTIC_GENERIC,
0,
TGSI_INTERPOLATE_PERSPECTIVE);
- }
- if (is_fill) {
+ } else {
+ debug_assert(is_fill);
if (is_solid)
src_input = ureg_DECL_fs_input(ureg,
TGSI_SEMANTIC_COLOR,
@@ -335,7 +330,7 @@ create_fs(struct pipe_context *pipe,
if (has_mask) {
mask_sampler = ureg_DECL_sampler(ureg, 1);
mask_pos = ureg_DECL_fs_input(ureg,
- TGSI_SEMANTIC_POSITION,
+ TGSI_SEMANTIC_GENERIC,
1,
TGSI_INTERPOLATE_PERSPECTIVE);
}
@@ -370,11 +365,11 @@ create_fs(struct pipe_context *pipe,
else
src = out;
- coords = ureg_DECL_constant(ureg);
- const0124 = ureg_DECL_constant(ureg);
- matrow0 = ureg_DECL_constant(ureg);
- matrow1 = ureg_DECL_constant(ureg);
- matrow2 = ureg_DECL_constant(ureg);
+ coords = ureg_DECL_constant(ureg, 0);
+ const0124 = ureg_DECL_constant(ureg, 1);
+ matrow0 = ureg_DECL_constant(ureg, 2);
+ matrow1 = ureg_DECL_constant(ureg, 3);
+ matrow2 = ureg_DECL_constant(ureg, 4);
if (is_lingrad) {
linear_gradient(ureg, src,
@@ -470,12 +465,12 @@ struct xorg_shader xorg_shaders_get(struct xorg_shaders *sc,
unsigned vs_traits,
unsigned fs_traits)
{
- struct xorg_shader shader = {0};
+ struct xorg_shader shader = { NULL, NULL };
void *vs, *fs;
- vs = shader_from_cache(sc->exa->ctx, PIPE_SHADER_VERTEX,
+ vs = shader_from_cache(sc->exa->pipe, PIPE_SHADER_VERTEX,
sc->vs_hash, vs_traits);
- fs = shader_from_cache(sc->exa->ctx, PIPE_SHADER_FRAGMENT,
+ fs = shader_from_cache(sc->exa->pipe, PIPE_SHADER_FRAGMENT,
sc->fs_hash, fs_traits);
debug_assert(vs && fs);
diff --git a/src/gallium/state_trackers/xorg/xorg_output.c b/src/gallium/state_trackers/xorg/xorg_output.c
index 950af942f5..26f45f8d64 100644
--- a/src/gallium/state_trackers/xorg/xorg_output.c
+++ b/src/gallium/state_trackers/xorg/xorg_output.c
@@ -42,8 +42,12 @@
#include <sys/stat.h>
#include <sys/types.h>
+#ifdef HAVE_XEXTPROTO_71
+#include <X11/extensions/dpmsconst.h>
+#else
#define DPMS_SERVER
#include <X11/extensions/dpms.h>
+#endif
#include "X11/Xatom.h"
diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h
index b1ab783a15..2f7050bcb7 100644
--- a/src/gallium/state_trackers/xorg/xorg_tracker.h
+++ b/src/gallium/state_trackers/xorg/xorg_tracker.h
@@ -151,5 +151,11 @@ crtc_cursor_destroy(xf86CrtcPtr crtc);
void
output_init(ScrnInfoPtr pScrn);
+/***********************************************************************
+ * xorg_xv.c
+ */
+void
+xorg_init_video(ScreenPtr pScreen);
+
#endif /* _XORG_TRACKER_H_ */
diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c
new file mode 100644
index 0000000000..88955d47fd
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xorg_xv.c
@@ -0,0 +1,212 @@
+#include "xorg_tracker.h"
+
+#include <xf86xv.h>
+#include <X11/extensions/Xv.h>
+#include <fourcc.h>
+
+/*XXX get these from pipe's texture limits */
+#define IMAGE_MAX_WIDTH 2048
+#define IMAGE_MAX_HEIGHT 2048
+
+#define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE)
+
+static Atom xvBrightness, xvContrast;
+
+#define NUM_TEXTURED_ATTRIBUTES 2
+static XF86AttributeRec TexturedAttributes[NUM_TEXTURED_ATTRIBUTES] = {
+ {XvSettable | XvGettable, -128, 127, "XV_BRIGHTNESS"},
+ {XvSettable | XvGettable, 0, 255, "XV_CONTRAST"}
+};
+
+#define NUM_FORMATS 3
+static XF86VideoFormatRec Formats[NUM_FORMATS] = {
+ {15, TrueColor}, {16, TrueColor}, {24, TrueColor}
+};
+
+static XF86VideoEncodingRec DummyEncoding[1] = {
+ {
+ 0,
+ "XV_IMAGE",
+ IMAGE_MAX_WIDTH, IMAGE_MAX_HEIGHT,
+ {1, 1}
+ }
+};
+
+#define NUM_IMAGES 2
+static XF86ImageRec Images[NUM_IMAGES] = {
+ XVIMAGE_UYVY,
+ XVIMAGE_YUY2,
+};
+
+struct xorg_xv_port_priv {
+ RegionRec clip;
+};
+
+
+static void
+stop_video(ScrnInfoPtr pScrn, pointer data, Bool shutdown)
+{
+}
+
+static int
+set_port_attribute(ScrnInfoPtr pScrn,
+ Atom attribute, INT32 value, pointer data)
+{
+ return 0;
+}
+
+static int
+get_port_attribute(ScrnInfoPtr pScrn,
+ Atom attribute, INT32 * value, pointer data)
+{
+ return 0;
+}
+
+static void
+query_best_size(ScrnInfoPtr pScrn,
+ Bool motion,
+ short vid_w, short vid_h,
+ short drw_w, short drw_h,
+ unsigned int *p_w, unsigned int *p_h, pointer data)
+{
+}
+
+static int
+put_image(ScrnInfoPtr pScrn,
+ short src_x, short src_y,
+ short drw_x, short drw_y,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ int id, unsigned char *buf,
+ short width, short height,
+ Bool sync, RegionPtr clipBoxes, pointer data,
+ DrawablePtr pDraw)
+{
+ return 0;
+}
+
+static int
+query_image_attributes(ScrnInfoPtr pScrn,
+ int id,
+ unsigned short *w, unsigned short *h,
+ int *pitches, int *offsets)
+{
+ return 0;
+}
+
+static struct xorg_xv_port_priv *
+port_priv_create(ScreenPtr pScreen)
+{
+ /*ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];*/
+ /*modesettingPtr ms = modesettingPTR(pScrn);*/
+ struct xorg_xv_port_priv *priv = NULL;
+
+ priv = calloc(1, sizeof(struct xorg_xv_port_priv));
+
+ if (!priv)
+ return NULL;
+
+ REGION_NULL(pScreen, &priv->clip);
+
+ return priv;
+}
+
+static XF86VideoAdaptorPtr
+xorg_setup_textured_adapter(ScreenPtr pScreen)
+{
+ /*ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];*/
+ /*modesettingPtr ms = modesettingPTR(pScrn);*/
+ XF86VideoAdaptorPtr adapt;
+ XF86AttributePtr attrs;
+ DevUnion *dev_unions;
+ int nports = 16, i;
+ int nattributes;
+
+ nattributes = NUM_TEXTURED_ATTRIBUTES;
+
+ adapt = calloc(1, sizeof(XF86VideoAdaptorRec));
+ dev_unions = calloc(nports, sizeof(DevUnion));
+ attrs = calloc(nattributes, sizeof(XF86AttributeRec));
+ if (adapt == NULL || dev_unions == NULL || attrs == NULL) {
+ free(adapt);
+ free(dev_unions);
+ free(attrs);
+ return NULL;
+ }
+
+ adapt->type = XvWindowMask | XvInputMask | XvImageMask;
+ adapt->flags = 0;
+ adapt->name = "Gallium3D Textured Video";
+ adapt->nEncodings = 1;
+ adapt->pEncodings = DummyEncoding;
+ adapt->nFormats = NUM_FORMATS;
+ adapt->pFormats = Formats;
+ adapt->nPorts = 0;
+ adapt->pPortPrivates = dev_unions;
+ adapt->nAttributes = nattributes;
+ adapt->pAttributes = attrs;
+ memcpy(attrs, TexturedAttributes, nattributes * sizeof(XF86AttributeRec));
+ adapt->nImages = NUM_IMAGES;
+ adapt->pImages = Images;
+ adapt->PutVideo = NULL;
+ adapt->PutStill = NULL;
+ adapt->GetVideo = NULL;
+ adapt->GetStill = NULL;
+ adapt->StopVideo = stop_video;
+ adapt->SetPortAttribute = set_port_attribute;
+ adapt->GetPortAttribute = get_port_attribute;
+ adapt->QueryBestSize = query_best_size;
+ adapt->PutImage = put_image;
+ adapt->QueryImageAttributes = query_image_attributes;
+
+ for (i = 0; i < nports; i++) {
+ struct xorg_xv_port_priv *priv =
+ port_priv_create(pScreen);
+
+ adapt->pPortPrivates[i].ptr = (pointer) (priv);
+ adapt->nPorts++;
+ }
+
+ return adapt;
+}
+
+void
+xorg_init_video(ScreenPtr pScreen)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+ /*modesettingPtr ms = modesettingPTR(pScrn);*/
+ XF86VideoAdaptorPtr *adaptors, *new_adaptors = NULL;
+ XF86VideoAdaptorPtr textured_adapter;
+ int num_adaptors;
+
+ num_adaptors = xf86XVListGenericAdaptors(pScrn, &adaptors);
+ new_adaptors = malloc((num_adaptors + 1) * sizeof(XF86VideoAdaptorPtr *));
+ if (new_adaptors == NULL)
+ return;
+
+ memcpy(new_adaptors, adaptors, num_adaptors * sizeof(XF86VideoAdaptorPtr));
+ adaptors = new_adaptors;
+
+ /* Add the adaptors supported by our hardware. First, set up the atoms
+ * that will be used by both output adaptors.
+ */
+ xvBrightness = MAKE_ATOM("XV_BRIGHTNESS");
+ xvContrast = MAKE_ATOM("XV_CONTRAST");
+
+ textured_adapter = xorg_setup_textured_adapter(pScreen);
+
+ debug_assert(textured_adapter);
+
+ if (textured_adapter) {
+ adaptors[num_adaptors++] = textured_adapter;
+ }
+
+ if (num_adaptors) {
+ xf86XVScreenInit(pScreen, adaptors, num_adaptors);
+ } else {
+ xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
+ "Disabling Xv because no adaptors could be initialized.\n");
+ }
+
+ free(adaptors);
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/Makefile b/src/gallium/state_trackers/xorg/xvmc/Makefile
new file mode 100644
index 0000000000..126dc6d58f
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/Makefile
@@ -0,0 +1,16 @@
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = xvmctracker
+
+LIBRARY_INCLUDES = \
+ $(shell pkg-config --cflags-only-I xvmc) \
+ -I$(TOP)/src/gallium/winsys/g3dvl
+
+C_SOURCES = block.c \
+ surface.c \
+ context.c \
+ subpicture.c \
+ attributes.c
+
+include ../../../Makefile.template
diff --git a/src/gallium/state_trackers/xorg/xvmc/SConscript b/src/gallium/state_trackers/xorg/xvmc/SConscript
new file mode 100644
index 0000000000..cb25d68bd8
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/SConscript
@@ -0,0 +1,27 @@
+#######################################################################
+# SConscript for xvmc state_tracker
+
+Import('*')
+
+if 'xorg/xvmc' in env['statetrackers']:
+
+ env = env.Clone()
+
+ env.Append(CPPPATH = [
+ '#/src/gallium/include',
+ '#/src/gallium/auxiliary',
+ '#/src/gallium/winsys/g3dvl',
+ ])
+
+ env.ParseConfig('pkg-config --cflags --libs xvmc')
+
+ st_xvmc = env.ConvenienceLibrary(
+ target = 'st_xvmc',
+ source = [ 'block.c',
+ 'surface.c',
+ 'context.c',
+ 'subpicture.c',
+ 'attributes.c',
+ ]
+ )
+ Export('st_xvmc')
diff --git a/src/gallium/state_trackers/xorg/xvmc/attributes.c b/src/gallium/state_trackers/xorg/xvmc/attributes.c
new file mode 100644
index 0000000000..638da0b577
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/attributes.c
@@ -0,0 +1,19 @@
+#include <assert.h>
+#include <X11/Xlib.h>
+#include <X11/extensions/Xvlib.h>
+#include <X11/extensions/XvMClib.h>
+
+XvAttribute* XvMCQueryAttributes(Display *dpy, XvMCContext *context, int *number)
+{
+ return NULL;
+}
+
+Status XvMCSetAttribute(Display *dpy, XvMCContext *context, Atom attribute, int value)
+{
+ return BadImplementation;
+}
+
+Status XvMCGetAttribute(Display *dpy, XvMCContext *context, Atom attribute, int *value)
+{
+ return BadImplementation;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/block.c b/src/gallium/state_trackers/xorg/xvmc/block.c
new file mode 100644
index 0000000000..78fddfb79e
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/block.c
@@ -0,0 +1,61 @@
+#include <assert.h>
+#include <X11/Xlib.h>
+#include <X11/extensions/XvMClib.h>
+#include <util/u_memory.h>
+#include "xvmc_private.h"
+
+Status XvMCCreateBlocks(Display *dpy, XvMCContext *context, unsigned int num_blocks, XvMCBlockArray *blocks)
+{
+ assert(dpy);
+
+ if (!context)
+ return XvMCBadContext;
+ if (num_blocks == 0)
+ return BadValue;
+
+ assert(blocks);
+
+ blocks->context_id = context->context_id;
+ blocks->num_blocks = num_blocks;
+ blocks->blocks = MALLOC(BLOCK_SIZE_BYTES * num_blocks);
+ blocks->privData = NULL;
+
+ return Success;
+}
+
+Status XvMCDestroyBlocks(Display *dpy, XvMCBlockArray *blocks)
+{
+ assert(dpy);
+ assert(blocks);
+ FREE(blocks->blocks);
+
+ return Success;
+}
+
+Status XvMCCreateMacroBlocks(Display *dpy, XvMCContext *context, unsigned int num_blocks, XvMCMacroBlockArray *blocks)
+{
+ assert(dpy);
+
+ if (!context)
+ return XvMCBadContext;
+ if (num_blocks == 0)
+ return BadValue;
+
+ assert(blocks);
+
+ blocks->context_id = context->context_id;
+ blocks->num_blocks = num_blocks;
+ blocks->macro_blocks = MALLOC(sizeof(XvMCMacroBlock) * num_blocks);
+ blocks->privData = NULL;
+
+ return Success;
+}
+
+Status XvMCDestroyMacroBlocks(Display *dpy, XvMCMacroBlockArray *blocks)
+{
+ assert(dpy);
+ assert(blocks);
+ FREE(blocks->macro_blocks);
+
+ return Success;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
new file mode 100644
index 0000000000..6d90dfc367
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -0,0 +1,214 @@
+#include <assert.h>
+#include <X11/Xlibint.h>
+#include <X11/extensions/XvMClib.h>
+#include <pipe/p_screen.h>
+#include <pipe/p_video_context.h>
+#include <pipe/p_video_state.h>
+#include <pipe/p_state.h>
+#include <vl_winsys.h>
+#include <util/u_memory.h>
+#include <util/u_debug.h>
+#include "xvmc_private.h"
+
+static Status Validate(Display *dpy, XvPortID port, int surface_type_id,
+ unsigned int width, unsigned int height, int flags,
+ bool *found_port, int *screen, int *chroma_format,
+ int *mc_type, int *surface_flags)
+{
+ bool found_surface = false;
+ XvAdaptorInfo *adaptor_info;
+ unsigned int num_adaptors;
+ int num_types;
+ unsigned int max_width, max_height;
+ Status ret;
+
+ assert(dpy);
+ assert(found_port);
+ assert(screen);
+ assert(chroma_format);
+ assert(mc_type);
+ assert(surface_flags);
+
+ *found_port = false;
+
+ for (unsigned int i = 0; i < XScreenCount(dpy); ++i) {
+ ret = XvQueryAdaptors(dpy, XRootWindow(dpy, i), &num_adaptors, &adaptor_info);
+ if (ret != Success)
+ return ret;
+
+ for (unsigned int j = 0; j < num_adaptors && !*found_port; ++j) {
+ for (unsigned int k = 0; k < adaptor_info[j].num_ports && !*found_port; ++k) {
+ XvMCSurfaceInfo *surface_info;
+
+ if (adaptor_info[j].base_id + k != port)
+ continue;
+
+ *found_port = true;
+
+ surface_info = XvMCListSurfaceTypes(dpy, adaptor_info[j].base_id, &num_types);
+ if (!surface_info) {
+ XvFreeAdaptorInfo(adaptor_info);
+ return BadAlloc;
+ }
+
+ for (unsigned int l = 0; l < num_types && !found_surface; ++l) {
+ if (surface_info[l].surface_type_id != surface_type_id)
+ continue;
+
+ found_surface = true;
+ max_width = surface_info[l].max_width;
+ max_height = surface_info[l].max_height;
+ *chroma_format = surface_info[l].chroma_format;
+ *mc_type = surface_info[l].mc_type;
+ *surface_flags = surface_info[l].flags;
+ *screen = i;
+ }
+
+ XFree(surface_info);
+ }
+ }
+
+ XvFreeAdaptorInfo(adaptor_info);
+ }
+
+ if (!*found_port)
+ return XvBadPort;
+ if (!found_surface)
+ return BadMatch;
+ if (width > max_width || height > max_height)
+ return BadValue;
+ if (flags != XVMC_DIRECT && flags != 0)
+ return BadValue;
+
+ return Success;
+}
+
+static enum pipe_video_profile ProfileToPipe(int xvmc_profile)
+{
+ if (xvmc_profile & XVMC_MPEG_1)
+ assert(0);
+ if (xvmc_profile & XVMC_MPEG_2)
+ return PIPE_VIDEO_PROFILE_MPEG2_MAIN;
+ if (xvmc_profile & XVMC_H263)
+ assert(0);
+ if (xvmc_profile & XVMC_MPEG_4)
+ assert(0);
+
+ assert(0);
+
+ return -1;
+}
+
+static enum pipe_video_chroma_format FormatToPipe(int xvmc_format)
+{
+ switch (xvmc_format) {
+ case XVMC_CHROMA_FORMAT_420:
+ return PIPE_VIDEO_CHROMA_FORMAT_420;
+ case XVMC_CHROMA_FORMAT_422:
+ return PIPE_VIDEO_CHROMA_FORMAT_422;
+ case XVMC_CHROMA_FORMAT_444:
+ return PIPE_VIDEO_CHROMA_FORMAT_444;
+ default:
+ assert(0);
+ }
+
+ return -1;
+}
+
+Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
+ int width, int height, int flags, XvMCContext *context)
+{
+ bool found_port;
+ int scrn;
+ int chroma_format;
+ int mc_type;
+ int surface_flags;
+ Status ret;
+ struct pipe_screen *screen;
+ struct pipe_video_context *vpipe;
+ XvMCContextPrivate *context_priv;
+
+ assert(dpy);
+
+ if (!context)
+ return XvMCBadContext;
+
+ ret = Validate(dpy, port, surface_type_id, width, height, flags,
+ &found_port, &scrn, &chroma_format, &mc_type, &surface_flags);
+
+ /* Success and XvBadPort have the same value */
+ if (ret != Success || !found_port)
+ return ret;
+
+ /* XXX: Current limits */
+ if (chroma_format != XVMC_CHROMA_FORMAT_420) {
+ debug_printf("[XvMCg3dvl] Cannot decode requested surface type. Unsupported chroma format.\n");
+ return BadImplementation;
+ }
+ if (mc_type != (XVMC_MOCOMP | XVMC_MPEG_2)) {
+ debug_printf("[XvMCg3dvl] Cannot decode requested surface type. Non-MPEG2/Mocomp acceleration unsupported.\n");
+ return BadImplementation;
+ }
+ if (!(surface_flags & XVMC_INTRA_UNSIGNED)) {
+ debug_printf("[XvMCg3dvl] Cannot decode requested surface type. Signed intra unsupported.\n");
+ return BadImplementation;
+ }
+
+ context_priv = CALLOC(1, sizeof(XvMCContextPrivate));
+ if (!context_priv)
+ return BadAlloc;
+
+ /* TODO: Reuse screen if process creates another context */
+ screen = vl_screen_create(dpy, scrn);
+
+ if (!screen) {
+ FREE(context_priv);
+ return BadAlloc;
+ }
+
+ vpipe = vl_video_create(screen, ProfileToPipe(mc_type),
+ FormatToPipe(chroma_format), width, height);
+
+ if (!vpipe) {
+ screen->destroy(screen);
+ FREE(context_priv);
+ return BadAlloc;
+ }
+
+ context_priv->vpipe = vpipe;
+
+ context->context_id = XAllocID(dpy);
+ context->surface_type_id = surface_type_id;
+ context->width = width;
+ context->height = height;
+ context->flags = flags;
+ context->port = port;
+ context->privData = context_priv;
+
+ SyncHandle();
+
+ return Success;
+}
+
+Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
+{
+ struct pipe_screen *screen;
+ struct pipe_video_context *vpipe;
+ XvMCContextPrivate *context_priv;
+
+ assert(dpy);
+
+ if (!context || !context->privData)
+ return XvMCBadContext;
+
+ context_priv = context->privData;
+ vpipe = context_priv->vpipe;
+ pipe_surface_reference(&context_priv->backbuffer, NULL);
+ screen = vpipe->screen;
+ vpipe->destroy(vpipe);
+ screen->destroy(screen);
+ FREE(context_priv);
+ context->privData = NULL;
+
+ return Success;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
new file mode 100644
index 0000000000..78ba618f5a
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -0,0 +1,168 @@
+#include <assert.h>
+#include <X11/Xlibint.h>
+#include <X11/extensions/XvMClib.h>
+
+Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *subpicture,
+ unsigned short width, unsigned short height, int xvimage_id)
+{
+ assert(dpy);
+
+ if (!context)
+ return XvMCBadContext;
+
+ assert(subpicture);
+
+ /*if (width > || height > )
+ return BadValue;*/
+
+ /*if (xvimage_id != )
+ return BadMatch;*/
+
+ subpicture->subpicture_id = XAllocID(dpy);
+ subpicture->context_id = context->context_id;
+ subpicture->xvimage_id = xvimage_id;
+ subpicture->width = width;
+ subpicture->height = height;
+ subpicture->num_palette_entries = 0;
+ subpicture->entry_bytes = 0;
+ subpicture->component_order[0] = 0;
+ subpicture->component_order[1] = 0;
+ subpicture->component_order[2] = 0;
+ subpicture->component_order[3] = 0;
+ /* TODO: subpicture->privData = ;*/
+
+ SyncHandle();
+
+ return Success;
+}
+
+Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, short y,
+ unsigned short width, unsigned short height, unsigned int color)
+{
+ assert(dpy);
+
+ if (!subpicture)
+ return XvMCBadSubpicture;
+
+ /* TODO: Assert clear rect is within bounds? Or clip? */
+
+ return Success;
+}
+
+Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage *image,
+ short srcx, short srcy, unsigned short width, unsigned short height,
+ short dstx, short dsty)
+{
+ assert(dpy);
+
+ if (!subpicture)
+ return XvMCBadSubpicture;
+
+ assert(image);
+
+ if (subpicture->xvimage_id != image->id)
+ return BadMatch;
+
+ /* TODO: Assert rects are within bounds? Or clip? */
+
+ return Success;
+}
+
+Status XvMCDestroySubpicture(Display *dpy, XvMCSubpicture *subpicture)
+{
+ assert(dpy);
+
+ if (!subpicture)
+ return XvMCBadSubpicture;
+
+ return BadImplementation;
+}
+
+Status XvMCSetSubpicturePalette(Display *dpy, XvMCSubpicture *subpicture, unsigned char *palette)
+{
+ assert(dpy);
+
+ if (!subpicture)
+ return XvMCBadSubpicture;
+
+ assert(palette);
+
+ /* We don't support paletted subpictures */
+ return BadMatch;
+}
+
+Status XvMCBlendSubpicture(Display *dpy, XvMCSurface *target_surface, XvMCSubpicture *subpicture,
+ short subx, short suby, unsigned short subw, unsigned short subh,
+ short surfx, short surfy, unsigned short surfw, unsigned short surfh)
+{
+ assert(dpy);
+
+ if (!target_surface)
+ return XvMCBadSurface;
+
+ if (!subpicture)
+ return XvMCBadSubpicture;
+
+ if (target_surface->context_id != subpicture->context_id)
+ return BadMatch;
+
+ /* TODO: Assert rects are within bounds? Or clip? */
+ return Success;
+}
+
+Status XvMCBlendSubpicture2(Display *dpy, XvMCSurface *source_surface, XvMCSurface *target_surface,
+ XvMCSubpicture *subpicture, short subx, short suby, unsigned short subw, unsigned short subh,
+ short surfx, short surfy, unsigned short surfw, unsigned short surfh)
+{
+ assert(dpy);
+
+ if (!source_surface || !target_surface)
+ return XvMCBadSurface;
+
+ if (!subpicture)
+ return XvMCBadSubpicture;
+
+ if (source_surface->context_id != subpicture->context_id)
+ return BadMatch;
+
+ if (source_surface->context_id != subpicture->context_id)
+ return BadMatch;
+
+ /* TODO: Assert rects are within bounds? Or clip? */
+ return Success;
+}
+
+Status XvMCSyncSubpicture(Display *dpy, XvMCSubpicture *subpicture)
+{
+ assert(dpy);
+
+ if (!subpicture)
+ return XvMCBadSubpicture;
+
+ return Success;
+}
+
+Status XvMCFlushSubpicture(Display *dpy, XvMCSubpicture *subpicture)
+{
+ assert(dpy);
+
+ if (!subpicture)
+ return XvMCBadSubpicture;
+
+ return Success;
+}
+
+Status XvMCGetSubpictureStatus(Display *dpy, XvMCSubpicture *subpicture, int *status)
+{
+ assert(dpy);
+
+ if (!subpicture)
+ return XvMCBadSubpicture;
+
+ assert(status);
+
+ /* TODO */
+ *status = 0;
+
+ return Success;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
new file mode 100644
index 0000000000..6b7dbf11dc
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -0,0 +1,382 @@
+#include <assert.h>
+#include <X11/Xlibint.h>
+#include <pipe/p_video_context.h>
+#include <pipe/p_video_state.h>
+#include <pipe/p_state.h>
+#include <util/u_memory.h>
+#include "xvmc_private.h"
+
+static enum pipe_mpeg12_macroblock_type TypeToPipe(int xvmc_mb_type)
+{
+ if (xvmc_mb_type & XVMC_MB_TYPE_INTRA)
+ return PIPE_MPEG12_MACROBLOCK_TYPE_INTRA;
+ if ((xvmc_mb_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) == XVMC_MB_TYPE_MOTION_FORWARD)
+ return PIPE_MPEG12_MACROBLOCK_TYPE_FWD;
+ if ((xvmc_mb_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) == XVMC_MB_TYPE_MOTION_BACKWARD)
+ return PIPE_MPEG12_MACROBLOCK_TYPE_BKWD;
+ if ((xvmc_mb_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) == (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD))
+ return PIPE_MPEG12_MACROBLOCK_TYPE_BI;
+
+ assert(0);
+
+ return -1;
+}
+
+static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic)
+{
+ switch (xvmc_pic) {
+ case XVMC_TOP_FIELD:
+ return PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP;
+ case XVMC_BOTTOM_FIELD:
+ return PIPE_MPEG12_PICTURE_TYPE_FIELD_BOTTOM;
+ case XVMC_FRAME_PICTURE:
+ return PIPE_MPEG12_PICTURE_TYPE_FRAME;
+ default:
+ assert(0);
+ }
+
+ return -1;
+}
+
+static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, int xvmc_dct_type)
+{
+ switch (xvmc_motion_type) {
+ case XVMC_PREDICTION_FRAME:
+ return xvmc_dct_type == XVMC_DCT_TYPE_FIELD ?
+ PIPE_MPEG12_MOTION_TYPE_16x8 : PIPE_MPEG12_MOTION_TYPE_FRAME;
+ case XVMC_PREDICTION_FIELD:
+ return PIPE_MPEG12_MOTION_TYPE_FIELD;
+ case XVMC_PREDICTION_DUAL_PRIME:
+ return PIPE_MPEG12_MOTION_TYPE_DUALPRIME;
+ default:
+ assert(0);
+ }
+
+ return -1;
+}
+
+static bool
+CreateOrResizeBackBuffer(struct pipe_video_context *vpipe, unsigned int width, unsigned int height,
+ struct pipe_surface **backbuffer)
+{
+ struct pipe_texture template;
+ struct pipe_texture *tex;
+
+ assert(vpipe);
+
+ if (*backbuffer) {
+ if ((*backbuffer)->width != width || (*backbuffer)->height != height)
+ pipe_surface_reference(backbuffer, NULL);
+ else
+ return true;
+ }
+
+ memset(&template, 0, sizeof(struct pipe_texture));
+ template.target = PIPE_TEXTURE_2D;
+ /* XXX: Needs to match the drawable's format? */
+ template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
+ template.last_level = 0;
+ template.width[0] = width;
+ template.height[0] = height;
+ template.depth[0] = 1;
+ pf_get_block(template.format, &template.block);
+ template.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
+
+ tex = vpipe->screen->texture_create(vpipe->screen, &template);
+ if (!tex)
+ return false;
+
+ *backbuffer = vpipe->screen->get_tex_surface(vpipe->screen, tex, 0, 0, 0,
+ PIPE_BUFFER_USAGE_GPU_READ |
+ PIPE_BUFFER_USAGE_GPU_WRITE);
+ pipe_texture_reference(&tex, NULL);
+
+ if (!*backbuffer)
+ return false;
+
+ /* Clear the backbuffer in case the video doesn't cover the whole window */
+ /* FIXME: Need to clear every time a frame moves and leaves dirty rects */
+ vpipe->clear_surface(vpipe, 0, 0, width, height, 0, *backbuffer);
+
+ return true;
+}
+
+static void
+MacroBlocksToPipe(const XvMCMacroBlockArray *xvmc_macroblocks,
+ const XvMCBlockArray *xvmc_blocks,
+ unsigned int first_macroblock,
+ unsigned int num_macroblocks,
+ struct pipe_mpeg12_macroblock *pipe_macroblocks)
+{
+ unsigned int i, j, k, l;
+ XvMCMacroBlock *xvmc_mb;
+
+ assert(xvmc_macroblocks);
+ assert(xvmc_blocks);
+ assert(pipe_macroblocks);
+ assert(num_macroblocks);
+
+ xvmc_mb = xvmc_macroblocks->macro_blocks + first_macroblock;
+
+ for (i = 0; i < num_macroblocks; ++i) {
+ pipe_macroblocks->base.codec = PIPE_VIDEO_CODEC_MPEG12;
+ pipe_macroblocks->mbx = xvmc_mb->x;
+ pipe_macroblocks->mby = xvmc_mb->y;
+ pipe_macroblocks->mb_type = TypeToPipe(xvmc_mb->macroblock_type);
+ if (pipe_macroblocks->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA)
+ pipe_macroblocks->mo_type = MotionToPipe(xvmc_mb->motion_type, xvmc_mb->dct_type);
+ /* Get rid of Valgrind 'undefined' warnings */
+ else
+ pipe_macroblocks->mo_type = -1;
+ pipe_macroblocks->dct_type = xvmc_mb->dct_type == XVMC_DCT_TYPE_FIELD ?
+ PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
+
+ for (j = 0; j < 2; ++j)
+ for (k = 0; k < 2; ++k)
+ for (l = 0; l < 2; ++l)
+ pipe_macroblocks->pmv[j][k][l] = xvmc_mb->PMV[j][k][l];
+
+ pipe_macroblocks->cbp = xvmc_mb->coded_block_pattern;
+ pipe_macroblocks->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
+
+ ++pipe_macroblocks;
+ ++xvmc_mb;
+ }
+}
+
+Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surface)
+{
+ XvMCContextPrivate *context_priv;
+ struct pipe_video_context *vpipe;
+ XvMCSurfacePrivate *surface_priv;
+ struct pipe_video_surface *vsfc;
+
+ assert(dpy);
+
+ if (!context)
+ return XvMCBadContext;
+ if (!surface)
+ return XvMCBadSurface;
+
+ context_priv = context->privData;
+ vpipe = context_priv->vpipe;
+
+ surface_priv = CALLOC(1, sizeof(XvMCSurfacePrivate));
+ if (!surface_priv)
+ return BadAlloc;
+
+ vsfc = vpipe->screen->video_surface_create(vpipe->screen, vpipe->chroma_format,
+ vpipe->width, vpipe->height);
+ if (!vsfc) {
+ FREE(surface_priv);
+ return BadAlloc;
+ }
+
+ surface_priv->pipe_vsfc = vsfc;
+ surface_priv->context = context;
+
+ surface->surface_id = XAllocID(dpy);
+ surface->context_id = context->context_id;
+ surface->surface_type_id = context->surface_type_id;
+ surface->width = context->width;
+ surface->height = context->height;
+ surface->privData = surface_priv;
+
+ SyncHandle();
+
+ return Success;
+}
+
+Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int picture_structure,
+ XvMCSurface *target_surface, XvMCSurface *past_surface, XvMCSurface *future_surface,
+ unsigned int flags, unsigned int num_macroblocks, unsigned int first_macroblock,
+ XvMCMacroBlockArray *macroblocks, XvMCBlockArray *blocks
+)
+{
+ struct pipe_video_context *vpipe;
+ struct pipe_surface *t_vsfc;
+ struct pipe_surface *p_vsfc;
+ struct pipe_surface *f_vsfc;
+ XvMCContextPrivate *context_priv;
+ XvMCSurfacePrivate *target_surface_priv;
+ XvMCSurfacePrivate *past_surface_priv;
+ XvMCSurfacePrivate *future_surface_priv;
+ struct pipe_mpeg12_macroblock pipe_macroblocks[num_macroblocks];
+
+ assert(dpy);
+
+ if (!context || !context->privData)
+ return XvMCBadContext;
+ if (!target_surface || !target_surface->privData)
+ return XvMCBadSurface;
+
+ if (picture_structure != XVMC_TOP_FIELD &&
+ picture_structure != XVMC_BOTTOM_FIELD &&
+ picture_structure != XVMC_FRAME_PICTURE)
+ return BadValue;
+ /* Bkwd pred equivalent to fwd (past && !future) */
+ if (future_surface && !past_surface)
+ return BadMatch;
+
+ assert(context->context_id == target_surface->context_id);
+ assert(!past_surface || context->context_id == past_surface->context_id);
+ assert(!future_surface || context->context_id == future_surface->context_id);
+
+ assert(macroblocks);
+ assert(blocks);
+
+ assert(macroblocks->context_id == context->context_id);
+ assert(blocks->context_id == context->context_id);
+
+ assert(flags == 0 || flags == XVMC_SECOND_FIELD);
+
+ target_surface_priv = target_surface->privData;
+ past_surface_priv = past_surface ? past_surface->privData : NULL;
+ future_surface_priv = future_surface ? future_surface->privData : NULL;
+
+ assert(target_surface_priv->context == context);
+ assert(!past_surface || past_surface_priv->context == context);
+ assert(!future_surface || future_surface_priv->context == context);
+
+ context_priv = context->privData;
+ vpipe = context_priv->vpipe;
+
+ t_vsfc = target_surface_priv->pipe_vsfc;
+ p_vsfc = past_surface ? past_surface_priv->pipe_vsfc : NULL;
+ f_vsfc = future_surface ? future_surface_priv->pipe_vsfc : NULL;
+
+ MacroBlocksToPipe(macroblocks, blocks, first_macroblock,
+ num_macroblocks, pipe_macroblocks);
+
+ vpipe->set_decode_target(vpipe, t_vsfc);
+ vpipe->decode_macroblocks(vpipe, p_vsfc, f_vsfc, num_macroblocks,
+ &pipe_macroblocks->base, target_surface_priv->render_fence);
+
+ return Success;
+}
+
+Status XvMCFlushSurface(Display *dpy, XvMCSurface *surface)
+{
+ assert(dpy);
+
+ if (!surface)
+ return XvMCBadSurface;
+
+ return Success;
+}
+
+Status XvMCSyncSurface(Display *dpy, XvMCSurface *surface)
+{
+ assert(dpy);
+
+ if (!surface)
+ return XvMCBadSurface;
+
+ return Success;
+}
+
+Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
+ short srcx, short srcy, unsigned short srcw, unsigned short srch,
+ short destx, short desty, unsigned short destw, unsigned short desth,
+ int flags)
+{
+ Window root;
+ int x, y;
+ unsigned int width, height;
+ unsigned int border_width;
+ unsigned int depth;
+ struct pipe_video_context *vpipe;
+ XvMCSurfacePrivate *surface_priv;
+ XvMCContextPrivate *context_priv;
+ XvMCContext *context;
+ struct pipe_video_rect src_rect = {srcx, srcy, srcw, srch};
+ struct pipe_video_rect dst_rect = {destx, desty, destw, desth};
+
+ assert(dpy);
+
+ if (!surface || !surface->privData)
+ return XvMCBadSurface;
+
+ if (XGetGeometry(dpy, drawable, &root, &x, &y, &width, &height, &border_width, &depth) == BadDrawable)
+ return BadDrawable;
+
+ assert(flags == XVMC_TOP_FIELD || flags == XVMC_BOTTOM_FIELD || flags == XVMC_FRAME_PICTURE);
+ assert(srcx + srcw - 1 < surface->width);
+ assert(srcy + srch - 1 < surface->height);
+ /*
+ * Some apps (mplayer) hit these asserts because they call
+ * this function after the window has been resized by the WM
+ * but before they've handled the corresponding XEvent and
+ * know about the new dimensions. The output should be clipped
+ * until the app updates destw and desth.
+ */
+ /*
+ assert(destx + destw - 1 < width);
+ assert(desty + desth - 1 < height);
+ */
+
+ surface_priv = surface->privData;
+ context = surface_priv->context;
+ context_priv = context->privData;
+ vpipe = context_priv->vpipe;
+
+ if (!CreateOrResizeBackBuffer(vpipe, width, height, &context_priv->backbuffer))
+ return BadAlloc;
+
+ vpipe->render_picture(vpipe, surface_priv->pipe_vsfc, PictureToPipe(flags), &src_rect,
+ context_priv->backbuffer, &dst_rect, surface_priv->disp_fence);
+
+ vl_video_bind_drawable(vpipe, drawable);
+
+ vpipe->screen->flush_frontbuffer
+ (
+ vpipe->screen,
+ context_priv->backbuffer,
+ vpipe->priv
+ );
+
+ return Success;
+}
+
+Status XvMCGetSurfaceStatus(Display *dpy, XvMCSurface *surface, int *status)
+{
+ assert(dpy);
+
+ if (!surface)
+ return XvMCBadSurface;
+
+ assert(status);
+
+ *status = 0;
+
+ return Success;
+}
+
+Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
+{
+ XvMCSurfacePrivate *surface_priv;
+
+ assert(dpy);
+
+ if (!surface || !surface->privData)
+ return XvMCBadSurface;
+
+ surface_priv = surface->privData;
+ pipe_video_surface_reference(&surface_priv->pipe_vsfc, NULL);
+ FREE(surface_priv);
+ surface->privData = NULL;
+
+ return Success;
+}
+
+Status XvMCHideSurface(Display *dpy, XvMCSurface *surface)
+{
+ assert(dpy);
+
+ if (!surface || !surface->privData)
+ return XvMCBadSurface;
+
+ /* No op, only for overlaid rendering */
+
+ return Success;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/.gitignore b/src/gallium/state_trackers/xorg/xvmc/tests/.gitignore
new file mode 100644
index 0000000000..e1d2f9023d
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/.gitignore
@@ -0,0 +1,5 @@
+test_context
+test_surface
+test_blocks
+test_rendering
+xvmc_bench
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/Makefile b/src/gallium/state_trackers/xorg/xvmc/tests/Makefile
new file mode 100644
index 0000000000..c875dd7605
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/Makefile
@@ -0,0 +1,28 @@
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBS = -lXvMCW -lXvMC -lXv -lX11
+
+#############################################
+
+.PHONY: default clean
+
+default: test_context test_surface test_blocks test_rendering xvmc_bench
+
+test_context: test_context.o testlib.o
+ $(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
+
+test_surface: test_surface.o testlib.o
+ $(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
+
+test_blocks: test_blocks.o testlib.o
+ $(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
+
+test_rendering: test_rendering.o testlib.o
+ $(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
+
+xvmc_bench: xvmc_bench.o testlib.o
+ $(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
+
+clean:
+ $(RM) -rf *.o test_context test_surface test_blocks test_rendering xvmc_bench
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_blocks.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_blocks.c
new file mode 100644
index 0000000000..dc80adfa65
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_blocks.c
@@ -0,0 +1,84 @@
+#include <assert.h>
+#include <error.h>
+#include "testlib.h"
+
+int main(int argc, char **argv)
+{
+ const unsigned int width = 16, height = 16;
+ const unsigned int min_required_blocks = 1, min_required_macroblocks = 1;
+ const unsigned int mc_types[2] = {XVMC_MOCOMP | XVMC_MPEG_2, XVMC_IDCT | XVMC_MPEG_2};
+
+ Display *display;
+ XvPortID port_num;
+ int surface_type_id;
+ unsigned int is_overlay, intra_unsigned;
+ int colorkey;
+ XvMCContext context;
+ XvMCSurface surface;
+ XvMCBlockArray blocks = {0};
+ XvMCMacroBlockArray macroblocks = {0};
+
+ display = XOpenDisplay(NULL);
+
+ if (!GetPort
+ (
+ display,
+ width,
+ height,
+ XVMC_CHROMA_FORMAT_420,
+ mc_types,
+ 2,
+ &port_num,
+ &surface_type_id,
+ &is_overlay,
+ &intra_unsigned
+ ))
+ {
+ XCloseDisplay(display);
+ error(1, 0, "Error, unable to find a good port.\n");
+ }
+
+ if (is_overlay)
+ {
+ Atom xv_colorkey = XInternAtom(display, "XV_COLORKEY", 0);
+ XvGetPortAttribute(display, port_num, xv_colorkey, &colorkey);
+ }
+
+ assert(XvMCCreateContext(display, port_num, surface_type_id, width, height, XVMC_DIRECT, &context) == Success);
+ assert(XvMCCreateSurface(display, &context, &surface) == Success);
+
+ /* Test NULL context */
+ assert(XvMCCreateBlocks(display, NULL, 1, &blocks) == XvMCBadContext);
+ /* Test 0 blocks */
+ assert(XvMCCreateBlocks(display, &context, 0, &blocks) == BadValue);
+ /* Test valid params */
+ assert(XvMCCreateBlocks(display, &context, min_required_blocks, &blocks) == Success);
+ /* Test context id assigned and correct */
+ assert(blocks.context_id == context.context_id);
+ /* Test number of blocks assigned and correct */
+ assert(blocks.num_blocks == min_required_blocks);
+ /* Test block pointer valid */
+ assert(blocks.blocks != NULL);
+ /* Test NULL context */
+ assert(XvMCCreateMacroBlocks(display, NULL, 1, &macroblocks) == XvMCBadContext);
+ /* Test 0 macroblocks */
+ assert(XvMCCreateMacroBlocks(display, &context, 0, &macroblocks) == BadValue);
+ /* Test valid params */
+ assert(XvMCCreateMacroBlocks(display, &context, min_required_macroblocks, &macroblocks) == Success);
+ /* Test context id assigned and correct */
+ assert(macroblocks.context_id == context.context_id);
+ /* Test macroblock pointer valid */
+ assert(macroblocks.macro_blocks != NULL);
+ /* Test valid params */
+ assert(XvMCDestroyMacroBlocks(display, &macroblocks) == Success);
+ /* Test valid params */
+ assert(XvMCDestroyBlocks(display, &blocks) == Success);
+
+ assert(XvMCDestroySurface(display, &surface) == Success);
+ assert(XvMCDestroyContext(display, &context) == Success);
+
+ XvUngrabPort(display, port_num, CurrentTime);
+ XCloseDisplay(display);
+
+ return 0;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_context.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_context.c
new file mode 100644
index 0000000000..53f7449cd0
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_context.c
@@ -0,0 +1,92 @@
+#include <assert.h>
+#include <error.h>
+#include "testlib.h"
+
+int main(int argc, char **argv)
+{
+ const unsigned int width = 16, height = 16;
+ const unsigned int mc_types[2] = {XVMC_MOCOMP | XVMC_MPEG_2, XVMC_IDCT | XVMC_MPEG_2};
+
+ Display *display;
+ XvPortID port_num;
+ int surface_type_id;
+ unsigned int is_overlay, intra_unsigned;
+ int colorkey;
+ XvMCContext context = {0};
+
+ display = XOpenDisplay(NULL);
+
+ if (!GetPort
+ (
+ display,
+ width,
+ height,
+ XVMC_CHROMA_FORMAT_420,
+ mc_types,
+ 2,
+ &port_num,
+ &surface_type_id,
+ &is_overlay,
+ &intra_unsigned
+ ))
+ {
+ XCloseDisplay(display);
+ error(1, 0, "Error, unable to find a good port.\n");
+ }
+
+ if (is_overlay)
+ {
+ Atom xv_colorkey = XInternAtom(display, "XV_COLORKEY", 0);
+ XvGetPortAttribute(display, port_num, xv_colorkey, &colorkey);
+ }
+
+ /* Test NULL context */
+ /* XXX: XvMCBadContext not a valid return for XvMCCreateContext in the XvMC API, but openChrome driver returns it */
+ assert(XvMCCreateContext(display, port_num, surface_type_id, width, height, XVMC_DIRECT, NULL) == XvMCBadContext);
+ /* Test invalid port */
+ /* XXX: Success and XvBadPort have the same value, if this call actually gets passed the validation step as of now we'll crash later */
+ assert(XvMCCreateContext(display, -1, surface_type_id, width, height, XVMC_DIRECT, &context) == XvBadPort);
+ /* Test invalid surface */
+ assert(XvMCCreateContext(display, port_num, -1, width, height, XVMC_DIRECT, &context) == BadMatch);
+ /* Test invalid flags */
+ assert(XvMCCreateContext(display, port_num, surface_type_id, width, height, -1, &context) == BadValue);
+ /* Test huge width */
+ assert(XvMCCreateContext(display, port_num, surface_type_id, 16384, height, XVMC_DIRECT, &context) == BadValue);
+ /* Test huge height */
+ assert(XvMCCreateContext(display, port_num, surface_type_id, width, 16384, XVMC_DIRECT, &context) == BadValue);
+ /* Test huge width & height */
+ assert(XvMCCreateContext(display, port_num, surface_type_id, 16384, 16384, XVMC_DIRECT, &context) == BadValue);
+ /* Test valid params */
+ assert(XvMCCreateContext(display, port_num, surface_type_id, width, height, XVMC_DIRECT, &context) == Success);
+ /* Test context id assigned */
+ assert(context.context_id != 0);
+ /* Test surface type id assigned and correct */
+ assert(context.surface_type_id == surface_type_id);
+ /* Test width & height assigned and correct */
+ assert(context.width == width && context.height == height);
+ /* Test port assigned and correct */
+ assert(context.port == port_num);
+ /* Test flags assigned and correct */
+ assert(context.flags == XVMC_DIRECT);
+ /* Test NULL context */
+ assert(XvMCDestroyContext(display, NULL) == XvMCBadContext);
+ /* Test valid params */
+ assert(XvMCDestroyContext(display, &context) == Success);
+ /* Test awkward but valid width */
+ assert(XvMCCreateContext(display, port_num, surface_type_id, width + 1, height, XVMC_DIRECT, &context) == Success);
+ assert(context.width >= width + 1);
+ assert(XvMCDestroyContext(display, &context) == Success);
+ /* Test awkward but valid height */
+ assert(XvMCCreateContext(display, port_num, surface_type_id, width, height + 1, XVMC_DIRECT, &context) == Success);
+ assert(context.height >= height + 1);
+ assert(XvMCDestroyContext(display, &context) == Success);
+ /* Test awkward but valid width & height */
+ assert(XvMCCreateContext(display, port_num, surface_type_id, width + 1, height + 1, XVMC_DIRECT, &context) == Success);
+ assert(context.width >= width + 1 && context.height >= height + 1);
+ assert(XvMCDestroyContext(display, &context) == Success);
+
+ XvUngrabPort(display, port_num, CurrentTime);
+ XCloseDisplay(display);
+
+ return 0;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c
new file mode 100644
index 0000000000..6d720dfcdc
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c
@@ -0,0 +1,290 @@
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <error.h>
+#include "testlib.h"
+
+#define BLOCK_WIDTH 8
+#define BLOCK_HEIGHT 8
+#define BLOCK_SIZE (BLOCK_WIDTH * BLOCK_HEIGHT)
+#define MACROBLOCK_WIDTH 16
+#define MACROBLOCK_HEIGHT 16
+#define MACROBLOCK_WIDTH_IN_BLOCKS (MACROBLOCK_WIDTH / BLOCK_WIDTH)
+#define MACROBLOCK_HEIGHT_IN_BLOCKS (MACROBLOCK_HEIGHT / BLOCK_HEIGHT)
+#define BLOCKS_PER_MACROBLOCK 6
+
+#define INPUT_WIDTH 16
+#define INPUT_HEIGHT 16
+#define INPUT_WIDTH_IN_MACROBLOCKS (INPUT_WIDTH / MACROBLOCK_WIDTH)
+#define INPUT_HEIGHT_IN_MACROBLOCKS (INPUT_HEIGHT / MACROBLOCK_HEIGHT)
+#define NUM_MACROBLOCKS (INPUT_WIDTH_IN_MACROBLOCKS * INPUT_HEIGHT_IN_MACROBLOCKS)
+
+#define DEFAULT_OUTPUT_WIDTH INPUT_WIDTH
+#define DEFAULT_OUTPUT_HEIGHT INPUT_HEIGHT
+#define DEFAULT_ACCEPTABLE_ERR 0.01
+
+void ParseArgs(int argc, char **argv, unsigned int *output_width, unsigned int *output_height, double *acceptable_error, int *prompt);
+void Gradient(short *block, unsigned int start, unsigned int stop, int horizontal);
+
+void ParseArgs(int argc, char **argv, unsigned int *output_width, unsigned int *output_height, double *acceptable_error, int *prompt)
+{
+ int fail = 0;
+ int i;
+
+ *output_width = DEFAULT_OUTPUT_WIDTH;
+ *output_height = DEFAULT_OUTPUT_WIDTH;
+ *acceptable_error = DEFAULT_ACCEPTABLE_ERR;
+ *prompt = 1;
+
+ for (i = 1; i < argc && !fail; ++i)
+ {
+ if (!strcmp(argv[i], "-w"))
+ {
+ if (sscanf(argv[++i], "%u", output_width) != 1)
+ fail = 1;
+ }
+ else if (!strcmp(argv[i], "-h"))
+ {
+ if (sscanf(argv[++i], "%u", output_height) != 1)
+ fail = 1;
+ }
+ else if (!strcmp(argv[i], "-e"))
+ {
+ if (sscanf(argv[++i], "%lf", acceptable_error) != 1)
+ fail = 1;
+ }
+ else if (strcmp(argv[i], "-n"))
+ *prompt = 0;
+ else
+ fail = 1;
+ }
+
+ if (fail)
+ error
+ (
+ 1, 0,
+ "Bad argument.\n"
+ "\n"
+ "Usage: %s [options]\n"
+ "\t-w <width>\tOutput width\n"
+ "\t-h <height>\tOutput height\n"
+ "\t-e <error>\tAcceptable margin of error per pixel, from 0 to 1\n"
+ "\t-n\tDon't prompt for quit\n",
+ argv[0]
+ );
+}
+
+void Gradient(short *block, unsigned int start, unsigned int stop, int horizontal)
+{
+ unsigned int x, y;
+ unsigned int range = stop - start;
+
+ if (horizontal)
+ {
+ for (y = 0; y < BLOCK_HEIGHT; ++y)
+ for (x = 0; x < BLOCK_WIDTH; ++x)
+ block[y * BLOCK_WIDTH + x] = (short)(start + range * (x / (float)(BLOCK_WIDTH - 1)));
+ }
+ else
+ {
+ for (y = 0; y < BLOCK_HEIGHT; ++y)
+ for (x = 0; x < BLOCK_WIDTH; ++x)
+ block[y * BLOCK_WIDTH + x] = (short)(start + range * (y / (float)(BLOCK_HEIGHT - 1)));
+ }
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int output_width;
+ unsigned int output_height;
+ double acceptable_error;
+ int prompt;
+ Display *display;
+ Window root, window;
+ const unsigned int mc_types[2] = {XVMC_MOCOMP | XVMC_MPEG_2, XVMC_IDCT | XVMC_MPEG_2};
+ XvPortID port_num;
+ int surface_type_id;
+ unsigned int is_overlay, intra_unsigned;
+ int colorkey;
+ XvMCContext context;
+ XvMCSurface surface;
+ XvMCBlockArray block_array;
+ XvMCMacroBlockArray mb_array;
+ int mbx, mby, bx, by;
+ XvMCMacroBlock *mb;
+ short *blocks;
+ int quit = 0;
+
+ ParseArgs(argc, argv, &output_width, &output_height, &acceptable_error, &prompt);
+
+ display = XOpenDisplay(NULL);
+
+ if (!GetPort
+ (
+ display,
+ INPUT_WIDTH,
+ INPUT_HEIGHT,
+ XVMC_CHROMA_FORMAT_420,
+ mc_types,
+ 2,
+ &port_num,
+ &surface_type_id,
+ &is_overlay,
+ &intra_unsigned
+ ))
+ {
+ XCloseDisplay(display);
+ error(1, 0, "Error, unable to find a good port.\n");
+ }
+
+ if (is_overlay)
+ {
+ Atom xv_colorkey = XInternAtom(display, "XV_COLORKEY", 0);
+ XvGetPortAttribute(display, port_num, xv_colorkey, &colorkey);
+ }
+
+ root = XDefaultRootWindow(display);
+ window = XCreateSimpleWindow(display, root, 0, 0, output_width, output_height, 0, 0, colorkey);
+
+ assert(XvMCCreateContext(display, port_num, surface_type_id, INPUT_WIDTH, INPUT_HEIGHT, XVMC_DIRECT, &context) == Success);
+ assert(XvMCCreateSurface(display, &context, &surface) == Success);
+ assert(XvMCCreateBlocks(display, &context, NUM_MACROBLOCKS * BLOCKS_PER_MACROBLOCK, &block_array) == Success);
+ assert(XvMCCreateMacroBlocks(display, &context, NUM_MACROBLOCKS, &mb_array) == Success);
+
+ mb = mb_array.macro_blocks;
+ blocks = block_array.blocks;
+
+ for (mby = 0; mby < INPUT_HEIGHT_IN_MACROBLOCKS; ++mby)
+ for (mbx = 0; mbx < INPUT_WIDTH_IN_MACROBLOCKS; ++mbx)
+ {
+ mb->x = mbx;
+ mb->y = mby;
+ mb->macroblock_type = XVMC_MB_TYPE_INTRA;
+ /*mb->motion_type = ;*/
+ /*mb->motion_vertical_field_select = ;*/
+ mb->dct_type = XVMC_DCT_TYPE_FRAME;
+ /*mb->PMV[0][0][0] = ;
+ mb->PMV[0][0][1] = ;
+ mb->PMV[0][1][0] = ;
+ mb->PMV[0][1][1] = ;
+ mb->PMV[1][0][0] = ;
+ mb->PMV[1][0][1] = ;
+ mb->PMV[1][1][0] = ;
+ mb->PMV[1][1][1] = ;*/
+ mb->index = (mby * INPUT_WIDTH_IN_MACROBLOCKS + mbx) * BLOCKS_PER_MACROBLOCK;
+ mb->coded_block_pattern = 0x3F;
+
+ mb++;
+
+ for (by = 0; by < MACROBLOCK_HEIGHT_IN_BLOCKS; ++by)
+ for (bx = 0; bx < MACROBLOCK_WIDTH_IN_BLOCKS; ++bx)
+ {
+ const int start = 16, stop = 235, range = stop - start;
+
+ Gradient
+ (
+ blocks,
+ (short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH) / (float)(INPUT_WIDTH - 1))),
+ (short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH + BLOCK_WIDTH - 1) / (float)(INPUT_WIDTH - 1))),
+ 1
+ );
+
+ blocks += BLOCK_SIZE;
+ }
+
+ for (by = 0; by < MACROBLOCK_HEIGHT_IN_BLOCKS / 2; ++by)
+ for (bx = 0; bx < MACROBLOCK_WIDTH_IN_BLOCKS / 2; ++bx)
+ {
+ const int start = 16, stop = 240, range = stop - start;
+
+ Gradient
+ (
+ blocks,
+ (short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH) / (float)(INPUT_WIDTH - 1))),
+ (short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH + BLOCK_WIDTH - 1) / (float)(INPUT_WIDTH - 1))),
+ 1
+ );
+
+ blocks += BLOCK_SIZE;
+
+ Gradient
+ (
+ blocks,
+ (short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH) / (float)(INPUT_WIDTH - 1))),
+ (short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH + BLOCK_WIDTH - 1) / (float)(INPUT_WIDTH - 1))),
+ 1
+ );
+
+ blocks += BLOCK_SIZE;
+ }
+ }
+
+ XSelectInput(display, window, ExposureMask | KeyPressMask);
+ XMapWindow(display, window);
+ XSync(display, 0);
+
+ /* Test NULL context */
+ assert(XvMCRenderSurface(display, NULL, XVMC_FRAME_PICTURE, &surface, NULL, NULL, 0, NUM_MACROBLOCKS, 0, &mb_array, &block_array) == XvMCBadContext);
+ /* Test NULL surface */
+ assert(XvMCRenderSurface(display, &context, XVMC_FRAME_PICTURE, NULL, NULL, NULL, 0, NUM_MACROBLOCKS, 0, &mb_array, &block_array) == XvMCBadSurface);
+ /* Test bad picture structure */
+ assert(XvMCRenderSurface(display, &context, 0, &surface, NULL, NULL, 0, NUM_MACROBLOCKS, 0, &mb_array, &block_array) == BadValue);
+ /* Test valid params */
+ assert(XvMCRenderSurface(display, &context, XVMC_FRAME_PICTURE, &surface, NULL, NULL, 0, NUM_MACROBLOCKS, 0, &mb_array, &block_array) == Success);
+
+ /* Test NULL surface */
+ assert(XvMCPutSurface(display, NULL, window, 0, 0, INPUT_WIDTH, INPUT_HEIGHT, 0, 0, output_width, output_height, XVMC_FRAME_PICTURE) == XvMCBadSurface);
+ /* Test bad window */
+ /* XXX: X halts with a bad drawable for some reason, doesn't return BadDrawable as expected */
+ /*assert(XvMCPutSurface(display, &surface, 0, 0, 0, width, height, 0, 0, width, height, XVMC_FRAME_PICTURE) == BadDrawable);*/
+
+ if (prompt)
+ {
+ puts("Press any button to quit...");
+
+ while (!quit)
+ {
+ if (XPending(display) > 0)
+ {
+ XEvent event;
+
+ XNextEvent(display, &event);
+
+ switch (event.type)
+ {
+ case Expose:
+ {
+ /* Test valid params */
+ assert
+ (
+ XvMCPutSurface
+ (
+ display, &surface, window,
+ 0, 0, INPUT_WIDTH, INPUT_HEIGHT,
+ 0, 0, output_width, output_height,
+ XVMC_FRAME_PICTURE
+ ) == Success
+ );
+ break;
+ }
+ case KeyPress:
+ {
+ quit = 1;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ assert(XvMCDestroyBlocks(display, &block_array) == Success);
+ assert(XvMCDestroyMacroBlocks(display, &mb_array) == Success);
+ assert(XvMCDestroySurface(display, &surface) == Success);
+ assert(XvMCDestroyContext(display, &context) == Success);
+
+ XvUngrabPort(display, port_num, CurrentTime);
+ XDestroyWindow(display, window);
+ XCloseDisplay(display);
+
+ return 0;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_surface.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_surface.c
new file mode 100644
index 0000000000..06948201ac
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_surface.c
@@ -0,0 +1,71 @@
+#include <assert.h>
+#include <error.h>
+#include "testlib.h"
+
+int main(int argc, char **argv)
+{
+ const unsigned int width = 16, height = 16;
+ const unsigned int mc_types[2] = {XVMC_MOCOMP | XVMC_MPEG_2, XVMC_IDCT | XVMC_MPEG_2};
+
+ Display *display;
+ XvPortID port_num;
+ int surface_type_id;
+ unsigned int is_overlay, intra_unsigned;
+ int colorkey;
+ XvMCContext context;
+ XvMCSurface surface = {0};
+
+ display = XOpenDisplay(NULL);
+
+ if (!GetPort
+ (
+ display,
+ width,
+ height,
+ XVMC_CHROMA_FORMAT_420,
+ mc_types,
+ 2,
+ &port_num,
+ &surface_type_id,
+ &is_overlay,
+ &intra_unsigned
+ ))
+ {
+ XCloseDisplay(display);
+ error(1, 0, "Error, unable to find a good port.\n");
+ }
+
+ if (is_overlay)
+ {
+ Atom xv_colorkey = XInternAtom(display, "XV_COLORKEY", 0);
+ XvGetPortAttribute(display, port_num, xv_colorkey, &colorkey);
+ }
+
+ assert(XvMCCreateContext(display, port_num, surface_type_id, width, height, XVMC_DIRECT, &context) == Success);
+
+ /* Test NULL context */
+ assert(XvMCCreateSurface(display, NULL, &surface) == XvMCBadContext);
+ /* Test NULL surface */
+ assert(XvMCCreateSurface(display, &context, NULL) == XvMCBadSurface);
+ /* Test valid params */
+ assert(XvMCCreateSurface(display, &context, &surface) == Success);
+ /* Test surface id assigned */
+ assert(surface.surface_id != 0);
+ /* Test context id assigned and correct */
+ assert(surface.context_id == context.context_id);
+ /* Test surface type id assigned and correct */
+ assert(surface.surface_type_id == surface_type_id);
+ /* Test width & height assigned and correct */
+ assert(surface.width == width && surface.height == height);
+ /* Test valid params */
+ assert(XvMCDestroySurface(display, &surface) == Success);
+ /* Test NULL surface */
+ assert(XvMCDestroySurface(display, NULL) == XvMCBadSurface);
+
+ assert(XvMCDestroyContext(display, &context) == Success);
+
+ XvUngrabPort(display, port_num, CurrentTime);
+ XCloseDisplay(display);
+
+ return 0;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/testlib.c b/src/gallium/state_trackers/xorg/xvmc/tests/testlib.c
new file mode 100644
index 0000000000..59a03ca813
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/testlib.c
@@ -0,0 +1,119 @@
+#include "testlib.h"
+#include <stdio.h>
+
+/*
+void test(int pred, const char *pred_string, const char *doc_string, const char *file, unsigned int line)
+{
+ fputs(doc_string, stderr);
+ if (!pred)
+ fprintf(stderr, " FAIL!\n\t\"%s\" at %s:%u\n", pred_string, file, line);
+ else
+ fputs(" PASS!\n", stderr);
+}
+*/
+
+int GetPort
+(
+ Display *display,
+ unsigned int width,
+ unsigned int height,
+ unsigned int chroma_format,
+ const unsigned int *mc_types,
+ unsigned int num_mc_types,
+ XvPortID *port_id,
+ int *surface_type_id,
+ unsigned int *is_overlay,
+ unsigned int *intra_unsigned
+)
+{
+ unsigned int found_port = 0;
+ XvAdaptorInfo *adaptor_info;
+ unsigned int num_adaptors;
+ int num_types;
+ int ev_base, err_base;
+ unsigned int i, j, k, l;
+
+ if (!XvMCQueryExtension(display, &ev_base, &err_base))
+ return 0;
+ if (XvQueryAdaptors(display, XDefaultRootWindow(display), &num_adaptors, &adaptor_info) != Success)
+ return 0;
+
+ for (i = 0; i < num_adaptors && !found_port; ++i)
+ {
+ if (adaptor_info[i].type & XvImageMask)
+ {
+ XvMCSurfaceInfo *surface_info = XvMCListSurfaceTypes(display, adaptor_info[i].base_id, &num_types);
+
+ if (surface_info)
+ {
+ for (j = 0; j < num_types && !found_port; ++j)
+ {
+ if
+ (
+ surface_info[j].chroma_format == chroma_format &&
+ surface_info[j].max_width >= width &&
+ surface_info[j].max_height >= height
+ )
+ {
+ for (k = 0; k < num_mc_types && !found_port; ++k)
+ {
+ if (surface_info[j].mc_type == mc_types[k])
+ {
+ for (l = 0; l < adaptor_info[i].num_ports && !found_port; ++l)
+ {
+ if (XvGrabPort(display, adaptor_info[i].base_id + l, CurrentTime) == Success)
+ {
+ *port_id = adaptor_info[i].base_id + l;
+ *surface_type_id = surface_info[j].surface_type_id;
+ *is_overlay = surface_info[j].flags & XVMC_OVERLAID_SURFACE;
+ *intra_unsigned = surface_info[j].flags & XVMC_INTRA_UNSIGNED;
+ found_port = 1;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ XFree(surface_info);
+ }
+ }
+ }
+
+ XvFreeAdaptorInfo(adaptor_info);
+
+ return found_port;
+}
+
+unsigned int align(unsigned int value, unsigned int alignment)
+{
+ return (value + alignment - 1) & ~(alignment - 1);
+}
+
+/* From the glibc manual */
+int timeval_subtract(struct timeval *result, struct timeval *x, struct timeval *y)
+{
+ /* Perform the carry for the later subtraction by updating y. */
+ if (x->tv_usec < y->tv_usec)
+ {
+ int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
+ y->tv_usec -= 1000000 * nsec;
+ y->tv_sec += nsec;
+ }
+ if (x->tv_usec - y->tv_usec > 1000000)
+ {
+ int nsec = (x->tv_usec - y->tv_usec) / 1000000;
+ y->tv_usec += 1000000 * nsec;
+ y->tv_sec -= nsec;
+ }
+
+ /*
+ * Compute the time remaining to wait.
+ * tv_usec is certainly positive.
+ */
+ result->tv_sec = x->tv_sec - y->tv_sec;
+ result->tv_usec = x->tv_usec - y->tv_usec;
+
+ /* Return 1 if result is negative. */
+ return x->tv_sec < y->tv_sec;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/testlib.h b/src/gallium/state_trackers/xorg/xvmc/tests/testlib.h
new file mode 100644
index 0000000000..af71ad74e1
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/testlib.h
@@ -0,0 +1,42 @@
+#ifndef testlib_h
+#define testlib_h
+
+/*
+#define TEST(pred, doc) test(pred, #pred, doc, __FILE__, __LINE__)
+
+void test(int pred, const char *pred_string, const char *doc_string, const char *file, unsigned int line);
+*/
+
+#include <sys/time.h>
+#include <X11/Xlib.h>
+#include <X11/extensions/XvMClib.h>
+
+/*
+ * display: IN A valid X display
+ * width, height: IN Surface size that the port must display
+ * chroma_format: IN Chroma format that the port must display
+ * mc_types, num_mc_types: IN List of MC types that the port must support, first port that matches the first mc_type will be returned
+ * port_id: OUT Your port's ID
+ * surface_type_id: OUT Your port's surface ID
+ * is_overlay: OUT If 1, port uses overlay surfaces, you need to set a colorkey
+ * intra_unsigned: OUT If 1, port uses unsigned values for intra-coded blocks
+ */
+int GetPort
+(
+ Display *display,
+ unsigned int width,
+ unsigned int height,
+ unsigned int chroma_format,
+ const unsigned int *mc_types,
+ unsigned int num_mc_types,
+ XvPortID *port_id,
+ int *surface_type_id,
+ unsigned int *is_overlay,
+ unsigned int *intra_unsigned
+);
+
+unsigned int align(unsigned int value, unsigned int alignment);
+
+int timeval_subtract(struct timeval *result, struct timeval *x, struct timeval *y);
+
+#endif
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/xvmc_bench.c b/src/gallium/state_trackers/xorg/xvmc/tests/xvmc_bench.c
new file mode 100644
index 0000000000..97adcfc58a
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/xvmc_bench.c
@@ -0,0 +1,273 @@
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <error.h>
+#include <sys/time.h>
+#include "testlib.h"
+
+#define MACROBLOCK_WIDTH 16
+#define MACROBLOCK_HEIGHT 16
+#define BLOCKS_PER_MACROBLOCK 6
+
+#define DEFAULT_INPUT_WIDTH 720
+#define DEFAULT_INPUT_HEIGHT 480
+#define DEFAULT_REPS 100
+
+#define PIPELINE_STEP_MC 1
+#define PIPELINE_STEP_CSC 2
+#define PIPELINE_STEP_SWAP 4
+
+#define MB_TYPE_I 1
+#define MB_TYPE_P 2
+#define MB_TYPE_B 4
+
+struct Config
+{
+ unsigned int input_width;
+ unsigned int input_height;
+ unsigned int output_width;
+ unsigned int output_height;
+ unsigned int pipeline;
+ unsigned int mb_types;
+ unsigned int reps;
+};
+
+void ParseArgs(int argc, char **argv, struct Config *config);
+
+void ParseArgs(int argc, char **argv, struct Config *config)
+{
+ int fail = 0;
+ int i;
+
+ config->input_width = DEFAULT_INPUT_WIDTH;
+ config->input_height = DEFAULT_INPUT_HEIGHT;
+ config->output_width = 0;
+ config->output_height = 0;
+ config->pipeline = 0;
+ config->mb_types = 0;
+ config->reps = DEFAULT_REPS;
+
+ for (i = 1; i < argc && !fail; ++i)
+ {
+ if (!strcmp(argv[i], "-iw"))
+ {
+ if (sscanf(argv[++i], "%u", &config->input_width) != 1)
+ fail = 1;
+ }
+ else if (!strcmp(argv[i], "-ih"))
+ {
+ if (sscanf(argv[++i], "%u", &config->input_height) != 1)
+ fail = 1;
+ }
+ else if (!strcmp(argv[i], "-ow"))
+ {
+ if (sscanf(argv[++i], "%u", &config->output_width) != 1)
+ fail = 1;
+ }
+ else if (!strcmp(argv[i], "-oh"))
+ {
+ if (sscanf(argv[++i], "%u", &config->output_height) != 1)
+ fail = 1;
+ }
+ else if (!strcmp(argv[i], "-p"))
+ {
+ char *token = strtok(argv[++i], ",");
+
+ while (token && !fail)
+ {
+ if (!strcmp(token, "mc"))
+ config->pipeline |= PIPELINE_STEP_MC;
+ else if (!strcmp(token, "csc"))
+ config->pipeline |= PIPELINE_STEP_CSC;
+ else if (!strcmp(token, "swp"))
+ config->pipeline |= PIPELINE_STEP_SWAP;
+ else
+ fail = 1;
+
+ if (!fail)
+ token = strtok(NULL, ",");
+ }
+ }
+ else if (!strcmp(argv[i], "-mb"))
+ {
+ char *token = strtok(argv[++i], ",");
+
+ while (token && !fail)
+ {
+ if (strcmp(token, "i"))
+ config->mb_types |= MB_TYPE_I;
+ else if (strcmp(token, "p"))
+ config->mb_types |= MB_TYPE_P;
+ else if (strcmp(token, "b"))
+ config->mb_types |= MB_TYPE_B;
+ else
+ fail = 1;
+
+ if (!fail)
+ token = strtok(NULL, ",");
+ }
+ }
+ else if (!strcmp(argv[i], "-r"))
+ {
+ if (sscanf(argv[++i], "%u", &config->reps) != 1)
+ fail = 1;
+ }
+ else
+ fail = 1;
+ }
+
+ if (fail)
+ error
+ (
+ 1, 0,
+ "Bad argument.\n"
+ "\n"
+ "Usage: %s [options]\n"
+ "\t-iw <width>\tInput width\n"
+ "\t-ih <height>\tInput height\n"
+ "\t-ow <width>\tOutput width\n"
+ "\t-oh <height>\tOutput height\n"
+ "\t-p <pipeline>\tPipeline to test\n"
+ "\t-mb <mb type>\tMacroBlock types to use\n"
+ "\t-r <reps>\tRepetitions\n\n"
+ "\tPipeline steps: mc,csc,swap\n"
+ "\tMB types: i,p,b\n",
+ argv[0]
+ );
+
+ if (config->output_width == 0)
+ config->output_width = config->input_width;
+ if (config->output_height == 0)
+ config->output_height = config->input_height;
+ if (!config->pipeline)
+ config->pipeline = PIPELINE_STEP_MC | PIPELINE_STEP_CSC | PIPELINE_STEP_SWAP;
+ if (!config->mb_types)
+ config->mb_types = MB_TYPE_I | MB_TYPE_P | MB_TYPE_B;
+}
+
+int main(int argc, char **argv)
+{
+ struct Config config;
+ Display *display;
+ Window root, window;
+ const unsigned int mc_types[2] = {XVMC_MOCOMP | XVMC_MPEG_2, XVMC_IDCT | XVMC_MPEG_2};
+ XvPortID port_num;
+ int surface_type_id;
+ unsigned int is_overlay, intra_unsigned;
+ int colorkey;
+ XvMCContext context;
+ XvMCSurface surface;
+ XvMCBlockArray block_array;
+ XvMCMacroBlockArray mb_array;
+ unsigned int mbw, mbh;
+ unsigned int mbx, mby;
+ unsigned int reps;
+ struct timeval start, stop, diff;
+ double diff_secs;
+
+ ParseArgs(argc, argv, &config);
+
+ mbw = align(config.input_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
+ mbh = align(config.input_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
+
+ display = XOpenDisplay(NULL);
+
+ if (!GetPort
+ (
+ display,
+ config.input_width,
+ config.input_height,
+ XVMC_CHROMA_FORMAT_420,
+ mc_types,
+ 2,
+ &port_num,
+ &surface_type_id,
+ &is_overlay,
+ &intra_unsigned
+ ))
+ {
+ XCloseDisplay(display);
+ error(1, 0, "Error, unable to find a good port.\n");
+ }
+
+ if (is_overlay)
+ {
+ Atom xv_colorkey = XInternAtom(display, "XV_COLORKEY", 0);
+ XvGetPortAttribute(display, port_num, xv_colorkey, &colorkey);
+ }
+
+ root = XDefaultRootWindow(display);
+ window = XCreateSimpleWindow(display, root, 0, 0, config.output_width, config.output_height, 0, 0, colorkey);
+
+ assert(XvMCCreateContext(display, port_num, surface_type_id, config.input_width, config.input_height, XVMC_DIRECT, &context) == Success);
+ assert(XvMCCreateSurface(display, &context, &surface) == Success);
+ assert(XvMCCreateBlocks(display, &context, mbw * mbh * BLOCKS_PER_MACROBLOCK, &block_array) == Success);
+ assert(XvMCCreateMacroBlocks(display, &context, mbw * mbh, &mb_array) == Success);
+
+ for (mby = 0; mby < mbh; ++mby)
+ for (mbx = 0; mbx < mbw; ++mbx)
+ {
+ mb_array.macro_blocks[mby * mbw + mbx].x = mbx;
+ mb_array.macro_blocks[mby * mbw + mbx].y = mby;
+ mb_array.macro_blocks[mby * mbw + mbx].macroblock_type = XVMC_MB_TYPE_INTRA;
+ /*mb->motion_type = ;*/
+ /*mb->motion_vertical_field_select = ;*/
+ mb_array.macro_blocks[mby * mbw + mbx].dct_type = XVMC_DCT_TYPE_FRAME;
+ /*mb->PMV[0][0][0] = ;
+ mb->PMV[0][0][1] = ;
+ mb->PMV[0][1][0] = ;
+ mb->PMV[0][1][1] = ;
+ mb->PMV[1][0][0] = ;
+ mb->PMV[1][0][1] = ;
+ mb->PMV[1][1][0] = ;
+ mb->PMV[1][1][1] = ;*/
+ mb_array.macro_blocks[mby * mbw + mbx].index = (mby * mbw + mbx) * BLOCKS_PER_MACROBLOCK;
+ mb_array.macro_blocks[mby * mbw + mbx].coded_block_pattern = 0x3F;
+ }
+
+ XSelectInput(display, window, ExposureMask | KeyPressMask);
+ XMapWindow(display, window);
+ XSync(display, 0);
+
+ gettimeofday(&start, NULL);
+
+ for (reps = 0; reps < config.reps; ++reps)
+ {
+ if (config.pipeline & PIPELINE_STEP_MC)
+ {
+ assert(XvMCRenderSurface(display, &context, XVMC_FRAME_PICTURE, &surface, NULL, NULL, 0, mbw * mbh, 0, &mb_array, &block_array) == Success);
+ assert(XvMCFlushSurface(display, &surface) == Success);
+ }
+ if (config.pipeline & PIPELINE_STEP_CSC)
+ assert(XvMCPutSurface(display, &surface, window, 0, 0, config.input_width, config.input_height, 0, 0, config.output_width, config.output_height, XVMC_FRAME_PICTURE) == Success);
+ }
+
+ gettimeofday(&stop, NULL);
+
+ timeval_subtract(&diff, &stop, &start);
+ diff_secs = (double)diff.tv_sec + (double)diff.tv_usec / 1000000.0;
+
+ printf("XvMC Benchmark\n");
+ printf("Input: %u,%u\nOutput: %u,%u\n", config.input_width, config.input_height, config.output_width, config.output_height);
+ printf("Pipeline: ");
+ if (config.pipeline & PIPELINE_STEP_MC)
+ printf("|mc|");
+ if (config.pipeline & PIPELINE_STEP_CSC)
+ printf("|csc|");
+ if (config.pipeline & PIPELINE_STEP_SWAP)
+ printf("|swap|");
+ printf("\n");
+ printf("Reps: %u\n", config.reps);
+ printf("Total time: %.2lf (%.2lf reps / sec)\n", diff_secs, config.reps / diff_secs);
+
+ assert(XvMCDestroyBlocks(display, &block_array) == Success);
+ assert(XvMCDestroyMacroBlocks(display, &mb_array) == Success);
+ assert(XvMCDestroySurface(display, &surface) == Success);
+ assert(XvMCDestroyContext(display, &context) == Success);
+
+ XvUngrabPort(display, port_num, CurrentTime);
+ XDestroyWindow(display, window);
+ XCloseDisplay(display);
+
+ return 0;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
new file mode 100644
index 0000000000..1e3dd561c6
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -0,0 +1,31 @@
+#ifndef xvmc_private_h
+#define xvmc_private_h
+
+#include <X11/Xlib.h>
+#include <X11/extensions/XvMClib.h>
+
+#define BLOCK_SIZE_SAMPLES 64
+#define BLOCK_SIZE_BYTES (BLOCK_SIZE_SAMPLES * 2)
+
+struct pipe_video_context;
+struct pipe_surface;
+struct pipe_fence_handle;
+
+typedef struct
+{
+ struct pipe_video_context *vpipe;
+ struct pipe_surface *backbuffer;
+} XvMCContextPrivate;
+
+typedef struct
+{
+ struct pipe_video_surface *pipe_vsfc;
+ struct pipe_fence_handle *render_fence;
+ struct pipe_fence_handle *disp_fence;
+
+ /* Some XvMC functions take a surface but not a context,
+ so we keep track of which context each surface belongs to. */
+ XvMCContext *context;
+} XvMCSurfacePrivate;
+
+#endif /* xvmc_private_h */
diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript
index 6c00861f51..f973811072 100644
--- a/src/gallium/winsys/drm/intel/dri/SConscript
+++ b/src/gallium/winsys/drm/intel/dri/SConscript
@@ -12,8 +12,9 @@ drivers = [
trace,
]
-env.SharedLibrary(
+env.LoadableModule(
target ='i915_dri.so',
source = COMMON_GALLIUM_SOURCES,
LIBS = drivers + mesa + auxiliaries + env['LIBS'],
+ SHLIBPREFIX = '',
)
diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c
index 4c5a1d2ea8..8b647a769b 100644
--- a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c
+++ b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c
@@ -7,6 +7,7 @@
#include "i915simple/i915_context.h"
#include "i915simple/i915_screen.h"
+#include "trace/tr_drm.h"
/*
* Helper functions
@@ -40,6 +41,7 @@ intel_drm_buffer_from_handle(struct intel_drm_winsys *idws,
const char* name, unsigned handle)
{
struct intel_drm_buffer *buf = CALLOC_STRUCT(intel_drm_buffer);
+ uint32_t tile = 0, swizzle = 0;
if (!buf)
return NULL;
@@ -52,6 +54,10 @@ intel_drm_buffer_from_handle(struct intel_drm_winsys *idws,
if (!buf->bo)
goto err;
+ drm_intel_bo_get_tiling(buf->bo, &tile, &swizzle);
+ if (tile != INTEL_TILE_NONE)
+ buf->map_gtt = TRUE;
+
return (struct intel_buffer *)buf;
err:
@@ -166,6 +172,7 @@ intel_drm_create_screen(struct drm_api *api, int drmFD,
idws->base.destroy = intel_drm_winsys_destroy;
idws->pools.gem = drm_intel_bufmgr_gem_init(idws->fd, idws->max_batch_size);
+ drm_intel_bufmgr_gem_enable_reuse(idws->pools.gem);
idws->softpipe = FALSE;
idws->dump_cmd = debug_get_bool_option("INTEL_DUMP_CMD", FALSE);
@@ -198,5 +205,5 @@ struct drm_api intel_drm_api =
struct drm_api *
drm_api_create()
{
- return &intel_drm_api;
+ return trace_drm_create(&intel_drm_api);
}
diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c b/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c
index 5ca3ad9762..ebd1b607b7 100644
--- a/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c
+++ b/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c
@@ -13,6 +13,7 @@
#define INTEL_BATCH_CLIPRECTS 0x2
#undef INTEL_RUN_SYNC
+#undef INTEL_MAP_BATCHBUFFER
struct intel_drm_batchbuffer
{
@@ -40,8 +41,11 @@ intel_drm_batchbuffer_reset(struct intel_drm_batchbuffer *batch)
"gallium3d_batchbuffer",
batch->actual_size,
4096);
+
+#ifdef INTEL_MAP_BATCHBUFFER
drm_intel_bo_map(batch->bo, TRUE);
batch->base.map = batch->bo->virtual;
+#endif
memset(batch->base.map, 0, batch->actual_size);
batch->base.ptr = batch->base.map;
@@ -55,7 +59,13 @@ intel_drm_batchbuffer_create(struct intel_winsys *iws)
struct intel_drm_winsys *idws = intel_drm_winsys(iws);
struct intel_drm_batchbuffer *batch = CALLOC_STRUCT(intel_drm_batchbuffer);
+ batch->actual_size = idws->max_batch_size;
+
+#ifdef INTEL_MAP_BATCHBUFFER
batch->base.map = NULL;
+#else
+ batch->base.map = MALLOC(batch->actual_size);
+#endif
batch->base.ptr = NULL;
batch->base.size = 0;
@@ -64,8 +74,6 @@ intel_drm_batchbuffer_create(struct intel_winsys *iws)
batch->base.iws = iws;
- batch->actual_size = idws->max_batch_size;
-
intel_drm_batchbuffer_reset(batch);
return &batch->base;
@@ -156,7 +164,11 @@ intel_drm_batchbuffer_flush(struct intel_batchbuffer *ibatch,
used = batch->base.ptr - batch->base.map;
+#ifdef INTEL_MAP_BATCHBUFFER
drm_intel_bo_unmap(batch->bo);
+#else
+ drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map);
+#endif
/* Do the sending to HW */
ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0);
@@ -202,7 +214,10 @@ intel_drm_batchbuffer_destroy(struct intel_batchbuffer *ibatch)
if (batch->bo)
drm_intel_bo_unreference(batch->bo);
- free(batch);
+#ifndef INTEL_MAP_BATCHBUFFER
+ FREE(batch->base.map);
+#endif
+ FREE(batch);
}
void intel_drm_winsys_init_batchbuffer_functions(struct intel_drm_winsys *idws)
diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c b/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c
index e017cd2e98..327e19fcd6 100644
--- a/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c
+++ b/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c
@@ -28,6 +28,7 @@ intel_drm_buffer_create(struct intel_winsys *iws,
} else if (type == INTEL_NEW_VERTEX) {
name = "gallium3d_vertex";
pool = idws->pools.gem;
+ buf->map_gtt = TRUE;
} else if (type == INTEL_NEW_SCANOUT) {
name = "gallium3d_scanout";
pool = idws->pools.gem;
@@ -57,11 +58,17 @@ intel_drm_buffer_set_fence_reg(struct intel_winsys *iws,
unsigned stride,
enum intel_buffer_tile tile)
{
+ struct intel_drm_buffer *buf = intel_drm_buffer(buffer);
assert(I915_TILING_NONE == INTEL_TILE_NONE);
assert(I915_TILING_X == INTEL_TILE_X);
assert(I915_TILING_Y == INTEL_TILE_Y);
- return drm_intel_bo_set_tiling(intel_bo(buffer), &tile, stride);
+ if (tile != INTEL_TILE_NONE) {
+ assert(buf->map_count == 0);
+ buf->map_gtt = TRUE;
+ }
+
+ return drm_intel_bo_set_tiling(buf->bo, &tile, stride);
}
static void *
@@ -109,6 +116,18 @@ intel_drm_buffer_unmap(struct intel_winsys *iws,
drm_intel_bo_unmap(intel_bo(buffer));
}
+static int
+intel_drm_buffer_write(struct intel_winsys *iws,
+ struct intel_buffer *buffer,
+ const void *data,
+ size_t size,
+ size_t offset)
+{
+ struct intel_drm_buffer *buf = intel_drm_buffer(buffer);
+
+ return drm_intel_bo_subdata(buf->bo, offset, size, (void*)data);
+}
+
static void
intel_drm_buffer_destroy(struct intel_winsys *iws,
struct intel_buffer *buffer)
@@ -130,5 +149,6 @@ intel_drm_winsys_init_buffer_functions(struct intel_drm_winsys *idws)
idws->base.buffer_set_fence_reg = intel_drm_buffer_set_fence_reg;
idws->base.buffer_map = intel_drm_buffer_map;
idws->base.buffer_unmap = intel_drm_buffer_unmap;
+ idws->base.buffer_write = intel_drm_buffer_write;
idws->base.buffer_destroy = intel_drm_buffer_destroy;
}
diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
index 091cbbcfed..117ca6059b 100644
--- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
+++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
@@ -112,7 +112,7 @@ nouveau_drm_create_screen(struct drm_api *api, int fd,
return NULL;
}
- if (arg->mode == DRM_CREATE_DRI1) {
+ if (arg && arg->mode == DRM_CREATE_DRI1) {
struct nouveau_dri *nvdri = dri1->ddx_info;
enum pipe_format format;
diff --git a/src/gallium/winsys/drm/nouveau/xorg/Makefile b/src/gallium/winsys/drm/nouveau/xorg/Makefile
new file mode 100644
index 0000000000..f0d3b337e8
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/xorg/Makefile
@@ -0,0 +1,61 @@
+TARGET = modesetting_drv.so
+CFILES = $(wildcard ./*.c)
+OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES))
+TOP = ../../../../../..
+
+include $(TOP)/configs/current
+
+INCLUDES = \
+ $(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto) \
+ -I../gem \
+ -I$(TOP)/src/gallium/include \
+ -I$(TOP)/src/gallium/drivers \
+ -I$(TOP)/src/gallium/auxiliary \
+ -I$(TOP)/src/mesa \
+ -I$(TOP)/include \
+ -I$(TOP)/src/egl/main
+
+LIBS = \
+ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
+ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \
+ $(TOP)/src/gallium/drivers/nv04/libnv04.a \
+ $(TOP)/src/gallium/drivers/nv10/libnv10.a \
+ $(TOP)/src/gallium/drivers/nv20/libnv20.a \
+ $(TOP)/src/gallium/drivers/nv30/libnv30.a \
+ $(TOP)/src/gallium/drivers/nv40/libnv40.a \
+ $(TOP)/src/gallium/drivers/nv50/libnv50.a \
+ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \
+ $(GALLIUM_AUXILIARIES)
+
+DRIVER_DEFINES = \
+ -DHAVE_CONFIG_H
+
+
+#############################################
+
+
+
+all default: $(TARGET)
+
+$(TARGET): $(OBJECTS) Makefile $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a $(LIBS)
+ $(TOP)/bin/mklib -noprefix -o $@ \
+ $(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_nouveau
+
+clean:
+ rm -rf $(OBJECTS) $(TARGET)
+
+install:
+ $(INSTALL) -d $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR)
+ $(MINSTALL) -m 755 $(TARGET) $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR)
+
+
+##############################################
+
+
+.c.o:
+ $(CC) -c $(CFLAGS) $(INCLUDES) $(DRIVER_DEFINES) $< -o $@
+
+
+##############################################
+
+.PHONY = all clean install
diff --git a/src/gallium/winsys/drm/nouveau/xorg/nouveau_xorg.c b/src/gallium/winsys/drm/nouveau/xorg/nouveau_xorg.c
new file mode 100644
index 0000000000..a669b3080a
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/xorg/nouveau_xorg.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ * Author: Alan Hourihane <alanh@tungstengraphics.com>
+ * Author: Jakob Bornecrantz <wallbraker@gmail.com>
+ *
+ */
+
+#include "../../../../state_trackers/xorg/xorg_winsys.h"
+
+static void nouveau_xorg_identify(int flags);
+static Bool nouveau_xorg_pci_probe(DriverPtr driver, int entity_num,
+ struct pci_device *device,
+ intptr_t match_data);
+
+static const struct pci_id_match nouveau_xorg_device_match[] = {
+ { 0x10de, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY,
+ 0x00030000, 0x00ffffff, 0 },
+ { 0x12d2, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY,
+ 0x00030000, 0x00ffffff, 0 },
+ {0, 0, 0},
+};
+
+static SymTabRec nouveau_xorg_chipsets[] = {
+ {PCI_MATCH_ANY, "NVIDIA Graphics Device"},
+ {-1, NULL}
+};
+
+static PciChipsets nouveau_xorg_pci_devices[] = {
+ {PCI_MATCH_ANY, PCI_MATCH_ANY, NULL},
+ {-1, -1, NULL}
+};
+
+static XF86ModuleVersionInfo nouveau_xorg_version = {
+ "modesetting",
+ MODULEVENDORSTRING,
+ MODINFOSTRING1,
+ MODINFOSTRING2,
+ XORG_VERSION_CURRENT,
+ 0, 1, 0, /* major, minor, patch */
+ ABI_CLASS_VIDEODRV,
+ ABI_VIDEODRV_VERSION,
+ MOD_CLASS_VIDEODRV,
+ {0, 0, 0, 0}
+};
+
+/*
+ * Xorg driver exported structures
+ */
+
+_X_EXPORT DriverRec modesetting = {
+ 1,
+ "modesetting",
+ nouveau_xorg_identify,
+ NULL,
+ xorg_tracker_available_options,
+ NULL,
+ 0,
+ NULL,
+ nouveau_xorg_device_match,
+ nouveau_xorg_pci_probe
+};
+
+static MODULESETUPPROTO(nouveau_xorg_setup);
+
+_X_EXPORT XF86ModuleData modesettingModuleData = {
+ &nouveau_xorg_version,
+ nouveau_xorg_setup,
+ NULL
+};
+
+/*
+ * Xorg driver functions
+ */
+
+static pointer
+nouveau_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin)
+{
+ static Bool setupDone = 0;
+
+ /* This module should be loaded only once, but check to be sure.
+ */
+ if (!setupDone) {
+ setupDone = 1;
+ xf86AddDriver(&modesetting, module, HaveDriverFuncs);
+
+ /*
+ * The return value must be non-NULL on success even though there
+ * is no TearDownProc.
+ */
+ return (pointer) 1;
+ } else {
+ if (errmaj)
+ *errmaj = LDR_ONCEONLY;
+ return NULL;
+ }
+}
+
+static void
+nouveau_xorg_identify(int flags)
+{
+ xf86PrintChipsets("modesetting", "Driver for Modesetting Kernel Drivers",
+ nouveau_xorg_chipsets);
+}
+
+static Bool
+nouveau_xorg_pci_probe(DriverPtr driver,
+ int entity_num, struct pci_device *device, intptr_t match_data)
+{
+ ScrnInfoPtr scrn = NULL;
+ EntityInfoPtr entity;
+
+ scrn = xf86ConfigPciEntity(scrn, 0, entity_num, nouveau_xorg_pci_devices,
+ NULL, NULL, NULL, NULL, NULL);
+ if (scrn != NULL) {
+ scrn->driverVersion = 1;
+ scrn->driverName = "i915";
+ scrn->name = "modesetting";
+ scrn->Probe = NULL;
+
+ entity = xf86GetEntityInfo(entity_num);
+
+ /* Use all the functions from the xorg tracker */
+ xorg_tracker_set_functions(scrn);
+ }
+ return scrn != NULL;
+}
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
index 07551e7cd1..7bf23cba23 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
@@ -32,6 +32,8 @@
#include "radeon_buffer.h"
+#include "radeon_bo_gem.h"
+
static const char *radeon_get_name(struct pipe_winsys *ws)
{
return "Radeon/GEM+KMS";
@@ -99,6 +101,7 @@ static struct pipe_buffer *radeon_surface_buffer_create(struct pipe_winsys *ws,
unsigned height,
enum pipe_format format,
unsigned usage,
+ unsigned tex_usage,
unsigned *stride)
{
struct pipe_format_block block;
@@ -134,8 +137,11 @@ static void *radeon_buffer_map(struct pipe_winsys *ws,
(struct radeon_pipe_buffer*)buffer;
int write = 0;
- if (!(flags & PIPE_BUFFER_USAGE_DONTBLOCK)) {
- radeon_bo_wait(radeon_buffer->bo);
+ if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) {
+ uint32_t domain;
+
+ if (radeon_bo_is_busy(radeon_buffer->bo, &domain))
+ return NULL;
}
if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) {
write = 1;
@@ -187,7 +193,6 @@ static void radeon_flush_frontbuffer(struct pipe_winsys *pipe_winsys,
struct radeon_winsys* radeon_pipe_winsys(int fd)
{
struct radeon_winsys* radeon_ws;
- struct radeon_bo_manager* bom;
radeon_ws = CALLOC_STRUCT(radeon_winsys);
if (radeon_ws == NULL) {
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
index 47376a0f07..a4011db0b8 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
@@ -98,7 +98,7 @@ struct pipe_buffer* radeon_buffer_from_handle(struct drm_api* api,
return &radeon_buffer->base;
}
-struct pipe_texture*
+static struct pipe_texture*
radeon_texture_from_shared_handle(struct drm_api *api,
struct pipe_screen *screen,
struct pipe_texture *templ,
@@ -116,20 +116,22 @@ radeon_texture_from_shared_handle(struct drm_api *api,
return screen->texture_blanket(screen, templ, &stride, buffer);
}
-boolean radeon_shared_handle_from_texture(struct drm_api *api,
- struct pipe_screen *screen,
- struct pipe_texture *texture,
- unsigned *stride,
- unsigned *handle)
+static boolean radeon_shared_handle_from_texture(struct drm_api *api,
+ struct pipe_screen *screen,
+ struct pipe_texture *texture,
+ unsigned *stride,
+ unsigned *handle)
{
int retval, fd;
struct drm_gem_flink flink;
struct radeon_pipe_buffer* radeon_buffer;
- struct pipe_buffer* buffer = &radeon_buffer->base;
- if (!radeon_buffer_from_texture(api, texture, buffer, stride)) {
+ struct pipe_buffer *buffer;
+
+ if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) {
return FALSE;
}
+ radeon_buffer = (struct radeon_pipe_buffer*)buffer;
if (!radeon_buffer->flinked) {
fd = ((struct radeon_winsys*)screen->winsys)->priv->fd;
@@ -150,11 +152,11 @@ boolean radeon_shared_handle_from_texture(struct drm_api *api,
return TRUE;
}
-boolean radeon_local_handle_from_texture(struct drm_api *api,
- struct pipe_screen *screen,
- struct pipe_texture *texture,
- unsigned *stride,
- unsigned *handle)
+static boolean radeon_local_handle_from_texture(struct drm_api *api,
+ struct pipe_screen *screen,
+ struct pipe_texture *texture,
+ unsigned *stride,
+ unsigned *handle)
{
struct pipe_buffer *buffer;
if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) {
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
index d723876221..d2d84f1a8f 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
@@ -137,7 +137,7 @@ static void do_ioctls(struct r300_winsys* winsys, int fd)
int target = 0;
int retval;
- info.value = &target;
+ info.value = (unsigned long)&target;
/* First, get the number of pixel pipes */
info.request = RADEON_INFO_NUM_GB_PIPES;
diff --git a/src/gallium/winsys/g3dvl/Makefile b/src/gallium/winsys/g3dvl/Makefile
new file mode 100644
index 0000000000..424ddea87a
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/Makefile
@@ -0,0 +1,11 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+SUBDIRS = $(GALLIUM_WINSYS_DIRS)
+
+default install clean:
+ @for dir in $(SUBDIRS) ; do \
+ if [ -d $$dir ] ; then \
+ (cd $$dir && $(MAKE) $@) || exit 1; \
+ fi \
+ done
diff --git a/src/gallium/winsys/g3dvl/vl_winsys.h b/src/gallium/winsys/g3dvl/vl_winsys.h
index c83db28dd9..4f7a243361 100644
--- a/src/gallium/winsys/g3dvl/vl_winsys.h
+++ b/src/gallium/winsys/g3dvl/vl_winsys.h
@@ -2,13 +2,22 @@
#define vl_winsys_h
#include <X11/Xlib.h>
+#include <pipe/p_defines.h>
+#include <pipe/p_format.h>
-struct pipe_context;
+struct pipe_screen;
+struct pipe_video_context;
-struct pipe_context* create_pipe_context(Display *display, int screen);
-int destroy_pipe_context(struct pipe_context *pipe);
-int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable);
-int unbind_pipe_drawable(struct pipe_context *pipe);
+struct pipe_screen*
+vl_screen_create(Display *display, int screen);
-#endif
+struct pipe_video_context*
+vl_video_create(struct pipe_screen *screen,
+ enum pipe_video_profile profile,
+ enum pipe_video_chroma_format chroma_format,
+ unsigned width, unsigned height);
+
+Drawable
+vl_video_bind_drawable(struct pipe_video_context *vpipe, Drawable drawable);
+#endif
diff --git a/src/gallium/winsys/g3dvl/xlib/Makefile b/src/gallium/winsys/g3dvl/xlib/Makefile
new file mode 100644
index 0000000000..cf765ef51a
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/xlib/Makefile
@@ -0,0 +1,74 @@
+# This makefile produces a "stand-alone" libXvMCg3dvl.so which is
+# based on Xlib (no DRI HW acceleration)
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+XVMC_MAJOR = 1
+XVMC_MINOR = 0
+XVMC_LIB = XvMCg3dvl
+XVMC_LIB_NAME = lib$(XVMC_LIB).so
+XVMC_LIB_DEPS = $(EXTRA_LIB_PATH) -lXvMC -lXv -lX11 -lm
+
+INCLUDES = -I$(TOP)/src/gallium/include \
+ -I$(TOP)/src/gallium/auxiliary \
+ -I$(TOP)/src/gallium/drivers \
+ -I$(TOP)/src/gallium/winsys/g3dvl
+
+DEFINES += -DGALLIUM_SOFTPIPE \
+ -DGALLIUM_TRACE
+
+SOURCES = xsp_winsys.c
+
+# XXX: Hack, if we include libxvmctracker.a in LIBS none of the symbols are
+# pulled in by the linker because xsp_winsys.c doesn't refer to them
+OBJECTS = $(SOURCES:.c=.o) $(TOP)/src/gallium/state_trackers/xorg/xvmc/*.o
+
+LIBS = $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+ $(TOP)/src/gallium/auxiliary/vl/libvl.a \
+ $(TOP)/src/gallium/auxiliary/tgsi/libtgsi.a \
+ $(TOP)/src/gallium/auxiliary/draw/libdraw.a \
+ $(TOP)/src/gallium/auxiliary/translate/libtranslate.a \
+ $(TOP)/src/gallium/auxiliary/cso_cache/libcso_cache.a \
+ $(TOP)/src/gallium/auxiliary/rtasm/librtasm.a \
+ $(TOP)/src/gallium/auxiliary/util/libutil.a
+
+.c.o:
+ $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@
+
+.S.o:
+ $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@
+
+.PHONY: default $(TOP)/$(LIB_DIR)/gallium clean
+
+default: depend $(TOP)/$(LIB_DIR)/gallium $(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME)
+
+$(TOP)/$(LIB_DIR)/gallium:
+ @mkdir -p $(TOP)/$(LIB_DIR)/gallium
+
+# Make the libXvMCg3dvl.so library
+$(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME): $(OBJECTS) $(LIBS) Makefile
+ $(MKLIB) -o $(XVMC_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+ -major $(XVMC_MAJOR) -minor $(XVMC_MINOR) $(MKLIB_OPTIONS) \
+ -install $(TOP)/$(LIB_DIR)/gallium -id $(INSTALL_LIB_DIR)/lib$(XVMC_LIB).1.dylib \
+ $(XVMC_LIB_DEPS) $(OBJECTS) $(LIBS)
+
+depend: $(SOURCES) Makefile
+ $(RM) depend
+ touch depend
+ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDES) $(SOURCES)
+
+#install: default
+# $(INSTALL) -d $(INSTALL_DIR)/include/GL
+# $(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR)
+# $(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL
+# @if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \
+# $(INSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \
+# fi
+
+clean: Makefile
+ $(RM) $(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME)
+ $(RM) *.o *~
+ $(RM) depend depend.bak
+
+-include depend
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
new file mode 100644
index 0000000000..37eee79c5d
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -0,0 +1,307 @@
+#include <vl_winsys.h>
+#include <X11/Xutil.h>
+#include <pipe/internal/p_winsys_screen.h>
+#include <pipe/p_state.h>
+#include <pipe/p_inlines.h>
+#include <util/u_memory.h>
+#include <util/u_math.h>
+#include <softpipe/sp_winsys.h>
+#include <softpipe/sp_video_context.h>
+#include <softpipe/sp_texture.h>
+
+/* pipe_winsys implementation */
+
+struct xsp_pipe_winsys
+{
+ struct pipe_winsys base;
+ Display *display;
+ int screen;
+ XImage *fbimage;
+};
+
+struct xsp_context
+{
+ Drawable drawable;
+
+ void (*pipe_destroy)(struct pipe_video_context *vpipe);
+};
+
+struct xsp_buffer
+{
+ struct pipe_buffer base;
+ boolean is_user_buffer;
+ void *data;
+ void *mapped_data;
+};
+
+static struct pipe_buffer* xsp_buffer_create(struct pipe_winsys *pws, unsigned alignment, unsigned usage, unsigned size)
+{
+ struct xsp_buffer *buffer;
+
+ assert(pws);
+
+ buffer = calloc(1, sizeof(struct xsp_buffer));
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->base.alignment = alignment;
+ buffer->base.usage = usage;
+ buffer->base.size = size;
+ buffer->data = align_malloc(size, alignment);
+
+ return (struct pipe_buffer*)buffer;
+}
+
+static struct pipe_buffer* xsp_user_buffer_create(struct pipe_winsys *pws, void *data, unsigned size)
+{
+ struct xsp_buffer *buffer;
+
+ assert(pws);
+
+ buffer = calloc(1, sizeof(struct xsp_buffer));
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->base.size = size;
+ buffer->is_user_buffer = TRUE;
+ buffer->data = data;
+
+ return (struct pipe_buffer*)buffer;
+}
+
+static void* xsp_buffer_map(struct pipe_winsys *pws, struct pipe_buffer *buffer, unsigned flags)
+{
+ struct xsp_buffer *xsp_buf = (struct xsp_buffer*)buffer;
+
+ assert(pws);
+ assert(buffer);
+
+ xsp_buf->mapped_data = xsp_buf->data;
+
+ return xsp_buf->mapped_data;
+}
+
+static void xsp_buffer_unmap(struct pipe_winsys *pws, struct pipe_buffer *buffer)
+{
+ struct xsp_buffer *xsp_buf = (struct xsp_buffer*)buffer;
+
+ assert(pws);
+ assert(buffer);
+
+ xsp_buf->mapped_data = NULL;
+}
+
+static void xsp_buffer_destroy(struct pipe_buffer *buffer)
+{
+ struct xsp_buffer *xsp_buf = (struct xsp_buffer*)buffer;
+
+ assert(buffer);
+
+ if (!xsp_buf->is_user_buffer)
+ align_free(xsp_buf->data);
+
+ free(xsp_buf);
+}
+
+static struct pipe_buffer* xsp_surface_buffer_create
+(
+ struct pipe_winsys *pws,
+ unsigned width,
+ unsigned height,
+ enum pipe_format format,
+ unsigned usage,
+ unsigned tex_usage,
+ unsigned *stride
+)
+{
+ const unsigned int ALIGNMENT = 1;
+ struct pipe_format_block block;
+ unsigned nblocksx, nblocksy;
+
+ pf_get_block(format, &block);
+ nblocksx = pf_get_nblocksx(&block, width);
+ nblocksy = pf_get_nblocksy(&block, height);
+ *stride = align(nblocksx * block.size, ALIGNMENT);
+
+ return pws->buffer_create(pws, ALIGNMENT,
+ usage,
+ *stride * nblocksy);
+}
+
+static void xsp_fence_reference(struct pipe_winsys *pws, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence)
+{
+ assert(pws);
+ assert(ptr);
+ assert(fence);
+}
+
+static int xsp_fence_signalled(struct pipe_winsys *pws, struct pipe_fence_handle *fence, unsigned flag)
+{
+ assert(pws);
+ assert(fence);
+
+ return 0;
+}
+
+static int xsp_fence_finish(struct pipe_winsys *pws, struct pipe_fence_handle *fence, unsigned flag)
+{
+ assert(pws);
+ assert(fence);
+
+ return 0;
+}
+
+static void xsp_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surface, void *context_private)
+{
+ struct xsp_pipe_winsys *xsp_winsys;
+ struct xsp_context *xsp_context;
+
+ assert(pws);
+ assert(surface);
+ assert(context_private);
+
+ xsp_winsys = (struct xsp_pipe_winsys*)pws;
+ xsp_context = (struct xsp_context*)context_private;
+ xsp_winsys->fbimage->width = surface->width;
+ xsp_winsys->fbimage->height = surface->height;
+ xsp_winsys->fbimage->bytes_per_line = surface->width * (xsp_winsys->fbimage->bits_per_pixel >> 3);
+ xsp_winsys->fbimage->data = (char*)((struct xsp_buffer *)softpipe_texture(surface->texture)->buffer)->data + surface->offset;
+
+ XPutImage
+ (
+ xsp_winsys->display, xsp_context->drawable,
+ XDefaultGC(xsp_winsys->display, xsp_winsys->screen),
+ xsp_winsys->fbimage, 0, 0, 0, 0,
+ surface->width, surface->height
+ );
+ XFlush(xsp_winsys->display);
+}
+
+static const char* xsp_get_name(struct pipe_winsys *pws)
+{
+ assert(pws);
+ return "X11 SoftPipe";
+}
+
+static void xsp_destroy(struct pipe_winsys *pws)
+{
+ struct xsp_pipe_winsys *xsp_winsys = (struct xsp_pipe_winsys*)pws;
+
+ assert(pws);
+
+ /* XDestroyImage() wants to free the data as well */
+ xsp_winsys->fbimage->data = NULL;
+
+ XDestroyImage(xsp_winsys->fbimage);
+ FREE(xsp_winsys);
+}
+
+/* Called through pipe_video_context::destroy() */
+static void xsp_pipe_destroy(struct pipe_video_context *vpipe)
+{
+ struct xsp_context *xsp_context;
+
+ assert(vpipe);
+
+ xsp_context = vpipe->priv;
+
+ /* Call the original destroy */
+ xsp_context->pipe_destroy(vpipe);
+
+ FREE(xsp_context);
+}
+
+/* Show starts here */
+
+Drawable
+vl_video_bind_drawable(struct pipe_video_context *vpipe, Drawable drawable)
+{
+ struct xsp_context *xsp_context;
+ Drawable old_drawable;
+
+ assert(vpipe);
+
+ xsp_context = vpipe->priv;
+ old_drawable = xsp_context->drawable;
+ xsp_context->drawable = drawable;
+
+ return old_drawable;
+}
+
+struct pipe_screen*
+vl_screen_create(Display *display, int screen)
+{
+ struct xsp_pipe_winsys *xsp_winsys;
+
+ assert(display);
+
+ xsp_winsys = CALLOC_STRUCT(xsp_pipe_winsys);
+ if (!xsp_winsys)
+ return NULL;
+
+ xsp_winsys->base.buffer_create = xsp_buffer_create;
+ xsp_winsys->base.user_buffer_create = xsp_user_buffer_create;
+ xsp_winsys->base.buffer_map = xsp_buffer_map;
+ xsp_winsys->base.buffer_unmap = xsp_buffer_unmap;
+ xsp_winsys->base.buffer_destroy = xsp_buffer_destroy;
+ xsp_winsys->base.surface_buffer_create = xsp_surface_buffer_create;
+ xsp_winsys->base.fence_reference = xsp_fence_reference;
+ xsp_winsys->base.fence_signalled = xsp_fence_signalled;
+ xsp_winsys->base.fence_finish = xsp_fence_finish;
+ xsp_winsys->base.flush_frontbuffer = xsp_flush_frontbuffer;
+ xsp_winsys->base.get_name = xsp_get_name;
+ xsp_winsys->base.destroy = xsp_destroy;
+ xsp_winsys->display = display;
+ xsp_winsys->screen = screen;
+ xsp_winsys->fbimage = XCreateImage
+ (
+ display,
+ XDefaultVisual(display, screen),
+ XDefaultDepth(display, screen),
+ ZPixmap,
+ 0,
+ NULL,
+ 0, /* Don't know the width and height until flush_frontbuffer */
+ 0,
+ 32,
+ 0
+ );
+
+ if (!xsp_winsys->fbimage)
+ {
+ FREE(xsp_winsys);
+ return NULL;
+ }
+
+ XInitImage(xsp_winsys->fbimage);
+
+ return softpipe_create_screen(&xsp_winsys->base);
+}
+
+struct pipe_video_context*
+vl_video_create(struct pipe_screen *screen,
+ enum pipe_video_profile profile,
+ enum pipe_video_chroma_format chroma_format,
+ unsigned width, unsigned height)
+{
+ struct pipe_video_context *vpipe;
+ struct xsp_context *xsp_context;
+
+ assert(screen);
+ assert(width && height);
+
+ vpipe = sp_video_create(screen, profile, chroma_format, width, height);
+ if (!vpipe)
+ return NULL;
+
+ xsp_context = CALLOC_STRUCT(xsp_context);
+ if (!xsp_context)
+ {
+ vpipe->destroy(vpipe);
+ return NULL;
+ }
+
+ /* Override this so we can free our xsp_context when the pipe is freed */
+ xsp_context->pipe_destroy = vpipe->destroy;
+ vpipe->destroy = xsp_pipe_destroy;
+
+ vpipe->priv = xsp_context;
+
+ return vpipe;
+}
diff --git a/src/gallium/winsys/g3dvl/xsp_winsys.c b/src/gallium/winsys/g3dvl/xsp_winsys.c
index 698c2856a4..37d60ce540 100644
--- a/src/gallium/winsys/g3dvl/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xsp_winsys.c
@@ -105,6 +105,7 @@ static struct pipe_buffer* xsp_surface_buffer_create
unsigned height,
enum pipe_format format,
unsigned usage,
+ unsigned tex_usage,
unsigned *stride
)
{
diff --git a/src/gallium/winsys/gdi/SConscript b/src/gallium/winsys/gdi/SConscript
index 86eb9ef55e..f5e6d36d89 100644
--- a/src/gallium/winsys/gdi/SConscript
+++ b/src/gallium/winsys/gdi/SConscript
@@ -5,35 +5,39 @@ Import('*')
if env['platform'] == 'windows':
- env = env.Clone()
+ env = env.Clone()
- env.Append(CPPPATH = [
- '#src/gallium/state_trackers/wgl',
- ])
+ env.Append(CPPPATH = [
+ '#src/gallium/state_trackers/wgl',
+ ])
- env.Append(LIBS = [
- 'gdi32',
- 'user32',
- 'kernel32',
- 'ws2_32',
- ])
+ env.Append(LIBS = [
+ 'gdi32',
+ 'user32',
+ 'kernel32',
+ 'ws2_32',
+ ])
- sources = [
- 'gdi_softpipe_winsys.c',
- ]
-
- if env['gcc']:
- sources += ['#src/gallium/state_trackers/wgl/opengl32.mingw.def']
- else:
- sources += ['#src/gallium/state_trackers/wgl/opengl32.def']
-
- drivers = [
- trace,
- softpipe,
- ]
+ if 'llvmpipe' in env['drivers']:
+ sources = ['gdi_llvmpipe_winsys.c']
+ drivers = [llvmpipe]
+ env.Tool('llvm')
+ elif 'softpipe' in env['drivers']:
+ sources = ['gdi_softpipe_winsys.c']
+ drivers = [softpipe]
+ else:
+ print 'warning: softpipe or llvmpipe not selected, gdi winsys disabled'
+ Return()
+
+ if env['gcc']:
+ sources += ['#src/gallium/state_trackers/wgl/opengl32.mingw.def']
+ else:
+ sources += ['#src/gallium/state_trackers/wgl/opengl32.def']
+
+ drivers += [trace]
- env.SharedLibrary(
- target ='opengl32',
- source = sources,
- LIBS = wgl + glapi + mesa + drivers + auxiliaries + env['LIBS'],
- )
+ env.SharedLibrary(
+ target ='opengl32',
+ source = sources,
+ LIBS = wgl + glapi + mesa + drivers + auxiliaries + env['LIBS'],
+ )
diff --git a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c
new file mode 100644
index 0000000000..e8bc0f55ac
--- /dev/null
+++ b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c
@@ -0,0 +1,288 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * LLVMpipe support.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include <windows.h>
+
+#include "pipe/p_format.h"
+#include "pipe/p_context.h"
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "llvmpipe/lp_winsys.h"
+#include "llvmpipe/lp_texture.h"
+#include "stw_winsys.h"
+
+
+struct gdi_llvmpipe_displaytarget
+{
+ enum pipe_format format;
+ struct pipe_format_block block;
+ unsigned width;
+ unsigned height;
+ unsigned stride;
+
+ unsigned size;
+
+ void *data;
+
+ BITMAPINFO bmi;
+};
+
+
+/** Cast wrapper */
+static INLINE struct gdi_llvmpipe_displaytarget *
+gdi_llvmpipe_displaytarget( struct llvmpipe_displaytarget *buf )
+{
+ return (struct gdi_llvmpipe_displaytarget *)buf;
+}
+
+
+static boolean
+gdi_llvmpipe_is_displaytarget_format_supported( struct llvmpipe_winsys *ws,
+ enum pipe_format format )
+{
+ switch(format) {
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return TRUE;
+
+ /* TODO: Support other formats possible with BMPs, as described in
+ * http://msdn.microsoft.com/en-us/library/dd183376(VS.85).aspx */
+
+ default:
+ return FALSE;
+ }
+}
+
+
+static void *
+gdi_llvmpipe_displaytarget_map(struct llvmpipe_winsys *ws,
+ struct llvmpipe_displaytarget *dt,
+ unsigned flags )
+{
+ struct gdi_llvmpipe_displaytarget *gdt = gdi_llvmpipe_displaytarget(dt);
+
+ return gdt->data;
+}
+
+
+static void
+gdi_llvmpipe_displaytarget_unmap(struct llvmpipe_winsys *ws,
+ struct llvmpipe_displaytarget *dt )
+{
+
+}
+
+
+static void
+gdi_llvmpipe_displaytarget_destroy(struct llvmpipe_winsys *winsys,
+ struct llvmpipe_displaytarget *dt)
+{
+ struct gdi_llvmpipe_displaytarget *gdt = gdi_llvmpipe_displaytarget(dt);
+
+ align_free(gdt->data);
+ FREE(gdt);
+}
+
+
+/**
+ * Round n up to next multiple.
+ */
+static INLINE unsigned
+round_up(unsigned n, unsigned multiple)
+{
+ return (n + multiple - 1) & ~(multiple - 1);
+}
+
+
+static struct llvmpipe_displaytarget *
+gdi_llvmpipe_displaytarget_create(struct llvmpipe_winsys *winsys,
+ enum pipe_format format,
+ unsigned width, unsigned height,
+ unsigned alignment,
+ unsigned *stride)
+{
+ struct gdi_llvmpipe_displaytarget *gdt;
+ unsigned cpp;
+ unsigned bpp;
+
+ gdt = CALLOC_STRUCT(gdi_llvmpipe_displaytarget);
+ if(!gdt)
+ goto no_gdt;
+
+ gdt->format = format;
+ gdt->width = width;
+ gdt->height = height;
+
+ bpp = pf_get_bits(format);
+ cpp = pf_get_size(format);
+
+ gdt->stride = round_up(width * cpp, alignment);
+ gdt->size = gdt->stride * height;
+
+ gdt->data = align_malloc(gdt->size, alignment);
+ if(!gdt->data)
+ goto no_data;
+
+ gdt->bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
+ gdt->bmi.bmiHeader.biWidth = gdt->stride / cpp;
+ gdt->bmi.bmiHeader.biHeight= -(long)height;
+ gdt->bmi.bmiHeader.biPlanes = 1;
+ gdt->bmi.bmiHeader.biBitCount = bpp;
+ gdt->bmi.bmiHeader.biCompression = BI_RGB;
+ gdt->bmi.bmiHeader.biSizeImage = 0;
+ gdt->bmi.bmiHeader.biXPelsPerMeter = 0;
+ gdt->bmi.bmiHeader.biYPelsPerMeter = 0;
+ gdt->bmi.bmiHeader.biClrUsed = 0;
+ gdt->bmi.bmiHeader.biClrImportant = 0;
+
+ *stride = gdt->stride;
+ return (struct llvmpipe_displaytarget *)gdt;
+
+no_data:
+ FREE(gdt);
+no_gdt:
+ return NULL;
+}
+
+
+static void
+gdi_llvmpipe_displaytarget_display(struct llvmpipe_winsys *winsys,
+ struct llvmpipe_displaytarget *dt,
+ void *context_private)
+{
+ assert(0);
+}
+
+
+static void
+gdi_llvmpipe_destroy(struct llvmpipe_winsys *winsys)
+{
+ FREE(winsys);
+}
+
+
+static struct pipe_screen *
+gdi_llvmpipe_screen_create(void)
+{
+ static struct llvmpipe_winsys *winsys;
+ struct pipe_screen *screen;
+
+ winsys = CALLOC_STRUCT(llvmpipe_winsys);
+ if(!winsys)
+ goto no_winsys;
+
+ winsys->destroy = gdi_llvmpipe_destroy;
+ winsys->is_displaytarget_format_supported = gdi_llvmpipe_is_displaytarget_format_supported;
+ winsys->displaytarget_create = gdi_llvmpipe_displaytarget_create;
+ winsys->displaytarget_map = gdi_llvmpipe_displaytarget_map;
+ winsys->displaytarget_unmap = gdi_llvmpipe_displaytarget_unmap;
+ winsys->displaytarget_display = gdi_llvmpipe_displaytarget_display;
+ winsys->displaytarget_destroy = gdi_llvmpipe_displaytarget_destroy;
+
+ screen = llvmpipe_create_screen(winsys);
+ if(!screen)
+ goto no_screen;
+
+ return screen;
+
+no_screen:
+ FREE(winsys);
+no_winsys:
+ return NULL;
+}
+
+
+static struct pipe_context *
+gdi_llvmpipe_context_create(struct pipe_screen *screen)
+{
+ return llvmpipe_create(screen);
+}
+
+
+static void
+gdi_llvmpipe_present(struct pipe_screen *screen,
+ struct pipe_surface *surface,
+ HDC hDC)
+{
+ struct llvmpipe_texture *texture;
+ struct gdi_llvmpipe_displaytarget *gdt;
+
+ texture = llvmpipe_texture(surface->texture);
+ gdt = gdi_llvmpipe_displaytarget(texture->dt);
+
+ StretchDIBits(hDC,
+ 0, 0, gdt->width, gdt->height,
+ 0, 0, gdt->width, gdt->height,
+ gdt->data, &gdt->bmi, 0, SRCCOPY);
+}
+
+
+static const struct stw_winsys stw_winsys = {
+ &gdi_llvmpipe_screen_create,
+ &gdi_llvmpipe_context_create,
+ &gdi_llvmpipe_present,
+ NULL, /* get_adapter_luid */
+ NULL, /* shared_surface_open */
+ NULL, /* shared_surface_close */
+ NULL /* compose */
+};
+
+
+BOOL WINAPI
+DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved)
+{
+ switch (fdwReason) {
+ case DLL_PROCESS_ATTACH:
+ if (!stw_init(&stw_winsys)) {
+ return FALSE;
+ }
+ return stw_init_thread();
+
+ case DLL_THREAD_ATTACH:
+ return stw_init_thread();
+
+ case DLL_THREAD_DETACH:
+ stw_cleanup_thread();
+ break;
+
+ case DLL_PROCESS_DETACH:
+ stw_cleanup_thread();
+ stw_cleanup();
+ break;
+ }
+ return TRUE;
+}
diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
index 33826524d7..5e0ccf32f4 100644
--- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
+++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
@@ -46,7 +46,7 @@
#include "util/u_memory.h"
#include "softpipe/sp_winsys.h"
#include "softpipe/sp_texture.h"
-#include "shared/stw_winsys.h"
+#include "stw_winsys.h"
struct gdi_softpipe_buffer
@@ -166,6 +166,7 @@ gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
unsigned width, unsigned height,
enum pipe_format format,
unsigned usage,
+ unsigned tex_usage,
unsigned *stride)
{
const unsigned alignment = 64;
@@ -268,9 +269,9 @@ gdi_softpipe_context_create(struct pipe_screen *screen)
static void
-gdi_softpipe_flush_frontbuffer(struct pipe_screen *screen,
- struct pipe_surface *surface,
- HDC hDC)
+gdi_softpipe_present(struct pipe_screen *screen,
+ struct pipe_surface *surface,
+ HDC hDC)
{
struct softpipe_texture *texture;
struct gdi_softpipe_buffer *buffer;
@@ -303,7 +304,11 @@ gdi_softpipe_flush_frontbuffer(struct pipe_screen *screen,
static const struct stw_winsys stw_winsys = {
&gdi_softpipe_screen_create,
&gdi_softpipe_context_create,
- &gdi_softpipe_flush_frontbuffer
+ &gdi_softpipe_present,
+ NULL, /* get_adapter_luid */
+ NULL, /* shared_surface_open */
+ NULL, /* shared_surface_close */
+ NULL /* compose */
};
diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c
index 67fea023a3..260b39e2a0 100644
--- a/src/gallium/winsys/xlib/xlib_softpipe.c
+++ b/src/gallium/winsys/xlib/xlib_softpipe.c
@@ -75,9 +75,6 @@ struct xmesa_pipe_winsys
{
struct pipe_winsys base;
/* struct xmesa_visual *xm_visual; */
-#ifdef USE_XSHM
- int shm;
-#endif
};
@@ -93,11 +90,6 @@ xm_buffer( struct pipe_buffer *buf )
/**
* X Shared Memory Image extension code
*/
-#ifdef USE_XSHM
-#define XSHM_ENABLED(b) ((b)->shm)
-#else
-#define XSHM_ENABLED(b) 0
-#endif
#ifdef USE_XSHM
@@ -116,23 +108,23 @@ mesaHandleXError(Display *dpy, XErrorEvent *event)
}
-static GLboolean alloc_shm(struct xm_buffer *buf, unsigned size)
+static char *alloc_shm(struct xm_buffer *buf, unsigned size)
{
XShmSegmentInfo *const shminfo = & buf->shminfo;
shminfo->shmid = shmget(IPC_PRIVATE, size, IPC_CREAT|0777);
if (shminfo->shmid < 0) {
- return GL_FALSE;
+ return NULL;
}
shminfo->shmaddr = (char *) shmat(shminfo->shmid, 0, 0);
if (shminfo->shmaddr == (char *) -1) {
shmctl(shminfo->shmid, IPC_RMID, 0);
- return GL_FALSE;
+ return NULL;
}
shminfo->readOnly = False;
- return GL_TRUE;
+ return shminfo->shmaddr;
}
@@ -258,25 +250,30 @@ xlib_softpipe_display_surface(struct xmesa_buffer *b,
return;
#ifdef USE_XSHM
- if (XSHM_ENABLED(xm_buf) && (xm_buf->tempImage == NULL)) {
- assert(surf->texture->block.width == 1);
- assert(surf->texture->block.height == 1);
- alloc_shm_ximage(xm_buf, b, spt->stride[surf->level] /
- surf->texture->block.size, surf->height);
- }
-#endif
+ if (xm_buf->shm)
+ {
+ if (xm_buf->tempImage == NULL)
+ {
+ assert(surf->texture->block.width == 1);
+ assert(surf->texture->block.height == 1);
+ alloc_shm_ximage(xm_buf, b, spt->stride[surf->level] /
+ surf->texture->block.size, surf->height);
+ }
- ximage = (XSHM_ENABLED(xm_buf)) ? xm_buf->tempImage : b->tempImage;
- ximage->data = xm_buf->data;
+ ximage = xm_buf->tempImage;
+ ximage->data = xm_buf->data;
- /* display image in Window */
-#ifdef USE_XSHM
- if (XSHM_ENABLED(xm_buf)) {
+ /* _debug_printf("XSHM\n"); */
XShmPutImage(b->xm_visual->display, b->drawable, b->gc,
ximage, 0, 0, 0, 0, surf->width, surf->height, False);
- } else
+ }
+ else
#endif
{
+ /* display image in Window */
+ ximage = b->tempImage;
+ ximage->data = xm_buf->data;
+
/* check that the XImage has been previously initialized */
assert(ximage->format);
assert(ximage->bitmap_unit);
@@ -286,6 +283,7 @@ xlib_softpipe_display_surface(struct xmesa_buffer *b,
ximage->height = surf->height;
ximage->bytes_per_line = spt->stride[surf->level];
+ /* _debug_printf("XPUT\n"); */
XPutImage(b->xm_visual->display, b->drawable, b->gc,
ximage, 0, 0, 0, 0, surf->width, surf->height);
}
@@ -322,21 +320,6 @@ xm_buffer_create(struct pipe_winsys *pws,
unsigned size)
{
struct xm_buffer *buffer = CALLOC_STRUCT(xm_buffer);
-#ifdef USE_XSHM
- struct xmesa_pipe_winsys *xpws = (struct xmesa_pipe_winsys *) pws;
-
- buffer->shminfo.shmid = -1;
- buffer->shminfo.shmaddr = (char *) -1;
-
- if (xpws->shm && (usage & PIPE_BUFFER_USAGE_PIXEL) != 0) {
- buffer->shm = xpws->shm;
-
- if (alloc_shm(buffer, size)) {
- buffer->data = buffer->shminfo.shmaddr;
- buffer->shm = 1;
- }
- }
-#endif
pipe_reference_init(&buffer->base.reference, 1);
buffer->base.alignment = alignment;
@@ -363,9 +346,6 @@ xm_user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes)
buffer->base.size = bytes;
buffer->userBuffer = TRUE;
buffer->data = ptr;
-#ifdef USE_XSHM
- buffer->shm = 0;
-#endif
return &buffer->base;
}
@@ -381,16 +361,44 @@ xm_surface_buffer_create(struct pipe_winsys *winsys,
{
const unsigned alignment = 64;
struct pipe_format_block block;
- unsigned nblocksx, nblocksy;
+ unsigned nblocksx, nblocksy, size;
pf_get_block(format, &block);
nblocksx = pf_get_nblocksx(&block, width);
nblocksy = pf_get_nblocksy(&block, height);
*stride = align(nblocksx * block.size, alignment);
+ size = *stride * nblocksy;
+
+#ifdef USE_XSHM
+ if (!debug_get_bool_option("XLIB_NO_SHM", FALSE))
+ {
+ struct xm_buffer *buffer = CALLOC_STRUCT(xm_buffer);
+
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->base.alignment = alignment;
+ buffer->base.usage = usage;
+ buffer->base.size = size;
+ buffer->userBuffer = FALSE;
+ buffer->shminfo.shmid = -1;
+ buffer->shminfo.shmaddr = (char *) -1;
+ buffer->shm = TRUE;
+
+ buffer->data = alloc_shm(buffer, size);
+ if (!buffer->data)
+ goto out;
+
+ return &buffer->base;
+
+ out:
+ if (buffer)
+ FREE(buffer);
+ }
+#endif
+
return winsys->buffer_create(winsys, alignment,
usage,
- *stride * nblocksy);
+ size);
}