summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/Makefile36
-rw-r--r--src/gallium/auxiliary/SConscript37
-rw-r--r--src/gallium/auxiliary/draw/draw_cliptest_tmp.h114
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c113
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h23
-rw-r--r--src/gallium/auxiliary/draw/draw_decompose_tmp.h26
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_gs_tmp.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c74
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h54
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c32
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_validate.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h44
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c139
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h51
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_elts.c89
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c11
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c29
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c29
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c27
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c35
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c288
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_so_emit.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_util.c7
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c200
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp.h238
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h103
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c610
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h19
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vsplit.c208
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h309
-rw-r--r--src/gallium/auxiliary/draw/draw_so_emit_tmp.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_split_tmp.h176
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c48
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp14
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c103
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h11
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c307
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.h8
-rw-r--r--src/gallium/auxiliary/os/os_stream.c58
-rw-r--r--src/gallium/auxiliary/os/os_stream.h25
-rw-r--r--src/gallium/auxiliary/os/os_stream_log.c3
-rw-r--r--src/gallium/auxiliary/os/os_stream_null.c8
-rw-r--r--src/gallium/auxiliary/os/os_stream_stdc.c9
-rw-r--r--src/gallium/auxiliary/os/os_stream_str.c1
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr.h3
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_cpu.c6
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c499
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h100
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c5
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c27
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.h12
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.h5
-rw-r--r--src/gallium/auxiliary/translate/translate.c2
-rw-r--r--src/gallium/auxiliary/translate/translate.h12
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c213
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c1378
-rw-r--r--src/gallium/auxiliary/util/u_bitmask.h3
-rw-r--r--src/gallium/auxiliary/util/u_blit.c130
-rw-r--r--src/gallium/auxiliary/util/u_blit.h12
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c54
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h36
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c177
-rw-r--r--src/gallium/auxiliary/util/u_debug_describe.c81
-rw-r--r--src/gallium/auxiliary/util/u_debug_describe.h49
-rw-r--r--src/gallium/auxiliary/util/u_debug_refcnt.c181
-rw-r--r--src/gallium/auxiliary/util/u_debug_refcnt.h63
-rw-r--r--src/gallium/auxiliary/util/u_debug_symbol.c97
-rw-r--r--src/gallium/auxiliary/util/u_debug_symbol.h7
-rw-r--r--src/gallium/auxiliary/util/u_dirty_surfaces.h30
-rw-r--r--src/gallium/auxiliary/util/u_draw.h1
-rw-r--r--src/gallium/auxiliary/util/u_dynarray.h3
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c1
-rw-r--r--src/gallium/auxiliary/util/u_inlines.h25
-rw-r--r--src/gallium/auxiliary/util/u_linkage.c149
-rw-r--r--src/gallium/auxiliary/util/u_linkage.h66
-rw-r--r--src/gallium/auxiliary/util/u_math.h13
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h8
-rw-r--r--src/gallium/auxiliary/util/u_rect.c51
-rw-r--r--src/gallium/auxiliary/util/u_rect.h60
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.c1
-rw-r--r--src/gallium/auxiliary/util/u_split_prim.h13
-rw-r--r--src/gallium/auxiliary/util/u_staging.c38
-rw-r--r--src/gallium/auxiliary/util/u_staging.h34
-rw-r--r--src/gallium/auxiliary/util/u_surface.c46
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.c26
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.h30
-rw-r--r--src/gallium/auxiliary/util/u_tile.h3
-rw-r--r--src/gallium/auxiliary/util/u_transfer.h1
-rw-r--r--src/gallium/auxiliary/util/u_upload_mgr.h5
-rw-r--r--src/gallium/docs/source/context.rst4
-rw-r--r--src/gallium/docs/source/debugging.rst4
-rw-r--r--src/gallium/docs/source/distro.rst9
-rw-r--r--src/gallium/docs/source/index.rst1
-rw-r--r--src/gallium/docs/source/resources.rst195
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.c15
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_vertex.c2
-rw-r--r--src/gallium/drivers/galahad/glhd_context.c27
-rw-r--r--src/gallium/drivers/galahad/glhd_screen.c30
-rw-r--r--src/gallium/drivers/i915/i915_context.c22
-rw-r--r--src/gallium/drivers/i915/i915_resource_texture.c5
-rw-r--r--src/gallium/drivers/i915/i915_state.c8
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.c2
-rw-r--r--src/gallium/drivers/i965/brw_resource_texture.c4
-rw-r--r--src/gallium/drivers/i965/brw_wm_debug.c8
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile2
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_debug.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_draw_arrays.c18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.c93
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.h13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c39
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.h13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.c21
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_query.c26
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c37
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c223
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h35
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c61
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c61
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef.c258
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef.h61
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c207
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h37
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_line.c664
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_point.c292
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c568
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c41
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_rasterizer.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_vertex.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_surface.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c4
-rw-r--r--src/gallium/drivers/nouveau/nouveau_class.h17
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c3
-rw-r--r--src/gallium/drivers/nouveau/nouveau_util.h91
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c3
-rw-r--r--src/gallium/drivers/nv50/nv50_push.c5
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c3
-rw-r--r--src/gallium/drivers/nvfx/Makefile3
-rw-r--r--src/gallium/drivers/nvfx/SConscript3
-rw-r--r--src/gallium/drivers/nvfx/nv04_2d.c1341
-rw-r--r--src/gallium/drivers/nvfx/nv04_2d.h87
-rw-r--r--src/gallium/drivers/nvfx/nv04_2d_loops.h70
-rw-r--r--src/gallium/drivers/nvfx/nv04_surface_2d.c532
-rw-r--r--src/gallium/drivers/nvfx/nv04_surface_2d.h43
-rw-r--r--src/gallium/drivers/nvfx/nv30_fragtex.c192
-rw-r--r--src/gallium/drivers/nvfx/nv30_vertprog.h9
-rw-r--r--src/gallium/drivers/nvfx/nv40_fragtex.c183
-rw-r--r--src/gallium/drivers/nvfx/nv40_vertprog.h4
-rw-r--r--src/gallium/drivers/nvfx/nvfx_buffer.c171
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.c35
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.h166
-rw-r--r--src/gallium/drivers/nvfx/nvfx_draw.c73
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragprog.c1177
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragtex.c301
-rw-r--r--src/gallium/drivers/nvfx/nvfx_miptree.c387
-rw-r--r--src/gallium/drivers/nvfx/nvfx_push.c414
-rw-r--r--src/gallium/drivers/nvfx/nvfx_resource.c48
-rw-r--r--src/gallium/drivers/nvfx/nvfx_resource.h194
-rw-r--r--src/gallium/drivers/nvfx/nvfx_screen.c125
-rw-r--r--src/gallium/drivers/nvfx/nvfx_screen.h23
-rw-r--r--src/gallium/drivers/nvfx/nvfx_shader.h194
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state.c290
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state.h45
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_emit.c231
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_fb.c256
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_stipple.c21
-rw-r--r--src/gallium/drivers/nvfx/nvfx_surface.c448
-rw-r--r--src/gallium/drivers/nvfx/nvfx_tex.h90
-rw-r--r--src/gallium/drivers/nvfx/nvfx_transfer.c328
-rw-r--r--src/gallium/drivers/nvfx/nvfx_transfer.h10
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vbo.c1022
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vertprog.c798
-rw-r--r--src/gallium/drivers/r300/r300_context.c97
-rw-r--r--src/gallium/drivers/r300/r300_context.h10
-rw-r--r--src/gallium/drivers/r300/r300_emit.c42
-rw-r--r--src/gallium/drivers/r300/r300_flush.c5
-rw-r--r--src/gallium/drivers/r300/r300_fs.c23
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c28
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.h2
-rw-r--r--src/gallium/drivers/r300/r300_reg.h1
-rw-r--r--src/gallium/drivers/r300/r300_render.c55
-rw-r--r--src/gallium/drivers/r300/r300_state.c64
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c26
-rw-r--r--src/gallium/drivers/r300/r300_texture.c49
-rw-r--r--src/gallium/drivers/r300/r300_texture.h7
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.c6
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.h1
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c4
-rw-r--r--src/gallium/drivers/r300/r300_vs.c17
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h1
-rw-r--r--src/gallium/drivers/r600/r600_asm.c138
-rw-r--r--src/gallium/drivers/r600/r600_asm.h39
-rw-r--r--src/gallium/drivers/r600/r600_blit.c484
-rw-r--r--src/gallium/drivers/r600/r600_context.c182
-rw-r--r--src/gallium/drivers/r600/r600_context.h79
-rw-r--r--src/gallium/drivers/r600/r600_draw.c105
-rw-r--r--src/gallium/drivers/r600/r600_query.c200
-rw-r--r--src/gallium/drivers/r600/r600_resource.h12
-rw-r--r--src/gallium/drivers/r600/r600_screen.c25
-rw-r--r--src/gallium/drivers/r600/r600_screen.h18
-rw-r--r--src/gallium/drivers/r600/r600_shader.c1602
-rw-r--r--src/gallium/drivers/r600/r600_shader.h1
-rw-r--r--src/gallium/drivers/r600/r600_sq.h27
-rw-r--r--src/gallium/drivers/r600/r600_state.c476
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h7
-rw-r--r--src/gallium/drivers/r600/r600_texture.c461
-rw-r--r--src/gallium/drivers/r600/r600d.h8
-rw-r--r--src/gallium/drivers/r600/radeon.h198
-rw-r--r--src/gallium/drivers/softpipe/sp_draw_arrays.c26
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.c1
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c1
-rw-r--r--src/gallium/drivers/softpipe/sp_state_vertex.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c2
-rw-r--r--src/gallium/drivers/svga/svga_cmd.c2
-rw-r--r--src/gallium/drivers/svga/svga_context.c5
-rw-r--r--src/gallium/drivers/svga/svga_context.h1
-rw-r--r--src/gallium/drivers/svga/svga_pipe_draw.c82
-rw-r--r--src/gallium/drivers/svga/svga_resource_texture.c3
-rw-r--r--src/gallium/drivers/svga/svga_state_framebuffer.c15
-rw-r--r--src/gallium/drivers/svga/svga_state_tss.c27
-rw-r--r--src/gallium/drivers/svga/svga_swtnl.h11
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_draw.c39
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_emit.h1
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c19
-rw-r--r--src/gallium/drivers/trace/tr_context.c9
-rw-r--r--src/gallium/include/pipe/p_compiler.h8
-rw-r--r--src/gallium/include/pipe/p_context.h30
-rw-r--r--src/gallium/include/pipe/p_defines.h6
-rw-r--r--src/gallium/include/pipe/p_format.h2
-rw-r--r--src/gallium/include/pipe/p_shader_tokens.h2
-rw-r--r--src/gallium/include/pipe/p_state.h1
-rw-r--r--src/gallium/include/state_tracker/graw.h28
-rw-r--r--src/gallium/state_trackers/dri/common/dri_context.h1
-rw-r--r--src/gallium/state_trackers/dri/common/dri_screen.c5
-rw-r--r--src/gallium/state_trackers/dri/common/dri_screen.h1
-rw-r--r--src/gallium/state_trackers/dri/drm/dri2.c4
-rw-r--r--src/gallium/state_trackers/dri/sw/drisw.c2
-rw-r--r--src/gallium/state_trackers/egl/Makefile2
-rw-r--r--src/gallium/state_trackers/egl/SConscript1
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d.c12
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d.h20
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_api.c33
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_image.c245
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_image.h8
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_sync.c284
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_sync.h53
-rw-r--r--src/gallium/state_trackers/egl/kms/native_kms.c79
-rw-r--r--src/gallium/state_trackers/glx/xlib/glx_api.c8
-rw-r--r--src/gallium/state_trackers/glx/xlib/glx_usefont.c3
-rw-r--r--src/gallium/state_trackers/glx/xlib/xm_api.c78
-rw-r--r--src/gallium/state_trackers/glx/xlib/xm_api.h4
-rw-r--r--src/gallium/state_trackers/glx/xlib/xm_st.c24
-rw-r--r--src/gallium/state_trackers/wgl/stw_context.c2
-rw-r--r--src/gallium/state_trackers/wgl/stw_device.c2
-rw-r--r--src/gallium/state_trackers/wgl/stw_pixelformat.c4
-rw-r--r--src/gallium/state_trackers/wgl/stw_pixelformat.h2
-rw-r--r--src/gallium/targets/Makefile.dri13
-rw-r--r--src/gallium/targets/SConscript15
-rw-r--r--src/gallium/targets/dri-r300/Makefile (renamed from src/gallium/targets/dri-radeong/Makefile)2
-rw-r--r--src/gallium/targets/dri-r300/SConscript (renamed from src/gallium/targets/dri-radeong/SConscript)4
-rw-r--r--src/gallium/targets/dri-r300/target.c (renamed from src/gallium/targets/dri-radeong/target.c)2
-rw-r--r--src/gallium/targets/dri-r600/Makefile6
-rw-r--r--src/gallium/targets/egl-gdi/egl-static.c39
-rw-r--r--src/gallium/targets/egl/Makefile33
-rw-r--r--src/gallium/targets/egl/egl.c27
-rw-r--r--src/gallium/targets/egl/pipe_r300.c (renamed from src/gallium/targets/egl/pipe_radeon.c)2
-rw-r--r--src/gallium/targets/egl/pipe_r600.c27
-rw-r--r--src/gallium/targets/graw-xlib/graw_util.c1
-rw-r--r--src/gallium/targets/graw-xlib/graw_xlib.c1
-rw-r--r--src/gallium/targets/libgl-gdi/SConscript1
-rw-r--r--src/gallium/targets/libgl-xlib/Makefile3
-rw-r--r--src/gallium/targets/libgl-xlib/SConscript1
-rw-r--r--src/gallium/targets/libgl-xlib/xlib.c1
-rwxr-xr-xsrc/gallium/tests/python/tests/texture_blit.py2
-rwxr-xr-xsrc/gallium/tools/addr2line.sh26
-rw-r--r--src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c3
-rw-r--r--src/gallium/winsys/r600/drm/r600_state.c182
-rw-r--r--src/gallium/winsys/r600/drm/r600_states.h161
-rw-r--r--src/gallium/winsys/r600/drm/r600d.h3
-rw-r--r--src/gallium/winsys/r600/drm/radeon.c70
-rw-r--r--src/gallium/winsys/r600/drm/radeon_bo.c4
-rw-r--r--src/gallium/winsys/r600/drm/radeon_ctx.c218
-rw-r--r--src/gallium/winsys/r600/drm/radeon_draw.c108
-rw-r--r--src/gallium/winsys/r600/drm/radeon_priv.h37
-rw-r--r--src/gallium/winsys/r600/drm/radeon_state.c145
-rw-r--r--src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c8
303 files changed, 18498 insertions, 8515 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 9544e90a96..eb86d83d2a 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -4,8 +4,8 @@ include $(TOP)/configs/current
LIBNAME = gallium
C_SOURCES = \
- cso_cache/cso_context.c \
cso_cache/cso_cache.c \
+ cso_cache/cso_context.c \
cso_cache/cso_hash.c \
draw/draw_context.c \
draw/draw_gs.c \
@@ -26,7 +26,6 @@ C_SOURCES = \
draw/draw_pipe_wide_line.c \
draw/draw_pipe_wide_point.c \
draw/draw_pt.c \
- draw/draw_pt_elts.c \
draw/draw_pt_emit.c \
draw/draw_pt_fetch.c \
draw/draw_pt_fetch_emit.c \
@@ -35,24 +34,24 @@ C_SOURCES = \
draw/draw_pt_post_vs.c \
draw/draw_pt_so_emit.c \
draw/draw_pt_util.c \
- draw/draw_pt_varray.c \
- draw/draw_pt_vcache.c \
+ draw/draw_pt_vsplit.c \
draw/draw_vertex.c \
draw/draw_vs.c \
- draw/draw_vs_varient.c \
draw/draw_vs_aos.c \
draw/draw_vs_aos_io.c \
draw/draw_vs_aos_machine.c \
draw/draw_vs_exec.c \
draw/draw_vs_ppc.c \
draw/draw_vs_sse.c \
+ draw/draw_vs_varient.c \
indices/u_indices_gen.c \
indices/u_unfilled_gen.c \
os/os_misc.c \
+ os/os_stream.c \
os/os_stream_log.c \
+ os/os_stream_null.c \
os/os_stream_stdc.c \
os/os_stream_str.c \
- os/os_stream_null.c \
os/os_time.c \
pipebuffer/pb_buffer_fenced.c \
pipebuffer/pb_buffer_malloc.c \
@@ -65,17 +64,16 @@ C_SOURCES = \
pipebuffer/pb_bufmgr_slab.c \
pipebuffer/pb_validate.c \
rbug/rbug_connection.c \
+ rbug/rbug_context.c \
rbug/rbug_core.c \
+ rbug/rbug_demarshal.c \
rbug/rbug_texture.c \
- rbug/rbug_context.c \
rbug/rbug_shader.c \
- rbug/rbug_demarshal.c \
rtasm/rtasm_cpu.c \
rtasm/rtasm_execmem.c \
- rtasm/rtasm_x86sse.c \
rtasm/rtasm_ppc.c \
rtasm/rtasm_ppc_spe.c \
- tgsi/tgsi_sanity.c \
+ rtasm/rtasm_x86sse.c \
tgsi/tgsi_build.c \
tgsi/tgsi_dump.c \
tgsi/tgsi_exec.c \
@@ -83,19 +81,22 @@ C_SOURCES = \
tgsi/tgsi_iterate.c \
tgsi/tgsi_parse.c \
tgsi/tgsi_ppc.c \
+ tgsi/tgsi_sanity.c \
tgsi/tgsi_scan.c \
tgsi/tgsi_sse2.c \
tgsi/tgsi_text.c \
tgsi/tgsi_transform.c \
tgsi/tgsi_ureg.c \
tgsi/tgsi_util.c \
- translate/translate_generic.c \
- translate/translate_sse.c \
translate/translate.c \
translate/translate_cache.c \
+ translate/translate_generic.c \
+ translate/translate_sse.c \
util/u_debug.c \
- util/u_debug_symbol.c \
+ util/u_debug_describe.c \
+ util/u_debug_refcnt.c \
util/u_debug_stack.c \
+ util/u_debug_symbol.c \
util/u_dump_defines.c \
util/u_dump_state.c \
util/u_bitmask.c \
@@ -118,10 +119,11 @@ C_SOURCES = \
util/u_gen_mipmap.c \
util/u_half.c \
util/u_handle_table.c \
- util/u_hash_table.c \
util/u_hash.c \
+ util/u_hash_table.c \
util/u_keymap.c \
util/u_linear.c \
+ util/u_linkage.c \
util/u_network.c \
util/u_math.c \
util/u_mempool.c \
@@ -172,10 +174,10 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_tgsi_soa.c \
gallivm/lp_bld_type.c \
draw/draw_llvm.c \
- draw/draw_vs_llvm.c \
- draw/draw_pt_fetch_shade_pipeline_llvm.c \
+ draw/draw_llvm_sample.c \
draw/draw_llvm_translate.c \
- draw/draw_llvm_sample.c
+ draw/draw_vs_llvm.c \
+ draw/draw_pt_fetch_shade_pipeline_llvm.c
GALLIVM_CPP_SOURCES = \
gallivm/lp_bld_misc.cpp
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 3124e20ce8..6210ada990 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -50,10 +50,11 @@ env.Depends('util/u_format_table.c', [
])
source = [
- 'cso_cache/cso_context.c',
'cso_cache/cso_cache.c',
+ 'cso_cache/cso_context.c',
'cso_cache/cso_hash.c',
'draw/draw_context.c',
+ 'draw/draw_gs.c',
'draw/draw_pipe.c',
'draw/draw_pipe_aaline.c',
'draw/draw_pipe_aapoint.c',
@@ -71,7 +72,6 @@ source = [
'draw/draw_pipe_wide_line.c',
'draw/draw_pipe_wide_point.c',
'draw/draw_pt.c',
- 'draw/draw_pt_elts.c',
'draw/draw_pt_emit.c',
'draw/draw_pt_fetch.c',
'draw/draw_pt_fetch_emit.c',
@@ -80,8 +80,7 @@ source = [
'draw/draw_pt_post_vs.c',
'draw/draw_pt_so_emit.c',
'draw/draw_pt_util.c',
- 'draw/draw_pt_varray.c',
- 'draw/draw_pt_vcache.c',
+ 'draw/draw_pt_vsplit.c',
'draw/draw_vertex.c',
'draw/draw_vs.c',
'draw/draw_vs_aos.c',
@@ -91,16 +90,16 @@ source = [
'draw/draw_vs_ppc.c',
'draw/draw_vs_sse.c',
'draw/draw_vs_varient.c',
- 'draw/draw_gs.c',
#'indices/u_indices.c',
#'indices/u_unfilled_indices.c',
'indices/u_indices_gen.c',
'indices/u_unfilled_gen.c',
'os/os_misc.c',
+ 'os/os_stream.c',
'os/os_stream_log.c',
+ 'os/os_stream_null.c',
'os/os_stream_stdc.c',
'os/os_stream_str.c',
- 'os/os_stream_null.c',
'os/os_time.c',
'pipebuffer/pb_buffer_fenced.c',
'pipebuffer/pb_buffer_malloc.c',
@@ -112,35 +111,35 @@ source = [
'pipebuffer/pb_bufmgr_pool.c',
'pipebuffer/pb_bufmgr_slab.c',
'pipebuffer/pb_validate.c',
+ 'rbug/rbug_connection.c',
+ 'rbug/rbug_context.c',
'rbug/rbug_core.c',
+ 'rbug/rbug_demarshal.c',
'rbug/rbug_shader.c',
- 'rbug/rbug_context.c',
'rbug/rbug_texture.c',
- 'rbug/rbug_demarshal.c',
- 'rbug/rbug_connection.c',
'rtasm/rtasm_cpu.c',
'rtasm/rtasm_execmem.c',
- 'rtasm/rtasm_x86sse.c',
'rtasm/rtasm_ppc.c',
'rtasm/rtasm_ppc_spe.c',
+ 'rtasm/rtasm_x86sse.c',
'tgsi/tgsi_build.c',
'tgsi/tgsi_dump.c',
'tgsi/tgsi_exec.c',
'tgsi/tgsi_info.c',
'tgsi/tgsi_iterate.c',
'tgsi/tgsi_parse.c',
+ 'tgsi/tgsi_ppc.c',
'tgsi/tgsi_sanity.c',
'tgsi/tgsi_scan.c',
- 'tgsi/tgsi_ppc.c',
'tgsi/tgsi_sse2.c',
'tgsi/tgsi_text.c',
'tgsi/tgsi_transform.c',
'tgsi/tgsi_ureg.c',
'tgsi/tgsi_util.c',
- 'translate/translate_generic.c',
- 'translate/translate_sse.c',
'translate/translate.c',
'translate/translate_cache.c',
+ 'translate/translate_generic.c',
+ 'translate/translate_sse.c',
'util/u_bitmask.c',
'util/u_blit.c',
'util/u_blitter.c',
@@ -148,7 +147,9 @@ source = [
'util/u_caps.c',
'util/u_cpu_detect.c',
'util/u_debug.c',
+ 'util/u_debug_describe.c',
'util/u_debug_memory.c',
+ 'util/u_debug_refcnt.c',
'util/u_debug_stack.c',
'util/u_debug_symbol.c',
'util/u_dump_defines.c',
@@ -170,6 +171,8 @@ source = [
'util/u_hash.c',
'util/u_hash_table.c',
'util/u_keymap.c',
+ 'util/u_linear.c',
+ 'util/u_linkage.c',
'util/u_network.c',
'util/u_math.c',
'util/u_mempool.c',
@@ -208,9 +211,9 @@ if env['llvm']:
'gallivm/lp_bld_format_soa.c',
'gallivm/lp_bld_format_yuv.c',
'gallivm/lp_bld_gather.c',
+ 'gallivm/lp_bld_init.c',
'gallivm/lp_bld_intr.c',
'gallivm/lp_bld_logic.c',
- 'gallivm/lp_bld_init.c',
'gallivm/lp_bld_misc.cpp',
'gallivm/lp_bld_pack.c',
'gallivm/lp_bld_printf.c',
@@ -222,10 +225,10 @@ if env['llvm']:
'gallivm/lp_bld_tgsi_soa.c',
'gallivm/lp_bld_type.c',
'draw/draw_llvm.c',
- 'draw/draw_pt_fetch_shade_pipeline_llvm.c',
+ 'draw/draw_llvm_sample.c',
'draw/draw_llvm_translate.c',
- 'draw/draw_vs_llvm.c',
- 'draw/draw_llvm_sample.c'
+ 'draw/draw_pt_fetch_shade_pipeline_llvm.c',
+ 'draw/draw_vs_llvm.c'
]
gallium = env.ConvenienceLibrary(
diff --git a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
new file mode 100644
index 0000000000..958ed20dc8
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
@@ -0,0 +1,114 @@
+/**************************************************************************
+ *
+ * Copyright 2010, VMware, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
+ struct draw_vertex_info *info )
+{
+ struct vertex_header *out = info->verts;
+ const float *scale = pvs->draw->viewport.scale;
+ const float *trans = pvs->draw->viewport.translate;
+ /* const */ float (*plane)[4] = pvs->draw->plane;
+ const unsigned pos = draw_current_shader_position_output(pvs->draw);
+ const unsigned ef = pvs->draw->vs.edgeflag_output;
+ const unsigned nr = pvs->draw->nr_planes;
+ const unsigned flags = (FLAGS);
+ unsigned need_pipeline = 0;
+ unsigned j;
+
+ for (j = 0; j < info->count; j++) {
+ float *position = out->data[pos];
+ unsigned mask = 0x0;
+
+ initialize_vertex_header(out);
+
+ if (flags & (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_HALF_Z | DO_CLIP_USER)) {
+ out->clip[0] = position[0];
+ out->clip[1] = position[1];
+ out->clip[2] = position[2];
+ out->clip[3] = position[3];
+
+ /* Do the hardwired planes first:
+ */
+ if (flags & DO_CLIP_XY) {
+ if (-position[0] + position[3] < 0) mask |= (1<<0);
+ if ( position[0] + position[3] < 0) mask |= (1<<1);
+ if (-position[1] + position[3] < 0) mask |= (1<<2);
+ if ( position[1] + position[3] < 0) mask |= (1<<3);
+ }
+
+ /* Clip Z planes according to full cube, half cube or none.
+ */
+ if (flags & DO_CLIP_FULL_Z) {
+ if ( position[2] + position[3] < 0) mask |= (1<<4);
+ if (-position[2] + position[3] < 0) mask |= (1<<5);
+ }
+ else if (flags & DO_CLIP_HALF_Z) {
+ if ( position[2] < 0) mask |= (1<<4);
+ if (-position[2] + position[3] < 0) mask |= (1<<5);
+ }
+
+ if (flags & DO_CLIP_USER) {
+ unsigned i;
+ for (i = 6; i < nr; i++) {
+ if (dot4(position, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+ }
+
+ out->clipmask = mask;
+ need_pipeline |= out->clipmask;
+ }
+
+ if ((flags & DO_VIEWPORT) && mask == 0)
+ {
+ /* divide by w */
+ float w = 1.0f / position[3];
+
+ /* Viewport mapping */
+ position[0] = position[0] * w * scale[0] + trans[0];
+ position[1] = position[1] * w * scale[1] + trans[1];
+ position[2] = position[2] * w * scale[2] + trans[2];
+ position[3] = w;
+ }
+
+ if ((flags & DO_EDGEFLAG) && ef) {
+ const float *edgeflag = out->data[ef];
+ out->edgeflag = !(edgeflag[0] != 1.0f);
+ need_pipeline |= !out->edgeflag;
+ }
+
+ out = (struct vertex_header *)( (char *)out + info->stride );
+ }
+
+ return need_pipeline != 0;
+}
+
+
+#undef FLAGS
+#undef TAG
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 995b675b9a..937b093479 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -34,6 +34,7 @@
#include "pipe/p_context.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
#include "draw_context.h"
#include "draw_vs.h"
#include "draw_gs.h"
@@ -41,6 +42,25 @@
#if HAVE_LLVM
#include "gallivm/lp_bld_init.h"
#include "draw_llvm.h"
+
+static boolean
+draw_get_option_use_llvm(void)
+{
+ static boolean first = TRUE;
+ static boolean value;
+ if (first) {
+ first = FALSE;
+ value = debug_get_bool_option("DRAW_USE_LLVM", TRUE);
+
+#ifdef PIPE_ARCH_X86
+ util_cpu_detect();
+ /* require SSE2 due to LLVM PR6960. */
+ if (!util_cpu_caps.has_sse2)
+ value = FALSE;
+#endif
+ }
+ return value;
+}
#endif
struct draw_context *draw_create( struct pipe_context *pipe )
@@ -50,10 +70,13 @@ struct draw_context *draw_create( struct pipe_context *pipe )
goto fail;
#if HAVE_LLVM
- lp_build_init();
- assert(lp_build_engine);
- draw->engine = lp_build_engine;
- draw->llvm = draw_llvm_create(draw);
+ if(draw_get_option_use_llvm())
+ {
+ lp_build_init();
+ assert(lp_build_engine);
+ draw->engine = lp_build_engine;
+ draw->llvm = draw_llvm_create(draw);
+ }
#endif
if (!draw_init(draw))
@@ -83,6 +106,8 @@ boolean draw_init(struct draw_context *draw)
ASSIGN_4V( draw->plane[4], 0, 0, 1, 1 ); /* yes these are correct */
ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */
draw->nr_planes = 6;
+ draw->clip_xy = 1;
+ draw->clip_z = 1;
draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
@@ -135,7 +160,8 @@ void draw_destroy( struct draw_context *draw )
draw_vs_destroy( draw );
draw_gs_destroy( draw );
#ifdef HAVE_LLVM
- draw_llvm_destroy( draw->llvm );
+ if(draw->llvm)
+ draw_llvm_destroy( draw->llvm );
#endif
FREE( draw );
@@ -162,6 +188,14 @@ void draw_set_mrd(struct draw_context *draw, double mrd)
}
+static void update_clip_flags( struct draw_context *draw )
+{
+ draw->clip_xy = !draw->driver.bypass_clip_xy;
+ draw->clip_z = (!draw->driver.bypass_clip_z &&
+ !draw->depth_clamp);
+ draw->clip_user = (draw->nr_planes > 6);
+}
+
/**
* Register new primitive rasterization/rendering state.
* This causes the drawing pipeline to be rebuilt.
@@ -176,18 +210,25 @@ void draw_set_rasterizer_state( struct draw_context *draw,
draw->rasterizer = raster;
draw->rast_handle = rast_handle;
- draw->bypass_clipping = draw->driver.bypass_clipping;
- }
+ }
}
-
+/* With a little more work, llvmpipe will be able to turn this off and
+ * do its own x/y clipping.
+ *
+ * Some hardware can turn off clipping altogether - in particular any
+ * hardware with a TNL unit can do its own clipping, even if it is
+ * relying on the draw module for some other reason.
+ */
void draw_set_driver_clipping( struct draw_context *draw,
- boolean bypass_clipping )
+ boolean bypass_clip_xy,
+ boolean bypass_clip_z )
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->driver.bypass_clipping = bypass_clipping;
- draw->bypass_clipping = draw->driver.bypass_clipping;
+ draw->driver.bypass_clip_xy = bypass_clip_xy;
+ draw->driver.bypass_clip_z = bypass_clip_z;
+ update_clip_flags(draw);
}
@@ -217,6 +258,8 @@ void draw_set_clip_state( struct draw_context *draw,
memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0]));
draw->nr_planes = 6 + clip->nr;
draw->depth_clamp = clip->depth_clamp;
+
+ update_clip_flags(draw);
}
@@ -472,47 +515,28 @@ void draw_set_render( struct draw_context *draw,
}
-
-/**
- * Tell the drawing context about the index/element buffer to use
- * (ala glDrawElements)
- * If no element buffer is to be used (i.e. glDrawArrays) then this
- * should be called with eltSize=0 and elements=NULL.
- *
- * \param draw the drawing context
- * \param eltSize size of each element (1, 2 or 4 bytes)
- * \param elements the element buffer ptr
- */
void
-draw_set_mapped_element_buffer_range( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- unsigned min_index,
- unsigned max_index,
- const void *elements )
+draw_set_index_buffer(struct draw_context *draw,
+ const struct pipe_index_buffer *ib)
{
- draw->pt.user.elts = elements;
- draw->pt.user.eltSize = eltSize;
- draw->pt.user.eltBias = eltBias;
- draw->pt.user.min_index = min_index;
- draw->pt.user.max_index = max_index;
+ if (ib)
+ memcpy(&draw->pt.index_buffer, ib, sizeof(draw->pt.index_buffer));
+ else
+ memset(&draw->pt.index_buffer, 0, sizeof(draw->pt.index_buffer));
}
+/**
+ * Tell drawing context where to find mapped index/element buffer.
+ */
void
-draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- const void *elements )
+draw_set_mapped_index_buffer(struct draw_context *draw,
+ const void *elements)
{
- draw->pt.user.elts = elements;
- draw->pt.user.eltSize = eltSize;
- draw->pt.user.eltBias = eltBias;
- draw->pt.user.min_index = 0;
- draw->pt.user.max_index = 0xffffffff;
+ draw->pt.user.elts = elements;
}
-
+
/* Revamp me please:
*/
void draw_do_flush( struct draw_context *draw, unsigned flags )
@@ -659,7 +683,8 @@ draw_set_mapped_texture(struct draw_context *draw,
const void *data[DRAW_MAX_TEXTURE_LEVELS])
{
#ifdef HAVE_LLVM
- draw_llvm_set_mapped_texture(draw,
+ if(draw->llvm)
+ draw_llvm_set_mapped_texture(draw,
sampler_idx,
width, height, depth, last_level,
row_stride, img_stride, data);
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 116716af6f..4c780e4dcb 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -160,18 +160,11 @@ void draw_set_vertex_elements(struct draw_context *draw,
unsigned count,
const struct pipe_vertex_element *elements);
-void
-draw_set_mapped_element_buffer_range( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- unsigned min_index,
- unsigned max_index,
- const void *elements );
-
-void draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- const void *elements );
+void draw_set_index_buffer(struct draw_context *draw,
+ const struct pipe_index_buffer *ib);
+
+void draw_set_mapped_index_buffer(struct draw_context *draw,
+ const void *elements);
void draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer);
@@ -196,6 +189,9 @@ draw_set_so_state(struct draw_context *draw,
* draw_pt.c
*/
+void draw_vbo(struct draw_context *draw,
+ const struct pipe_draw_info *info);
+
void draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count);
@@ -216,7 +212,8 @@ void draw_set_render( struct draw_context *draw,
struct vbuf_render *render );
void draw_set_driver_clipping( struct draw_context *draw,
- boolean bypass_clipping );
+ boolean bypass_clip_xy,
+ boolean bypass_clip_z );
void draw_set_force_passthrough( struct draw_context *draw,
boolean enable );
diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
index a52d2b5058..a142563af9 100644
--- a/src/gallium/auxiliary/draw/draw_decompose_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
@@ -54,10 +54,10 @@ FUNC(FUNC_VARS)
FUNC_ENTER;
- /* prim, count, and last_vertex_last should have been defined */
+ /* prim, prim_flags, count, and last_vertex_last should have been defined */
if (0) {
- debug_printf("%s: prim 0x%x, count %d, last_vertex_last %d\n",
- __FUNCTION__, prim, count, last_vertex_last);
+ debug_printf("%s: prim 0x%x, prim_flags 0x%x, count %d, last_vertex_last %d\n",
+ __FUNCTION__, prim, prim_flags, count, last_vertex_last);
}
switch (prim) {
@@ -80,7 +80,7 @@ FUNC(FUNC_VARS)
case PIPE_PRIM_LINE_LOOP:
case PIPE_PRIM_LINE_STRIP:
if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
+ flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
idx[1] = GET_ELT(0);
idx[2] = idx[1];
@@ -90,7 +90,7 @@ FUNC(FUNC_VARS)
LINE(flags, idx[0], idx[1]);
}
/* close the loop */
- if (prim == PIPE_PRIM_LINE_LOOP)
+ if (prim == PIPE_PRIM_LINE_LOOP && !prim_flags)
LINE(flags, idx[1], idx[2]);
}
break;
@@ -255,17 +255,23 @@ FUNC(FUNC_VARS)
if (last_vertex_last) {
flags = (DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_2 |
DRAW_PIPE_EDGE_FLAG_0);
+ if (!(prim_flags & DRAW_SPLIT_BEFORE))
+ flags |= DRAW_PIPE_EDGE_FLAG_2;
+
edge_next = DRAW_PIPE_EDGE_FLAG_0;
- edge_finish = DRAW_PIPE_EDGE_FLAG_1;
+ edge_finish =
+ (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_1;
}
else {
flags = (DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_0 |
DRAW_PIPE_EDGE_FLAG_1);
+ if (!(prim_flags & DRAW_SPLIT_BEFORE))
+ flags |= DRAW_PIPE_EDGE_FLAG_0;
+
edge_next = DRAW_PIPE_EDGE_FLAG_1;
- edge_finish = DRAW_PIPE_EDGE_FLAG_2;
+ edge_finish =
+ (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_2;
}
idx[0] = GET_ELT(0);
@@ -300,7 +306,7 @@ FUNC(FUNC_VARS)
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
if (count >= 4) {
- flags = DRAW_PIPE_RESET_STIPPLE;
+ flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
idx[1] = GET_ELT(0);
idx[2] = GET_ELT(1);
idx[3] = GET_ELT(2);
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 4a1013e79a..50a03ac95a 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -380,7 +380,7 @@ static void gs_tri_adj(struct draw_geometry_shader *shader,
#define FUNC gs_run_elts
#define LOCAL_VARS const ushort *elts = input_prims->elts;
-#define GET_ELT(idx) (elts[idx] & ~DRAW_PIPE_FLAG_MASK)
+#define GET_ELT(idx) (elts[idx])
#include "draw_gs_tmp.h"
@@ -457,6 +457,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
output_prims->start = 0;
output_prims->count = shader->emitted_vertices;
output_prims->prim = shader->output_primitive;
+ output_prims->flags = 0x0;
output_prims->primitive_lengths = shader->primitive_lengths;
output_prims->primitive_count = shader->emitted_primitives;
output_verts->count = shader->emitted_vertices;
diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h
index 4a17af0dea..de7b02655a 100644
--- a/src/gallium/auxiliary/draw/draw_gs_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h
@@ -6,12 +6,10 @@
#define FUNC_ENTER \
/* declare more local vars */ \
- struct draw_context *draw = gs->draw; \
const unsigned prim = input_prims->prim; \
+ const unsigned prim_flags = input_prims->flags; \
const unsigned count = input_prims->count; \
- const boolean last_vertex_last = \
- !(draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first); \
+ const boolean last_vertex_last = TRUE; \
do { \
debug_assert(input_prims->primitive_count == 1); \
switch (prim) { \
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 8d53601d19..8759c38cab 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -210,13 +210,6 @@ draw_llvm_create(struct draw_context *draw)
{
struct draw_llvm *llvm;
-#ifdef PIPE_ARCH_X86
- util_cpu_detect();
- /* require SSE2 due to LLVM PR6960. */
- if (!util_cpu_caps.has_sse2)
- return NULL;
-#endif
-
llvm = CALLOC_STRUCT( draw_llvm );
if (!llvm)
return NULL;
@@ -292,15 +285,23 @@ draw_llvm_destroy(struct draw_llvm *llvm)
}
struct draw_llvm_variant *
-draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs)
+draw_llvm_create_variant(struct draw_llvm *llvm,
+ unsigned num_inputs,
+ const struct draw_llvm_variant_key *key)
{
- struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant));
+ struct draw_llvm_variant *variant;
struct llvm_vertex_shader *shader =
llvm_vertex_shader(llvm->draw->vs.vertex_shader);
+ variant = MALLOC(sizeof *variant +
+ shader->variant_key_size -
+ sizeof variant->key);
+ if (variant == NULL)
+ return NULL;
+
variant->llvm = llvm;
- draw_llvm_make_variant_key(llvm, &variant->key);
+ memcpy(&variant->key, key, shader->variant_key_size);
llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
@@ -738,8 +739,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
/* code generated texture sampling */
- sampler = draw_llvm_sampler_soa_create(variant->key.sampler,
- context_ptr);
+ sampler = draw_llvm_sampler_soa_create(
+ draw_llvm_variant_key_samplers(&variant->key),
+ context_ptr);
#if DEBUG_STORE
lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
@@ -901,8 +903,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
/* code generated texture sampling */
- sampler = draw_llvm_sampler_soa_create(variant->key.sampler,
- context_ptr);
+ sampler = draw_llvm_sampler_soa_create(
+ draw_llvm_variant_key_samplers(&variant->key),
+ context_ptr);
fetch_max = LLVMBuildSub(builder, fetch_count,
LLVMConstInt(LLVMInt32Type(), 1, 0),
@@ -1002,35 +1005,42 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
lp_func_delete_body(variant->function_elts);
}
-void
-draw_llvm_make_variant_key(struct draw_llvm *llvm,
- struct draw_llvm_variant_key *key)
+
+struct draw_llvm_variant_key *
+draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
{
unsigned i;
+ struct draw_llvm_variant_key *key;
+ struct lp_sampler_static_state *sampler;
- memset(key, 0, sizeof(struct draw_llvm_variant_key));
+ key = (struct draw_llvm_variant_key *)store;
+ /* Presumably all variants of the shader should have the same
+ * number of vertex elements - ie the number of shader inputs.
+ */
key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
+ /* All variants of this shader will have the same value for
+ * nr_samplers. Not yet trying to compact away holes in the
+ * sampler array.
+ */
+ key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+
+ sampler = draw_llvm_variant_key_samplers(key);
+
memcpy(key->vertex_element,
llvm->draw->pt.vertex_element,
sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
+
+ memset(sampler, 0, key->nr_samplers * sizeof *sampler);
- memcpy(&key->vs,
- &llvm->draw->vs.vertex_shader->state,
- sizeof(struct pipe_shader_state));
-
- /* if the driver implemented the sampling hooks then
- * setup our sampling state */
- if (llvm->draw->num_sampler_views && llvm->draw->num_samplers) {
- for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) {
- struct draw_vertex_shader *shader = llvm->draw->vs.vertex_shader;
- if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
- lp_sampler_static_state(&key->sampler[i],
- llvm->draw->sampler_views[i],
- llvm->draw->samplers[i]);
- }
+ for (i = 0 ; i < key->nr_samplers; i++) {
+ lp_sampler_static_state(&sampler[i],
+ llvm->draw->sampler_views[i],
+ llvm->draw->samplers[i]);
}
+
+ return key;
}
void
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index 4addb47d2d..6196b2f983 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -151,12 +151,43 @@ typedef void
struct draw_llvm_variant_key
{
- struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
- unsigned nr_vertex_elements;
- struct pipe_shader_state vs;
- struct lp_sampler_static_state sampler[PIPE_MAX_VERTEX_SAMPLERS];
+ unsigned nr_vertex_elements:16;
+ unsigned nr_samplers:16;
+
+ /* Variable number of vertex elements:
+ */
+ struct pipe_vertex_element vertex_element[1];
+
+ /* Followed by variable number of samplers:
+ */
+/* struct lp_sampler_static_state sampler; */
};
+#define DRAW_LLVM_MAX_VARIANT_KEY_SIZE \
+ (sizeof(struct draw_llvm_variant_key) + \
+ PIPE_MAX_VERTEX_SAMPLERS * sizeof(struct lp_sampler_static_state) + \
+ (PIPE_MAX_ATTRIBS-1) * sizeof(struct pipe_vertex_element))
+
+
+static INLINE size_t
+draw_llvm_variant_key_size(unsigned nr_vertex_elements,
+ unsigned nr_samplers)
+{
+ return (sizeof(struct draw_llvm_variant_key) +
+ nr_samplers * sizeof(struct lp_sampler_static_state) +
+ (nr_vertex_elements - 1) * sizeof(struct pipe_vertex_element));
+}
+
+
+static INLINE struct lp_sampler_static_state *
+draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key)
+{
+ return (struct lp_sampler_static_state *)
+ &key->vertex_element[key->nr_vertex_elements];
+}
+
+
+
struct draw_llvm_variant_list_item
{
struct draw_llvm_variant *base;
@@ -165,7 +196,6 @@ struct draw_llvm_variant_list_item
struct draw_llvm_variant
{
- struct draw_llvm_variant_key key;
LLVMValueRef function;
LLVMValueRef function_elts;
draw_jit_vert_func jit_func;
@@ -176,11 +206,16 @@ struct draw_llvm_variant
struct draw_llvm *llvm;
struct draw_llvm_variant_list_item list_item_global;
struct draw_llvm_variant_list_item list_item_local;
+
+ /* key is variable-sized, must be last */
+ struct draw_llvm_variant_key key;
+ /* key is variable-sized, must be last */
};
struct llvm_vertex_shader {
struct draw_vertex_shader base;
+ unsigned variant_key_size;
struct draw_llvm_variant_list_item variants;
unsigned variants_created;
unsigned variants_cached;
@@ -220,14 +255,15 @@ void
draw_llvm_destroy(struct draw_llvm *llvm);
struct draw_llvm_variant *
-draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs);
+draw_llvm_create_variant(struct draw_llvm *llvm,
+ unsigned num_vertex_header_attribs,
+ const struct draw_llvm_variant_key *key);
void
draw_llvm_destroy_variant(struct draw_llvm_variant *variant);
-void
-draw_llvm_make_variant_key(struct draw_llvm *llvm,
- struct draw_llvm_variant_key *key);
+struct draw_llvm_variant_key *
+draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store);
LLVMValueRef
draw_llvm_translate_from(LLVMBuilderRef builder,
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 58995e0724..6206197dae 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -169,35 +169,27 @@ static void do_triangle( struct draw_context *draw,
/*
* Set up macros for draw_pt_decompose.h template code.
* This code uses vertex indexes / elements.
- *
- * Flags are needed by the stipple and unfilled stages. When the two stages
- * are active, vcache_run_extras is called and the flags are stored in the
- * higher bits of i0. Otherwise, flags do not matter.
*/
#define TRIANGLE(flags,i0,i1,i2) \
do { \
- assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \
- assert(!((i2) & DRAW_PIPE_FLAG_MASK)); \
do_triangle( draw, \
- i0, /* flags */ \
- verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \
+ flags, \
+ verts + stride * (i0), \
verts + stride * (i1), \
verts + stride * (i2) ); \
} while (0)
#define LINE(flags,i0,i1) \
do { \
- assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \
do_line( draw, \
- i0, /* flags */ \
- verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \
+ flags, \
+ verts + stride * (i0), \
verts + stride * (i1) ); \
} while (0)
#define POINT(i0) \
do { \
- assert(!((i0) & DRAW_PIPE_FLAG_MASK)); \
do_point( draw, verts + stride * (i0) ); \
} while (0)
@@ -207,6 +199,7 @@ static void do_triangle( struct draw_context *draw,
#define FUNC_VARS \
struct draw_context *draw, \
unsigned prim, \
+ unsigned prim_flags, \
struct vertex_header *vertices, \
unsigned stride, \
const ushort *elts, \
@@ -245,22 +238,27 @@ void draw_pipeline_run( struct draw_context *draw,
const unsigned count = prim_info->primitive_lengths[i];
#if DEBUG
- /* make sure none of the element indexes go outside the vertex buffer */
+ /* Warn if one of the element indexes go outside the vertex buffer */
{
unsigned max_index = 0x0, i;
/* find the largest element index */
for (i = 0; i < count; i++) {
- unsigned int index = (prim_info->elts[start + i]
- & ~DRAW_PIPE_FLAG_MASK);
+ unsigned int index = prim_info->elts[start + i];
if (index > max_index)
max_index = index;
}
- assert(max_index <= vert_info->count);
+ if (max_index >= vert_info->count) {
+ debug_printf("%s: max_index (%u) outside vertex buffer (%u)\n",
+ __FUNCTION__,
+ max_index,
+ vert_info->count);
+ }
}
#endif
pipe_run_elts(draw,
prim_info->prim,
+ prim_info->flags,
vert_info->verts,
vert_info->stride,
prim_info->elts + start,
@@ -298,6 +296,7 @@ void draw_pipeline_run( struct draw_context *draw,
#define FUNC_VARS \
struct draw_context *draw, \
unsigned prim, \
+ unsigned prim_flags, \
struct vertex_header *vertices, \
unsigned stride, \
unsigned count
@@ -330,6 +329,7 @@ void draw_pipeline_run_linear( struct draw_context *draw,
pipe_run_linear(draw,
prim_info->prim,
+ prim_info->flags,
(struct vertex_header*)verts,
vert_info->stride,
count);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
index eafa29276f..8b92543987 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -265,7 +265,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
/* Clip stage
*/
- if (!draw->bypass_clipping)
+ if (draw->clip_xy || draw->clip_z || draw->clip_user)
{
draw->pipeline.clip->next = next;
next = draw->pipeline.clip;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 3c93c9014a..58c5858734 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -353,9 +353,6 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
/* Allocate a new vertex buffer */
vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
- /* even number */
- vbuf->max_vertices = vbuf->max_vertices & ~1;
-
if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1;
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 397d4bf653..362f563ba6 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -140,8 +140,7 @@ struct draw_context
} middle;
struct {
- struct draw_pt_front_end *vcache;
- struct draw_pt_front_end *varray;
+ struct draw_pt_front_end *vsplit;
} front;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
@@ -150,6 +149,8 @@ struct draw_context
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
unsigned nr_vertex_elements;
+ struct pipe_index_buffer index_buffer;
+
/* user-space vertex data, buffers */
struct {
/** vertex element/index buffer (ex: glDrawElements) */
@@ -175,13 +176,19 @@ struct draw_context
} pt;
struct {
- boolean bypass_clipping;
- boolean bypass_vs;
+ boolean bypass_clip_xy;
+ boolean bypass_clip_z;
} driver;
boolean flushing; /**< debugging/sanity */
boolean suspend_flushing; /**< internally set */
- boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */
+
+ /* Flags set if API requires clipping in these planes and the
+ * driver doesn't indicate that it can do it for us.
+ */
+ boolean clip_xy;
+ boolean clip_z;
+ boolean clip_user;
boolean force_passthrough; /**< never clip or shade */
@@ -296,6 +303,10 @@ struct draw_vertex_info {
unsigned count;
};
+/* these flags are set if the primitive is a segment of a larger one */
+#define DRAW_SPLIT_BEFORE 0x1
+#define DRAW_SPLIT_AFTER 0x2
+
struct draw_prim_info {
boolean linear;
unsigned start;
@@ -304,6 +315,7 @@ struct draw_prim_info {
unsigned count;
unsigned prim;
+ unsigned flags;
unsigned *primitive_lengths;
unsigned primitive_count;
};
@@ -369,21 +381,15 @@ void draw_pipeline_destroy( struct draw_context *draw );
-/* We use the top few bits in the elts[] parameter to convey a little
- * API information. This limits the number of vertices we can address
- * to only 4096 -- if that becomes a problem, we can switch to 32-bit
- * draw indices.
- *
- * These flags expected at first vertex of lines & triangles when
- * unfilled and/or line stipple modes are operational.
+/*
+ * These flags are used by the pipeline when unfilled and/or line stipple modes
+ * are operational.
*/
-#define DRAW_PIPE_MAX_VERTICES (0x1<<12)
-#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12)
-#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12)
-#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12)
-#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12)
-#define DRAW_PIPE_RESET_STIPPLE (0x8<<12)
-#define DRAW_PIPE_FLAG_MASK (0xf<<12)
+#define DRAW_PIPE_EDGE_FLAG_0 0x1
+#define DRAW_PIPE_EDGE_FLAG_1 0x2
+#define DRAW_PIPE_EDGE_FLAG_2 0x4
+#define DRAW_PIPE_EDGE_FLAG_ALL 0x7
+#define DRAW_PIPE_RESET_STIPPLE 0x8
void draw_pipeline_run( struct draw_context *draw,
const struct draw_vertex_info *vert,
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 248927505d..f44bf2507c 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -39,25 +39,14 @@
#include "util/u_math.h"
#include "util/u_prim.h"
#include "util/u_format.h"
+#include "util/u_draw.h"
DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE)
DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE)
-#ifdef HAVE_LLVM
-DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE)
-#endif
-
-static unsigned trim( unsigned count, unsigned first, unsigned incr )
-{
- if (count < first)
- return 0;
- return count - (count - first) % incr;
-}
-
-
/* Overall we split things into:
- * - frontend -- prepare fetch_elts, draw_elts - eg vcache
+ * - frontend -- prepare fetch_elts, draw_elts - eg vsplit
* - middle -- fetch, shade, cliptest, viewport
* - pipeline -- the prim pipeline: clipping, wide lines, etc
* - backend -- the vbuf_render provided by the driver.
@@ -77,7 +66,7 @@ draw_pt_arrays(struct draw_context *draw,
{
unsigned first, incr;
draw_pt_split_prim(prim, &first, &incr);
- count = trim(count, first, incr);
+ count = draw_pt_trim_count(count, first, incr);
if (count < first)
return TRUE;
}
@@ -97,7 +86,9 @@ draw_pt_arrays(struct draw_context *draw,
opt |= PT_PIPELINE;
}
- if (!draw->bypass_clipping && !draw->pt.test_fse) {
+ if ((draw->clip_xy ||
+ draw->clip_z ||
+ draw->clip_user) && !draw->pt.test_fse) {
opt |= PT_CLIPTEST;
}
@@ -115,22 +106,11 @@ draw_pt_arrays(struct draw_context *draw,
middle = draw->pt.middle.general;
}
-
- /* Pick the right frontend
- */
- if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
- frontend = draw->pt.front.vcache;
- } else {
- frontend = draw->pt.front.varray;
- }
+ frontend = draw->pt.front.vsplit;
frontend->prepare( frontend, prim, middle, opt );
- frontend->run(frontend,
- draw_pt_elt_func(draw),
- draw_pt_elt_ptr(draw, start),
- draw->pt.user.eltBias,
- count);
+ frontend->run(frontend, start, count);
frontend->finish( frontend );
@@ -143,12 +123,8 @@ boolean draw_pt_init( struct draw_context *draw )
draw->pt.test_fse = debug_get_option_draw_fse();
draw->pt.no_fse = debug_get_option_draw_no_fse();
- draw->pt.front.vcache = draw_pt_vcache( draw );
- if (!draw->pt.front.vcache)
- return FALSE;
-
- draw->pt.front.varray = draw_pt_varray(draw);
- if (!draw->pt.front.varray)
+ draw->pt.front.vsplit = draw_pt_vsplit(draw);
+ if (!draw->pt.front.vsplit)
return FALSE;
draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw );
@@ -164,7 +140,7 @@ boolean draw_pt_init( struct draw_context *draw )
return FALSE;
#if HAVE_LLVM
- if (debug_get_option_draw_use_llvm())
+ if (draw->llvm)
draw->pt.middle.llvm = draw_pt_fetch_pipeline_or_emit_llvm( draw );
#endif
@@ -194,14 +170,9 @@ void draw_pt_destroy( struct draw_context *draw )
draw->pt.middle.fetch_shade_emit = NULL;
}
- if (draw->pt.front.vcache) {
- draw->pt.front.vcache->destroy( draw->pt.front.vcache );
- draw->pt.front.vcache = NULL;
- }
-
- if (draw->pt.front.varray) {
- draw->pt.front.varray->destroy( draw->pt.front.varray );
- draw->pt.front.varray = NULL;
+ if (draw->pt.front.vsplit) {
+ draw->pt.front.vsplit->destroy( draw->pt.front.vsplit );
+ draw->pt.front.vsplit = NULL;
}
}
@@ -221,24 +192,29 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
uint ii = 0;
uint j;
- if (draw->pt.user.elts) {
+ if (draw->pt.user.eltSize) {
+ const char *elts;
+
/* indexed arrays */
+ elts = (const char *) draw->pt.user.elts;
+ elts += draw->pt.index_buffer.offset;
+
switch (draw->pt.user.eltSize) {
case 1:
{
- const ubyte *elem = (const ubyte *) draw->pt.user.elts;
+ const ubyte *elem = (const ubyte *) elts;
ii = elem[start + i];
}
break;
case 2:
{
- const ushort *elem = (const ushort *) draw->pt.user.elts;
+ const ushort *elem = (const ushort *) elts;
ii = elem[start + i];
}
break;
case 4:
{
- const uint *elem = (const uint *) draw->pt.user.elts;
+ const uint *elem = (const uint *) elts;
ii = elem[start + i];
}
break;
@@ -324,17 +300,8 @@ draw_arrays(struct draw_context *draw, unsigned prim,
/**
- * Draw vertex arrays.
- * This is the main entrypoint into the drawing module.
- * If drawing an indexed primitive, the draw_set_mapped_element_buffer_range()
- * function should have already been called to specify the element/index buffer
- * information.
- *
- * \param prim one of PIPE_PRIM_x
- * \param start index of first vertex to draw
- * \param count number of vertices to draw
- * \param startInstance number for the first primitive instance (usually 0).
- * \param instanceCount number of instances to draw (1=non-instanced)
+ * Instanced drawing.
+ * \sa draw_vbo
*/
void
draw_arrays_instanced(struct draw_context *draw,
@@ -344,10 +311,50 @@ draw_arrays_instanced(struct draw_context *draw,
unsigned startInstance,
unsigned instanceCount)
{
- unsigned reduced_prim = u_reduced_prim(mode);
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.start_instance = startInstance;
+ info.instance_count = instanceCount;
+
+ info.indexed = (draw->pt.user.elts != NULL);
+ if (!info.indexed) {
+ info.min_index = start;
+ info.max_index = start + count - 1;
+ }
+
+ draw_vbo(draw, &info);
+}
+
+
+/**
+ * Draw vertex arrays.
+ * This is the main entrypoint into the drawing module. If drawing an indexed
+ * primitive, the draw_set_index_buffer() and draw_set_mapped_index_buffer()
+ * functions should have already been called to specify the element/index
+ * buffer information.
+ */
+void
+draw_vbo(struct draw_context *draw,
+ const struct pipe_draw_info *info)
+{
+ unsigned reduced_prim = u_reduced_prim(info->mode);
unsigned instance;
- assert(instanceCount > 0);
+ assert(info->instance_count > 0);
+ if (info->indexed)
+ assert(draw->pt.user.elts);
+
+ draw->pt.user.eltSize =
+ (info->indexed) ? draw->pt.index_buffer.index_size : 0;
+
+ draw->pt.user.eltBias = info->index_bias;
+ draw->pt.user.min_index = info->min_index;
+ draw->pt.user.max_index = info->max_index;
if (reduced_prim != draw->reduced_prim) {
draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
@@ -355,8 +362,8 @@ draw_arrays_instanced(struct draw_context *draw,
}
if (0)
- debug_printf("draw_arrays(mode=%u start=%u count=%u):\n",
- mode, start, count);
+ debug_printf("draw_vbo(mode=%u start=%u count=%u):\n",
+ info->mode, info->start, info->count);
if (0)
tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
@@ -384,10 +391,10 @@ draw_arrays_instanced(struct draw_context *draw,
}
if (0)
- draw_print_arrays(draw, mode, start, MIN2(count, 20));
+ draw_print_arrays(draw, info->mode, info->start, MIN2(info->count, 20));
- for (instance = 0; instance < instanceCount; instance++) {
- draw->instance_id = instance + startInstance;
- draw_pt_arrays(draw, mode, start, count);
+ for (instance = 0; instance < info->instance_count; instance++) {
+ draw->instance_id = instance + info->start_instance;
+ draw_pt_arrays(draw, info->mode, info->start, info->count);
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 44356fba4c..5fbb424291 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -35,8 +35,6 @@
#include "pipe/p_compiler.h"
-typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx );
-
struct draw_pt_middle_end;
struct draw_context;
struct draw_prim_info;
@@ -52,13 +50,18 @@ struct draw_vertex_info;
/* The "front end" - prepare sets of fetch, draw elements for the
* middle end.
*
- * Currenly one version of this:
- * - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims
- * Later:
- * - varray, varray_split
- * - velement, velement_split
+ * The fetch elements are indices to the vertices. The draw elements are
+ * indices to the fetched vertices. When both arrays of elements are both
+ * linear, middle->run_linear is called; When only the fetch elements are
+ * linear, middle->run_linear_elts is called; Otherwise, middle->run is
+ * called.
+ *
+ * When the number of the draw elements exceeds max_vertex of the middle end,
+ * the draw elements (as well as the fetch elements) are splitted and the
+ * middle end is called multiple times.
*
- * Currenly only using the vcache version.
+ * Currenly there is:
+ * - vsplit - catchall implementation, splits big prims
*/
struct draw_pt_front_end {
void (*prepare)( struct draw_pt_front_end *,
@@ -67,9 +70,7 @@ struct draw_pt_front_end {
unsigned opt );
void (*run)( struct draw_pt_front_end *,
- pt_elt_func elt_func,
- const void *elt_ptr,
- int elt_bias,
+ unsigned start,
unsigned count );
void (*finish)( struct draw_pt_front_end * );
@@ -80,6 +81,8 @@ struct draw_pt_front_end {
/* The "middle end" - prepares actual hardware vertices for the
* hardware backend.
*
+ * prim_flags is as defined by pipe_draw_info::flags.
+ *
* Currently two versions of this:
* - fetch, vertex shade, cliptest, prim-pipeline
* - fetch, emit (ie passthrough)
@@ -94,11 +97,13 @@ struct draw_pt_middle_end {
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count );
+ unsigned draw_count,
+ unsigned prim_flags );
void (*run_linear)(struct draw_pt_middle_end *,
unsigned start,
- unsigned count);
+ unsigned count,
+ unsigned prim_flags );
/* Transform all vertices in a linear range and then draw them with
* the supplied element list. May fail and return FALSE.
@@ -107,7 +112,8 @@ struct draw_pt_middle_end {
unsigned fetch_start,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count );
+ unsigned draw_count,
+ unsigned prim_flags );
int (*get_max_vertex_count)( struct draw_pt_middle_end * );
@@ -122,19 +128,11 @@ struct vbuf_render;
struct vertex_header;
-/* Helper functions.
- */
-pt_elt_func draw_pt_elt_func( struct draw_context *draw );
-const void *draw_pt_elt_ptr( struct draw_context *draw,
- unsigned start );
-
/* Frontends:
*
- * Currently only the general-purpose vcache implementation, could add
- * a special case for tiny vertex buffers.
+ * Currently only the general-purpose vsplit implementation.
*/
-struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
-struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
+struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw);
/* Middle-ends:
@@ -223,7 +221,9 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
struct draw_vertex_info *info );
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
- boolean bypass_clipping,
+ boolean clip_xy,
+ boolean clip_z,
+ boolean clip_user,
boolean bypass_viewport,
boolean opengl,
boolean need_edgeflags );
@@ -237,6 +237,7 @@ void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
* Utils:
*/
void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr);
+unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr);
#endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c
deleted file mode 100644
index 88f4d9f495..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_elts.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "draw/draw_pt.h"
-#include "draw/draw_private.h"
-
-/* Neat get_elt func that also works for varrays drawing by encoding
- * the start value into a pointer.
- */
-
-static unsigned elt_uint( const void *elts, unsigned idx )
-{
- return *(((const uint *)elts) + idx);
-}
-
-static unsigned elt_ushort( const void *elts, unsigned idx )
-{
- return *(((const ushort *)elts) + idx);
-}
-
-static unsigned elt_ubyte( const void *elts, unsigned idx )
-{
- return *(((const ubyte *)elts) + idx);
-}
-
-static unsigned elt_vert( const void *elts, unsigned idx )
-{
- /* unsigned index is packed in the pointer */
- return (unsigned)(uintptr_t)elts + idx;
-}
-
-pt_elt_func draw_pt_elt_func( struct draw_context *draw )
-{
- switch (draw->pt.user.eltSize) {
- case 0: return &elt_vert;
- case 1: return &elt_ubyte;
- case 2: return &elt_ushort;
- case 4: return &elt_uint;
- default: return NULL;
- }
-}
-
-const void *draw_pt_elt_ptr( struct draw_context *draw,
- unsigned start )
-{
- const char *elts = draw->pt.user.elts;
-
- switch (draw->pt.user.eltSize) {
- case 0:
- return (const void *)(((const ubyte *)NULL) + start);
- case 1:
- return (const void *)(((const ubyte *)elts) + start);
- case 2:
- return (const void *)(((const ushort *)elts) + start);
- case 4:
- return (const void *)(((const uint *)elts) + start);
- default:
- return NULL;
- }
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index 5568fbb9f8..c8dfc16911 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -120,9 +120,6 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
-
- /* even number */
- *max_vertices = *max_vertices & ~1;
}
@@ -147,11 +144,6 @@ void draw_pt_emit( struct pt_emit *emit,
if (vertex_count == 0)
return;
- if (vertex_count >= UNDEFINED_VERTEX_ID) {
- assert(0);
- return;
- }
-
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
@@ -226,9 +218,6 @@ void draw_pt_emit_linear(struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 5c8af17c8e..e706b7796f 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -191,15 +191,6 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
-
- /* Return an even number of verts.
- * This prevents "parity" errors when splitting long triangle strips which
- * can lead to front/back culling mix-ups.
- * Every other triangle in a strip has an alternate front/back orientation
- * so splitting at an odd position can cause the orientation of subsequent
- * triangles to get reversed.
- */
- *max_vertices = *max_vertices & ~1;
}
@@ -210,7 +201,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -220,11 +212,6 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (fetch_count >= UNDEFINED_VERTEX_ID) {
- assert(0);
- return;
- }
-
draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)fetch_count );
@@ -273,7 +260,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count )
+ unsigned count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -283,9 +271,6 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)count ))
@@ -334,7 +319,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -344,9 +330,6 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- return FALSE;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)count ))
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index b8270280b6..7c198c6026 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -102,7 +102,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
fse->key.nr_inputs); /* inputs - fetch from api format */
fse->key.viewport = !draw->identity_viewport;
- fse->key.clip = !draw->bypass_clipping;
+ fse->key.clip = draw->clip_xy || draw->clip_z || draw->clip_user;
fse->key.const_vbuffers = 0;
memset(fse->key.element, 0,
@@ -175,15 +175,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
- /* Return an even number of verts.
- * This prevents "parity" errors when splitting long triangle strips which
- * can lead to front/back culling mix-ups.
- * Every other triangle in a strip has an alternate front/back orientation
- * so splitting at an odd position can cause the orientation of subsequent
- * triangles to get reversed.
- */
- *max_vertices = *max_vertices & ~1;
-
/* Probably need to do this somewhere (or fix exec shader not to
* need it):
*/
@@ -197,7 +188,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
static void fse_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count )
+ unsigned count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -207,9 +199,6 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)count ))
@@ -265,7 +254,8 @@ fse_run(struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -275,9 +265,6 @@ fse_run(struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (fetch_count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)fetch_count ))
@@ -327,7 +314,8 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -337,9 +325,6 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- return FALSE;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)count ))
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 5b16c3788e..b72fd61245 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -100,8 +100,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
- (boolean)draw->bypass_clipping,
- (boolean)draw->identity_viewport,
+ draw->clip_xy,
+ draw->clip_z,
+ draw->clip_user,
+ draw->identity_viewport,
(boolean)draw->rasterizer->gl_rasterization_rules,
(draw->vs.edgeflag_output ? TRUE : FALSE) );
@@ -112,16 +114,13 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
gs_out_prim,
max_vertices );
- *max_vertices = MAX2( *max_vertices,
- DRAW_PIPE_MAX_VERTICES );
+ *max_vertices = MAX2( *max_vertices, 4096 );
}
else {
- *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ /* limit max fetches by limiting max_vertices */
+ *max_vertices = 4096;
}
- /* return even number */
- *max_vertices = *max_vertices & ~1;
-
/* No need to prepare the shader.
*/
vs->prepare(vs, draw);
@@ -295,7 +294,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -311,6 +311,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@@ -320,7 +321,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count)
+ unsigned count,
+ unsigned prim_flags)
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -336,6 +338,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
prim_info.count = count;
prim_info.elts = NULL;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &count;
@@ -348,7 +351,8 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -364,6 +368,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 4b99bee86a..77291e304e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -66,7 +66,8 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
struct draw_context *draw = fpme->draw;
struct llvm_vertex_shader *shader =
llvm_vertex_shader(draw->vs.vertex_shader);
- struct draw_llvm_variant_key key;
+ char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE];
+ struct draw_llvm_variant_key *key;
struct draw_llvm_variant *variant = NULL;
struct draw_llvm_variant_list_item *li;
unsigned i;
@@ -106,8 +107,10 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
- (boolean)draw->bypass_clipping,
- (boolean)(draw->identity_viewport),
+ draw->clip_xy,
+ draw->clip_z,
+ draw->clip_user,
+ draw->identity_viewport,
(boolean)draw->rasterizer->gl_rasterization_rules,
(draw->vs.edgeflag_output ? TRUE : FALSE) );
@@ -118,21 +121,21 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
out_prim,
max_vertices );
- *max_vertices = MAX2( *max_vertices,
- DRAW_PIPE_MAX_VERTICES );
+ *max_vertices = MAX2( *max_vertices, 4096 );
}
else {
- *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ /* limit max fetches by limiting max_vertices */
+ *max_vertices = 4096;
}
/* return even number */
*max_vertices = *max_vertices & ~1;
-
- draw_llvm_make_variant_key(fpme->llvm, &key);
+
+ key = draw_llvm_make_variant_key(fpme->llvm, store);
li = first_elem(&shader->variants);
while(!at_end(&shader->variants, li)) {
- if(memcmp(&li->base->key, &key, sizeof key) == 0) {
+ if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
variant = li->base;
break;
}
@@ -155,7 +158,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
}
}
- variant = draw_llvm_create_variant(fpme->llvm, nr);
+ variant = draw_llvm_create_variant(fpme->llvm, nr, key);
if (variant) {
insert_at_head(&shader->variants, &variant->list_item_local);
@@ -294,7 +297,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -310,6 +314,7 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@@ -319,7 +324,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count)
+ unsigned count,
+ unsigned prim_flags)
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -335,6 +341,7 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
prim_info.count = count;
prim_info.elts = NULL;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &count;
@@ -348,7 +355,8 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -364,6 +372,7 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 308f927b77..769409cfd6 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -26,14 +26,26 @@
**************************************************************************/
#include "util/u_memory.h"
+#include "util/u_math.h"
#include "pipe/p_context.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
+
+#define DO_CLIP_XY 0x1
+#define DO_CLIP_FULL_Z 0x2
+#define DO_CLIP_HALF_Z 0x4
+#define DO_CLIP_USER 0x8
+#define DO_VIEWPORT 0x10
+#define DO_EDGEFLAG 0x20
+
+
struct pt_post_vs {
struct draw_context *draw;
+ unsigned flags;
+
boolean (*run)( struct pt_post_vs *pvs,
struct draw_vertex_info *info );
};
@@ -56,186 +68,47 @@ dot4(const float *a, const float *b)
a[3]*b[3]);
}
-static INLINE unsigned
-compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr,
- boolean clip_depth)
-{
- unsigned mask = 0x0;
- unsigned i;
+#define FLAGS (0)
+#define TAG(x) x##_none
+#include "draw_cliptest_tmp.h"
-#if 0
- debug_printf("compute clipmask %f %f %f %f\n",
- clip[0], clip[1], clip[2], clip[3]);
- assert(clip[3] != 0.0);
-#endif
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT)
+#define TAG(x) x##_xy_fullz_viewport
+#include "draw_cliptest_tmp.h"
- /* Do the hardwired planes first:
- */
- if (-clip[0] + clip[3] < 0) mask |= (1<<0);
- if ( clip[0] + clip[3] < 0) mask |= (1<<1);
- if (-clip[1] + clip[3] < 0) mask |= (1<<2);
- if ( clip[1] + clip[3] < 0) mask |= (1<<3);
- if (clip_depth) {
- if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */
- if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */
- }
+#define FLAGS (DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT)
+#define TAG(x) x##_xy_halfz_viewport
+#include "draw_cliptest_tmp.h"
- /* Followed by any remaining ones:
- */
- for (i = 6; i < nr; i++) {
- if (dot4(clip, plane[i]) < 0)
- mask |= (1<<i);
- }
+#define FLAGS (DO_CLIP_FULL_Z | DO_VIEWPORT)
+#define TAG(x) x##_fullz_viewport
+#include "draw_cliptest_tmp.h"
- return mask;
-}
+#define FLAGS (DO_CLIP_HALF_Z | DO_VIEWPORT)
+#define TAG(x) x##_halfz_viewport
+#include "draw_cliptest_tmp.h"
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT)
+#define TAG(x) x##_xy_fullz_user_viewport
+#include "draw_cliptest_tmp.h"
-/* The normal case - cliptest, rhw divide, viewport transform.
- *
- * Also handle identity viewport here at the expense of a few wasted
- * instructions
- */
-static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
- struct draw_vertex_info *info )
-{
- struct vertex_header *out = info->verts;
- const float *scale = pvs->draw->viewport.scale;
- const float *trans = pvs->draw->viewport.translate;
- const unsigned pos = draw_current_shader_position_output(pvs->draw);
- unsigned clipped = 0;
- unsigned j;
-
- if (0) debug_printf("%s count, %d\n", __FUNCTION__, info->count);
-
- for (j = 0; j < info->count; j++) {
- float *position = out->data[pos];
-
- initialize_vertex_header(out);
-#if 0
- debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n",
- j, out, position, position[0], position[1], position[2], position[3]);
-#endif
-
- out->clip[0] = position[0];
- out->clip[1] = position[1];
- out->clip[2] = position[2];
- out->clip[3] = position[3];
-
- out->vertex_id = 0xffff;
- /* Disable depth clipping if depth clamping is enabled. */
- out->clipmask = compute_clipmask_gl(out->clip,
- pvs->draw->plane,
- pvs->draw->nr_planes,
- !pvs->draw->depth_clamp);
- clipped += out->clipmask;
-
- if (out->clipmask == 0)
- {
- /* divide by w */
- float w = 1.0f / position[3];
-
- /* Viewport mapping */
- position[0] = position[0] * w * scale[0] + trans[0];
- position[1] = position[1] * w * scale[1] + trans[1];
- position[2] = position[2] * w * scale[2] + trans[2];
- position[3] = w;
-#if 0
- debug_printf("post viewport: %f %f %f %f\n",
- position[0],
- position[1],
- position[2],
- position[3]);
-#endif
- }
-
- out = (struct vertex_header *)( (char *)out + info->stride );
- }
-
- return clipped != 0;
-}
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT | DO_EDGEFLAG)
+#define TAG(x) x##_xy_fullz_user_viewport_edgeflag
+#include "draw_cliptest_tmp.h"
-/* As above plus edgeflags
+/* Don't want to create 64 versions of this function, so catch the
+ * less common ones here. This is looking like something which should
+ * be code-generated, perhaps appended to the end of the vertex
+ * shader.
*/
-static boolean
-post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs,
- struct draw_vertex_info *info)
-{
- unsigned j;
- boolean needpipe;
-
- needpipe = post_vs_cliptest_viewport_gl(pvs, info);
-
- /* If present, copy edgeflag VS output into vertex header.
- * Otherwise, leave header as is.
- */
- if (pvs->draw->vs.edgeflag_output) {
- struct vertex_header *out = info->verts;
- int ef = pvs->draw->vs.edgeflag_output;
-
- for (j = 0; j < info->count; j++) {
- const float *edgeflag = out->data[ef];
- out->edgeflag = !(edgeflag[0] != 1.0f);
- needpipe |= !out->edgeflag;
- out = (struct vertex_header *)( (char *)out + info->stride );
- }
- }
- return needpipe;
-}
-
+#define FLAGS (pvs->flags)
+#define TAG(x) x##_generic
+#include "draw_cliptest_tmp.h"
-/* If bypass_clipping is set, skip cliptest and rhw divide.
- */
-static boolean post_vs_viewport( struct pt_post_vs *pvs,
- struct draw_vertex_info *info )
-{
- struct vertex_header *out = info->verts;
- const float *scale = pvs->draw->viewport.scale;
- const float *trans = pvs->draw->viewport.translate;
- const unsigned pos = draw_current_shader_position_output(pvs->draw);
- unsigned j;
-
- if (0) debug_printf("%s\n", __FUNCTION__);
- for (j = 0; j < info->count; j++) {
- float *position = out->data[pos];
-
- initialize_vertex_header(out);
- /* Viewport mapping only, no cliptest/rhw divide
- */
- position[0] = position[0] * scale[0] + trans[0];
- position[1] = position[1] * scale[1] + trans[1];
- position[2] = position[2] * scale[2] + trans[2];
-
- out = (struct vertex_header *)((char *)out + info->stride);
- }
-
- return FALSE;
-}
-
-
-/* If bypass_clipping is set and we have an identity viewport, nothing
- * to do.
- */
-static boolean post_vs_none( struct pt_post_vs *pvs,
- struct draw_vertex_info *info )
-{
- struct vertex_header *out = info->verts;
- unsigned j;
-
- if (0) debug_printf("%s\n", __FUNCTION__);
- /* just initialize the vertex_id in all headers */
- for (j = 0; j < info->count; j++) {
- initialize_vertex_header(out);
-
- out = (struct vertex_header *)((char *)out + info->stride);
- }
- return FALSE;
-}
-
boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
struct draw_vertex_info *info )
{
@@ -244,31 +117,72 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
- boolean bypass_clipping,
+ boolean clip_xy,
+ boolean clip_z,
+ boolean clip_user,
boolean bypass_viewport,
boolean opengl,
boolean need_edgeflags )
{
- if (!need_edgeflags) {
- if (bypass_clipping) {
- if (bypass_viewport)
- pvs->run = post_vs_none;
- else
- pvs->run = post_vs_viewport;
- }
- else {
- /* if (opengl) */
- pvs->run = post_vs_cliptest_viewport_gl;
- }
+ pvs->flags = 0;
+
+ if (clip_xy)
+ pvs->flags |= DO_CLIP_XY;
+
+ if (clip_z && opengl) {
+ pvs->flags |= DO_CLIP_FULL_Z;
+ ASSIGN_4V( pvs->draw->plane[4], 0, 0, 1, 1 );
+ }
+
+ if (clip_z && !opengl) {
+ pvs->flags |= DO_CLIP_HALF_Z;
+ ASSIGN_4V( pvs->draw->plane[4], 0, 0, 1, 0 );
}
- else {
- /* If we need to copy edgeflags to the vertex header, it should
- * mean we're running the primitive pipeline. Hence the bypass
- * flags should be false.
- */
- assert(!bypass_clipping);
- assert(!bypass_viewport);
- pvs->run = post_vs_cliptest_viewport_gl_edgeflag;
+
+ if (clip_user)
+ pvs->flags |= DO_CLIP_USER;
+
+ if (!bypass_viewport)
+ pvs->flags |= DO_VIEWPORT;
+
+ if (need_edgeflags)
+ pvs->flags |= DO_EDGEFLAG;
+
+ /* Now select the relevant function:
+ */
+ switch (pvs->flags) {
+ case 0:
+ pvs->run = do_cliptest_none;
+ break;
+
+ case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_xy_fullz_viewport;
+ break;
+
+ case DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_xy_halfz_viewport;
+ break;
+
+ case DO_CLIP_FULL_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_fullz_viewport;
+ break;
+
+ case DO_CLIP_HALF_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_halfz_viewport;
+ break;
+
+ case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT:
+ pvs->run = do_cliptest_xy_fullz_user_viewport;
+ break;
+
+ case (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER |
+ DO_VIEWPORT | DO_EDGEFLAG):
+ pvs->run = do_cliptest_xy_fullz_user_viewport_edgeflag;
+ break;
+
+ default:
+ pvs->run = do_cliptest_generic;
+ break;
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
index f7f4f24d35..c86bdd99a3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -225,7 +225,7 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2)
#define FUNC so_run_elts
#define LOCAL_VARS const ushort *elts = input_prims->elts;
-#define GET_ELT(idx) (elts[start + (idx)] & ~DRAW_PIPE_FLAG_MASK)
+#define GET_ELT(idx) (elts[start + (idx)])
#include "draw_so_emit_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
index 182a597cca..513bbbed21 100644
--- a/src/gallium/auxiliary/draw/draw_pt_util.c
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -92,3 +92,10 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
}
}
+
+unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr)
+{
+ if (count < first)
+ return 0;
+ return count - (count - first) % incr;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
deleted file mode 100644
index cd7bb7bf25..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_pt.h"
-
-#define FETCH_MAX 256
-#define DRAW_MAX (FETCH_MAX+8)
-
-struct varray_frontend {
- struct draw_pt_front_end base;
- struct draw_context *draw;
-
- ushort draw_elts[DRAW_MAX];
- unsigned fetch_elts[FETCH_MAX];
-
- unsigned driver_fetch_max;
- unsigned fetch_max;
-
- struct draw_pt_middle_end *middle;
-
- unsigned input_prim;
- unsigned output_prim;
-};
-
-
-static void varray_flush_linear(struct varray_frontend *varray,
- unsigned start, unsigned count)
-{
- if (count) {
- assert(varray->middle->run_linear);
- varray->middle->run_linear(varray->middle, start, count);
- }
-}
-
-static void varray_line_loop_segment(struct varray_frontend *varray,
- unsigned start,
- unsigned segment_start,
- unsigned segment_count,
- boolean end )
-{
- assert(segment_count < varray->fetch_max);
- if (segment_count >= 1) {
- unsigned nr = 0, i;
-
- for (i = 0; i < segment_count; i++)
- varray->fetch_elts[nr++] = start + segment_start + i;
-
- if (end)
- varray->fetch_elts[nr++] = start;
-
- assert(nr <= FETCH_MAX);
-
- varray->middle->run(varray->middle,
- varray->fetch_elts,
- nr,
- varray->draw_elts, /* ie. linear */
- nr);
- }
-}
-
-
-
-static void varray_fan_segment(struct varray_frontend *varray,
- unsigned start,
- unsigned segment_start,
- unsigned segment_count )
-{
- assert(segment_count < varray->fetch_max);
- if (segment_count >= 2) {
- unsigned nr = 0, i;
-
- if (segment_start != 0)
- varray->fetch_elts[nr++] = start;
-
- for (i = 0 ; i < segment_count; i++)
- varray->fetch_elts[nr++] = start + segment_start + i;
-
- assert(nr <= FETCH_MAX);
-
- varray->middle->run(varray->middle,
- varray->fetch_elts,
- nr,
- varray->draw_elts, /* ie. linear */
- nr);
- }
-}
-
-
-
-
-#define FUNC varray_run
-#include "draw_pt_varray_tmp_linear.h"
-
-static unsigned decompose_prim[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY + 1] = {
- PIPE_PRIM_POINTS,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */
- PIPE_PRIM_LINE_STRIP,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLE_STRIP,
- PIPE_PRIM_TRIANGLE_FAN,
- PIPE_PRIM_QUADS,
- PIPE_PRIM_QUAD_STRIP,
- PIPE_PRIM_POLYGON,
- PIPE_PRIM_LINES_ADJACENCY,
- PIPE_PRIM_LINE_STRIP_ADJACENCY,
- PIPE_PRIM_TRIANGLES_ADJACENCY,
- PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
-};
-
-
-
-static void varray_prepare(struct draw_pt_front_end *frontend,
- unsigned in_prim,
- struct draw_pt_middle_end *middle,
- unsigned opt)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
-
- varray->base.run = varray_run;
-
- varray->input_prim = in_prim;
- assert(in_prim < Elements(decompose_prim));
- varray->output_prim = decompose_prim[in_prim];
-
- varray->middle = middle;
- middle->prepare(middle,
- varray->output_prim,
- opt, &varray->driver_fetch_max );
-
- /* check that the max is even */
- assert((varray->driver_fetch_max & 1) == 0);
-
- varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max);
-}
-
-
-
-
-static void varray_finish(struct draw_pt_front_end *frontend)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- varray->middle->finish(varray->middle);
- varray->middle = NULL;
-}
-
-static void varray_destroy(struct draw_pt_front_end *frontend)
-{
- FREE(frontend);
-}
-
-
-struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw)
-{
- ushort i;
- struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend);
- if (varray == NULL)
- return NULL;
-
- varray->base.prepare = varray_prepare;
- varray->base.run = NULL;
- varray->base.finish = varray_finish;
- varray->base.destroy = varray_destroy;
- varray->draw = draw;
-
- for (i = 0; i < DRAW_MAX; i++) {
- varray->draw_elts[i] = i;
- }
-
- return &varray->base;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
deleted file mode 100644
index 7c722457c3..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ /dev/null
@@ -1,238 +0,0 @@
-
-static void FUNC(struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- unsigned count)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- struct draw_context *draw = varray->draw;
- unsigned start = (unsigned)elts;
-
- boolean flatfirst = (draw->rasterizer->flatshade &&
- draw->rasterizer->flatshade_first);
- unsigned i, j;
- ushort flags;
- unsigned first, incr;
-
- varray->fetch_start = start;
-
- draw_pt_split_prim(varray->input_prim, &first, &incr);
-
-#if 0
- debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
- varray->input_prim,
- start, count);
-#endif
-
- switch (varray->input_prim) {
- case PIPE_PRIM_POINTS:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i < end; i++) {
- POINT(varray, i + 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+1 < end; i += 2) {
- LINE(varray, DRAW_PIPE_RESET_STIPPLE,
- i + 0, i + 1);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
-
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 1; i < end; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
- }
- LINE(varray, flags, i - 1, 0);
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- flags = DRAW_PIPE_RESET_STIPPLE;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 1; i < end; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i += 3) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1, i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1 + (i&1), i + 2 - (i&1));
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- }
- else {
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i + 2 < end; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0 + (i&1), i + 1 - (i&1), i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- else {
- flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, flags, 0, i + 1, i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- }
- break;
-
- case PIPE_PRIM_QUADS:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+3 < end; i += 4) {
- QUAD(varray, i + 0, i + 1, i + 2, i + 3);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+3 < end; i += 2) {
- QUAD(varray, i + 2, i + 0, i + 1, i + 3);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
-
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++, flags = edge_middle) {
-
- if (i + 3 == count)
- flags |= edge_last;
-
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-
- varray_flush(varray);
-}
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
deleted file mode 100644
index 55e43b2a71..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ /dev/null
@@ -1,103 +0,0 @@
-static unsigned trim( unsigned count, unsigned first, unsigned incr )
-{
- /*
- * count either has been trimmed in draw_pt_arrays or is set to
- * (driver)_fetch_max which is hopefully always larger than first.
- */
- assert(count >= first);
- return count - (count - first) % incr;
-}
-
-static void FUNC(struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned count)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- unsigned start = (unsigned) ((char *) elts - (char *) NULL);
-
- unsigned j;
- unsigned first, incr;
-
- assert(elt_bias == 0);
-
- draw_pt_split_prim(varray->input_prim, &first, &incr);
-
- /* Sanitize primitive length:
- */
- count = trim(count, first, incr);
- if (count < first)
- return;
-
-#if 0
- debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
- varray->input_prim,
- start, count);
-#endif
-
- switch (varray->input_prim) {
- case PIPE_PRIM_POINTS:
- case PIPE_PRIM_LINES:
- case PIPE_PRIM_TRIANGLES:
- case PIPE_PRIM_LINE_STRIP:
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_QUADS:
- case PIPE_PRIM_QUAD_STRIP:
- case PIPE_PRIM_LINES_ADJACENCY:
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- for (j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr );
- varray_flush_linear(varray, start + j, nr);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- /* Always have to decompose as we've stated that this will be
- * emitted as a line-strip.
- */
- for (j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
- varray_line_loop_segment(varray, start, j, nr, nr == remaining);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- break;
-
-
- case PIPE_PRIM_POLYGON:
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count < varray->driver_fetch_max) {
- varray_flush_linear(varray, start, count);
- }
- else {
- for ( j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
- varray_fan_segment(varray, start, j, nr);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-}
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
deleted file mode 100644
index a848b54f7d..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ /dev/null
@@ -1,610 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "util/u_memory.h"
-#include "util/u_prim.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_pt.h"
-
-
-#define CACHE_MAX 256
-#define FETCH_MAX 256
-#define DRAW_MAX (16*1024)
-
-
-struct vcache_frontend {
- struct draw_pt_front_end base;
- struct draw_context *draw;
-
- unsigned in[CACHE_MAX];
- ushort out[CACHE_MAX];
-
- ushort draw_elts[DRAW_MAX];
- unsigned fetch_elts[FETCH_MAX];
-
- unsigned draw_count;
- unsigned fetch_count;
- unsigned fetch_max;
-
- struct draw_pt_middle_end *middle;
-
- unsigned input_prim;
- unsigned output_prim;
-
- unsigned middle_prim;
- unsigned opt;
-};
-
-
-static INLINE void
-vcache_flush( struct vcache_frontend *vcache )
-{
- if (vcache->middle_prim != vcache->output_prim) {
- vcache->middle_prim = vcache->output_prim;
- vcache->middle->prepare( vcache->middle,
- vcache->middle_prim,
- vcache->opt,
- &vcache->fetch_max );
- }
-
- if (vcache->draw_count) {
- vcache->middle->run( vcache->middle,
- vcache->fetch_elts,
- vcache->fetch_count,
- vcache->draw_elts,
- vcache->draw_count );
- }
-
- memset(vcache->in, ~0, sizeof(vcache->in));
- vcache->fetch_count = 0;
- vcache->draw_count = 0;
-}
-
-
-static INLINE void
-vcache_check_flush( struct vcache_frontend *vcache )
-{
- if (vcache->draw_count + 6 >= DRAW_MAX ||
- vcache->fetch_count + 6 >= FETCH_MAX) {
- vcache_flush( vcache );
- }
-}
-
-
-static INLINE void
-vcache_elt( struct vcache_frontend *vcache,
- unsigned felt,
- ushort flags )
-{
- unsigned idx = felt % CACHE_MAX;
-
- if (vcache->in[idx] != felt) {
- assert(vcache->fetch_count < FETCH_MAX);
-
- vcache->in[idx] = felt;
- vcache->out[idx] = (ushort)vcache->fetch_count;
- vcache->fetch_elts[vcache->fetch_count++] = felt;
- }
-
- vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags;
-}
-
-
-
-static INLINE void
-vcache_triangle( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_flags( struct vcache_frontend *vcache,
- ushort flags,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_flags( struct vcache_frontend *vcache,
- ushort flags,
- unsigned i0,
- unsigned i1 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_point( struct vcache_frontend *vcache,
- unsigned i0 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_adj_flags( struct vcache_frontend *vcache,
- unsigned flags,
- unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
-{
- vcache_elt(vcache, a0, 0);
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, a1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_adj( struct vcache_frontend *vcache,
- unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
-{
- vcache_elt(vcache, a0, 0);
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, a1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_adj_flags( struct vcache_frontend *vcache,
- unsigned flags,
- unsigned i0, unsigned a0,
- unsigned i1, unsigned a1,
- unsigned i2, unsigned a2 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, a0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, a1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_elt(vcache, a2, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_adj( struct vcache_frontend *vcache,
- unsigned i0, unsigned a0,
- unsigned i1, unsigned a1,
- unsigned i2, unsigned a2 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, a0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, a1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_elt(vcache, a2, 0);
- vcache_check_flush(vcache);
-}
-
-
-/* At least for now, we're back to using a template include file for
- * this. The two paths aren't too different though - it may be
- * possible to reunify them.
- */
-#define TRIANGLE(flags,i0,i1,i2) vcache_triangle_flags(vcache,flags,i0,i1,i2)
-#define LINE(flags,i0,i1) vcache_line_flags(vcache,flags,i0,i1)
-#define POINT(i0) vcache_point(vcache,i0)
-#define LINE_ADJ(flags,a0,i0,i1,a1) \
- vcache_line_adj_flags(vcache,flags,a0,i0,i1,a1)
-#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
- vcache_triangle_adj_flags(vcache,flags,i0,a0,i1,a1,i2,a2)
-#define FUNC vcache_run_extras
-#include "draw_pt_vcache_tmp.h"
-
-#define TRIANGLE(flags,i0,i1,i2) vcache_triangle(vcache,i0,i1,i2)
-#define LINE(flags,i0,i1) vcache_line(vcache,i0,i1)
-#define POINT(i0) vcache_point(vcache,i0)
-#define LINE_ADJ(flags,a0,i0,i1,a1) \
- vcache_line_adj(vcache,a0,i0,i1,a1)
-#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
- vcache_triangle_adj(vcache,i0,a0,i1,a1,i2,a2)
-#define FUNC vcache_run
-#include "draw_pt_vcache_tmp.h"
-
-static INLINE void
-rebase_uint_elts( const unsigned *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-
-static INLINE void
-rebase_ushort_elts( const ushort *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-
-static INLINE void
-rebase_ubyte_elts( const ubyte *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-
-static INLINE void
-translate_uint_elts( const unsigned *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-
-static INLINE void
-translate_ushort_elts( const ushort *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-
-static INLINE void
-translate_ubyte_elts( const ubyte *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-
-
-
-#if 0
-static INLINE enum pipe_format
-format_from_get_elt( pt_elt_func get_elt )
-{
- switch (draw->pt.user.eltSize) {
- case 1: return PIPE_FORMAT_R8_UNORM;
- case 2: return PIPE_FORMAT_R16_UNORM;
- case 4: return PIPE_FORMAT_R32_UNORM;
- default: return PIPE_FORMAT_NONE;
- }
-}
-#endif
-
-
-/**
- * Check if any vertex attributes use instance divisors.
- * Note that instance divisors complicate vertex fetching so we need
- * to take the vcache path when they're in use.
- */
-static boolean
-any_instance_divisors(const struct draw_context *draw)
-{
- uint i;
-
- for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
- uint div = draw->pt.vertex_element[i].instance_divisor;
- if (div)
- return TRUE;
- }
- return FALSE;
-}
-
-
-static INLINE void
-vcache_check_run( struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned draw_count )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- struct draw_context *draw = vcache->draw;
- const unsigned min_index = draw->pt.user.min_index;
- const unsigned max_index = draw->pt.user.max_index;
- const unsigned index_size = draw->pt.user.eltSize;
- unsigned fetch_count;
- const ushort *transformed_elts;
- ushort *storage = NULL;
- boolean ok = FALSE;
-
- /* debug: verify indexes are in range [min_index, max_index] */
- if (0) {
- unsigned i;
- for (i = 0; i < draw_count; i++) {
- if (index_size == 1) {
- assert( ((const ubyte *) elts)[i] >= min_index);
- assert( ((const ubyte *) elts)[i] <= max_index);
- }
- else if (index_size == 2) {
- assert( ((const ushort *) elts)[i] >= min_index);
- assert( ((const ushort *) elts)[i] <= max_index);
- }
- else {
- assert(index_size == 4);
- assert( ((const uint *) elts)[i] >= min_index);
- assert( ((const uint *) elts)[i] <= max_index);
- }
- }
- }
-
- /* Note: max_index is frequently 0xffffffff so we have to be sure
- * that any arithmetic involving max_index doesn't overflow!
- */
- if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES)
- goto fail;
-
- if (any_instance_divisors(draw))
- goto fail;
-
- fetch_count = max_index + 1 - min_index;
-
- if (0)
- debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
- vcache->fetch_max,
- draw_count);
-
- if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
- fetch_count >= UNDEFINED_VERTEX_ID ||
- fetch_count > draw_count) {
- if (0) debug_printf("fail\n");
- goto fail;
- }
-
- if (vcache->middle_prim != vcache->input_prim) {
- vcache->middle_prim = vcache->input_prim;
- vcache->middle->prepare( vcache->middle,
- vcache->middle_prim,
- vcache->opt,
- &vcache->fetch_max );
- }
-
- assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
- (elt_bias < 0 && min_index + elt_bias < min_index));
-
- if (min_index == 0 &&
- index_size == 2) {
- transformed_elts = (const ushort *)elts;
- }
- else {
- storage = MALLOC( draw_count * sizeof(ushort) );
- if (!storage)
- goto fail;
-
- if (min_index == 0) {
- switch(index_size) {
- case 1:
- translate_ubyte_elts( (const ubyte *)elts,
- draw_count,
- storage );
- break;
-
- case 2:
- translate_ushort_elts( (const ushort *)elts,
- draw_count,
- storage );
- break;
-
- case 4:
- translate_uint_elts( (const uint *)elts,
- draw_count,
- storage );
- break;
-
- default:
- assert(0);
- FREE(storage);
- return;
- }
- }
- else {
- switch(index_size) {
- case 1:
- rebase_ubyte_elts( (const ubyte *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- case 2:
- rebase_ushort_elts( (const ushort *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- case 4:
- rebase_uint_elts( (const uint *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- default:
- assert(0);
- FREE(storage);
- return;
- }
- }
- transformed_elts = storage;
- }
-
- if (fetch_count < UNDEFINED_VERTEX_ID)
- ok = vcache->middle->run_linear_elts( vcache->middle,
- min_index + elt_bias, /* start */
- fetch_count,
- transformed_elts,
- draw_count );
-
- FREE(storage);
-
- if (ok)
- return;
-
- debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
- fetch_count, draw_count);
-
-fail:
- vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
-}
-
-
-
-
-static void
-vcache_prepare( struct draw_pt_front_end *frontend,
- unsigned in_prim,
- struct draw_pt_middle_end *middle,
- unsigned opt )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
-
- if (opt & PT_PIPELINE) {
- vcache->base.run = vcache_run_extras;
- }
- else {
- vcache->base.run = vcache_check_run;
- }
-
- /* VCache will always emit the reduced version of its input
- * primitive, ie STRIP/FANS become TRIS, etc.
- *
- * This is not to be confused with what the GS might be up to,
- * which is a separate issue.
- */
- vcache->input_prim = in_prim;
- switch (in_prim) {
- case PIPE_PRIM_LINES_ADJACENCY:
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- vcache->output_prim = PIPE_PRIM_LINES_ADJACENCY;
- break;
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- vcache->output_prim = PIPE_PRIM_TRIANGLES_ADJACENCY;
- break;
- default:
- vcache->output_prim = u_reduced_prim(in_prim);
- }
-
- vcache->middle = middle;
- vcache->opt = opt;
-
- /* Have to run prepare here, but try and guess a good prim for
- * doing so:
- */
- vcache->middle_prim = (opt & PT_PIPELINE)
- ? vcache->output_prim : vcache->input_prim;
-
- middle->prepare( middle,
- vcache->middle_prim,
- opt, &vcache->fetch_max );
-}
-
-
-static void
-vcache_finish( struct draw_pt_front_end *frontend )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- vcache->middle->finish( vcache->middle );
- vcache->middle = NULL;
-}
-
-
-static void
-vcache_destroy( struct draw_pt_front_end *frontend )
-{
- FREE(frontend);
-}
-
-
-struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw )
-{
- struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend );
- if (vcache == NULL)
- return NULL;
-
- vcache->base.prepare = vcache_prepare;
- vcache->base.run = NULL;
- vcache->base.finish = vcache_finish;
- vcache->base.destroy = vcache_destroy;
- vcache->draw = draw;
-
- memset(vcache->in, ~0, sizeof(vcache->in));
-
- return &vcache->base;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
deleted file mode 100644
index 1a3748d5f0..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#define FUNC_VARS \
- struct draw_pt_front_end *frontend, \
- pt_elt_func get_elt, \
- const void *elts, \
- int elt_bias, \
- unsigned count
-
-#define LOCAL_VARS \
- struct vcache_frontend *vcache = (struct vcache_frontend *) frontend; \
- struct draw_context *draw = vcache->draw; \
- const unsigned prim = vcache->input_prim; \
- const boolean last_vertex_last = !(draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first);
-
-#define GET_ELT(idx) (get_elt(elts, idx) + elt_bias)
-
-#define FUNC_EXIT do { vcache_flush(vcache); } while (0)
-
-#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
new file mode 100644
index 0000000000..a687525309
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -0,0 +1,208 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+
+#define SEGMENT_SIZE 1024
+#define MAP_SIZE 256
+
+struct vsplit_frontend {
+ struct draw_pt_front_end base;
+ struct draw_context *draw;
+
+ unsigned prim;
+
+ struct draw_pt_middle_end *middle;
+
+ unsigned max_vertices;
+ ushort segment_size;
+
+ /* buffers for splitting */
+ unsigned fetch_elts[SEGMENT_SIZE];
+ ushort draw_elts[SEGMENT_SIZE];
+ ushort identity_draw_elts[SEGMENT_SIZE];
+
+ struct {
+ /* map a fetch element to a draw element */
+ unsigned fetches[MAP_SIZE];
+ ushort draws[MAP_SIZE];
+ boolean has_max_fetch;
+
+ ushort num_fetch_elts;
+ ushort num_draw_elts;
+ } cache;
+};
+
+
+static void
+vsplit_clear_cache(struct vsplit_frontend *vsplit)
+{
+ memset(vsplit->cache.fetches, 0xff, sizeof(vsplit->cache.fetches));
+ vsplit->cache.has_max_fetch = FALSE;
+ vsplit->cache.num_fetch_elts = 0;
+ vsplit->cache.num_draw_elts = 0;
+}
+
+static void
+vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags)
+{
+ vsplit->middle->run(vsplit->middle,
+ vsplit->fetch_elts, vsplit->cache.num_fetch_elts,
+ vsplit->draw_elts, vsplit->cache.num_draw_elts, flags);
+}
+
+/**
+ * Add a fetch element and add it to the draw elements.
+ */
+static INLINE void
+vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch)
+{
+ unsigned hash = fetch % MAP_SIZE;
+
+ if (vsplit->cache.fetches[hash] != fetch) {
+ /* update cache */
+ vsplit->cache.fetches[hash] = fetch;
+ vsplit->cache.draws[hash] = vsplit->cache.num_fetch_elts;
+
+ /* add fetch */
+ assert(vsplit->cache.num_fetch_elts < vsplit->segment_size);
+ vsplit->fetch_elts[vsplit->cache.num_fetch_elts++] = fetch;
+ }
+
+ vsplit->draw_elts[vsplit->cache.num_draw_elts++] = vsplit->cache.draws[hash];
+}
+
+
+/**
+ * Add a fetch element and add it to the draw elements. The fetch element is
+ * in full range (uint).
+ */
+static INLINE void
+vsplit_add_cache_uint(struct vsplit_frontend *vsplit, unsigned fetch)
+{
+ /* special care for 0xffffffff */
+ if (fetch == 0xffffffff && !vsplit->cache.has_max_fetch) {
+ unsigned hash = fetch % MAP_SIZE;
+ vsplit->cache.fetches[hash] = fetch - 1; /* force update */
+ vsplit->cache.has_max_fetch = TRUE;
+ }
+
+ vsplit_add_cache(vsplit, fetch);
+}
+
+
+#define FUNC vsplit_run_linear
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_ubyte
+#define ELT_TYPE ubyte
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_ushort
+#define ELT_TYPE ushort
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_uint
+#define ELT_TYPE uint
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache_uint(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+
+static void vsplit_prepare(struct draw_pt_front_end *frontend,
+ unsigned in_prim,
+ struct draw_pt_middle_end *middle,
+ unsigned opt)
+{
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
+
+ switch (vsplit->draw->pt.user.eltSize) {
+ case 0:
+ vsplit->base.run = vsplit_run_linear;
+ break;
+ case 1:
+ vsplit->base.run = vsplit_run_ubyte;
+ break;
+ case 2:
+ vsplit->base.run = vsplit_run_ushort;
+ break;
+ case 4:
+ vsplit->base.run = vsplit_run_uint;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* split only */
+ vsplit->prim = in_prim;
+
+ vsplit->middle = middle;
+ middle->prepare(middle, vsplit->prim, opt, &vsplit->max_vertices);
+
+ vsplit->segment_size = MIN2(SEGMENT_SIZE, vsplit->max_vertices);
+}
+
+
+static void vsplit_finish(struct draw_pt_front_end *frontend)
+{
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
+ vsplit->middle->finish(vsplit->middle);
+ vsplit->middle = NULL;
+}
+
+
+static void vsplit_destroy(struct draw_pt_front_end *frontend)
+{
+ FREE(frontend);
+}
+
+
+struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw)
+{
+ struct vsplit_frontend *vsplit = CALLOC_STRUCT(vsplit_frontend);
+ ushort i;
+
+ if (!vsplit)
+ return NULL;
+
+ vsplit->base.prepare = vsplit_prepare;
+ vsplit->base.run = NULL;
+ vsplit->base.finish = vsplit_finish;
+ vsplit->base.destroy = vsplit_destroy;
+ vsplit->draw = draw;
+
+ for (i = 0; i < SEGMENT_SIZE; i++)
+ vsplit->identity_draw_elts[i] = i;
+
+ return &vsplit->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
new file mode 100644
index 0000000000..3f66f962e1
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -0,0 +1,309 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#define CONCAT2(name, elt_type) name ## elt_type
+#define CONCAT(name, elt_type) CONCAT2(name, elt_type)
+
+#ifdef ELT_TYPE
+
+/**
+ * Fetch all elements in [min_index, max_index] with bias, and use the
+ * (rebased) index buffer as the draw elements.
+ */
+static boolean
+CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned istart, unsigned icount)
+{
+ struct draw_context *draw = vsplit->draw;
+ const ELT_TYPE *ib = (const ELT_TYPE *)
+ ((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset);
+ const unsigned min_index = draw->pt.user.min_index;
+ const unsigned max_index = draw->pt.user.max_index;
+ const int elt_bias = draw->pt.user.eltBias;
+ unsigned fetch_start, fetch_count;
+ const ushort *draw_elts = NULL;
+ unsigned i;
+
+ /* use the ib directly */
+ if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) {
+ if (icount > vsplit->max_vertices)
+ return FALSE;
+
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+ assert(idx >= min_index && idx <= max_index);
+ }
+ draw_elts = (const ushort *) ib;
+ }
+ else {
+ /* have to go through vsplit->draw_elts */
+ if (icount > vsplit->segment_size)
+ return FALSE;
+ }
+
+ /* this is faster only when we fetch less elements than the normal path */
+ if (max_index - min_index > icount - 1)
+ return FALSE;
+
+ if (elt_bias < 0 && min_index < -elt_bias)
+ return FALSE;
+
+ /* why this check? */
+ for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+ if (draw->pt.vertex_element[i].instance_divisor)
+ return FALSE;
+ }
+
+ fetch_start = min_index + elt_bias;
+ fetch_count = max_index - min_index + 1;
+
+ if (!draw_elts) {
+ if (min_index == 0) {
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+
+ assert(idx >= min_index && idx <= max_index);
+ vsplit->draw_elts[i] = (ushort) idx;
+ }
+ }
+ else {
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+
+ assert(idx >= min_index && idx <= max_index);
+ vsplit->draw_elts[i] = (ushort) (idx - min_index);
+ }
+ }
+
+ draw_elts = vsplit->draw_elts;
+ }
+
+ return vsplit->middle->run_linear_elts(vsplit->middle,
+ fetch_start, fetch_count,
+ draw_elts, icount, 0x0);
+}
+
+/**
+ * Use the cache to prepare the fetch and draw elements, and flush.
+ *
+ * When spoken is TRUE, ispoken replaces istart; When close is TRUE, iclose is
+ * appended.
+ */
+static INLINE void
+CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart, unsigned icount,
+ boolean spoken, unsigned ispoken,
+ boolean close, unsigned iclose)
+{
+ struct draw_context *draw = vsplit->draw;
+ const ELT_TYPE *ib = (const ELT_TYPE *)
+ ((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset);
+ const int ibias = draw->pt.user.eltBias;
+ unsigned i;
+
+ assert(icount + !!close <= vsplit->segment_size);
+
+ vsplit_clear_cache(vsplit);
+
+ spoken = !!spoken;
+ if (ibias == 0) {
+ if (spoken)
+ ADD_CACHE(vsplit, ib[ispoken]);
+
+ for (i = spoken; i < icount; i++)
+ ADD_CACHE(vsplit, ib[istart + i]);
+
+ if (close)
+ ADD_CACHE(vsplit, ib[iclose]);
+ }
+ else if (ibias > 0) {
+ if (spoken)
+ ADD_CACHE(vsplit, (uint) ib[ispoken] + ibias);
+
+ for (i = spoken; i < icount; i++)
+ ADD_CACHE(vsplit, (uint) ib[istart + i] + ibias);
+
+ if (close)
+ ADD_CACHE(vsplit, (uint) ib[iclose] + ibias);
+ }
+ else {
+ if (spoken) {
+ if (ib[ispoken] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[ispoken] + ibias);
+ }
+
+ for (i = spoken; i < icount; i++) {
+ if (ib[istart + i] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[istart + i] + ibias);
+ }
+
+ if (close) {
+ if (ib[iclose] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[iclose] + ibias);
+ }
+ }
+
+ vsplit_flush_cache(vsplit, flags);
+}
+
+static void
+CONCAT(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount)
+{
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, FALSE, 0, FALSE, 0);
+}
+
+static void
+CONCAT(vsplit_segment_loop_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount,
+ unsigned i0)
+{
+ const boolean close_loop = ((flags) == DRAW_SPLIT_BEFORE);
+
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, FALSE, 0, close_loop, i0);
+}
+
+static void
+CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount,
+ unsigned i0)
+{
+ const boolean use_spoken = (((flags) & DRAW_SPLIT_BEFORE) != 0);
+
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, use_spoken, i0, FALSE, 0);
+}
+
+#define LOCAL_VARS \
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \
+ const unsigned prim = vsplit->prim; \
+ const unsigned max_count_simple = vsplit->segment_size; \
+ const unsigned max_count_loop = vsplit->segment_size - 1; \
+ const unsigned max_count_fan = vsplit->segment_size;
+
+#define PRIMITIVE(istart, icount) \
+ CONCAT(vsplit_primitive_, ELT_TYPE)(vsplit, istart, icount)
+
+#else /* ELT_TYPE */
+
+static void
+vsplit_segment_simple_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount)
+{
+ assert(icount <= vsplit->max_vertices);
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+}
+
+static void
+vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount, unsigned i0)
+{
+ boolean close_loop = (flags == DRAW_SPLIT_BEFORE);
+ unsigned nr;
+
+ assert(icount + !!close_loop <= vsplit->segment_size);
+
+ if (close_loop) {
+ for (nr = 0; nr < icount; nr++)
+ vsplit->fetch_elts[nr] = istart + nr;
+ vsplit->fetch_elts[nr++] = i0;
+
+ vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
+ vsplit->identity_draw_elts, nr, flags);
+ }
+ else {
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+ }
+}
+
+static void
+vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount, unsigned i0)
+{
+ boolean use_spoken = ((flags & DRAW_SPLIT_BEFORE) != 0);
+ unsigned nr = 0, i;
+
+ assert(icount + !!use_spoken <= vsplit->segment_size);
+
+ if (use_spoken) {
+ vsplit->fetch_elts[nr++] = i0;
+ for (i = 1 ; i < icount; i++)
+ vsplit->fetch_elts[nr++] = istart + i;
+
+ vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
+ vsplit->identity_draw_elts, nr, flags);
+ }
+ else {
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+ }
+}
+
+#define LOCAL_VARS \
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \
+ const unsigned prim = vsplit->prim; \
+ const unsigned max_count_simple = vsplit->max_vertices; \
+ const unsigned max_count_loop = vsplit->segment_size - 1; \
+ const unsigned max_count_fan = vsplit->segment_size;
+
+#define PRIMITIVE(istart, icount) FALSE
+
+#define ELT_TYPE linear
+
+#endif /* ELT_TYPE */
+
+#define FUNC_VARS \
+ struct draw_pt_front_end *frontend, \
+ unsigned start, \
+ unsigned count
+
+#define SEGMENT_SIMPLE(flags, istart, icount) \
+ CONCAT(vsplit_segment_simple_, ELT_TYPE)(vsplit, flags, istart, icount)
+
+#define SEGMENT_LOOP(flags, istart, icount, i0) \
+ CONCAT(vsplit_segment_loop_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+
+#define SEGMENT_FAN(flags, istart, icount, i0) \
+ CONCAT(vsplit_segment_fan_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+
+#include "draw_split_tmp.h"
+
+#undef CONCAT2
+#undef CONCAT
+
+#undef ELT_TYPE
+#undef ADD_CACHE
diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
index 6d8937a0b4..7fafde9d5e 100644
--- a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
@@ -7,11 +7,9 @@
#define FUNC_ENTER \
/* declare more local vars */ \
- struct draw_context *draw = so->draw; \
const unsigned prim = input_prims->prim; \
- const boolean last_vertex_last = \
- !(draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first); \
+ const unsigned prim_flags = input_prims->flags; \
+ const boolean last_vertex_last = TRUE; \
do { \
debug_assert(input_prims->primitive_count == 1); \
switch (prim) { \
diff --git a/src/gallium/auxiliary/draw/draw_split_tmp.h b/src/gallium/auxiliary/draw/draw_split_tmp.h
new file mode 100644
index 0000000000..47defc62b9
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_split_tmp.h
@@ -0,0 +1,176 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+static void
+FUNC(FUNC_VARS)
+{
+ unsigned first, incr;
+ LOCAL_VARS
+
+ /*
+ * prim, start, count, and max_count_{simple,loop,fan} should have been
+ * defined
+ */
+ if (0) {
+ debug_printf("%s: prim 0x%x, start %d, count %d, max_count_simple %d, "
+ "max_count_loop %d, max_count_fan %d\n",
+ __FUNCTION__, prim, start, count, max_count_simple,
+ max_count_loop, max_count_fan);
+ }
+
+ draw_pt_split_prim(prim, &first, &incr);
+ /* sanitize primitive length */
+ count = draw_pt_trim_count(count, first, incr);
+ if (count < first)
+ return;
+
+ /* try flushing the entire primitive */
+ if (PRIMITIVE(start, count))
+ return;
+
+ /* must be able to at least flush two complete primitives */
+ assert(max_count_simple >= first + incr &&
+ max_count_loop >= first + incr &&
+ max_count_fan >= first + incr);
+
+ /* no splitting required */
+ if (count <= max_count_simple) {
+ SEGMENT_SIMPLE(0x0, start, count);
+ }
+ else {
+ const unsigned rollback = first - incr;
+ unsigned flags = DRAW_SPLIT_AFTER, seg_start = 0, seg_max;
+
+ /*
+ * Both count and seg_max below are explicitly trimmed. Because
+ *
+ * seg_start = N * (seg_max - rollback) = N' * incr,
+ *
+ * we have
+ *
+ * remaining = count - seg_start = first + N'' * incr.
+ *
+ * That is, remaining is implicitly trimmed.
+ */
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ case PIPE_PRIM_LINES_ADJACENCY:
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_simple, count), first, incr);
+ if (prim == PIPE_PRIM_TRIANGLE_STRIP ||
+ prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY) {
+ /* make sure we flush even number of triangles at a time */
+ if (seg_max < count && !(((seg_max - first) / incr) & 1))
+ seg_max -= incr;
+ }
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_SIMPLE(flags, start + seg_start, seg_max);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_SIMPLE(flags, start + seg_start, remaining);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_loop, count), first, incr);
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_LOOP(flags, start + seg_start, seg_max, start);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_LOOP(flags, start + seg_start, remaining, start);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_fan, count), first, incr);
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_FAN(flags, start + seg_start, seg_max, start);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_FAN(flags, start + seg_start, remaining, start);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+}
+
+#undef FUNC
+#undef FUNC_VARS
+#undef LOCAL_VARS
+
+#undef PRIMITIVE
+#undef SEGMENT_SIMPLE
+#undef SEGMENT_LOOP
+#undef SEGMENT_FAN
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index d13ad24fff..fa9992db78 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -28,6 +28,7 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_screen.h"
#include "draw_private.h"
#include "draw_context.h"
@@ -109,6 +110,11 @@ draw_create_vs_llvm(struct draw_context *draw,
tgsi_scan_shader(state->tokens, &vs->base.info);
+ vs->variant_key_size =
+ draw_llvm_variant_key_size(
+ vs->base.info.file_max[TGSI_FILE_INPUT]+1,
+ vs->base.info.file_max[TGSI_FILE_SAMPLER]+1);
+
vs->base.draw = draw;
vs->base.prepare = vs_llvm_prepare;
vs->base.run_linear = vs_llvm_run_linear;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 7b35dd4bb4..e0d30be98d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -59,14 +59,6 @@
#include "lp_bld_arit.h"
-/*
- * XXX: Increasing eliminates some artifacts, but adds others, most
- * noticeably corruption in the Earth halo in Google Earth.
- */
-#define RCP_NEWTON_STEPS 0
-
-#define RSQRT_NEWTON_STEPS 0
-
#define EXP_POLY_DEGREE 3
#define LOG_POLY_DEGREE 5
@@ -267,7 +259,7 @@ lp_build_add(struct lp_build_context *bld,
}
-/** Return the sum of the elements of a */
+/** Return the scalar sum of the elements of a */
LLVMValueRef
lp_build_sum_vector(struct lp_build_context *bld,
LLVMValueRef a)
@@ -278,11 +270,9 @@ lp_build_sum_vector(struct lp_build_context *bld,
assert(lp_check_value(type, a));
- if (a == bld->zero)
- return bld->zero;
- if (a == bld->undef)
- return bld->undef;
- assert(type.length > 1);
+ if (type.length == 1) {
+ return a;
+ }
assert(!bld->type.norm);
@@ -546,7 +536,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
if(b == 2 && bld->type.floating)
return lp_build_add(bld, a, a);
- if(util_is_pot(b)) {
+ if(util_is_power_of_two(b)) {
unsigned shift = ffs(b) - 1;
if(bld->type.floating) {
@@ -1266,6 +1256,11 @@ lp_build_sqrt(struct lp_build_context *bld,
*
* x_{i+1} = x_i * (2 - a * x_i)
*
+ * XXX: Unfortunately this won't give IEEE-754 conformant results for 0 or
+ * +/-Inf, giving NaN instead. Certain applications rely on this behavior,
+ * such as Google Earth, which does RCP(RSQRT(0.0) when drawing the Earth's
+ * halo. It would be necessary to clamp the argument to prevent this.
+ *
* See also:
* - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
* - http://softwarecommunity.intel.com/articles/eng/1818.htm
@@ -1306,13 +1301,27 @@ lp_build_rcp(struct lp_build_context *bld,
if(LLVMIsConstant(a))
return LLVMConstFDiv(bld->one, a);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ /*
+ * We don't use RCPPS because:
+ * - it only has 10bits of precision
+ * - it doesn't even get the reciprocate of 1.0 exactly
+ * - doing Newton-Rapshon steps yields wrong (NaN) values for 0.0 or Inf
+ * - for recent processors the benefit over DIVPS is marginal, a case
+ * depedent
+ *
+ * We could still use it on certain processors if benchmarks show that the
+ * RCPPS plus necessary workarounds are still preferrable to DIVPS; or for
+ * particular uses that require less workarounds.
+ */
+
+ if (FALSE && util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ const unsigned num_iterations = 0;
LLVMValueRef res;
unsigned i;
res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a);
- for (i = 0; i < RCP_NEWTON_STEPS; ++i) {
+ for (i = 0; i < num_iterations; ++i) {
res = lp_build_rcp_refine(bld, a, res);
}
@@ -1363,13 +1372,14 @@ lp_build_rsqrt(struct lp_build_context *bld,
assert(type.floating);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ if (util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ const unsigned num_iterations = 0;
LLVMValueRef res;
unsigned i;
res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a);
- for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) {
+ for (i = 0; i < num_iterations; ++i) {
res = lp_build_rsqrt_refine(bld, a, res);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
index 39dfc51e50..d3a5afff8c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
@@ -46,7 +46,7 @@
boolean
lp_check_alignment(const void *ptr, unsigned alignment)
{
- assert(util_is_pot(alignment));
+ assert(util_is_power_of_two(alignment));
return ((uintptr_t)ptr & (alignment - 1)) == 0;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 247cb83ce6..92123e09d3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -388,7 +388,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
if (format_matches_type(format_desc, type) &&
format_desc->block.bits <= type.width * 4 &&
- util_is_pot(format_desc->block.bits)) {
+ util_is_power_of_two(format_desc->block.bits)) {
LLVMValueRef packed;
/*
@@ -416,7 +416,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
format_desc->block.width == 1 &&
format_desc->block.height == 1 &&
- util_is_pot(format_desc->block.bits) &&
+ util_is_power_of_two(format_desc->block.bits) &&
format_desc->block.bits <= 32 &&
format_desc->is_bitmask &&
!format_desc->is_mixed &&
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 6d5410d970..48baf7c425 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -40,6 +40,7 @@
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/JITEventListener.h>
#include <llvm/Support/CommandLine.h>
+#include <llvm/Support/PrettyStackTrace.h>
#include "pipe/p_config.h"
#include "util/u_debug.h"
@@ -143,7 +144,6 @@ lp_set_target_options(void)
llvm::UnsafeFPMath = true;
#endif
-#if 0
/*
* LLVM will generate MMX instructions for vectors <= 64 bits, leading to
* innefficient code, and in 32bit systems, to the corruption of the FPU
@@ -152,10 +152,8 @@ lp_set_target_options(void)
* See also:
* - http://llvm.org/bugs/show_bug.cgi?id=3287
* - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/
- *
- * XXX: Unfortunately this is not working.
*/
- static boolean first = FALSE;
+ static boolean first = TRUE;
if (first) {
static const char* options[] = {
"prog",
@@ -164,7 +162,13 @@ lp_set_target_options(void)
llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options));
first = FALSE;
}
-#endif
+
+ /*
+ * By default LLVM adds a signal handler to output a pretty stack trace.
+ * This signal handler is never removed, causing problems when unloading the
+ * shared object where the gallium driver resides.
+ */
+ llvm::DisablePrettyStackTrace = true;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index e470082b97..e947b90d16 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -37,6 +37,8 @@
#define LP_BLD_PACK_H
+#include "pipe/p_compiler.h"
+
#include "gallivm/lp_bld.h"
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 0fd014ab9b..259b1142e3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -82,9 +82,9 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
state->swizzle_a = view->swizzle_a;
state->target = texture->target;
- state->pot_width = util_is_pot(texture->width0);
- state->pot_height = util_is_pot(texture->height0);
- state->pot_depth = util_is_pot(texture->depth0);
+ state->pot_width = util_is_power_of_two(texture->width0);
+ state->pot_height = util_is_power_of_two(texture->height0);
+ state->pot_depth = util_is_power_of_two(texture->depth0);
state->wrap_s = sampler->wrap_s;
state->wrap_t = sampler->wrap_t;
@@ -124,6 +124,52 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
/**
+ * Compute the partial offset of a pixel block along an arbitrary axis.
+ *
+ * @param coord coordinate in pixels
+ * @param stride number of bytes between rows of successive pixel blocks
+ * @param block_length number of pixels in a pixels block along the coordinate
+ * axis
+ * @param out_offset resulting relative offset of the pixel block in bytes
+ * @param out_subcoord resulting sub-block pixel coordinate
+ */
+void
+lp_build_sample_partial_offset(struct lp_build_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_subcoord)
+{
+ LLVMValueRef offset;
+ LLVMValueRef subcoord;
+
+ if (block_length == 1) {
+ subcoord = bld->zero;
+ }
+ else {
+ /*
+ * Pixel blocks have power of two dimensions. LLVM should convert the
+ * rem/div to bit arithmetic.
+ * TODO: Verify this.
+ */
+
+ LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
+ subcoord = LLVMBuildURem(bld->builder, coord, block_width, "");
+ coord = LLVMBuildUDiv(bld->builder, coord, block_width, "");
+ }
+
+ offset = lp_build_mul(bld, coord, stride);
+
+ assert(out_offset);
+ assert(out_subcoord);
+
+ *out_offset = offset;
+ *out_subcoord = subcoord;
+}
+
+
+/**
* Compute the offset of a pixel block.
*
* x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
@@ -144,48 +190,35 @@ lp_build_sample_offset(struct lp_build_context *bld,
{
LLVMValueRef x_stride;
LLVMValueRef offset;
- LLVMValueRef i;
- LLVMValueRef j;
-
- /*
- * Describe the coordinates in terms of pixel blocks.
- *
- * TODO: pixel blocks are power of two. LLVM should convert rem/div to
- * bit arithmetic. Verify this.
- */
-
- if (format_desc->block.width == 1) {
- i = bld->zero;
- }
- else {
- LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width);
- i = LLVMBuildURem(bld->builder, x, block_width, "");
- x = LLVMBuildUDiv(bld->builder, x, block_width, "");
- }
-
- if (format_desc->block.height == 1) {
- j = bld->zero;
- }
- else {
- LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height);
- j = LLVMBuildURem(bld->builder, y, block_height, "");
- y = LLVMBuildUDiv(bld->builder, y, block_height, "");
- }
x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
- offset = lp_build_mul(bld, x, x_stride);
+
+ lp_build_sample_partial_offset(bld,
+ format_desc->block.width,
+ x, x_stride,
+ &offset, out_i);
if (y && y_stride) {
- LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
+ LLVMValueRef y_offset;
+ lp_build_sample_partial_offset(bld,
+ format_desc->block.height,
+ y, y_stride,
+ &y_offset, out_j);
offset = lp_build_add(bld, offset, y_offset);
}
+ else {
+ *out_j = bld->zero;
+ }
if (z && z_stride) {
- LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
+ LLVMValueRef z_offset;
+ LLVMValueRef k;
+ lp_build_sample_partial_offset(bld,
+ 1, /* pixel blocks are always 2D */
+ z, z_stride,
+ &z_offset, &k);
offset = lp_build_add(bld, offset, z_offset);
}
*out_offset = offset;
- *out_i = i;
- *out_j = j;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 5b8f478094..caafc4eca0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -36,6 +36,8 @@
#define LP_BLD_SAMPLE_H
+#include "pipe/p_format.h"
+
#include "gallivm/lp_bld.h"
struct pipe_resource;
@@ -147,6 +149,15 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
void
+lp_build_sample_partial_offset(struct lp_build_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i);
+
+
+void
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 806c7d56a8..1f39d9c98b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -176,6 +176,7 @@ texture_dims(enum pipe_texture_target tex)
case PIPE_TEXTURE_1D:
return 1;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
return 2;
case PIPE_TEXTURE_3D:
@@ -322,59 +323,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
/**
- * Fetch the texels as <4n x i8> in AoS form.
- */
-static LLVMValueRef
-lp_build_sample_packed(struct lp_build_sample_context *bld,
- LLVMValueRef x,
- LLVMValueRef y,
- LLVMValueRef y_stride,
- LLVMValueRef data_array)
-{
- LLVMValueRef offset, i, j;
- LLVMValueRef data_ptr;
- LLVMValueRef res;
-
- /* convert x,y,z coords to linear offset from start of texture, in bytes */
- lp_build_sample_offset(&bld->uint_coord_bld,
- bld->format_desc,
- x, y, NULL, y_stride, NULL,
- &offset, &i, &j);
-
- /* get pointer to mipmap level 0 data */
- data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
-
- if (util_format_is_rgba8_variant(bld->format_desc)) {
- /* Just fetch the data directly without swizzling */
- assert(bld->format_desc->block.width == 1);
- assert(bld->format_desc->block.height == 1);
- assert(bld->format_desc->block.bits <= bld->texel_type.width);
-
- res = lp_build_gather(bld->builder,
- bld->texel_type.length,
- bld->format_desc->block.bits,
- bld->texel_type.width,
- data_ptr, offset);
- }
- else {
- struct lp_type type;
-
- assert(bld->texel_type.width == 32);
-
- memset(&type, 0, sizeof type);
- type.width = 8;
- type.length = bld->texel_type.length*4;
- type.norm = TRUE;
-
- res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
- data_ptr, offset, i, j);
- }
-
- return res;
-}
-
-
-/**
* Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
*/
static LLVMValueRef
@@ -408,7 +356,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
/**
- * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
+ * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time.
* Return whether the given mode is supported by that function.
*/
static boolean
@@ -430,13 +378,18 @@ is_simple_wrap_mode(unsigned mode)
* \param length the texture size along one dimension
* \param is_pot if TRUE, length is a power of two
* \param wrap_mode one of PIPE_TEX_WRAP_x
+ * \param i0 resulting sub-block pixel coordinate for coord0
*/
-static LLVMValueRef
-lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
- LLVMValueRef coord,
- LLVMValueRef length,
- boolean is_pot,
- unsigned wrap_mode)
+static void
+lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef length,
+ LLVMValueRef stride,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i)
{
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
@@ -469,7 +422,134 @@ lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
assert(0);
}
- return coord;
+ lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
+ out_offset, out_i);
+}
+
+
+/**
+ * Build LLVM code for texture wrap mode, for scaled integer texcoords.
+ * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
+ * \param length the texture size along one dimension
+ * \param stride pixel stride along the coordinate axis
+ * \param block_length is the length of the pixel block along the
+ * coordinate axis
+ * \param is_pot if TRUE, length is a power of two
+ * \param wrap_mode one of PIPE_TEX_WRAP_x
+ * \param offset0 resulting relative offset for coord0
+ * \param offset1 resulting relative offset for coord0 + 1
+ * \param i0 resulting sub-block pixel coordinate for coord0
+ * \param i1 resulting sub-block pixel coordinate for coord0 + 1
+ */
+static void
+lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord0,
+ LLVMValueRef length,
+ LLVMValueRef stride,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *offset0,
+ LLVMValueRef *offset1,
+ LLVMValueRef *i0,
+ LLVMValueRef *i1)
+{
+ struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef length_minus_one;
+ LLVMValueRef lmask, umask, mask;
+
+ if (block_length != 1) {
+ /*
+ * If the pixel block covers more than one pixel then there is no easy
+ * way to calculate offset1 relative to offset0. Instead, compute them
+ * independently.
+ */
+
+ LLVMValueRef coord1;
+
+ lp_build_sample_wrap_nearest_int(bld,
+ block_length,
+ coord0,
+ length,
+ stride,
+ is_pot,
+ wrap_mode,
+ offset0, i0);
+
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+
+ lp_build_sample_wrap_nearest_int(bld,
+ block_length,
+ coord1,
+ length,
+ stride,
+ is_pot,
+ wrap_mode,
+ offset1, i1);
+
+ return;
+ }
+
+ /*
+ * Scalar pixels -- try to compute offset0 and offset1 with a single stride
+ * multiplication.
+ */
+
+ *i0 = uint_coord_bld->zero;
+ *i1 = uint_coord_bld->zero;
+
+ length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ if (is_pot) {
+ coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
+ }
+ else {
+ /* Signed remainder won't give the right results for negative
+ * dividends but unsigned remainder does.*/
+ coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
+ }
+
+ mask = lp_build_compare(bld->builder, int_coord_bld->type,
+ PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
+
+ *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset1 = LLVMBuildAnd(bld->builder,
+ lp_build_add(uint_coord_bld, *offset0, stride),
+ mask, "");
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+ PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
+ umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+ PIPE_FUNC_LESS, coord0, length_minus_one);
+
+ coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
+ coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
+
+ mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
+
+ *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset1 = lp_build_add(uint_coord_bld,
+ *offset0,
+ LLVMBuildAnd(bld->builder, stride, mask, ""));
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ default:
+ assert(0);
+ *offset0 = uint_coord_bld->zero;
+ *offset1 = uint_coord_bld->zero;
+ break;
+ }
}
@@ -1740,16 +1820,21 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef i32_c8, i32_c128, i32_c255;
LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
- LLVMValueRef x0, x1;
- LLVMValueRef y0, y1;
- LLVMValueRef neighbors[2][2];
+ LLVMValueRef data_ptr;
+ LLVMValueRef x_stride, y_stride;
+ LLVMValueRef x_offset0, x_offset1;
+ LLVMValueRef y_offset0, y_offset1;
+ LLVMValueRef offset[2][2];
+ LLVMValueRef x_subcoord[2], y_subcoord[2];
LLVMValueRef neighbors_lo[2][2];
LLVMValueRef neighbors_hi[2][2];
LLVMValueRef packed, packed_lo, packed_hi;
LLVMValueRef unswizzled[4];
- LLVMValueRef stride;
+ const unsigned level = 0;
+ unsigned i, j;
- assert(bld->static_state->target == PIPE_TEXTURE_2D);
+ assert(bld->static_state->target == PIPE_TEXTURE_2D
+ || bld->static_state->target == PIPE_TEXTURE_RECT);
assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
@@ -1793,21 +1878,30 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
- x0 = s_ipart;
- y0 = t_ipart;
-
- x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
- y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
-
- x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width,
- bld->static_state->wrap_s);
- y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
- bld->static_state->wrap_t);
-
- x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width,
- bld->static_state->wrap_s);
- y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
- bld->static_state->wrap_t);
+ x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+ bld->format_desc->block.bits/8);
+
+ y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level);
+
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.width,
+ s_ipart, width, x_stride,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s,
+ &x_offset0, &x_offset1,
+ &x_subcoord[0], &x_subcoord[1]);
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.height,
+ t_ipart, height, y_stride,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t,
+ &y_offset0, &y_offset1,
+ &y_subcoord[0], &y_subcoord[1]);
+
+ offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0);
+ offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0);
+ offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1);
+ offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1);
/*
* Transform 4 x i32 in
@@ -1836,7 +1930,6 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
LLVMValueRef shuffle_lo;
LLVMValueRef shuffle_hi;
- unsigned i, j;
for(j = 0; j < h16.type.length; j += 4) {
#ifdef PIPE_ARCH_LITTLE_ENDIAN
@@ -1864,7 +1957,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
}
- stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
+ /*
+ * get pointer to mipmap level 0 data
+ */
+ data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level);
/*
* Fetch the pixels as 4 x 32bit (rgba order might differ):
@@ -1883,20 +1979,38 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
* The higher 8 bits of the resulting elements will be zero.
*/
- neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
- neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
- neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
- neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
+ for (j = 0; j < 2; ++j) {
+ for (i = 0; i < 2; ++i) {
+ LLVMValueRef rgba8;
- neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
- neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
- neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
- neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ /*
+ * Given the format is a rgba8, just read the pixels as is,
+ * without any swizzling. Swizzling will be done later.
+ */
+ rgba8 = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset[j][i]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
+ rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
+
+ }
+ else {
+ rgba8 = lp_build_fetch_rgba_aos(bld->builder,
+ bld->format_desc,
+ u8n.type,
+ data_ptr, offset[j][i],
+ x_subcoord[i],
+ y_subcoord[j]);
+ }
+
+ lp_build_unpack2(builder, u8n.type, h16.type,
+ rgba8,
+ &neighbors_lo[j][i], &neighbors_hi[j][i]);
+ }
+ }
/*
* Linear interpolate with 8.8 fixed point.
@@ -2077,7 +2191,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
}
else if (util_format_fits_8unorm(bld.format_desc) &&
bld.format_desc->nr_channels > 1 &&
- static_state->target == PIPE_TEXTURE_2D &&
+ (static_state->target == PIPE_TEXTURE_2D ||
+ static_state->target == PIPE_TEXTURE_RECT) &&
static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 0aa64affac..0e07f7f3f3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -200,8 +200,10 @@ static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
}
mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
assert(LLVMTypeOf(val) == mask->int_vec_type);
- mask->cond_mask = val;
-
+ mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->cond_mask,
+ val,
+ "");
lp_exec_mask_update(mask);
}
@@ -802,7 +804,7 @@ emit_store(
case TGSI_FILE_PREDICATE:
lp_exec_mask_store(&bld->exec_mask, pred, value,
- bld->preds[index][chan_index]);
+ bld->preds[reg->Register.Index][chan_index]);
break;
default:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index 3ffe916f8e..fec1d3dfbc 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -128,16 +128,16 @@ struct lp_build_context
*/
struct lp_type type;
- /** Same as lp_build_undef(type) */
+ /** Same as lp_build_elem_type(type) */
LLVMTypeRef elem_type;
- /** Same as lp_build_undef(type) */
+ /** Same as lp_build_vec_type(type) */
LLVMTypeRef vec_type;
- /** Same as lp_build_undef(type) */
+ /** Same as lp_build_int_elem_type(type) */
LLVMTypeRef int_elem_type;
- /** Same as lp_build_undef(type) */
+ /** Same as lp_build_int_vec_type(type) */
LLVMTypeRef int_vec_type;
/** Same as lp_build_undef(type) */
diff --git a/src/gallium/auxiliary/os/os_stream.c b/src/gallium/auxiliary/os/os_stream.c
new file mode 100644
index 0000000000..3c55fc00d9
--- /dev/null
+++ b/src/gallium/auxiliary/os/os_stream.c
@@ -0,0 +1,58 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_config.h"
+
+#include "os_stream.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+
+int
+os_default_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
+{
+ char buf[1024];
+ int retval;
+ va_list ap2;
+ va_copy(ap2, ap);
+ retval = util_vsnprintf(buf, sizeof(buf), format, ap2);
+ va_end(ap2);
+ if(retval <= 0)
+ {}
+ else if(retval < sizeof(buf))
+ stream->write(stream, buf, retval);
+ else
+ {
+ char* str = MALLOC(retval + 1);
+ if(!str)
+ return -1;
+ retval = util_vsnprintf(str, retval + 1, format, ap);
+ if(retval > 0)
+ stream->write(stream, str, retval);
+ FREE(str);
+ }
+
+ return retval;
+}
diff --git a/src/gallium/auxiliary/os/os_stream.h b/src/gallium/auxiliary/os/os_stream.h
index 693a0621e2..6c6050bb02 100644
--- a/src/gallium/auxiliary/os/os_stream.h
+++ b/src/gallium/auxiliary/os/os_stream.h
@@ -50,6 +50,9 @@ struct os_stream
void
(*flush)(struct os_stream *stream);
+
+ int
+ (*vprintf)(struct os_stream *stream, const char* format, va_list ap);
};
@@ -90,6 +93,27 @@ os_stream_flush(struct os_stream *stream)
stream->flush(stream);
}
+int
+os_default_stream_vprintf (struct os_stream* stream, const char *format, va_list ap);
+
+static INLINE int
+os_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
+{
+ return stream->vprintf(stream, format, ap);
+}
+
+static INLINE int
+os_stream_printf (struct os_stream* stream, const char *format, ...)
+{
+ int retval;
+ va_list args;
+
+ va_start (args, format);
+ retval = stream->vprintf(stream, format, args);
+ va_end (args);
+
+ return retval;
+}
struct os_stream *
os_file_stream_create(const char *filename);
@@ -118,5 +142,4 @@ os_str_stream_get_and_close(struct os_stream *stream);
#define os_file_stream_create(_filename) os_null_stream_create()
#endif
-
#endif /* _OS_STREAM_H_ */
diff --git a/src/gallium/auxiliary/os/os_stream_log.c b/src/gallium/auxiliary/os/os_stream_log.c
index 7cc2028a22..b01377c346 100644
--- a/src/gallium/auxiliary/os/os_stream_log.c
+++ b/src/gallium/auxiliary/os/os_stream_log.c
@@ -73,7 +73,8 @@ static struct os_stream
os_log_stream_struct = {
&os_log_stream_close,
&os_log_stream_write,
- &os_log_stream_flush
+ &os_log_stream_flush,
+ &os_default_stream_vprintf,
};
diff --git a/src/gallium/auxiliary/os/os_stream_null.c b/src/gallium/auxiliary/os/os_stream_null.c
index 128c4e8f0e..a549a789e6 100644
--- a/src/gallium/auxiliary/os/os_stream_null.c
+++ b/src/gallium/auxiliary/os/os_stream_null.c
@@ -56,12 +56,18 @@ os_null_stream_flush(struct os_stream *stream)
(void)stream;
}
+static int
+os_null_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
+{
+ return 0;
+}
static struct os_stream
os_null_stream = {
&os_null_stream_close,
&os_null_stream_write,
- &os_null_stream_flush
+ &os_null_stream_flush,
+ &os_null_stream_vprintf
};
diff --git a/src/gallium/auxiliary/os/os_stream_stdc.c b/src/gallium/auxiliary/os/os_stream_stdc.c
index 9e7ed71107..37e7d063e2 100644
--- a/src/gallium/auxiliary/os/os_stream_stdc.c
+++ b/src/gallium/auxiliary/os/os_stream_stdc.c
@@ -83,6 +83,14 @@ os_stdc_stream_flush(struct os_stream *_stream)
fflush(stream->file);
}
+static int
+os_stdc_stream_vprintf (struct os_stream* _stream, const char *format, va_list ap)
+{
+ struct os_stdc_stream *stream = os_stdc_stream(_stream);
+
+ return vfprintf(stream->file, format, ap);
+}
+
struct os_stream *
os_file_stream_create(const char *filename)
@@ -96,6 +104,7 @@ os_file_stream_create(const char *filename)
stream->base.close = &os_stdc_stream_close;
stream->base.write = &os_stdc_stream_write;
stream->base.flush = &os_stdc_stream_flush;
+ stream->base.vprintf = &os_stdc_stream_vprintf;
stream->file = fopen(filename, "w");
if(!stream->file)
diff --git a/src/gallium/auxiliary/os/os_stream_str.c b/src/gallium/auxiliary/os/os_stream_str.c
index b5c7270d2a..be9478b2a1 100644
--- a/src/gallium/auxiliary/os/os_stream_str.c
+++ b/src/gallium/auxiliary/os/os_stream_str.c
@@ -118,6 +118,7 @@ os_str_stream_create(size_t size)
stream->base.close = &os_str_stream_close;
stream->base.write = &os_str_stream_write;
stream->base.flush = &os_str_stream_flush;
+ stream->base.vprintf = &os_default_stream_vprintf;
stream->str = os_malloc(size);
if(!stream->str)
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index cec2524da2..2ef02160f2 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -50,8 +50,7 @@
#define PB_BUFMGR_H_
-#include "pipe/p_compiler.h"
-#include "pipe/p_defines.h"
+#include "pb_buffer.h"
#ifdef __cplusplus
diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
index 2e15751e50..0461c81550 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -30,7 +30,7 @@
#include "rtasm_cpu.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
static boolean rtasm_sse_enabled(void)
{
static boolean firsttime = 1;
@@ -49,7 +49,7 @@ static boolean rtasm_sse_enabled(void)
int rtasm_cpu_has_sse(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
return rtasm_sse_enabled();
#else
return 0;
@@ -59,7 +59,7 @@ int rtasm_cpu_has_sse(void)
int rtasm_cpu_has_sse2(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
return rtasm_sse_enabled();
#else
return 0;
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 9f70b73698..75b0f6a68e 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -22,8 +22,9 @@
**************************************************************************/
#include "pipe/p_config.h"
+#include "util/u_cpu_detect.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
@@ -231,6 +232,10 @@ static void emit_modrm( struct x86_function *p,
assert(reg.mod == mod_REG);
+ /* TODO: support extended x86-64 registers */
+ assert(reg.idx < 8);
+ assert(regmem.idx < 8);
+
val |= regmem.mod << 6; /* mod field */
val |= reg.idx << 3; /* reg field */
val |= regmem.idx; /* r/m field */
@@ -363,6 +368,12 @@ int x86_get_label( struct x86_function *p )
*/
+void x64_rexw(struct x86_function *p)
+{
+ if(x86_target(p) != X86_32)
+ emit_1ub(p, 0x48);
+}
+
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
int label )
@@ -449,6 +460,52 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
emit_1i(p, imm);
}
+void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ if(dst.mod == mod_REG)
+ x86_mov_reg_imm(p, dst, imm);
+ else
+ {
+ emit_1ub(p, 0xc7);
+ emit_modrm_noreg(p, 0, dst);
+ emit_1i(p, imm);
+ }
+}
+
+void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm )
+{
+ DUMP_RI( dst, imm );
+ emit_1ub(p, 0x66);
+ if(dst.mod == mod_REG)
+ {
+ emit_1ub(p, 0xb8 + dst.idx);
+ emit_2ub(p, imm & 0xff, imm >> 8);
+ }
+ else
+ {
+ emit_1ub(p, 0xc7);
+ emit_modrm_noreg(p, 0, dst);
+ emit_2ub(p, imm & 0xff, imm >> 8);
+ }
+}
+
+void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm )
+{
+ DUMP_RI( dst, imm );
+ if(dst.mod == mod_REG)
+ {
+ emit_1ub(p, 0xb0 + dst.idx);
+ emit_1ub(p, imm);
+ }
+ else
+ {
+ emit_1ub(p, 0xc6);
+ emit_modrm_noreg(p, 0, dst);
+ emit_1ub(p, imm);
+ }
+}
+
/**
* Immediate group 1 instructions.
*/
@@ -520,7 +577,7 @@ void x86_push( struct x86_function *p,
}
- p->stack_offset += 4;
+ p->stack_offset += sizeof(void*);
}
void x86_push_imm32( struct x86_function *p,
@@ -530,7 +587,7 @@ void x86_push_imm32( struct x86_function *p,
emit_1ub(p, 0x68);
emit_1i(p, imm32);
- p->stack_offset += 4;
+ p->stack_offset += sizeof(void*);
}
@@ -540,23 +597,33 @@ void x86_pop( struct x86_function *p,
DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x58 + reg.idx);
- p->stack_offset -= 4;
+ p->stack_offset -= sizeof(void*);
}
void x86_inc( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x40 + reg.idx);
+ if(x86_target(p) == X86_32 && reg.mod == mod_REG)
+ {
+ emit_1ub(p, 0x40 + reg.idx);
+ return;
+ }
+ emit_1ub(p, 0xff);
+ emit_modrm_noreg(p, 0, reg);
}
void x86_dec( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x48 + reg.idx);
+ if(x86_target(p) == X86_32 && reg.mod == mod_REG)
+ {
+ emit_1ub(p, 0x48 + reg.idx);
+ return;
+ }
+ emit_1ub(p, 0xff);
+ emit_modrm_noreg(p, 1, reg);
}
void x86_ret( struct x86_function *p )
@@ -583,9 +650,82 @@ void x86_mov( struct x86_function *p,
struct x86_reg src )
{
DUMP_RR( dst, src );
+ /* special hack for reading arguments until we support x86-64 registers everywhere */
+ if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
+ {
+ uint8_t rex = 0x40;
+ if(dst.idx >= 8)
+ {
+ rex |= 4;
+ dst.idx -= 8;
+ }
+ if(src.idx >= 8)
+ {
+ rex |= 1;
+ src.idx -= 8;
+ }
+ emit_1ub(p, rex);
+ }
+ emit_op_modrm( p, 0x8b, 0x89, dst, src );
+}
+
+void x86_mov16( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_1ub(p, 0x66);
+ emit_op_modrm( p, 0x8b, 0x89, dst, src );
+}
+
+void x86_mov8( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_op_modrm( p, 0x8a, 0x88, dst, src );
+}
+
+void x64_mov64( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ uint8_t rex = 0x48;
+ DUMP_RR( dst, src );
+ assert(x86_target(p) != X86_32);
+
+ /* special hack for reading arguments until we support x86-64 registers everywhere */
+ if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
+ {
+ if(dst.idx >= 8)
+ {
+ rex |= 4;
+ dst.idx -= 8;
+ }
+ if(src.idx >= 8)
+ {
+ rex |= 1;
+ src.idx -= 8;
+ }
+ }
+ emit_1ub(p, rex);
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
+void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, 0x0f, 0xb6);
+ emit_modrm(p, dst, src);
+}
+
+void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, 0x0f, 0xb7);
+ emit_modrm(p, dst, src);
+}
+
void x86_xor( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -680,6 +820,61 @@ void x86_div( struct x86_function *p,
emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src);
}
+void x86_bswap( struct x86_function *p, struct x86_reg reg )
+{
+ DUMP_R(reg);
+ assert(reg.file == file_REG32);
+ assert(reg.mod == mod_REG);
+ emit_2ub(p, 0x0f, 0xc8 + reg.idx);
+}
+
+void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+ DUMP_RI(reg, imm);
+ if(imm == 1)
+ {
+ emit_1ub(p, 0xd1);
+ emit_modrm_noreg(p, 5, reg);
+ }
+ else
+ {
+ emit_1ub(p, 0xc1);
+ emit_modrm_noreg(p, 5, reg);
+ emit_1ub(p, imm);
+ }
+}
+
+void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+ DUMP_RI(reg, imm);
+ if(imm == 1)
+ {
+ emit_1ub(p, 0xd1);
+ emit_modrm_noreg(p, 7, reg);
+ }
+ else
+ {
+ emit_1ub(p, 0xc1);
+ emit_modrm_noreg(p, 7, reg);
+ emit_1ub(p, imm);
+ }
+}
+
+void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+ DUMP_RI(reg, imm);
+ if(imm == 1)
+ {
+ emit_1ub(p, 0xd1);
+ emit_modrm_noreg(p, 4, reg);
+ }
+ else
+ {
+ emit_1ub(p, 0xc1);
+ emit_modrm_noreg(p, 4, reg);
+ emit_1ub(p, imm);
+ }
+}
/***********************************************************************
@@ -1013,6 +1208,77 @@ void sse_movmskps( struct x86_function *p,
* SSE2 instructions
*/
+void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ if(dst.mod == mod_REG && dst.file == file_REG32)
+ {
+ emit_1ub(p, 0x7e);
+ emit_modrm(p, src, dst);
+ }
+ else
+ {
+ emit_op_modrm(p, 0x6e, 0x7e, dst, src);
+ }
+}
+
+void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ switch (dst.mod) {
+ case mod_REG:
+ emit_3ub(p, 0xf3, 0x0f, 0x7e);
+ emit_modrm(p, dst, src);
+ break;
+ case mod_INDIRECT:
+ case mod_DISP32:
+ case mod_DISP8:
+ assert(src.mod == mod_REG);
+ emit_3ub(p, 0x66, 0x0f, 0xd6);
+ emit_modrm(p, src, dst);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0xf3, 0x0f);
+ emit_op_modrm(p, 0x6f, 0x7f, dst, src);
+}
+
+void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ emit_op_modrm(p, 0x6f, 0x7f, dst, src);
+}
+
+void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0xf2, 0x0f);
+ emit_op_modrm(p, 0x10, 0x11, dst, src);
+}
+
+void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ emit_op_modrm(p, 0x10, 0x11, dst, src);
+}
+
+void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ emit_op_modrm(p, 0x28, 0x29, dst, src);
+}
+
/**
* Perform a reduced swizzle:
*/
@@ -1027,6 +1293,28 @@ void sse2_pshufd( struct x86_function *p,
emit_1ub(p, shuf);
}
+void sse2_pshuflw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src,
+ unsigned char shuf)
+{
+ DUMP_RRI( dst, src, shuf );
+ emit_3ub(p, 0xf2, X86_TWOB, 0x70);
+ emit_modrm(p, dst, src);
+ emit_1ub(p, shuf);
+}
+
+void sse2_pshufhw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src,
+ unsigned char shuf)
+{
+ DUMP_RRI( dst, src, shuf );
+ emit_3ub(p, 0xf3, X86_TWOB, 0x70);
+ emit_modrm(p, dst, src);
+ emit_1ub(p, shuf);
+}
+
void sse2_cvttps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -1045,6 +1333,24 @@ void sse2_cvtps2dq( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_cvtsd2ss( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0xf2, 0x0f, 0x5a);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_cvtpd2ps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x5a);
+ emit_modrm( p, dst, src );
+}
+
void sse2_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -1081,6 +1387,97 @@ void sse2_punpcklbw( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x61);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x62);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x6c);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x71);
+ emit_modrm_noreg(p, 6, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x72);
+ emit_modrm_noreg(p, 6, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x73);
+ emit_modrm_noreg(p, 6, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x71);
+ emit_modrm_noreg(p, 2, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x72);
+ emit_modrm_noreg(p, 2, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x73);
+ emit_modrm_noreg(p, 2, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x71);
+ emit_modrm_noreg(p, 4, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x72);
+ emit_modrm_noreg(p, 4, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_3ub(p, 0x66, 0x0f, 0xeb);
+ emit_modrm(p, dst, src);
+}
void sse2_rcpps( struct x86_function *p,
struct x86_reg dst,
@@ -1100,18 +1497,6 @@ void sse2_rcpss( struct x86_function *p,
emit_modrm( p, dst, src );
}
-void sse2_movd( struct x86_function *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( dst, src );
- emit_2ub(p, 0x66, X86_TWOB);
- emit_op_modrm( p, 0x6e, 0x7e, dst, src );
-}
-
-
-
-
/***********************************************************************
* x87 instructions
*/
@@ -1702,23 +2087,80 @@ void x86_cdecl_caller_pop_regs( struct x86_function *p )
}
-/* Retreive a reference to one of the function arguments, taking into
- * account any push/pop activity:
- */
struct x86_reg x86_fn_arg( struct x86_function *p,
- unsigned arg )
+ unsigned arg )
{
- return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+ switch(x86_target(p))
+ {
+ case X86_64_WIN64_ABI:
+ /* Microsoft uses a different calling convention than the rest of the world */
+ switch(arg)
+ {
+ case 1:
+ return x86_make_reg(file_REG32, reg_CX);
+ case 2:
+ return x86_make_reg(file_REG32, reg_DX);
+ case 3:
+ return x86_make_reg(file_REG32, reg_R8);
+ case 4:
+ return x86_make_reg(file_REG32, reg_R9);
+ default:
+ /* Win64 allocates stack slots as if it pushed the first 4 arguments too */
+ return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+ p->stack_offset + arg * 8);
+ }
+ case X86_64_STD_ABI:
+ switch(arg)
+ {
+ case 1:
+ return x86_make_reg(file_REG32, reg_DI);
+ case 2:
+ return x86_make_reg(file_REG32, reg_SI);
+ case 3:
+ return x86_make_reg(file_REG32, reg_DX);
+ case 4:
+ return x86_make_reg(file_REG32, reg_CX);
+ case 5:
+ return x86_make_reg(file_REG32, reg_R8);
+ case 6:
+ return x86_make_reg(file_REG32, reg_R9);
+ default:
+ return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+ p->stack_offset + (arg - 6) * 8); /* ??? */
+ }
+ case X86_32:
+ return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + arg * 4); /* ??? */
+ default:
+ abort();
+ }
}
+static void x86_init_func_common( struct x86_function *p )
+{
+ util_cpu_detect();
+ p->caps = 0;
+ if(util_cpu_caps.has_mmx)
+ p->caps |= X86_MMX;
+ if(util_cpu_caps.has_mmx2)
+ p->caps |= X86_MMX2;
+ if(util_cpu_caps.has_sse)
+ p->caps |= X86_SSE;
+ if(util_cpu_caps.has_sse2)
+ p->caps |= X86_SSE2;
+ if(util_cpu_caps.has_sse3)
+ p->caps |= X86_SSE3;
+ if(util_cpu_caps.has_sse4_1)
+ p->caps |= X86_SSE4_1;
+ p->csr = p->store;
+ DUMP_START();
+}
void x86_init_func( struct x86_function *p )
{
p->size = 0;
p->store = NULL;
- p->csr = p->store;
- DUMP_START();
+ x86_init_func_common(p);
}
void x86_init_func_size( struct x86_function *p, unsigned code_size )
@@ -1728,8 +2170,7 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size )
if (p->store == NULL) {
p->store = p->error_overflow;
}
- p->csr = p->store;
- DUMP_START();
+ x86_init_func_common(p);
}
void x86_release_func( struct x86_function *p )
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 6208e8f707..2b9678b176 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -24,22 +24,31 @@
#ifndef _RTASM_X86SSE_H_
#define _RTASM_X86SSE_H_
+#include "pipe/p_compiler.h"
#include "pipe/p_config.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
/* It is up to the caller to ensure that instructions issued are
* suitable for the host cpu. There are no checks made in this module
* for mmx/sse/sse2 support on the cpu.
*/
struct x86_reg {
- unsigned file:3;
- unsigned idx:3;
+ unsigned file:2;
+ unsigned idx:4;
unsigned mod:2; /* mod_REG if this is just a register */
int disp:24; /* only +/- 23bits of offset - should be enough... */
};
+#define X86_MMX 1
+#define X86_MMX2 2
+#define X86_SSE 4
+#define X86_SSE2 8
+#define X86_SSE3 0x10
+#define X86_SSE4_1 0x20
+
struct x86_function {
+ unsigned caps;
unsigned size;
unsigned char *store;
unsigned char *csr;
@@ -75,7 +84,15 @@ enum x86_reg_name {
reg_SP,
reg_BP,
reg_SI,
- reg_DI
+ reg_DI,
+ reg_R8,
+ reg_R9,
+ reg_R10,
+ reg_R11,
+ reg_R12,
+ reg_R13,
+ reg_R14,
+ reg_R15
};
@@ -110,6 +127,29 @@ typedef void (*x86_func)(void);
/* Begin/end/retrieve function creation:
*/
+enum x86_target
+{
+ X86_32,
+ X86_64_STD_ABI,
+ X86_64_WIN64_ABI
+};
+
+/* make this read a member of x86_function if target != host is desired */
+static INLINE enum x86_target x86_target( struct x86_function* p )
+{
+#ifdef PIPE_ARCH_X86
+ return X86_32;
+#elif defined(_WIN64)
+ return X86_64_WIN64_ABI;
+#elif defined(PIPE_ARCH_X86_64)
+ return X86_64_STD_ABI;
+#endif
+}
+
+static INLINE unsigned x86_target_caps( struct x86_function* p )
+{
+ return p->caps;
+}
void x86_init_func( struct x86_function *p );
void x86_init_func_size( struct x86_function *p, unsigned code_size );
@@ -138,6 +178,8 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg );
*/
int x86_get_label( struct x86_function *p );
+void x64_rexw(struct x86_function *p);
+
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
int label );
@@ -178,18 +220,54 @@ void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
+void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+ unsigned char shuf );
+void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+ unsigned char shuf );
void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
+void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
+void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
+void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
+void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
@@ -227,7 +305,6 @@ void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg
void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
-void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -237,6 +314,14 @@ void x86_dec( struct x86_function *p, struct x86_reg reg );
void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
+void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );
+void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );
void x86_mul( struct x86_function *p, struct x86_reg src );
void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -250,7 +335,10 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
void x86_div( struct x86_function *p, struct x86_reg src );
-
+void x86_bswap( struct x86_function *p, struct x86_reg src );
+void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
+void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
+void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
void x86_cdecl_caller_push_regs( struct x86_function *p );
void x86_cdecl_caller_pop_regs( struct x86_function *p );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index 4cd27317b3..dd78b36100 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -28,6 +28,7 @@
#ifndef TGSI_DUMP_H
#define TGSI_DUMP_H
+#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 298f3d0a8b..0757f05dfa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3239,6 +3239,8 @@ exec_instruction(
if (mach->CallStackTop == 0) {
/* returning from main() */
+ mach->CondStackTop = 0;
+ mach->LoopStackTop = 0;
*pc = -1;
return;
}
@@ -3767,6 +3769,9 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
}
#endif
+ /* Strictly speaking, these assertions aren't really needed but they
+ * can potentially catch some bugs in the control flow code.
+ */
assert(mach->CondStackTop == 0);
assert(mach->LoopStackTop == 0);
assert(mach->ContStackTop == 0);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
index 50248884fd..1992d11bbe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -28,6 +28,7 @@
#ifndef TGSI_INFO_H
#define TGSI_INFO_H
+#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index db9a342220..1891203abe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -282,17 +282,6 @@ tgsi_parse_token(
}
-unsigned
-tgsi_num_tokens(const struct tgsi_token *tokens)
-{
- struct tgsi_parse_context ctx;
- if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) {
- unsigned len = (ctx.FullHeader.Header.HeaderSize +
- ctx.FullHeader.Header.BodySize);
- return len;
- }
- return 0;
-}
/**
@@ -319,3 +308,19 @@ tgsi_alloc_tokens(unsigned num_tokens)
unsigned bytes = num_tokens * sizeof(struct tgsi_token);
return (struct tgsi_token *) MALLOC(bytes);
}
+
+
+void
+tgsi_dump_tokens(const struct tgsi_token *tokens)
+{
+ const unsigned *dwords = (const unsigned *)tokens;
+ int nr = tgsi_num_tokens(tokens);
+ int i;
+
+ assert(sizeof(*tokens) == sizeof(unsigned));
+
+ debug_printf("const unsigned tokens[%d] = {\n", nr);
+ for (i = 0; i < nr; i++)
+ debug_printf("0x%08x,\n", dwords[i]);
+ debug_printf("};\n");
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 36de8807b4..d4df585176 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -28,6 +28,7 @@
#ifndef TGSI_PARSE_H
#define TGSI_PARSE_H
+#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
@@ -132,8 +133,15 @@ void
tgsi_parse_token(
struct tgsi_parse_context *ctx );
-unsigned
-tgsi_num_tokens(const struct tgsi_token *tokens);
+static INLINE unsigned
+tgsi_num_tokens(const struct tgsi_token *tokens)
+{
+ struct tgsi_header header = *(const struct tgsi_header *) tokens;
+ return header.HeaderSize + header.BodySize;
+}
+
+void
+tgsi_dump_tokens(const struct tgsi_token *tokens);
struct tgsi_token *
tgsi_dup_tokens(const struct tgsi_token *tokens);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
index d81ee3d00e..00aa8b84fe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
@@ -32,9 +32,12 @@
extern "C" {
#endif
+#include "pipe/p_compiler.h"
+
+struct tgsi_exec_machine;
+struct tgsi_interp_coef;
struct tgsi_token;
struct x86_function;
-struct tgsi_interp_coef;
unsigned
tgsi_emit_sse2(
diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c
index fe638e211f..73287b667d 100644
--- a/src/gallium/auxiliary/translate/translate.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -38,7 +38,7 @@ struct translate *translate_create( const struct translate_key *key )
{
struct translate *translate = NULL;
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
translate = translate_sse2_create( key );
if (translate)
return translate;
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index eb6f2cc486..a75380228b 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -85,6 +85,18 @@ struct translate {
unsigned instance_id,
void *output_buffer);
+ void (PIPE_CDECL *run_elts16)( struct translate *,
+ const uint16_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer);
+
+ void (PIPE_CDECL *run_elts8)( struct translate *,
+ const uint8_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer);
+
void (PIPE_CDECL *run)( struct translate *,
unsigned start,
unsigned count,
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 42cfd763e9..ad809db720 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -64,6 +64,14 @@ struct translate_generic {
unsigned input_stride;
unsigned max_index;
+ /* this value is set to -1 if this is a normal element with output_format != input_format:
+ * in this case, u_format is used to do a full conversion
+ *
+ * this value is set to the format size in bytes if output_format == input_format or for 32-bit instance ids:
+ * in this case, memcpy is used to copy this amount of bytes
+ */
+ int copy_size;
+
} attrib[PIPE_MAX_ATTRIBS];
unsigned nr_attrib;
@@ -354,7 +362,65 @@ static emit_func get_emit_func( enum pipe_format format )
}
}
+static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *tg,
+ unsigned elt,
+ unsigned instance_id,
+ void *vert )
+{
+ unsigned nr_attrs = tg->nr_attrib;
+ unsigned attr;
+
+ for (attr = 0; attr < nr_attrs; attr++) {
+ float data[4];
+ uint8_t *dst = (uint8_t *)vert + tg->attrib[attr].output_offset;
+
+ if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+ const uint8_t *src;
+ unsigned index;
+ int copy_size;
+
+ if (tg->attrib[attr].instance_divisor) {
+ index = instance_id / tg->attrib[attr].instance_divisor;
+ }
+ else {
+ index = elt;
+ }
+
+ /* clamp to void going out of bounds */
+ index = MIN2(index, tg->attrib[attr].max_index);
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * index;
+
+ copy_size = tg->attrib[attr].copy_size;
+ if(likely(copy_size >= 0))
+ memcpy(dst, src, copy_size);
+ else
+ {
+ tg->attrib[attr].fetch( data, src, 0, 0 );
+
+ if (0)
+ debug_printf("Fetch linear attr %d from %p stride %d index %d: "
+ " %f, %f, %f, %f \n",
+ attr,
+ tg->attrib[attr].input_ptr,
+ tg->attrib[attr].input_stride,
+ index,
+ data[0], data[1],data[2], data[3]);
+
+ tg->attrib[attr].emit( data, dst );
+ }
+ } else {
+ if(likely(tg->attrib[attr].copy_size >= 0))
+ memcpy(data, &instance_id, 4);
+ else
+ {
+ data[0] = (float)instance_id;
+ tg->attrib[attr].emit( data, dst );
+ }
+ }
+ }
+}
/**
* Fetch vertex attributes for 'count' vertices.
@@ -367,62 +433,45 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
- unsigned nr_attrs = tg->nr_attrib;
- unsigned attr;
unsigned i;
- /* loop over vertex attributes (vertex shader inputs)
- */
for (i = 0; i < count; i++) {
- const unsigned elt = *elts++;
-
- for (attr = 0; attr < nr_attrs; attr++) {
- float data[4];
- char *dst = vert + tg->attrib[attr].output_offset;
-
- if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
- const uint8_t *src;
- unsigned index;
-
- if (tg->attrib[attr].instance_divisor) {
- index = instance_id / tg->attrib[attr].instance_divisor;
- } else {
- index = elt;
- }
-
- /* clamp to void going out of bounds */
- index = MIN2(index, tg->attrib[attr].max_index);
-
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * index;
-
- tg->attrib[attr].fetch( data, src, 0, 0 );
-
- if (0)
- debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: "
- " %f, %f, %f, %f \n",
- attr,
- tg->attrib[attr].input_ptr,
- tg->attrib[attr].input_stride,
- tg->attrib[attr].instance_divisor,
- tg->attrib[attr].max_index,
- index,
- data[0], data[1],data[2], data[3]);
- } else {
- data[0] = (float)instance_id;
- }
+ generic_run_one(tg, *elts++, instance_id, vert);
+ vert += tg->translate.key.output_stride;
+ }
+}
- if (0)
- debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
- i, elt, attr, data[0], data[1], data[2], data[3]);
+static void PIPE_CDECL generic_run_elts16( struct translate *translate,
+ const uint16_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned i;
- tg->attrib[attr].emit( data, dst );
- }
+ for (i = 0; i < count; i++) {
+ generic_run_one(tg, *elts++, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
+static void PIPE_CDECL generic_run_elts8( struct translate *translate,
+ const uint8_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned i;
+ for (i = 0; i < count; i++) {
+ generic_run_one(tg, *elts++, instance_id, vert);
+ vert += tg->translate.key.output_stride;
+ }
+}
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
@@ -432,57 +481,10 @@ static void PIPE_CDECL generic_run( struct translate *translate,
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
- unsigned nr_attrs = tg->nr_attrib;
- unsigned attr;
unsigned i;
- /* loop over vertex attributes (vertex shader inputs)
- */
for (i = 0; i < count; i++) {
- unsigned elt = start + i;
-
- for (attr = 0; attr < nr_attrs; attr++) {
- float data[4];
- char *dst = vert + tg->attrib[attr].output_offset;
-
- if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
- const uint8_t *src;
- unsigned index;
-
- if (tg->attrib[attr].instance_divisor) {
- index = instance_id / tg->attrib[attr].instance_divisor;
- }
- else {
- index = elt;
- }
-
- /* clamp to void going out of bounds */
- index = MIN2(index, tg->attrib[attr].max_index);
-
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * index;
-
- tg->attrib[attr].fetch( data, src, 0, 0 );
-
- if (0)
- debug_printf("Fetch linear attr %d from %p stride %d index %d: "
- " %f, %f, %f, %f \n",
- attr,
- tg->attrib[attr].input_ptr,
- tg->attrib[attr].input_stride,
- index,
- data[0], data[1],data[2], data[3]);
- } else {
- data[0] = (float)instance_id;
- }
-
- if (0)
- debug_printf("vert %d attr %d: %f %f %f %f\n",
- i, attr, data[0], data[1], data[2], data[3]);
-
- tg->attrib[attr].emit( data, dst );
- }
-
+ generic_run_one(tg, start + i, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
@@ -528,6 +530,8 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->translate.release = generic_release;
tg->translate.set_buffer = generic_set_buffer;
tg->translate.run_elts = generic_run_elts;
+ tg->translate.run_elts16 = generic_run_elts16;
+ tg->translate.run_elts8 = generic_run_elts8;
tg->translate.run = generic_run;
for (i = 0; i < key->nr_elements; i++) {
@@ -544,9 +548,28 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->attrib[i].input_offset = key->element[i].input_offset;
tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
- tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
tg->attrib[i].output_offset = key->element[i].output_offset;
+ tg->attrib[i].copy_size = -1;
+ if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID)
+ {
+ if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED
+ || key->element[i].output_format == PIPE_FORMAT_R32_SSCALED)
+ tg->attrib[i].copy_size = 4;
+ }
+ else
+ {
+ if(key->element[i].input_format == key->element[i].output_format
+ && format_desc->block.width == 1
+ && format_desc->block.height == 1
+ && !(format_desc->block.bits & 7))
+ tg->attrib[i].copy_size = format_desc->block.bits >> 3;
+ }
+
+ if(tg->attrib[i].copy_size < 0)
+ tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
+ else
+ tg->attrib[i].emit = NULL;
}
tg->nr_attrib = key->nr_elements;
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index ef3aa674a3..f8bf5b4669 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -30,11 +30,12 @@
#include "pipe/p_compiler.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_format.h"
#include "translate.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#include "rtasm/rtasm_cpu.h"
#include "rtasm/rtasm_x86sse.h"
@@ -46,21 +47,9 @@
#define W 3
-typedef void (PIPE_CDECL *run_func)( struct translate *translate,
- unsigned start,
- unsigned count,
- unsigned instance_id,
- void *output_buffer);
-
-typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
- const unsigned *elts,
- unsigned count,
- unsigned instance_id,
- void *output_buffer);
-
struct translate_buffer {
const void *base_ptr;
- unsigned stride;
+ uintptr_t stride;
unsigned max_index;
};
@@ -73,21 +62,43 @@ struct translate_buffer_varient {
#define ELEMENT_BUFFER_INSTANCE_ID 1001
+#define NUM_CONSTS 7
+
+enum
+{
+ CONST_IDENTITY,
+ CONST_INV_127,
+ CONST_INV_255,
+ CONST_INV_32767,
+ CONST_INV_65535,
+ CONST_INV_2147483647,
+ CONST_255
+};
+
+#define C(v) {(float)(v), (float)(v), (float)(v), (float)(v)}
+static float consts[NUM_CONSTS][4] = {
+ {0, 0, 0, 1},
+ C(1.0 / 127.0),
+ C(1.0 / 255.0),
+ C(1.0 / 32767.0),
+ C(1.0 / 65535.0),
+ C(1.0 / 2147483647.0),
+ C(255.0)
+};
+#undef C
struct translate_sse {
struct translate translate;
struct x86_function linear_func;
struct x86_function elt_func;
+ struct x86_function elt16_func;
+ struct x86_function elt8_func;
struct x86_function *func;
- boolean loaded_identity;
- boolean loaded_255;
- boolean loaded_inv_255;
-
- float identity[4];
- float float_255[4];
- float inv_255[4];
+ PIPE_ALIGN_VAR(16) float consts[NUM_CONSTS][4];
+ int8_t reg_to_const[16];
+ int8_t const_to_reg[NUM_CONSTS];
struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
unsigned nr_buffers;
@@ -102,17 +113,16 @@ struct translate_sse {
boolean use_instancing;
unsigned instance_id;
- run_func gen_run;
- run_elts_func gen_run_elts;
-
/* these are actually known values, but putting them in a struct
* like this is helpful to keep them in sync across the file.
*/
struct x86_reg tmp_EAX;
- struct x86_reg idx_EBX; /* either start+i or &elt[i] */
- struct x86_reg outbuf_ECX;
- struct x86_reg machine_EDX;
- struct x86_reg count_ESI; /* decrements to zero */
+ struct x86_reg tmp2_EDX;
+ struct x86_reg src_ECX;
+ struct x86_reg idx_ESI; /* either start+i or &elt[i] */
+ struct x86_reg machine_EDI;
+ struct x86_reg outbuf_EBX;
+ struct x86_reg count_EBP; /* decrements to zero */
};
static int get_offset( const void *a, const void *b )
@@ -120,281 +130,950 @@ static int get_offset( const void *a, const void *b )
return (const char *)b - (const char *)a;
}
+static struct x86_reg get_const( struct translate_sse *p, unsigned id)
+{
+ struct x86_reg reg;
+ unsigned i;
+ if(p->const_to_reg[id] >= 0)
+ return x86_make_reg(file_XMM, p->const_to_reg[id]);
-static struct x86_reg get_identity( struct translate_sse *p )
-{
- struct x86_reg reg = x86_make_reg(file_XMM, 6);
-
- if (!p->loaded_identity) {
- p->loaded_identity = TRUE;
- p->identity[0] = 0;
- p->identity[1] = 0;
- p->identity[2] = 0;
- p->identity[3] = 1;
-
- sse_movups(p->func, reg,
- x86_make_disp(p->machine_EDX,
- get_offset(p, &p->identity[0])));
+ for(i = 2; i < 8; ++i)
+ {
+ if(p->reg_to_const[i] < 0)
+ break;
}
+ /* TODO: be smarter here */
+ if(i == 8)
+ --i;
+
+ reg = x86_make_reg(file_XMM, i);
+
+ if(p->reg_to_const[i] >= 0)
+ p->const_to_reg[p->reg_to_const[i]] = -1;
+
+ p->reg_to_const[i] = id;
+ p->const_to_reg[id] = i;
+
+ /* TODO: this should happen outside the loop, if possible */
+ sse_movaps(p->func, reg,
+ x86_make_disp(p->machine_EDI,
+ get_offset(p, &p->consts[id][0])));
+
return reg;
}
-static struct x86_reg get_255( struct translate_sse *p )
+/* load the data in a SSE2 register, padding with zeros */
+static boolean emit_load_sse2( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg src,
+ unsigned size)
{
- struct x86_reg reg = x86_make_reg(file_XMM, 7);
-
- if (!p->loaded_255) {
- p->loaded_255 = TRUE;
- p->float_255[0] =
- p->float_255[1] =
- p->float_255[2] =
- p->float_255[3] = 255.0f;
-
- sse_movups(p->func, reg,
- x86_make_disp(p->machine_EDX,
- get_offset(p, &p->float_255[0])));
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+ struct x86_reg tmp = p->tmp_EAX;
+ switch(size)
+ {
+ case 1:
+ x86_movzx8(p->func, tmp, src);
+ sse2_movd(p->func, data, tmp);
+ break;
+ case 2:
+ x86_movzx16(p->func, tmp, src);
+ sse2_movd(p->func, data, tmp);
+ break;
+ case 3:
+ x86_movzx8(p->func, tmp, x86_make_disp(src, 2));
+ x86_shl_imm(p->func, tmp, 16);
+ x86_mov16(p->func, tmp, src);
+ sse2_movd(p->func, data, tmp);
+ break;
+ case 4:
+ sse2_movd(p->func, data, src);
+ break;
+ case 6:
+ sse2_movd(p->func, data, src);
+ x86_movzx16(p->func, tmp, x86_make_disp(src, 4));
+ sse2_movd(p->func, tmpXMM, tmp);
+ sse2_punpckldq(p->func, data, tmpXMM);
+ break;
+ case 8:
+ sse2_movq(p->func, data, src);
+ break;
+ case 12:
+ sse2_movq(p->func, data, src);
+ sse2_movd(p->func, tmpXMM, x86_make_disp(src, 8));
+ sse2_punpcklqdq(p->func, data, tmpXMM);
+ break;
+ case 16:
+ sse2_movdqu(p->func, data, src);
+ break;
+ default:
+ return FALSE;
}
-
- return reg;
+ return TRUE;
}
-static struct x86_reg get_inv_255( struct translate_sse *p )
+/* this value can be passed for the out_chans argument */
+#define CHANNELS_0001 5
+
+/* this function will load #chans float values, and will
+ * pad the register with zeroes at least up to out_chans.
+ *
+ * If out_chans is set to CHANNELS_0001, then the fourth
+ * value will be padded with 1. Only pass this value if
+ * chans < 4 or results are undefined.
+ */
+static void emit_load_float32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0,
+ unsigned out_chans,
+ unsigned chans)
{
- struct x86_reg reg = x86_make_reg(file_XMM, 5);
-
- if (!p->loaded_inv_255) {
- p->loaded_inv_255 = TRUE;
- p->inv_255[0] =
- p->inv_255[1] =
- p->inv_255[2] =
- p->inv_255[3] = 1.0f / 255.0f;
-
- sse_movups(p->func, reg,
- x86_make_disp(p->machine_EDX,
- get_offset(p, &p->inv_255[0])));
+ switch(chans)
+ {
+ case 1:
+ /* a 0 0 0
+ * a 0 0 1
+ */
+ sse_movss(p->func, data, arg0);
+ if(out_chans == CHANNELS_0001)
+ sse_orps(p->func, data, get_const(p, CONST_IDENTITY) );
+ break;
+ case 2:
+ /* 0 0 0 1
+ * a b 0 1
+ */
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
+ else if(out_chans > 2)
+ sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) );
+ sse_movlps(p->func, data, arg0);
+ break;
+ case 3:
+ /* Have to jump through some hoops:
+ *
+ * c 0 0 0
+ * c 0 0 1 if out_chans == CHANNELS_0001
+ * 0 0 c 0/1
+ * a b c 0/1
+ */
+ sse_movss(p->func, data, x86_make_disp(arg0, 8));
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X,Y,Z,W) );
+ sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
+ sse_movlps(p->func, data, arg0);
+ break;
+ case 4:
+ sse_movups(p->func, data, arg0);
+ break;
}
-
- return reg;
}
+/* this function behaves like emit_load_float32, but loads
+ 64-bit floating point numbers, converting them to 32-bit
+ ones */
+static void emit_load_float64to32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0,
+ unsigned out_chans,
+ unsigned chans)
+{
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+ switch(chans)
+ {
+ case 1:
+ sse2_movsd(p->func, data, arg0);
+ if(out_chans > 1)
+ sse2_cvtpd2ps(p->func, data, data);
+ else
+ sse2_cvtsd2ss(p->func, data, data);
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
+ break;
+ case 2:
+ sse2_movupd(p->func, data, arg0);
+ sse2_cvtpd2ps(p->func, data, data);
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
+ else if(out_chans > 2)
+ sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) );
+ break;
+ case 3:
+ sse2_movupd(p->func, data, arg0);
+ sse2_cvtpd2ps(p->func, data, data);
+ sse2_movsd(p->func, tmpXMM, x86_make_disp(arg0, 16));
+ if(out_chans > 3)
+ sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM);
+ else
+ sse2_cvtsd2ss(p->func, tmpXMM, tmpXMM);
+ sse_movlhps(p->func, data, tmpXMM);
+ if(out_chans == CHANNELS_0001)
+ sse_orps(p->func, data, get_const(p, CONST_IDENTITY) );
+ break;
+ case 4:
+ sse2_movupd(p->func, data, arg0);
+ sse2_cvtpd2ps(p->func, data, data);
+ sse2_movupd(p->func, tmpXMM, x86_make_disp(arg0, 16));
+ sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM);
+ sse_movlhps(p->func, data, tmpXMM);
+ break;
+ }
+}
-static void emit_load_R32G32B32A32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static void emit_mov64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src_gpr, struct x86_reg src_xmm)
{
- sse_movups(p->func, data, arg0);
+ if(x86_target(p->func) != X86_32)
+ x64_mov64(p->func, dst_gpr, src_gpr);
+ else
+ {
+ /* TODO: when/on which CPUs is SSE2 actually better than SSE? */
+ if(x86_target_caps(p->func) & X86_SSE2)
+ sse2_movq(p->func, dst_xmm, src_xmm);
+ else
+ sse_movlps(p->func, dst_xmm, src_xmm);
+ }
}
-static void emit_load_R32G32B32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static void emit_load64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src)
{
- /* Have to jump through some hoops:
- *
- * c 0 0 0
- * c 0 0 1
- * 0 0 c 1
- * a b c 1
- */
- sse_movss(p->func, data, x86_make_disp(arg0, 8));
- sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
- sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
- sse_movlps(p->func, data, arg0);
+ emit_mov64(p, dst_gpr, dst_xmm, src, src);
}
-static void emit_load_R32G32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static void emit_store64(struct translate_sse *p, struct x86_reg dst, struct x86_reg src_gpr, struct x86_reg src_xmm)
{
- /* 0 0 0 1
- * a b 0 1
- */
- sse_movups(p->func, data, get_identity(p) );
- sse_movlps(p->func, data, arg0);
+ emit_mov64(p, dst, dst, src_gpr, src_xmm);
}
+static void emit_mov128(struct translate_sse *p, struct x86_reg dst, struct x86_reg src)
+{
+ if(x86_target_caps(p->func) & X86_SSE2)
+ sse2_movdqu(p->func, dst, src);
+ else
+ sse_movups(p->func, dst, src);
+}
-static void emit_load_R32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+/* TODO: this uses unaligned accesses liberally, which is great on Nehalem,
+ * but may or may not be good on older processors
+ * TODO: may perhaps want to use non-temporal stores here if possible
+ */
+static void emit_memcpy(struct translate_sse *p, struct x86_reg dst, struct x86_reg src, unsigned size)
{
- /* a 0 0 0
- * a 0 0 1
- */
- sse_movss(p->func, data, arg0);
- sse_orps(p->func, data, get_identity(p) );
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ struct x86_reg dataXMM2 = x86_make_reg(file_XMM, 1);
+ struct x86_reg dataGPR = p->tmp_EAX;
+ struct x86_reg dataGPR2 = p->tmp2_EDX;
+
+ if(size < 8)
+ {
+ switch (size)
+ {
+ case 1:
+ x86_mov8(p->func, dataGPR, src);
+ x86_mov8(p->func, dst, dataGPR);
+ break;
+ case 2:
+ x86_mov16(p->func, dataGPR, src);
+ x86_mov16(p->func, dst, dataGPR);
+ break;
+ case 3:
+ x86_mov16(p->func, dataGPR, src);
+ x86_mov8(p->func, dataGPR2, x86_make_disp(src, 2));
+ x86_mov16(p->func, dst, dataGPR);
+ x86_mov8(p->func, x86_make_disp(dst, 2), dataGPR2);
+ break;
+ case 4:
+ x86_mov(p->func, dataGPR, src);
+ x86_mov(p->func, dst, dataGPR);
+ break;
+ case 6:
+ x86_mov(p->func, dataGPR, src);
+ x86_mov16(p->func, dataGPR2, x86_make_disp(src, 4));
+ x86_mov(p->func, dst, dataGPR);
+ x86_mov16(p->func, x86_make_disp(dst, 4), dataGPR2);
+ break;
+ }
+ }
+ else if(!(x86_target_caps(p->func) & X86_SSE))
+ {
+ unsigned i = 0;
+ assert((size & 3) == 0);
+ for(i = 0; i < size; i += 4)
+ {
+ x86_mov(p->func, dataGPR, x86_make_disp(src, i));
+ x86_mov(p->func, x86_make_disp(dst, i), dataGPR);
+ }
+ }
+ else
+ {
+ switch(size)
+ {
+ case 8:
+ emit_load64(p, dataGPR, dataXMM, src);
+ emit_store64(p, dst, dataGPR, dataXMM);
+ break;
+ case 12:
+ emit_load64(p, dataGPR2, dataXMM, src);
+ x86_mov(p->func, dataGPR, x86_make_disp(src, 8));
+ emit_store64(p, dst, dataGPR2, dataXMM);
+ x86_mov(p->func, x86_make_disp(dst, 8), dataGPR);
+ break;
+ case 16:
+ emit_mov128(p, dataXMM, src);
+ emit_mov128(p, dst, dataXMM);
+ break;
+ case 24:
+ emit_mov128(p, dataXMM, src);
+ emit_load64(p, dataGPR, dataXMM2, x86_make_disp(src, 16));
+ emit_mov128(p, dst, dataXMM);
+ emit_store64(p, x86_make_disp(dst, 16), dataGPR, dataXMM2);
+ break;
+ case 32:
+ emit_mov128(p, dataXMM, src);
+ emit_mov128(p, dataXMM2, x86_make_disp(src, 16));
+ emit_mov128(p, dst, dataXMM);
+ emit_mov128(p, x86_make_disp(dst, 16), dataXMM2);
+ break;
+ default:
+ assert(0);
+ }
+ }
}
+static boolean translate_attr_convert( struct translate_sse *p,
+ const struct translate_element *a,
+ struct x86_reg src,
+ struct x86_reg dst)
-static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg src )
{
+ const struct util_format_description* input_desc = util_format_description(a->input_format);
+ const struct util_format_description* output_desc = util_format_description(a->output_format);
+ unsigned i;
+ boolean id_swizzle = TRUE;
+ unsigned swizzle[4] = {UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE};
+ unsigned needed_chans = 0;
+ unsigned imms[2] = {0, 0x3f800000};
- /* Load and unpack twice:
- */
- sse_movss(p->func, data, src);
- sse2_punpcklbw(p->func, data, get_identity(p));
- sse2_punpcklbw(p->func, data, get_identity(p));
+ if(a->output_format == PIPE_FORMAT_NONE || a->input_format == PIPE_FORMAT_NONE)
+ return FALSE;
- /* Convert to float:
- */
- sse2_cvtdq2ps(p->func, data, data);
+ if(input_desc->channel[0].size & 7)
+ return FALSE;
+ if(input_desc->colorspace != output_desc->colorspace)
+ return FALSE;
- /* Scale by 1/255.0
- */
- sse_mulps(p->func, data, get_inv_255(p));
-}
+ for(i = 1; i < input_desc->nr_channels; ++i)
+ {
+ if(memcmp(&input_desc->channel[i], &input_desc->channel[0], sizeof(input_desc->channel[0])))
+ return FALSE;
+ }
+ for(i = 1; i < output_desc->nr_channels; ++i)
+ {
+ if(memcmp(&output_desc->channel[i], &output_desc->channel[0], sizeof(output_desc->channel[0])))
+ return FALSE;
+ }
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(output_desc->swizzle[i] < 4)
+ swizzle[output_desc->swizzle[i]] = input_desc->swizzle[i];
+ }
+ if((x86_target_caps(p->func) & X86_SSE) && (0
+ || a->output_format == PIPE_FORMAT_R32_FLOAT
+ || a->output_format == PIPE_FORMAT_R32G32_FLOAT
+ || a->output_format == PIPE_FORMAT_R32G32B32_FLOAT
+ || a->output_format == PIPE_FORMAT_R32G32B32A32_FLOAT))
+ {
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
-static void emit_store_R32G32B32A32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- sse_movups(p->func, dest, dataXMM);
-}
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels)
+ swizzle[i] = i;
+ }
-static void emit_store_R32G32B32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- /* Emit two, shuffle, emit one.
- */
- sse_movlps(p->func, dest, dataXMM);
- sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
- sse_movss(p->func, x86_make_disp(dest,8), dataXMM);
-}
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] < 4)
+ needed_chans = MAX2(needed_chans, swizzle[i] + 1);
+ if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i)
+ id_swizzle = FALSE;
+ }
-static void emit_store_R32G32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- sse_movlps(p->func, dest, dataXMM);
-}
+ if(needed_chans > 0)
+ {
+ switch(input_desc->channel[0].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if(!(x86_target_caps(p->func) & X86_SSE2))
+ return FALSE;
+ emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+ /* TODO: add support for SSE4.1 pmovzx */
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ /* TODO: this may be inefficient due to get_identity() being used both as a float and integer register */
+ sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ break;
+ case 16:
+ sse2_punpcklwd(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ break;
+ case 32: /* we lose precision here */
+ sse2_psrld_imm(p->func, dataXMM, 1);
+ break;
+ default:
+ return FALSE;
+ }
+ sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
+ if(input_desc->channel[0].normalized)
+ {
+ struct x86_reg factor;
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ factor = get_const(p, CONST_INV_255);
+ break;
+ case 16:
+ factor = get_const(p, CONST_INV_65535);
+ break;
+ case 32:
+ factor = get_const(p, CONST_INV_2147483647);
+ break;
+ default:
+ assert(0);
+ factor.disp = 0;
+ factor.file = 0;
+ factor.idx = 0;
+ factor.mod = 0;
+ break;
+ }
+ sse_mulps(p->func, dataXMM, factor);
+ }
+ else if(input_desc->channel[0].size == 32)
+ sse_addps(p->func, dataXMM, dataXMM); /* compensate for the bit we threw away to fit u32 into s32 */
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if(!(x86_target_caps(p->func) & X86_SSE2))
+ return FALSE;
+ emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+ /* TODO: add support for SSE4.1 pmovsx */
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ sse2_psrad_imm(p->func, dataXMM, 24);
+ break;
+ case 16:
+ sse2_punpcklwd(p->func, dataXMM, dataXMM);
+ sse2_psrad_imm(p->func, dataXMM, 16);
+ break;
+ case 32: /* we lose precision here */
+ break;
+ default:
+ return FALSE;
+ }
+ sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
+ if(input_desc->channel[0].normalized)
+ {
+ struct x86_reg factor;
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ factor = get_const(p, CONST_INV_127);
+ break;
+ case 16:
+ factor = get_const(p, CONST_INV_32767);
+ break;
+ case 32:
+ factor = get_const(p, CONST_INV_2147483647);
+ break;
+ default:
+ assert(0);
+ factor.disp = 0;
+ factor.file = 0;
+ factor.idx = 0;
+ factor.mod = 0;
+ break;
+ }
+ sse_mulps(p->func, dataXMM, factor);
+ }
+ break;
+
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ if(input_desc->channel[0].size != 32 && input_desc->channel[0].size != 64)
+ return FALSE;
+ if(swizzle[3] == UTIL_FORMAT_SWIZZLE_1 && input_desc->nr_channels <= 3)
+ {
+ swizzle[3] = UTIL_FORMAT_SWIZZLE_W;
+ needed_chans = CHANNELS_0001;
+ }
+ switch(input_desc->channel[0].size)
+ {
+ case 32:
+ emit_load_float32(p, dataXMM, src, needed_chans, input_desc->nr_channels);
+ break;
+ case 64: /* we lose precision here */
+ if(!(x86_target_caps(p->func) & X86_SSE2))
+ return FALSE;
+ emit_load_float64to32(p, dataXMM, src, needed_chans, input_desc->nr_channels);
+ break;
+ default:
+ return FALSE;
+ }
+ break;
+ default:
+ return FALSE;
+ }
-static void emit_store_R32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- sse_movss(p->func, dest, dataXMM);
-}
+ if(!id_swizzle)
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(swizzle[0], swizzle[1], swizzle[2], swizzle[3]) );
+ }
+ if(output_desc->nr_channels >= 4
+ && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[1] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[3] < UTIL_FORMAT_SWIZZLE_0
+ )
+ sse_movups(p->func, dst, dataXMM);
+ else
+ {
+ if(output_desc->nr_channels >= 2
+ && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+ sse_movlps(p->func, dst, dataXMM);
+ else
+ {
+ if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0)
+ sse_movss(p->func, dst, dataXMM);
+ else
+ x86_mov_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+
+ if(output_desc->nr_channels >= 2)
+ {
+ if(swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(1, 1, 2, 3));
+ sse_movss(p->func, x86_make_disp(dst, 4), dataXMM);
+ }
+ else
+ x86_mov_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ if(output_desc->nr_channels >= 3)
+ {
+ if(output_desc->nr_channels >= 4
+ && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+ sse_movhps(p->func, x86_make_disp(dst, 8), dataXMM);
+ else
+ {
+ if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(2, 2, 2, 3));
+ sse_movss(p->func, x86_make_disp(dst, 8), dataXMM);
+ }
+ else
+ x86_mov_imm(p->func, x86_make_disp(dst, 8), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+
+ if(output_desc->nr_channels >= 4)
+ {
+ if(swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(3, 3, 3, 3));
+ sse_movss(p->func, x86_make_disp(dst, 12), dataXMM);
+ }
+ else
+ x86_mov_imm(p->func, x86_make_disp(dst, 12), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ }
+ }
+ return TRUE;
+ }
+ else if((x86_target_caps(p->func) & X86_SSE2) && input_desc->channel[0].size == 8 && output_desc->channel[0].size == 16
+ && output_desc->channel[0].normalized == input_desc->channel[0].normalized
+ && (0
+ || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
+ || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+ || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+ ))
+ {
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+ struct x86_reg tmp = p->tmp_EAX;
+ unsigned imms[2] = {0, 1};
+
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels)
+ swizzle[i] = i;
+ }
-static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- /* Scale by 255.0
- */
- sse_mulps(p->func, dataXMM, get_255(p));
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] < 4)
+ needed_chans = MAX2(needed_chans, swizzle[i] + 1);
+ if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i)
+ id_swizzle = FALSE;
+ }
- /* Pack and emit:
- */
- sse2_cvtps2dq(p->func, dataXMM, dataXMM);
- sse2_packssdw(p->func, dataXMM, dataXMM);
- sse2_packuswb(p->func, dataXMM, dataXMM);
- sse_movss(p->func, dest, dataXMM);
-}
+ if(needed_chans > 0)
+ {
+ emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+ switch(input_desc->channel[0].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if(input_desc->channel[0].normalized)
+ {
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ if(output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+ sse2_psrlw_imm(p->func, dataXMM, 1);
+ }
+ else
+ sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if(input_desc->channel[0].normalized)
+ {
+ sse2_movq(p->func, tmpXMM, get_const(p, CONST_IDENTITY));
+ sse2_punpcklbw(p->func, tmpXMM, dataXMM);
+ sse2_psllw_imm(p->func, dataXMM, 9);
+ sse2_psrlw_imm(p->func, dataXMM, 8);
+ sse2_por(p->func, tmpXMM, dataXMM);
+ sse2_psrlw_imm(p->func, dataXMM, 7);
+ sse2_por(p->func, tmpXMM, dataXMM);
+ {
+ struct x86_reg t = dataXMM;
+ dataXMM = tmpXMM;
+ tmpXMM = t;
+ }
+ }
+ else
+ {
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ sse2_psraw_imm(p->func, dataXMM, 8);
+ }
+ break;
+ default:
+ assert(0);
+ }
+ if(output_desc->channel[0].normalized)
+ imms[1] = (output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) ? 0xffff : 0x7ffff;
+ if(!id_swizzle)
+ sse2_pshuflw(p->func, dataXMM, dataXMM, (swizzle[0] & 3) | ((swizzle[1] & 3) << 2) | ((swizzle[2] & 3) << 4) | ((swizzle[3] & 3) << 6));
+ }
+ if(output_desc->nr_channels >= 4
+ && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[1] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[3] < UTIL_FORMAT_SWIZZLE_0
+ )
+ sse2_movq(p->func, dst, dataXMM);
+ else
+ {
+ if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ if(output_desc->nr_channels >= 2 && swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+ sse2_movd(p->func, dst, dataXMM);
+ else
+ {
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_mov16(p->func, dst, tmp);
+ if(output_desc->nr_channels >= 2)
+ x86_mov16_imm(p->func, x86_make_disp(dst, 2), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ else
+ {
+ if(output_desc->nr_channels >= 2 && swizzle[1] >= UTIL_FORMAT_SWIZZLE_0)
+ x86_mov_imm(p->func, dst, (imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+ else
+ {
+ x86_mov16_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+ if(output_desc->nr_channels >= 2)
+ {
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_shr_imm(p->func, tmp, 16);
+ x86_mov16(p->func, x86_make_disp(dst, 2), tmp);
+ }
+ }
+ }
+ if(output_desc->nr_channels >= 3)
+ {
+ if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ if(output_desc->nr_channels >= 4 && swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse2_psrlq_imm(p->func, dataXMM, 32);
+ sse2_movd(p->func, x86_make_disp(dst, 4), dataXMM);
+ }
+ else
+ {
+ sse2_psrlq_imm(p->func, dataXMM, 32);
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_mov16(p->func, x86_make_disp(dst, 4), tmp);
+ if(output_desc->nr_channels >= 4)
+ {
+ x86_mov16_imm(p->func, x86_make_disp(dst, 6), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ }
+ else
+ {
+ if(output_desc->nr_channels >= 4 && swizzle[3] >= UTIL_FORMAT_SWIZZLE_0)
+ x86_mov_imm(p->func, x86_make_disp(dst, 4), (imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+ else
+ {
+ x86_mov16_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+
+ if(output_desc->nr_channels >= 4)
+ {
+ sse2_psrlq_imm(p->func, dataXMM, 48);
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_mov16(p->func, x86_make_disp(dst, 6), tmp);
+ }
+ }
+ }
+ }
+ }
+ return TRUE;
+ }
+ else if(!memcmp(&output_desc->channel[0], &input_desc->channel[0], sizeof(output_desc->channel[0])))
+ {
+ struct x86_reg tmp = p->tmp_EAX;
+ unsigned i;
+ if(input_desc->channel[0].size == 8 && input_desc->nr_channels == 4 && output_desc->nr_channels == 4
+ && swizzle[0] == UTIL_FORMAT_SWIZZLE_W
+ && swizzle[1] == UTIL_FORMAT_SWIZZLE_Z
+ && swizzle[2] == UTIL_FORMAT_SWIZZLE_Y
+ && swizzle[3] == UTIL_FORMAT_SWIZZLE_X)
+ {
+ /* TODO: support movbe */
+ x86_mov(p->func, tmp, src);
+ x86_bswap(p->func, tmp);
+ x86_mov(p->func, dst, tmp);
+ return TRUE;
+ }
-/* Extended swizzles? Maybe later.
- */
-static void emit_swizzle( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg src,
- unsigned char shuffle )
-{
- sse_shufps(p->func, dest, src, shuffle);
-}
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ switch(output_desc->channel[0].size)
+ {
+ case 8:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned v = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[0].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ v = output_desc->channel[0].normalized ? 0xff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ v = output_desc->channel[0].normalized ? 0x7f : 1;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov8_imm(p->func, x86_make_disp(dst, i * 1), v);
+ }
+ else
+ {
+ x86_mov8(p->func, tmp, x86_make_disp(src, swizzle[i] * 1));
+ x86_mov8(p->func, x86_make_disp(dst, i * 1), tmp);
+ }
+ break;
+ case 16:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned v = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[1].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ v = output_desc->channel[1].normalized ? 0xffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ v = output_desc->channel[1].normalized ? 0x7fff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ v = 0x3c00;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), v);
+ }
+ else if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0)
+ x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), 0);
+ else
+ {
+ x86_mov16(p->func, tmp, x86_make_disp(src, swizzle[i] * 2));
+ x86_mov16(p->func, x86_make_disp(dst, i * 2), tmp);
+ }
+ break;
+ case 32:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned v = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[1].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ v = output_desc->channel[1].normalized ? 0xffffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ v = output_desc->channel[1].normalized ? 0x7fffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ v = 0x3f800000;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov_imm(p->func, x86_make_disp(dst, i * 4), v);
+ }
+ else
+ {
+ x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 4));
+ x86_mov(p->func, x86_make_disp(dst, i * 4), tmp);
+ }
+ break;
+ case 64:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned l = 0;
+ unsigned h = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[1].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ h = output_desc->channel[1].normalized ? 0xffffffff : 0;
+ l = output_desc->channel[1].normalized ? 0xffffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ h = output_desc->channel[1].normalized ? 0x7fffffff : 0;
+ l = output_desc->channel[1].normalized ? 0xffffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ h = 0x3ff00000;
+ l = 0;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov_imm(p->func, x86_make_disp(dst, i * 8), l);
+ x86_mov_imm(p->func, x86_make_disp(dst, i * 8 + 4), h);
+ }
+ else
+ {
+ if(x86_target_caps(p->func) & X86_SSE)
+ {
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 0);
+ emit_load64(p, tmp, tmpXMM, x86_make_disp(src, swizzle[i] * 8));
+ emit_store64(p, x86_make_disp(dst, i * 8), tmp, tmpXMM);
+ }
+ else
+ {
+ x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8));
+ x86_mov(p->func, x86_make_disp(dst, i * 8), tmp);
+ x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8 + 4));
+ x86_mov(p->func, x86_make_disp(dst, i * 8 + 4), tmp);
+ }
+ }
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ return TRUE;
+ }
+ /* special case for draw's EMIT_4UB (RGBA) and EMIT_4UB_BGRA */
+ else if((x86_target_caps(p->func) & X86_SSE2) &&
+ a->input_format == PIPE_FORMAT_R32G32B32A32_FLOAT && (0
+ || a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM
+ || a->output_format == PIPE_FORMAT_R8G8B8A8_UNORM
+ ))
+ {
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ /* load */
+ sse_movups(p->func, dataXMM, src);
-static boolean translate_attr( struct translate_sse *p,
- const struct translate_element *a,
- struct x86_reg srcECX,
- struct x86_reg dstEAX)
-{
- struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ if (a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM)
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(2,1,0,3));
- switch (a->input_format) {
- case PIPE_FORMAT_R32_FLOAT:
- emit_load_R32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_R32G32_FLOAT:
- emit_load_R32G32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- emit_load_R32G32B32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- emit_load_R32G32B32A32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
- emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
- break;
- default:
- return FALSE;
- }
+ /* scale by 255.0 */
+ sse_mulps(p->func, dataXMM, get_const(p, CONST_255));
- switch (a->output_format) {
- case PIPE_FORMAT_R32_FLOAT:
- emit_store_R32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R32G32_FLOAT:
- emit_store_R32G32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- emit_store_R32G32B32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- emit_store_R32G32B32A32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
- emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
- break;
- default:
- return FALSE;
+ /* pack and emit */
+ sse2_cvtps2dq(p->func, dataXMM, dataXMM);
+ sse2_packssdw(p->func, dataXMM, dataXMM);
+ sse2_packuswb(p->func, dataXMM, dataXMM);
+ sse2_movd(p->func, dst, dataXMM);
+
+ return TRUE;
}
- return TRUE;
+ return FALSE;
}
+static boolean translate_attr( struct translate_sse *p,
+ const struct translate_element *a,
+ struct x86_reg src,
+ struct x86_reg dst)
+{
+ if(a->input_format == a->output_format)
+ {
+ emit_memcpy(p, dst, src, util_format_get_stride(a->input_format, 1));
+ return TRUE;
+ }
+
+ return translate_attr_convert(p, a, src, dst);
+}
static boolean init_inputs( struct translate_sse *p,
- boolean linear )
+ unsigned index_size )
{
unsigned i;
- struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
+ struct x86_reg instance_id = x86_make_disp(p->machine_EDI,
get_offset(p, &p->instance_id));
for (i = 0; i < p->nr_buffer_varients; i++) {
struct translate_buffer_varient *varient = &p->buffer_varient[i];
struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
- if (linear || varient->instance_divisor) {
- struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
+ if (!index_size || varient->instance_divisor) {
+ struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
get_offset(p, &buffer->stride));
- struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &varient->ptr));
- struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &buffer->base_ptr));
- struct x86_reg elt = p->idx_EBX;
+ struct x86_reg elt = p->idx_ESI;
struct x86_reg tmp_EAX = p->tmp_EAX;
/* Calculate pointer to first attrib:
@@ -406,20 +1085,16 @@ static boolean init_inputs( struct translate_sse *p,
x86_mov(p->func, tmp_EAX, instance_id);
if (varient->instance_divisor != 1) {
- struct x86_reg tmp_EDX = p->machine_EDX;
- struct x86_reg tmp_ECX = p->outbuf_ECX;
+ struct x86_reg tmp_EDX = p->tmp2_EDX;
+ struct x86_reg tmp_ECX = p->src_ECX;
/* TODO: Add x86_shr() to rtasm and use it whenever
* instance divisor is power of two.
*/
- x86_push(p->func, tmp_EDX);
- x86_push(p->func, tmp_ECX);
x86_xor(p->func, tmp_EDX, tmp_EDX);
x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
- x86_pop(p->func, tmp_ECX);
- x86_pop(p->func, tmp_EDX);
}
} else {
x86_mov(p->func, tmp_EAX, elt);
@@ -430,16 +1105,23 @@ static boolean init_inputs( struct translate_sse *p,
*/
x86_imul(p->func, tmp_EAX, buf_stride);
+ x64_rexw(p->func);
x86_add(p->func, tmp_EAX, buf_base_ptr);
/* In the linear case, keep the buffer pointer instead of the
* index number.
*/
- if (linear && p->nr_buffer_varients == 1)
+ if (!index_size && p->nr_buffer_varients == 1)
+ {
+ x64_rexw(p->func);
x86_mov(p->func, elt, tmp_EAX);
+ }
else
+ {
+ x64_rexw(p->func);
x86_mov(p->func, buf_ptr, tmp_EAX);
+ }
}
}
@@ -448,44 +1130,57 @@ static boolean init_inputs( struct translate_sse *p,
static struct x86_reg get_buffer_ptr( struct translate_sse *p,
- boolean linear,
+ unsigned index_size,
unsigned var_idx,
struct x86_reg elt )
{
if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
- return x86_make_disp(p->machine_EDX,
+ return x86_make_disp(p->machine_EDI,
get_offset(p, &p->instance_id));
}
- if (linear && p->nr_buffer_varients == 1) {
- return p->idx_EBX;
+ if (!index_size && p->nr_buffer_varients == 1) {
+ return p->idx_ESI;
}
- else if (linear || p->buffer_varient[var_idx].instance_divisor) {
- struct x86_reg ptr = p->tmp_EAX;
+ else if (!index_size || p->buffer_varient[var_idx].instance_divisor) {
+ struct x86_reg ptr = p->src_ECX;
struct x86_reg buf_ptr =
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer_varient[var_idx].ptr));
+ x64_rexw(p->func);
x86_mov(p->func, ptr, buf_ptr);
return ptr;
}
else {
- struct x86_reg ptr = p->tmp_EAX;
+ struct x86_reg ptr = p->src_ECX;
const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
struct x86_reg buf_stride =
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[varient->buffer_index].stride));
struct x86_reg buf_base_ptr =
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
/* Calculate pointer to current attrib:
*/
- x86_mov(p->func, ptr, buf_stride);
- x86_imul(p->func, ptr, elt);
+ switch(index_size)
+ {
+ case 1:
+ x86_movzx8(p->func, ptr, elt);
+ break;
+ case 2:
+ x86_movzx16(p->func, ptr, elt);
+ break;
+ case 4:
+ x86_mov(p->func, ptr, elt);
+ break;
+ }
+ x86_imul(p->func, ptr, buf_stride);
+ x64_rexw(p->func);
x86_add(p->func, ptr, buf_base_ptr);
return ptr;
}
@@ -494,39 +1189,43 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
static boolean incr_inputs( struct translate_sse *p,
- boolean linear )
+ unsigned index_size )
{
- if (linear && p->nr_buffer_varients == 1) {
- struct x86_reg stride = x86_make_disp(p->machine_EDX,
+ if (!index_size && p->nr_buffer_varients == 1) {
+ struct x86_reg stride = x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[0].stride));
if (p->buffer_varient[0].instance_divisor == 0) {
- x86_add(p->func, p->idx_EBX, stride);
- sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
+ x64_rexw(p->func);
+ x86_add(p->func, p->idx_ESI, stride);
+ sse_prefetchnta(p->func, x86_make_disp(p->idx_ESI, 192));
}
}
- else if (linear) {
+ else if (!index_size) {
unsigned i;
/* Is this worthwhile??
*/
for (i = 0; i < p->nr_buffer_varients; i++) {
struct translate_buffer_varient *varient = &p->buffer_varient[i];
- struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &varient->ptr));
- struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[varient->buffer_index].stride));
if (varient->instance_divisor == 0) {
- x86_mov(p->func, p->tmp_EAX, buf_ptr);
- x86_add(p->func, p->tmp_EAX, buf_stride);
+ x86_mov(p->func, p->tmp_EAX, buf_stride);
+ x64_rexw(p->func);
+ x86_add(p->func, p->tmp_EAX, buf_ptr);
if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
+ x64_rexw(p->func);
x86_mov(p->func, buf_ptr, p->tmp_EAX);
}
}
}
else {
- x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4));
+ x64_rexw(p->func);
+ x86_lea(p->func, p->idx_ESI, x86_make_disp(p->idx_ESI, index_size));
}
return TRUE;
@@ -551,35 +1250,52 @@ static boolean incr_inputs( struct translate_sse *p,
*/
static boolean build_vertex_emit( struct translate_sse *p,
struct x86_function *func,
- boolean linear )
+ unsigned index_size )
{
int fixup, label;
unsigned j;
+ memset(p->reg_to_const, 0xff, sizeof(p->reg_to_const));
+ memset(p->const_to_reg, 0xff, sizeof(p->const_to_reg));
+
p->tmp_EAX = x86_make_reg(file_REG32, reg_AX);
- p->idx_EBX = x86_make_reg(file_REG32, reg_BX);
- p->outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
- p->machine_EDX = x86_make_reg(file_REG32, reg_DX);
- p->count_ESI = x86_make_reg(file_REG32, reg_SI);
+ p->idx_ESI = x86_make_reg(file_REG32, reg_SI);
+ p->outbuf_EBX = x86_make_reg(file_REG32, reg_BX);
+ p->machine_EDI = x86_make_reg(file_REG32, reg_DI);
+ p->count_EBP = x86_make_reg(file_REG32, reg_BP);
+ p->tmp2_EDX = x86_make_reg(file_REG32, reg_DX);
+ p->src_ECX = x86_make_reg(file_REG32, reg_CX);
p->func = func;
- p->loaded_inv_255 = FALSE;
- p->loaded_255 = FALSE;
- p->loaded_identity = FALSE;
x86_init_func(p->func);
- /* Push a few regs?
- */
- x86_push(p->func, p->idx_EBX);
- x86_push(p->func, p->count_ESI);
+ if(x86_target(p->func) == X86_64_WIN64_ABI)
+ {
+ /* the ABI guarantees a 16-byte aligned 32-byte "shadow space" above the return address */
+ sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8), x86_make_reg(file_XMM, 6));
+ sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24), x86_make_reg(file_XMM, 7));
+ }
- /* Load arguments into regs:
- */
- x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1));
- x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2));
- x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3));
- x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5));
+ x86_push(p->func, p->outbuf_EBX);
+ x86_push(p->func, p->count_EBP);
+
+/* on non-Win64 x86-64, these are already in the right registers */
+ if(x86_target(p->func) != X86_64_STD_ABI)
+ {
+ x86_push(p->func, p->machine_EDI);
+ x86_push(p->func, p->idx_ESI);
+
+ x86_mov(p->func, p->machine_EDI, x86_fn_arg(p->func, 1));
+ x86_mov(p->func, p->idx_ESI, x86_fn_arg(p->func, 2));
+ }
+
+ x86_mov(p->func, p->count_EBP, x86_fn_arg(p->func, 3));
+
+ if(x86_target(p->func) != X86_32)
+ x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5));
+ else
+ x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5));
/* Load instance ID.
*/
@@ -588,25 +1304,25 @@ static boolean build_vertex_emit( struct translate_sse *p,
p->tmp_EAX,
x86_fn_arg(p->func, 4));
x86_mov(p->func,
- x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
+ x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)),
p->tmp_EAX);
}
/* Get vertex count, compare to zero
*/
x86_xor(p->func, p->tmp_EAX, p->tmp_EAX);
- x86_cmp(p->func, p->count_ESI, p->tmp_EAX);
+ x86_cmp(p->func, p->count_EBP, p->tmp_EAX);
fixup = x86_jcc_forward(p->func, cc_E);
/* always load, needed or not:
*/
- init_inputs(p, linear);
+ init_inputs(p, index_size);
/* Note address for loop jump
*/
label = x86_get_label(p->func);
{
- struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
+ struct x86_reg elt = !index_size ? p->idx_ESI : x86_deref(p->idx_ESI);
int last_varient = -1;
struct x86_reg vb;
@@ -618,30 +1334,31 @@ static boolean build_vertex_emit( struct translate_sse *p,
*/
if (varient != last_varient) {
last_varient = varient;
- vb = get_buffer_ptr(p, linear, varient, elt);
+ vb = get_buffer_ptr(p, index_size, varient, elt);
}
if (!translate_attr( p, a,
x86_make_disp(vb, a->input_offset),
- x86_make_disp(p->outbuf_ECX, a->output_offset)))
+ x86_make_disp(p->outbuf_EBX, a->output_offset)))
return FALSE;
}
/* Next output vertex:
*/
+ x64_rexw(p->func);
x86_lea(p->func,
- p->outbuf_ECX,
- x86_make_disp(p->outbuf_ECX,
+ p->outbuf_EBX,
+ x86_make_disp(p->outbuf_EBX,
p->translate.key.output_stride));
/* Incr index
*/
- incr_inputs( p, linear );
+ incr_inputs( p, index_size );
}
/* decr count, loop if not zero
*/
- x86_dec(p->func, p->count_ESI);
+ x86_dec(p->func, p->count_EBP);
x86_jcc(p->func, cc_NZ, label);
/* Exit mmx state?
@@ -656,8 +1373,20 @@ static boolean build_vertex_emit( struct translate_sse *p,
/* Pop regs and return
*/
- x86_pop(p->func, p->count_ESI);
- x86_pop(p->func, p->idx_EBX);
+ if(x86_target(p->func) != X86_64_STD_ABI)
+ {
+ x86_pop(p->func, p->idx_ESI);
+ x86_pop(p->func, p->machine_EDI);
+ }
+
+ x86_pop(p->func, p->count_EBP);
+ x86_pop(p->func, p->outbuf_EBX);
+
+ if(x86_target(p->func) == X86_64_WIN64_ABI)
+ {
+ sse2_movdqa(p->func, x86_make_reg(file_XMM, 6), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8));
+ sse2_movdqa(p->func, x86_make_reg(file_XMM, 7), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24));
+ }
x86_ret(p->func);
return TRUE;
@@ -697,37 +1426,7 @@ static void translate_sse_release( struct translate *translate )
x86_release_func( &p->linear_func );
x86_release_func( &p->elt_func );
- FREE(p);
-}
-
-static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
- const unsigned *elts,
- unsigned count,
- unsigned instance_id,
- void *output_buffer )
-{
- struct translate_sse *p = (struct translate_sse *)translate;
-
- p->gen_run_elts( translate,
- elts,
- count,
- instance_id,
- output_buffer);
-}
-
-static void PIPE_CDECL translate_sse_run( struct translate *translate,
- unsigned start,
- unsigned count,
- unsigned instance_id,
- void *output_buffer )
-{
- struct translate_sse *p = (struct translate_sse *)translate;
-
- p->gen_run( translate,
- start,
- count,
- instance_id,
- output_buffer);
+ os_free_aligned(p);
}
@@ -736,18 +1435,19 @@ struct translate *translate_sse2_create( const struct translate_key *key )
struct translate_sse *p = NULL;
unsigned i;
- if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2())
+ /* this is misnamed, it actually refers to whether rtasm is enabled or not */
+ if (!rtasm_cpu_has_sse())
goto fail;
- p = CALLOC_STRUCT( translate_sse );
+ p = os_malloc_aligned(sizeof(struct translate_sse), 16);
if (p == NULL)
goto fail;
+ memset(p, 0, sizeof(*p));
+ memcpy(p->consts, consts, sizeof(consts));
p->translate.key = *key;
p->translate.release = translate_sse_release;
p->translate.set_buffer = translate_sse_set_buffer;
- p->translate.run_elts = translate_sse_run_elts;
- p->translate.run = translate_sse_run;
for (i = 0; i < key->nr_elements; i++) {
if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
@@ -783,18 +1483,32 @@ struct translate *translate_sse2_create( const struct translate_key *key )
if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
- if (!build_vertex_emit(p, &p->linear_func, TRUE))
+ if (!build_vertex_emit(p, &p->linear_func, 0))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt_func, 4))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt16_func, 2))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt8_func, 1))
+ goto fail;
+
+ p->translate.run = (void*)x86_get_func(&p->linear_func);
+ if (p->translate.run == NULL)
goto fail;
- if (!build_vertex_emit(p, &p->elt_func, FALSE))
+ p->translate.run_elts = (void*)x86_get_func(&p->elt_func);
+ if (p->translate.run_elts == NULL)
goto fail;
- p->gen_run = (run_func)x86_get_func(&p->linear_func);
- if (p->gen_run == NULL)
+ p->translate.run_elts16 = (void*)x86_get_func(&p->elt16_func);
+ if (p->translate.run_elts16 == NULL)
goto fail;
- p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func);
- if (p->gen_run_elts == NULL)
+ p->translate.run_elts8 = (void*)x86_get_func(&p->elt8_func);
+ if (p->translate.run_elts8 == NULL)
goto fail;
return &p->translate;
diff --git a/src/gallium/auxiliary/util/u_bitmask.h b/src/gallium/auxiliary/util/u_bitmask.h
index 87f1110296..98b85ddecd 100644
--- a/src/gallium/auxiliary/util/u_bitmask.h
+++ b/src/gallium/auxiliary/util/u_bitmask.h
@@ -36,6 +36,9 @@
#define U_HANDLE_BITMASK_H_
+#include "pipe/p_compiler.h"
+
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 97fa99ec65..dfb142b9e1 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -42,6 +42,7 @@
#include "util/u_blit.h"
#include "util/u_draw_quad.h"
+#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_sampler.h"
@@ -56,15 +57,18 @@ struct blit_state
struct cso_context *cso;
struct pipe_blend_state blend;
- struct pipe_depth_stencil_alpha_state depthstencil;
+ struct pipe_depth_stencil_alpha_state depthstencil_keep;
+ struct pipe_depth_stencil_alpha_state depthstencil_write;
struct pipe_rasterizer_state rasterizer;
struct pipe_sampler_state sampler;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
struct pipe_vertex_element velem[2];
+ enum pipe_texture_target internal_target;
void *vs;
void *fs[TGSI_WRITEMASK_XYZW + 1];
+ void *fs_depth;
struct pipe_resource *vbuf; /**< quad vertices */
unsigned vbuf_slot;
@@ -95,7 +99,11 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->blend.rt[0].colormask = PIPE_MASK_RGBA;
/* no-op depth/stencil/alpha */
- memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil));
+ memset(&ctx->depthstencil_keep, 0, sizeof(ctx->depthstencil_keep));
+ memset(&ctx->depthstencil_write, 0, sizeof(ctx->depthstencil_write));
+ ctx->depthstencil_write.depth.enabled = 1;
+ ctx->depthstencil_write.depth.writemask = 1;
+ ctx->depthstencil_write.depth.func = PIPE_FUNC_ALWAYS;
/* rasterizer */
memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
@@ -110,7 +118,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
ctx->sampler.min_img_filter = 0; /* set later */
ctx->sampler.mag_img_filter = 0; /* set later */
- ctx->sampler.normalized_coords = 1;
/* vertex elements state */
memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
@@ -145,6 +152,11 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->vertices[i][1][3] = 1.0f; /* q */
}
+ if(pipe->screen->get_param(pipe->screen, PIPE_CAP_NPOT_TEXTURES))
+ ctx->internal_target = PIPE_TEXTURE_2D;
+ else
+ ctx->internal_target = PIPE_TEXTURE_RECT;
+
return ctx;
}
@@ -164,6 +176,9 @@ util_destroy_blit(struct blit_state *ctx)
if (ctx->fs[i])
pipe->delete_fs_state(pipe, ctx->fs[i]);
+ if (ctx->fs_depth)
+ pipe->delete_fs_state(pipe, ctx->fs_depth);
+
pipe_resource_reference(&ctx->vbuf, NULL);
FREE(ctx);
@@ -271,7 +286,7 @@ regions_overlap(int srcX0, int srcY0,
* \param writemask controls which channels in the dest surface are sourced
* from the src surface. Disabled channels are sourced
* from (0,0,0,1).
- * XXX need some control over blitting Z and/or stencil.
+ * XXX need some control over blitting stencil.
*/
void
util_blit_pixels_writemask(struct blit_state *ctx,
@@ -294,8 +309,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
const int srcW = abs(srcX1 - srcX0);
const int srcH = abs(srcY1 - srcY0);
unsigned offset;
- boolean overlap;
+ boolean overlap, dst_is_depth;
float s0, t0, s1, t1;
+ boolean normalized;
assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
filter == PIPE_TEX_MIPFILTER_LINEAR);
@@ -335,7 +351,6 @@ util_blit_pixels_writemask(struct blit_state *ctx,
return;
}
-
/* Create a temporary texture when src and dest alias or when src
* is anything other than a 2d texture.
* XXX should just use appropriate shader to access 1d / 3d slice / cube face,
@@ -347,7 +362,8 @@ util_blit_pixels_writemask(struct blit_state *ctx,
dst->face == srcsub.face &&
dst->level == srcsub.level &&
dst->zslice == srcZ0) ||
- src_tex->target != PIPE_TEXTURE_2D)
+ (src_tex->target != PIPE_TEXTURE_2D &&
+ src_tex->target != PIPE_TEXTURE_RECT))
{
struct pipe_resource texTemp;
struct pipe_resource *tex;
@@ -372,7 +388,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* create temp texture */
memset(&texTemp, 0, sizeof(texTemp));
- texTemp.target = PIPE_TEXTURE_2D;
+ texTemp.target = ctx->internal_target;
texTemp.format = src_tex->format;
texTemp.last_level = 0;
texTemp.width0 = srcW;
@@ -392,10 +408,19 @@ util_blit_pixels_writemask(struct blit_state *ctx,
src_tex, srcsub, srcLeft, srcTop, srcZ0, /* src */
srcW, srcH); /* size */
- s0 = 0.0f;
- s1 = 1.0f;
- t0 = 0.0f;
- t1 = 1.0f;
+ normalized = tex->target != PIPE_TEXTURE_RECT;
+ if(normalized) {
+ s0 = 0.0f;
+ s1 = 1.0f;
+ t0 = 0.0f;
+ t1 = 1.0f;
+ }
+ else {
+ s0 = 0;
+ s1 = srcW;
+ t0 = 0;
+ t1 = srcH;
+ }
u_sampler_view_default_template(&sv_templ, tex, tex->format);
sampler_view = pipe->create_sampler_view(pipe, tex, &sv_templ);
@@ -415,20 +440,29 @@ util_blit_pixels_writemask(struct blit_state *ctx,
return;
}
- s0 = srcX0 / (float)(u_minify(sampler_view->texture->width0, srcsub.level));
- s1 = srcX1 / (float)(u_minify(sampler_view->texture->width0, srcsub.level));
- t0 = srcY0 / (float)(u_minify(sampler_view->texture->height0, srcsub.level));
- t1 = srcY1 / (float)(u_minify(sampler_view->texture->height0, srcsub.level));
+ s0 = srcX0;
+ s1 = srcX1;
+ t0 = srcY0;
+ t1 = srcY1;
+ normalized = sampler_view->texture->target != PIPE_TEXTURE_RECT;
+ if(normalized)
+ {
+ s0 /= (float)(u_minify(sampler_view->texture->width0, srcsub.level));
+ s1 /= (float)(u_minify(sampler_view->texture->width0, srcsub.level));
+ t0 /= (float)(u_minify(sampler_view->texture->height0, srcsub.level));
+ t1 /= (float)(u_minify(sampler_view->texture->height0, srcsub.level));
+ }
}
+ dst_is_depth = util_format_is_depth_or_stencil(dst->format);
- assert(screen->is_format_supported(screen, sampler_view->format, PIPE_TEXTURE_2D,
+ assert(screen->is_format_supported(screen, sampler_view->format, ctx->internal_target,
sampler_view->texture->nr_samples,
PIPE_BIND_SAMPLER_VIEW, 0));
- assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
+ assert(screen->is_format_supported(screen, dst->format, ctx->internal_target,
dst->texture->nr_samples,
- PIPE_BIND_RENDER_TARGET, 0));
-
+ dst_is_depth ? PIPE_BIND_DEPTH_STENCIL :
+ PIPE_BIND_RENDER_TARGET, 0));
/* save state (restored below) */
cso_save_blend(ctx->cso);
cso_save_depth_stencil_alpha(ctx->cso);
@@ -444,12 +478,15 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
- cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
+ cso_set_depth_stencil_alpha(ctx->cso,
+ dst_is_depth ? &ctx->depthstencil_write :
+ &ctx->depthstencil_keep);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
/* sampler */
+ ctx->sampler.normalized_coords = normalized;
ctx->sampler.min_img_filter = filter;
ctx->sampler.mag_img_filter = filter;
/* we've limited this already with the sampler view but you never know... */
@@ -472,22 +509,35 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* texture */
cso_set_fragment_sampler_views(ctx->cso, 1, &sampler_view);
- if (ctx->fs[writemask] == NULL)
- ctx->fs[writemask] =
- util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
- TGSI_INTERPOLATE_LINEAR,
- writemask);
-
/* shaders */
- cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
+ if (dst_is_depth) {
+ if (ctx->fs_depth == NULL)
+ ctx->fs_depth =
+ util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D,
+ TGSI_INTERPOLATE_LINEAR);
+
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs_depth);
+ } else {
+ if (ctx->fs[writemask] == NULL)
+ ctx->fs[writemask] =
+ util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
+ TGSI_INTERPOLATE_LINEAR,
+ writemask);
+
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
+ }
cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
/* drawing dest */
memset(&fb, 0, sizeof(fb));
fb.width = dst->width;
fb.height = dst->height;
- fb.nr_cbufs = 1;
- fb.cbufs[0] = dst;
+ if (dst_is_depth) {
+ fb.zsbuf = dst;
+ } else {
+ fb.nr_cbufs = 1;
+ fb.cbufs[0] = dst;
+ }
cso_set_framebuffer(ctx->cso, &fb);
/* draw quad */
@@ -574,6 +624,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
int dstX1, int dstY1,
float z, uint filter)
{
+ boolean normalized = src_sampler_view->texture->target != PIPE_TEXTURE_RECT;
struct pipe_framebuffer_state fb;
float s0, t0, s1, t1;
unsigned offset;
@@ -586,10 +637,18 @@ util_blit_pixels_tex(struct blit_state *ctx,
assert(tex->width0 != 0);
assert(tex->height0 != 0);
- s0 = srcX0 / (float)tex->width0;
- s1 = srcX1 / (float)tex->width0;
- t0 = srcY0 / (float)tex->height0;
- t1 = srcY1 / (float)tex->height0;
+ s0 = srcX0;
+ s1 = srcX1;
+ t0 = srcY0;
+ t1 = srcY1;
+
+ if(normalized)
+ {
+ s0 /= (float)tex->width0;
+ s1 /= (float)tex->width0;
+ t0 /= (float)tex->height0;
+ t1 /= (float)tex->height0;
+ }
assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format,
PIPE_TEXTURE_2D,
@@ -611,12 +670,13 @@ util_blit_pixels_tex(struct blit_state *ctx,
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
- cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
+ cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil_keep);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
/* sampler */
+ ctx->sampler.normalized_coords = normalized;
ctx->sampler.min_img_filter = filter;
ctx->sampler.mag_img_filter = filter;
cso_single_sampler(ctx->cso, 0, &ctx->sampler);
diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h
index ef95134f32..b8a0dfce13 100644
--- a/src/gallium/auxiliary/util/u_blit.h
+++ b/src/gallium/auxiliary/util/u_blit.h
@@ -30,18 +30,20 @@
#define U_BLIT_H
+#include "pipe/p_compiler.h"
+
+
#ifdef __cplusplus
extern "C" {
#endif
+struct cso_context;
struct pipe_context;
-struct pipe_surface;
struct pipe_resource;
-struct cso_context;
-
-
-struct blit_state;
+struct pipe_sampler_view;
+struct pipe_subresource;
+struct pipe_surface;
extern struct blit_state *
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index b5b86b7214..f93ef26ae7 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -92,7 +92,7 @@ struct blitter_context_priv
void *velem_state;
/* Sampler state for clamping to a miplevel. */
- void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
+ void *sampler_state[PIPE_MAX_TEXTURE_LEVELS * 2];
/* Rasterizer state. */
void *rs_state;
@@ -254,6 +254,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
ctx->dsa_write_depth_keep_stencil);
pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil);
+ pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil);
pipe->delete_rasterizer_state(pipe, ctx->rs_state);
pipe->delete_vs_state(pipe, ctx->vs_col);
@@ -271,7 +272,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
if (ctx->fs_col[i])
pipe->delete_fs_state(pipe, ctx->fs_col[i]);
- for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
+ for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS * 2; i++)
if (ctx->sampler_state[i])
pipe->delete_sampler_state(pipe, ctx->sampler_state[i]);
@@ -319,7 +320,7 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
*/
if (ctx->base.saved_fb_state.nr_cbufs != ~0) {
pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state);
- util_assign_framebuffer_state(&ctx->base.saved_fb_state, NULL);
+ util_unreference_framebuffer_state(&ctx->base.saved_fb_state);
ctx->base.saved_fb_state.nr_cbufs = ~0;
}
@@ -417,16 +418,26 @@ static void blitter_set_clear_color(struct blitter_context_priv *ctx,
}
}
-static void get_normalized_texcoords(struct pipe_resource *src,
+static void get_texcoords(struct pipe_resource *src,
struct pipe_subresource subsrc,
unsigned x1, unsigned y1,
unsigned x2, unsigned y2,
- float out[4])
+ boolean normalized, float out[4])
{
- out[0] = x1 / (float)u_minify(src->width0, subsrc.level);
- out[1] = y1 / (float)u_minify(src->height0, subsrc.level);
- out[2] = x2 / (float)u_minify(src->width0, subsrc.level);
- out[3] = y2 / (float)u_minify(src->height0, subsrc.level);
+ if(normalized)
+ {
+ out[0] = x1 / (float)u_minify(src->width0, subsrc.level);
+ out[1] = y1 / (float)u_minify(src->height0, subsrc.level);
+ out[2] = x2 / (float)u_minify(src->width0, subsrc.level);
+ out[3] = y2 / (float)u_minify(src->height0, subsrc.level);
+ }
+ else
+ {
+ out[0] = x1;
+ out[1] = y1;
+ out[2] = x2;
+ out[3] = y2;
+ }
}
static void set_texcoords_in_vertices(const float coord[4],
@@ -454,7 +465,7 @@ static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx,
unsigned i;
float coord[4];
- get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord);
+ get_texcoords(src, subsrc, x1, y1, x2, y2, TRUE, coord);
set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8);
for (i = 0; i < 4; i++) {
@@ -489,7 +500,7 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
float coord[4];
float st[4][2];
- get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord);
+ get_texcoords(src, subsrc, x1, y1, x2, y2, TRUE, coord);
set_texcoords_in_vertices(coord, &st[0][0], 2);
util_map_texcoords2d_onto_cubemap(subsrc.face,
@@ -523,7 +534,7 @@ static void blitter_draw_quad(struct blitter_context_priv *ctx)
static INLINE
void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
- int miplevel)
+ int miplevel, boolean normalized)
{
struct pipe_context *pipe = ctx->base.pipe;
struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state;
@@ -531,18 +542,19 @@ void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
assert(miplevel < PIPE_MAX_TEXTURE_LEVELS);
/* Create the sampler state on-demand. */
- if (!ctx->sampler_state[miplevel]) {
+ if (!ctx->sampler_state[miplevel * 2 + normalized]) {
sampler_state->lod_bias = miplevel;
sampler_state->min_lod = miplevel;
sampler_state->max_lod = miplevel;
+ sampler_state->normalized_coords = normalized;
- ctx->sampler_state[miplevel] = pipe->create_sampler_state(pipe,
+ ctx->sampler_state[miplevel * 2 + normalized] = pipe->create_sampler_state(pipe,
sampler_state);
}
/* Return void** so that it can be passed to bind_fragment_sampler_states
* directly. */
- return &ctx->sampler_state[miplevel];
+ return &ctx->sampler_state[miplevel * 2 + normalized];
}
static INLINE
@@ -568,6 +580,8 @@ pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target)
return TGSI_TEXTURE_1D;
case PIPE_TEXTURE_2D:
return TGSI_TEXTURE_2D;
+ case PIPE_TEXTURE_RECT:
+ return TGSI_TEXTURE_RECT;
case PIPE_TEXTURE_3D:
return TGSI_TEXTURE_3D;
case PIPE_TEXTURE_CUBE:
@@ -716,6 +730,7 @@ void util_blitter_copy_region(struct blitter_context *blitter,
struct pipe_sampler_view viewTempl, *view;
unsigned bind;
boolean is_stencil, is_depth;
+ boolean normalized;
/* Give up if textures are not set. */
assert(dst && src);
@@ -787,6 +802,8 @@ void util_blitter_copy_region(struct blitter_context *blitter,
fb_state.zsbuf = 0;
}
+ normalized = src->target != PIPE_TEXTURE_RECT;
+
/* Initialize sampler view. */
u_sampler_view_default_template(&viewTempl, src, src->format);
view = pipe->create_sampler_view(pipe, src, &viewTempl);
@@ -795,7 +812,7 @@ void util_blitter_copy_region(struct blitter_context *blitter,
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
pipe->bind_vs_state(pipe, ctx->vs_tex);
pipe->bind_fragment_sampler_states(pipe, 1,
- blitter_get_sampler_state(ctx, subsrc.level));
+ blitter_get_sampler_state(ctx, subsrc.level, normalized));
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
pipe->set_fragment_sampler_views(pipe, 1, &view);
pipe->set_framebuffer_state(pipe, &fb_state);
@@ -806,11 +823,12 @@ void util_blitter_copy_region(struct blitter_context *blitter,
/* Draw the quad with the draw_rectangle callback. */
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
{
/* Set texture coordinates. */
float coord[4];
- get_normalized_texcoords(src, subsrc, srcx, srcy,
- srcx+width, srcy+height, coord);
+ get_texcoords(src, subsrc, srcx, srcy,
+ srcx+width, srcy+height, normalized, coord);
/* Draw. */
blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0,
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index f316587dea..e33d2e283f 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -27,6 +27,7 @@
#ifndef U_BLITTER_H
#define U_BLITTER_H
+#include "util/u_framebuffer.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
@@ -258,45 +259,12 @@ void util_blitter_save_vertex_shader(struct blitter_context *blitter,
blitter->saved_vs = vs;
}
-/* XXX This should probably be moved elsewhere. */
-static INLINE
-void util_assign_framebuffer_state(struct pipe_framebuffer_state *dst,
- const struct pipe_framebuffer_state *src)
-{
- unsigned i;
-
- if (src) {
- /* Reference all surfaces. */
- for (i = 0; i < src->nr_cbufs; i++) {
- pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
- }
- for (; i < dst->nr_cbufs; i++) {
- pipe_surface_reference(&dst->cbufs[i], NULL);
- }
-
- pipe_surface_reference(&dst->zsbuf, src->zsbuf);
-
- dst->nr_cbufs = src->nr_cbufs;
- dst->width = src->width;
- dst->height = src->height;
- } else {
- /* Set all surfaces to NULL. */
- for (i = 0; i < dst->nr_cbufs; i++) {
- pipe_surface_reference(&dst->cbufs[i], NULL);
- }
-
- pipe_surface_reference(&dst->zsbuf, NULL);
-
- dst->nr_cbufs = 0;
- }
-}
-
static INLINE
void util_blitter_save_framebuffer(struct blitter_context *blitter,
const struct pipe_framebuffer_state *state)
{
blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */
- util_assign_framebuffer_state(&blitter->saved_fb_state, state);
+ util_copy_framebuffer_state(&blitter->saved_fb_state, state);
}
static INLINE
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 5056351307..32519b148b 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -73,7 +73,9 @@
#endif
+#ifdef DEBUG
DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE)
+#endif
struct util_cpu_caps util_cpu_caps;
@@ -83,61 +85,6 @@ static int has_cpuid(void);
#endif
-#if defined(PIPE_ARCH_X86)
-
-/* The sigill handlers */
-#if defined(PIPE_OS_LINUX) /*&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)*/
-static void
-sigill_handler_sse(int signal, struct sigcontext sc)
-{
- /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
- * instructions are 3 bytes long. We must increment the instruction
- * pointer manually to avoid repeated execution of the offending
- * instruction.
- *
- * If the SIGILL is caused by a divide-by-zero when unmasked
- * exceptions aren't supported, the SIMD FPU status and control
- * word will be restored at the end of the test, so we don't need
- * to worry about doing it here. Besides, we may not be able to...
- */
- sc.eip += 3;
-
- util_cpu_caps.has_sse=0;
-}
-
-static void
-sigfpe_handler_sse(int signal, struct sigcontext sc)
-{
- if (sc.fpstate->magic != 0xffff) {
- /* Our signal context has the extended FPU state, so reset the
- * divide-by-zero exception mask and clear the divide-by-zero
- * exception bit.
- */
- sc.fpstate->mxcsr |= 0x00000200;
- sc.fpstate->mxcsr &= 0xfffffffb;
- } else {
- /* If we ever get here, we're completely hosed.
- */
- }
-}
-#endif /* PIPE_OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */
-
-#if defined(PIPE_OS_WINDOWS)
-static LONG CALLBACK
-win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
-{
- if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){
- ep->ContextRecord->Eip +=3;
- util_cpu_caps.has_sse=0;
- return EXCEPTION_CONTINUE_EXECUTION;
- }
- return EXCEPTION_CONTINUE_SEARCH;
-}
-#endif /* PIPE_OS_WINDOWS */
-
-#endif /* PIPE_ARCH_X86 */
-
-
#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE)
static jmp_buf __lv_powerpc_jmpbuf;
static volatile sig_atomic_t __lv_powerpc_canjump = 0;
@@ -194,123 +141,8 @@ check_os_altivec_support(void)
}
#endif /* PIPE_ARCH_PPC */
-/* If we're running on a processor that can do SSE, let's see if we
- * are allowed to or not. This will catch 2.4.0 or later kernels that
- * haven't been configured for a Pentium III but are running on one,
- * and RedHat patched 2.2 kernels that have broken exception handling
- * support for user space apps that do SSE.
- */
-#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
-static void
-check_os_katmai_support(void)
-{
-#if defined(PIPE_ARCH_X86)
-#if defined(PIPE_OS_FREEBSD)
- int has_sse=0, ret;
- int len = sizeof (has_sse);
-
- ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0);
- if (ret || !has_sse)
- util_cpu_caps.has_sse=0;
-
-#elif defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
- int has_sse, has_sse2, ret, mib[2];
- int varlen;
-
- mib[0] = CTL_MACHDEP;
- mib[1] = CPU_SSE;
- varlen = sizeof (has_sse);
-
- ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0);
- if (ret < 0 || !has_sse) {
- util_cpu_caps.has_sse = 0;
- } else {
- util_cpu_caps.has_sse = 1;
- }
-
- mib[1] = CPU_SSE2;
- varlen = sizeof (has_sse2);
- ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0);
- if (ret < 0 || !has_sse2) {
- util_cpu_caps.has_sse2 = 0;
- } else {
- util_cpu_caps.has_sse2 = 1;
- }
- util_cpu_caps.has_sse = 0; /* FIXME ?!?!? */
-
-#elif defined(PIPE_OS_WINDOWS)
- LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
- if (util_cpu_caps.has_sse) {
- exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
-#if defined(PIPE_CC_GCC)
- __asm __volatile ("xorps %xmm0, %xmm0");
-#elif defined(PIPE_CC_MSVC)
- __asm {
- xorps xmm0, xmm0 /* executing SSE instruction */
- }
-#else
-#error Unsupported compiler
-#endif
- SetUnhandledExceptionFilter(exc_fil);
- }
-#elif defined(PIPE_OS_LINUX)
- struct sigaction saved_sigill;
- struct sigaction saved_sigfpe;
-
- /* Save the original signal handlers.
- */
- sigaction(SIGILL, NULL, &saved_sigill);
- sigaction(SIGFPE, NULL, &saved_sigfpe);
-
- signal(SIGILL, (void (*)(int))sigill_handler_sse);
- signal(SIGFPE, (void (*)(int))sigfpe_handler_sse);
-
- /* Emulate test for OSFXSR in CR4. The OS will set this bit if it
- * supports the extended FPU save and restore required for SSE. If
- * we execute an SSE instruction on a PIII and get a SIGILL, the OS
- * doesn't support Streaming SIMD Exceptions, even if the processor
- * does.
- */
- if (util_cpu_caps.has_sse) {
- __asm __volatile ("xorps %xmm1, %xmm0");
- }
-
- /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if
- * it supports unmasked SIMD FPU exceptions. If we unmask the
- * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
- * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE
- * as expected, we're okay but we need to clean up after it.
- *
- * Are we being too stringent in our requirement that the OS support
- * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by
- * setting CR4.OSFXSR but don't support unmasked exceptions. Win98
- * doesn't even support them. We at least know the user-space SSE
- * support is good in kernels that do support unmasked exceptions,
- * and therefore to be safe I'm going to leave this test in here.
- */
- if (util_cpu_caps.has_sse) {
- /* test_os_katmai_exception_support(); */
- }
-
- /* Restore the original signal handlers.
- */
- sigaction(SIGILL, &saved_sigill, NULL);
- sigaction(SIGFPE, &saved_sigfpe, NULL);
-
-#else
- /* We can't use POSIX signal handling to test the availability of
- * SSE, so we disable it by default.
- */
- util_cpu_caps.has_sse = 0;
-#endif /* __linux__ */
-#endif
-
-#if defined(PIPE_ARCH_X86_64)
- util_cpu_caps.has_sse = 1;
-#endif
-}
-
+#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
static int has_cpuid(void)
{
#if defined(PIPE_ARCH_X86)
@@ -469,9 +301,6 @@ util_cpu_detect(void)
util_cpu_caps.cacheline = regs2[2] & 0xFF;
}
- if (util_cpu_caps.has_sse)
- check_os_katmai_support();
-
if (!util_cpu_caps.has_sse) {
util_cpu_caps.has_sse2 = 0;
util_cpu_caps.has_sse3 = 0;
diff --git a/src/gallium/auxiliary/util/u_debug_describe.c b/src/gallium/auxiliary/util/u_debug_describe.c
new file mode 100644
index 0000000000..1c90ff3106
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_describe.c
@@ -0,0 +1,81 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <pipe/p_state.h>
+#include <util/u_format.h>
+#include <util/u_debug_describe.h>
+#include <util/u_string.h>
+
+void
+debug_describe_reference(char* buf, const struct pipe_reference*ptr)
+{
+ strcpy(buf, "pipe_object");
+}
+
+void
+debug_describe_resource(char* buf, const struct pipe_resource *ptr)
+{
+ switch(ptr->target)
+ {
+ case PIPE_BUFFER:
+ util_sprintf(buf, "pipe_buffer<%u>", (unsigned)util_format_get_stride(ptr->format, ptr->width0));
+ break;
+ case PIPE_TEXTURE_1D:
+ util_sprintf(buf, "pipe_texture1d<%u,%s,%u>", ptr->width0, util_format_short_name(ptr->format), ptr->last_level);
+ break;
+ case PIPE_TEXTURE_2D:
+ util_sprintf(buf, "pipe_texture2d<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
+ break;
+ case PIPE_TEXTURE_RECT:
+ util_sprintf(buf, "pipe_texture_rect<%u,%u,%s>", ptr->width0, ptr->height0, util_format_short_name(ptr->format));
+ break;
+ case PIPE_TEXTURE_CUBE:
+ util_sprintf(buf, "pipe_texture_cube<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
+ break;
+ case PIPE_TEXTURE_3D:
+ util_sprintf(buf, "pipe_texture3d<%u,%u,%u,%s,%u>", ptr->width0, ptr->height0, ptr->depth0, util_format_short_name(ptr->format), ptr->last_level);
+ break;
+ default:
+ util_sprintf(buf, "pipe_martian_resource<%u>", ptr->target);
+ break;
+ }
+}
+
+void
+debug_describe_surface(char* buf, const struct pipe_surface *ptr)
+{
+ char res[128];
+ debug_describe_resource(res, ptr->texture);
+ util_sprintf(buf, "pipe_surface<%s,%u,%u,%u>", res, ptr->face, ptr->level, ptr->zslice);
+}
+
+void
+debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr)
+{
+ char res[128];
+ debug_describe_resource(res, ptr->texture);
+ util_sprintf(buf, "pipe_sampler_view<%s,%s>", res, util_format_short_name(ptr->format));
+}
diff --git a/src/gallium/auxiliary/util/u_debug_describe.h b/src/gallium/auxiliary/util/u_debug_describe.h
new file mode 100644
index 0000000000..26d1f803bf
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_describe.h
@@ -0,0 +1,49 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_DEBUG_DESCRIBE_H_
+#define U_DEBUG_DESCRIBE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_reference;
+struct pipe_resource;
+struct pipe_surface;
+struct pipe_sampler_view;
+
+/* a 256-byte buffer is necessary and sufficient */
+void debug_describe_reference(char* buf, const struct pipe_reference*ptr);
+void debug_describe_resource(char* buf, const struct pipe_resource *ptr);
+void debug_describe_surface(char* buf, const struct pipe_surface *ptr);
+void debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_DEBUG_DESCRIBE_H_ */
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.c b/src/gallium/auxiliary/util/u_debug_refcnt.c
new file mode 100644
index 0000000000..40a26c9c69
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.c
@@ -0,0 +1,181 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
+
+/* see http://www.mozilla.org/performance/refcnt-balancer.html for what do with the output
+ * on Linux, use tools/addr2line.sh to postprocess it before anything else
+ **/
+#include <util/u_debug.h>
+#include <util/u_debug_refcnt.h>
+#include <util/u_debug_stack.h>
+#include <util/u_debug_symbol.h>
+#include <util/u_string.h>
+#include <util/u_hash_table.h>
+#include <os/os_thread.h>
+#include <os/os_stream.h>
+
+int debug_refcnt_state;
+
+struct os_stream* stream;
+
+/* TODO: maybe move this serial machinery to a stand-alone module and expose it? */
+static pipe_mutex serials_mutex;
+static struct util_hash_table* serials_hash;
+static unsigned serials_last;
+
+static unsigned hash_ptr(void* p)
+{
+ return (unsigned)(uintptr_t)p;
+}
+
+static int compare_ptr(void* a, void* b)
+{
+ if(a == b)
+ return 0;
+ else if(a < b)
+ return -1;
+ else
+ return 1;
+}
+
+static boolean debug_serial(void* p, unsigned* pserial)
+{
+ unsigned serial;
+ boolean found = TRUE;
+ pipe_mutex_lock(serials_mutex);
+ if(!serials_hash)
+ serials_hash = util_hash_table_create(hash_ptr, compare_ptr);
+ serial = (unsigned)(uintptr_t)util_hash_table_get(serials_hash, p);
+ if(!serial)
+ {
+ /* time to stop logging... (you'll have a 100 GB logfile at least at this point)
+ * TODO: avoid this
+ */
+ serial = ++serials_last;
+ if(!serial)
+ {
+ debug_error("More than 2^32 objects detected, aborting.\n");
+ os_abort();
+ }
+
+ util_hash_table_set(serials_hash, p, (void*)(uintptr_t)serial);
+ found = FALSE;
+ }
+ pipe_mutex_unlock(serials_mutex);
+ *pserial = serial;
+ return found;
+}
+
+static void debug_serial_delete(void* p)
+{
+ pipe_mutex_lock(serials_mutex);
+ util_hash_table_remove(serials_hash, p);
+ pipe_mutex_unlock(serials_mutex);
+}
+
+#define STACK_LEN 64
+
+static void dump_stack(const char* symbols[STACK_LEN])
+{
+ unsigned i;
+ for(i = 0; i < STACK_LEN; ++i)
+ {
+ if(symbols[i])
+ os_stream_printf(stream, "%s\n", symbols[i]);
+ }
+ os_stream_write(stream, "\n", 1);
+}
+
+void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+{
+ if(debug_refcnt_state < 0)
+ return;
+
+ if(!debug_refcnt_state)
+ {
+ const char* filename = debug_get_option("GALLIUM_REFCNT_LOG", NULL);
+ if(filename && filename[0])
+ stream = os_file_stream_create(filename);
+
+ if(stream)
+ debug_refcnt_state = 1;
+ else
+ debug_refcnt_state = -1;
+ }
+
+ if(debug_refcnt_state > 0)
+ {
+ struct debug_stack_frame frames[STACK_LEN];
+ const char* symbols[STACK_LEN];
+ char buf[1024];
+
+ unsigned i;
+ unsigned refcnt = p->count;
+ unsigned serial;
+ boolean existing = debug_serial((void*)p, &serial);
+
+ debug_backtrace_capture(frames, 1, STACK_LEN);
+ for(i = 0; i < STACK_LEN; ++i)
+ {
+ if(frames[i].function)
+ symbols[i] = debug_symbol_name_cached(frames[i].function);
+ else
+ symbols[i] = 0;
+ }
+
+ get_desc(buf, p);
+
+ if(!existing)
+ {
+ os_stream_printf(stream, "<%s> %p %u Create\n", buf, p, serial);
+ dump_stack(symbols);
+
+ /* this is there to provide a gradual change even if we don't see the initialization */
+ for(i = 1; i <= refcnt - change; ++i)
+ {
+ os_stream_printf(stream, "<%s> %p %u AddRef %u\n", buf, p, serial, i);
+ dump_stack(symbols);
+ }
+ }
+
+ if(change)
+ {
+ os_stream_printf(stream, "<%s> %p %u %s %u\n", buf, p, serial, change > 0 ? "AddRef" : "Release", refcnt);
+ dump_stack(symbols);
+ }
+
+ if(!refcnt)
+ {
+ debug_serial_delete((void*)p);
+ os_stream_printf(stream, "<%s> %p %u Destroy\n", buf, p, serial);
+ dump_stack(symbols);
+ }
+
+ os_stream_flush(stream);
+ }
+}
+#endif
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.h b/src/gallium/auxiliary/util/u_debug_refcnt.h
new file mode 100644
index 0000000000..bea2d1c478
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.h
@@ -0,0 +1,63 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_DEBUG_REFCNT_H_
+#define U_DEBUG_REFCNT_H_
+
+#include <pipe/p_config.h>
+#include <pipe/p_state.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*debug_reference_descriptor)(char*, const struct pipe_reference*);
+
+#if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
+
+extern int debug_refcnt_state;
+
+void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change);
+
+static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+{
+ if (debug_refcnt_state >= 0)
+ debug_reference_slowpath(p, get_desc, change);
+}
+
+#else
+
+static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+{
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_DEBUG_REFCNT_H_ */
diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c
index 6e250575d6..332952af88 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.c
+++ b/src/gallium/auxiliary/util/u_debug_symbol.c
@@ -33,9 +33,12 @@
*/
#include "pipe/p_compiler.h"
+#include "os/os_thread.h"
+#include "u_string.h"
#include "u_debug.h"
#include "u_debug_symbol.h"
+#include "u_hash_table.h"
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86)
@@ -113,8 +116,8 @@ BOOL WINAPI j_SymGetSymFromAddr(HANDLE hProcess, DWORD Address, PDWORD Displacem
}
-static INLINE boolean
-debug_symbol_print_imagehlp(const void *addr)
+static INLINE void
+debug_symbol_name_imagehlp(const void *addr, char* buf, unsigned size)
{
HANDLE hProcess;
BYTE symbolBuffer[1024];
@@ -131,25 +134,95 @@ debug_symbol_print_imagehlp(const void *addr)
if(j_SymInitialize(hProcess, NULL, TRUE))
bSymInitialized = TRUE;
}
-
+
if(!j_SymGetSymFromAddr(hProcess, (DWORD)addr, &dwDisplacement, pSymbol))
- return FALSE;
+ buf[0] = 0;
+ else
+ {
+ strncpy(buf, pSymbol->Name, size);
+ buf[size - 1] = 0;
+ }
+}
+#endif
- debug_printf("\t%s\n", pSymbol->Name);
+#ifdef __GLIBC__
+#include <execinfo.h>
- return TRUE;
-
+/* This can only provide dynamic symbols, or binary offsets into a file.
+ *
+ * To fix this, post-process the output with tools/addr2line.sh
+ */
+static INLINE void
+debug_symbol_name_glibc(const void *addr, char* buf, unsigned size)
+{
+ char** syms = backtrace_symbols((void**)&addr, 1);
+ strncpy(buf, syms[0], size);
+ buf[size - 1] = 0;
+ free(syms);
}
#endif
-
void
-debug_symbol_print(const void *addr)
+debug_symbol_name(const void *addr, char* buf, unsigned size)
{
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86)
- if(debug_symbol_print_imagehlp(addr))
+ debug_symbol_name_imagehlp(addr, buf, size);
+ if(buf[0])
return;
#endif
-
- debug_printf("\t%p\n", addr);
+
+#ifdef __GLIBC__
+ debug_symbol_name_glibc(addr, buf, size);
+ if(buf[0])
+ return;
+#endif
+
+ util_snprintf(buf, size, "%p", addr);
+ buf[size - 1] = 0;
+}
+
+void
+debug_symbol_print(const void *addr)
+{
+ char buf[1024];
+ debug_symbol_name(addr, buf, sizeof(buf));
+ debug_printf("\t%s\n", buf);
+}
+
+struct util_hash_table* symbols_hash;
+pipe_mutex symbols_mutex;
+
+static unsigned hash_ptr(void* p)
+{
+ return (unsigned)(uintptr_t)p;
+}
+
+static int compare_ptr(void* a, void* b)
+{
+ if(a == b)
+ return 0;
+ else if(a < b)
+ return -1;
+ else
+ return 1;
+}
+
+const char*
+debug_symbol_name_cached(const void *addr)
+{
+ const char* name;
+ pipe_mutex_lock(symbols_mutex);
+ if(!symbols_hash)
+ symbols_hash = util_hash_table_create(hash_ptr, compare_ptr);
+ name = util_hash_table_get(symbols_hash, (void*)addr);
+ if(!name)
+ {
+ char buf[1024];
+ debug_symbol_name(addr, buf, sizeof(buf));
+ name = strdup(buf);
+
+ util_hash_table_set(symbols_hash, (void*)addr, (void*)name);
+ }
+ pipe_mutex_unlock(symbols_mutex);
+ return name;
}
diff --git a/src/gallium/auxiliary/util/u_debug_symbol.h b/src/gallium/auxiliary/util/u_debug_symbol.h
index 021586987b..b247706c2a 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.h
+++ b/src/gallium/auxiliary/util/u_debug_symbol.h
@@ -43,8 +43,13 @@ extern "C" {
void
-debug_symbol_print(const void *addr);
+debug_symbol_name(const void *addr, char* buf, unsigned size);
+
+const char*
+debug_symbol_name_cached(const void *addr);
+void
+debug_symbol_print(const void *addr);
#ifdef __cplusplus
}
diff --git a/src/gallium/auxiliary/util/u_dirty_surfaces.h b/src/gallium/auxiliary/util/u_dirty_surfaces.h
index 99f260bf96..fd1bbe5ffd 100644
--- a/src/gallium/auxiliary/util/u_dirty_surfaces.h
+++ b/src/gallium/auxiliary/util/u_dirty_surfaces.h
@@ -1,9 +1,39 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#ifndef U_DIRTY_SURFACES_H_
#define U_DIRTY_SURFACES_H_
+#include "pipe/p_state.h"
+
#include "util/u_double_list.h"
#include "util/u_math.h"
+struct pipe_context;
+
typedef void (*util_dirty_surface_flush_t) (struct pipe_context *, struct pipe_surface *);
struct util_dirty_surfaces
diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h
index 2a91ea0f9a..f06d09ef91 100644
--- a/src/gallium/auxiliary/util/u_draw.h
+++ b/src/gallium/auxiliary/util/u_draw.h
@@ -31,6 +31,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
static INLINE void
diff --git a/src/gallium/auxiliary/util/u_dynarray.h b/src/gallium/auxiliary/util/u_dynarray.h
index 9d1c1713a7..980cadf22d 100644
--- a/src/gallium/auxiliary/util/u_dynarray.h
+++ b/src/gallium/auxiliary/util/u_dynarray.h
@@ -106,6 +106,9 @@ util_dynarray_trim(struct util_dynarray *buf)
#define util_dynarray_pop_ptr(buf, type) (type*)((char*)(buf)->data + ((buf)->size -= sizeof(type)))
#define util_dynarray_pop(buf, type) *util_dynarray_pop_ptr(buf, type)
#define util_dynarray_contains(buf, type) ((buf)->size >= sizeof(type))
+#define util_dynarray_element(buf, type, idx) ((type*)(buf)->data + (idx))
+#define util_dynarray_begin(buf) ((buf)->data)
+#define util_dynarray_end(buf) ((void*)util_dynarray_element((buf), char, (buf)->size))
#endif /* U_DYNARRAY_H */
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index b7fe2d3003..6a931a9581 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -1255,6 +1255,7 @@ fallback_gen_mipmap(struct gen_mipmap_state *ctx,
make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel);
break;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel);
break;
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index 540305c146..78473bf35a 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -33,6 +33,8 @@
#include "pipe/p_state.h"
#include "pipe/p_screen.h"
#include "util/u_debug.h"
+#include "util/u_debug_describe.h"
+#include "util/u_debug_refcnt.h"
#include "util/u_atomic.h"
#include "util/u_box.h"
#include "util/u_math.h"
@@ -67,7 +69,9 @@ pipe_is_referenced(struct pipe_reference *reference)
* \return TRUE if the object's refcount hits zero and should be destroyed.
*/
static INLINE boolean
-pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
+pipe_reference_described(struct pipe_reference *ptr,
+ struct pipe_reference *reference,
+ debug_reference_descriptor get_desc)
{
boolean destroy = FALSE;
@@ -76,6 +80,7 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
if (reference) {
assert(pipe_is_referenced(reference));
p_atomic_inc(&reference->count);
+ debug_reference(reference, get_desc, 1);
}
if (ptr) {
@@ -83,41 +88,49 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
if (p_atomic_dec_zero(&ptr->count)) {
destroy = TRUE;
}
+ debug_reference(ptr, get_desc, -1);
}
}
return destroy;
}
+static INLINE boolean
+pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
+{
+ return pipe_reference_described(ptr, reference,
+ (debug_reference_descriptor)debug_describe_reference);
+}
static INLINE void
pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
{
struct pipe_surface *old_surf = *ptr;
- if (pipe_reference(&(*ptr)->reference, &surf->reference))
+ if (pipe_reference_described(&(*ptr)->reference, &surf->reference,
+ (debug_reference_descriptor)debug_describe_surface))
old_surf->texture->screen->tex_surface_destroy(old_surf);
*ptr = surf;
}
-
static INLINE void
pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
{
struct pipe_resource *old_tex = *ptr;
- if (pipe_reference(&(*ptr)->reference, &tex->reference))
+ if (pipe_reference_described(&(*ptr)->reference, &tex->reference,
+ (debug_reference_descriptor)debug_describe_resource))
old_tex->screen->resource_destroy(old_tex->screen, old_tex);
*ptr = tex;
}
-
static INLINE void
pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_view *view)
{
struct pipe_sampler_view *old_view = *ptr;
- if (pipe_reference(&(*ptr)->reference, &view->reference))
+ if (pipe_reference_described(&(*ptr)->reference, &view->reference,
+ (debug_reference_descriptor)debug_describe_sampler_view))
old_view->context->sampler_view_destroy(old_view->context, old_view);
*ptr = view;
}
diff --git a/src/gallium/auxiliary/util/u_linkage.c b/src/gallium/auxiliary/util/u_linkage.c
new file mode 100644
index 0000000000..2f6f41ba84
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_linkage.c
@@ -0,0 +1,149 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_debug.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "util/u_linkage.h"
+
+/* we must only record the registers that are actually used, not just declared */
+static INLINE boolean
+util_semantic_set_test_and_set(struct util_semantic_set *set, unsigned value)
+{
+ unsigned mask = 1 << (value % (sizeof(long) * 8));
+ unsigned long *p = &set->masks[value / (sizeof(long) * 8)];
+ unsigned long v = *p & mask;
+ *p |= mask;
+ return !!v;
+}
+
+unsigned
+util_semantic_set_from_program_file(struct util_semantic_set *set, const struct tgsi_token *tokens, enum tgsi_file_type file)
+{
+ struct tgsi_shader_info info;
+ struct tgsi_parse_context parse;
+ unsigned count = 0;
+ ubyte *semantic_name;
+ ubyte *semantic_index;
+
+ tgsi_scan_shader(tokens, &info);
+
+ if(file == TGSI_FILE_INPUT)
+ {
+ semantic_name = info.input_semantic_name;
+ semantic_index = info.input_semantic_index;
+ }
+ else if(file == TGSI_FILE_OUTPUT)
+ {
+ semantic_name = info.output_semantic_name;
+ semantic_index = info.output_semantic_index;
+ }
+ else
+ {
+ assert(0);
+ semantic_name = NULL;
+ semantic_index = NULL;
+ }
+
+ tgsi_parse_init(&parse, tokens);
+
+ memset(set->masks, 0, sizeof(set->masks));
+ while(!tgsi_parse_end_of_tokens(&parse))
+ {
+ tgsi_parse_token(&parse);
+
+ if(parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION)
+ {
+ const struct tgsi_full_instruction *finst = &parse.FullToken.FullInstruction;
+ unsigned i;
+ for(i = 0; i < finst->Instruction.NumDstRegs; ++i)
+ {
+ if(finst->Dst[i].Register.File == file)
+ {
+ unsigned idx = finst->Dst[i].Register.Index;
+ if(semantic_name[idx] == TGSI_SEMANTIC_GENERIC)
+ {
+ if(!util_semantic_set_test_and_set(set, semantic_index[idx]))
+ ++count;
+ }
+ }
+ }
+
+ for(i = 0; i < finst->Instruction.NumSrcRegs; ++i)
+ {
+ if(finst->Src[i].Register.File == file)
+ {
+ unsigned idx = finst->Src[i].Register.Index;
+ if(semantic_name[idx] == TGSI_SEMANTIC_GENERIC)
+ {
+ if(!util_semantic_set_test_and_set(set, semantic_index[idx]))
+ ++count;
+ }
+ }
+ }
+ }
+ }
+ tgsi_parse_free(&parse);
+
+ return count;
+}
+
+#define UTIL_SEMANTIC_SET_FOR_EACH(i, set) for(i = 0; i < 256; ++i) if(set->masks[i / (sizeof(long) * 8)] & (1 << (i % (sizeof(long) * 8))))
+
+void
+util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots)
+{
+ int first = -1;
+ int last = -1;
+ unsigned i;
+
+ memset(layout, 0xff, num_slots);
+
+ UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+ {
+ if(first < 0)
+ first = i;
+ last = i;
+ }
+
+ if(last < efficient_slots)
+ {
+ UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+ layout[i] = i;
+ }
+ else if((last - first) < efficient_slots)
+ {
+ UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+ layout[i - first] = i;
+ }
+ else
+ {
+ unsigned idx = 0;
+ UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+ layout[idx++] = i;
+ }
+}
diff --git a/src/gallium/auxiliary/util/u_linkage.h b/src/gallium/auxiliary/util/u_linkage.h
new file mode 100644
index 0000000000..4720e0ee60
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_linkage.h
@@ -0,0 +1,66 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_LINKAGE_H_
+#define U_LINKAGE_H_
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+
+struct util_semantic_set
+{
+ unsigned long masks[256 / 8 / sizeof(unsigned long)];
+};
+
+static INLINE bool
+util_semantic_set_contains(struct util_semantic_set *set, unsigned char value)
+{
+ return !!(set->masks[value / (sizeof(long) * 8)] & (1 << (value / (sizeof(long) * 8))));
+}
+
+unsigned util_semantic_set_from_program_file(struct util_semantic_set *set, const struct tgsi_token *tokens, enum tgsi_file_type file);
+
+/* efficient_slots is the number of slots such that hardware performance is
+ * the same for using that amount, with holes, or less slots but with less
+ * holes.
+ *
+ * num_slots is the size of the layout array and hardware limit instead.
+ *
+ * efficient_slots == 0 or efficient_solts == num_slots are typical settings.
+ */
+void util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots);
+
+static INLINE void
+util_semantic_table_from_layout(unsigned char *table, unsigned char *layout, unsigned char first_slot_value, unsigned char num_slots)
+{
+ int i;
+ memset(table, 0xff, sizeof(table));
+
+ for(i = 0; i < num_slots; ++i)
+ table[layout[i]] = first_slot_value + i;
+}
+
+#endif /* U_LINKAGE_H_ */
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index fe19466436..69a7681494 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -361,16 +361,6 @@ util_is_inf_or_nan(float x)
/**
- * Test whether x is a power of two.
- */
-static INLINE boolean
-util_is_pot(unsigned x)
-{
- return (x & (x - 1)) == 0;
-}
-
-
-/**
* Find first bit set in word. Least significant bit is 1.
* Return 0 if no bits set.
*/
@@ -566,6 +556,9 @@ util_bswap16(uint16_t n)
#define MIN3( A, B, C ) MIN2( MIN2( A, B ), C )
#define MAX3( A, B, C ) MAX2( MAX2( A, B ), C )
+#define MIN4( A, B, C, D ) MIN2( MIN2( A, B ), MIN2(C, D) )
+#define MAX4( A, B, C, D ) MAX2( MAX2( A, B ), MAX2(C, D) )
+
/**
* Align a value, only works pot alignemnts.
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index 5f113f742b..aae8b8bdf1 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -42,12 +42,18 @@
#include "util/u_math.h"
-
+/**
+ * Helper union for packing pixel values.
+ * Will often contain values in formats which are too complex to be described
+ * in simple terms, hence might just effectively contain a number of bytes.
+ * Must be big enough to hold data for all formats (currently 256 bits).
+ */
union util_color {
ubyte ub;
ushort us;
uint ui;
float f[4];
+ double d[4];
};
/**
diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c
index 9bbcf1c8c4..56fcfac069 100644
--- a/src/gallium/auxiliary/util/u_rect.c
+++ b/src/gallium/auxiliary/util/u_rect.c
@@ -32,6 +32,7 @@
#include "util/u_format.h"
#include "util/u_rect.h"
+#include "util/u_pack_color.h"
/**
@@ -94,7 +95,7 @@ util_fill_rect(ubyte * dst,
unsigned dst_y,
unsigned width,
unsigned height,
- uint32_t value)
+ union util_color *uc)
{
unsigned i, j;
unsigned width_size;
@@ -110,40 +111,54 @@ util_fill_rect(ubyte * dst,
dst_y /= blockheight;
width = (width + blockwidth - 1)/blockwidth;
height = (height + blockheight - 1)/blockheight;
-
+
dst += dst_x * blocksize;
dst += dst_y * dst_stride;
width_size = width * blocksize;
-
+
switch (blocksize) {
case 1:
if(dst_stride == width_size)
- memset(dst, (ubyte) value, height * width_size);
+ memset(dst, uc->ub, height * width_size);
else {
- for (i = 0; i < height; i++) {
- memset(dst, (ubyte) value, width_size);
- dst += dst_stride;
- }
+ for (i = 0; i < height; i++) {
+ memset(dst, uc->ub, width_size);
+ dst += dst_stride;
+ }
}
break;
case 2:
for (i = 0; i < height; i++) {
- uint16_t *row = (uint16_t *)dst;
- for (j = 0; j < width; j++)
- *row++ = (uint16_t) value;
- dst += dst_stride;
+ uint16_t *row = (uint16_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = uc->us;
+ dst += dst_stride;
}
break;
case 4:
for (i = 0; i < height; i++) {
- uint32_t *row = (uint32_t *)dst;
- for (j = 0; j < width; j++)
- *row++ = value;
- dst += dst_stride;
+ uint32_t *row = (uint32_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = uc->ui;
+ dst += dst_stride;
+ }
+ break;
+ case 8:
+ case 12:
+ case 16:
+ case 24:
+ case 32:
+ for (i = 0; i < height; i++) {
+ ubyte *row = dst;
+ for (j = 0; j < width; j++) {
+ memcpy(row, uc, blocksize);
+ row += blocksize;
+ }
+ dst += dst_stride;
}
break;
default:
- assert(0);
- break;
+ assert(0);
+ break;
}
}
diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h
index 40d57e662d..4cb90d3c31 100644
--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -26,17 +26,67 @@
**************************************************************************/
-/**
- * Pipe copy/fill rect helpers.
+#ifndef U_RECT_H
+#define U_RECT_H
+
+#include "pipe/p_compiler.h"
+
+struct u_rect {
+ int x0, x1;
+ int y0, y1;
+};
+
+/* Do two rectangles intersect?
*/
+static INLINE boolean
+u_rect_test_intersection(const struct u_rect *a,
+ const struct u_rect *b)
+{
+ return (!(a->x1 < b->x0 ||
+ b->x1 < a->x0 ||
+ a->y1 < b->y0 ||
+ b->y1 < a->y0));
+}
+/* Find the intersection of two rectangles known to intersect.
+ */
+static INLINE void
+u_rect_find_intersection(const struct u_rect *a,
+ struct u_rect *b)
+{
+ /* Caller should verify intersection exists before calling.
+ */
+ if (b->x0 < a->x0) b->x0 = a->x0;
+ if (b->x1 > a->x1) b->x1 = a->x1;
+ if (b->y0 < a->y0) b->y0 = a->y0;
+ if (b->y1 > a->y1) b->y1 = a->y1;
+}
-#ifndef U_RECT_H
-#define U_RECT_H
+static INLINE void
+u_rect_possible_intersection(const struct u_rect *a,
+ struct u_rect *b)
+{
+ if (u_rect_test_intersection(a,b)) {
+ u_rect_find_intersection(a,b);
+ }
+ else {
+ b->x0 = b->x1 = b->y0 = b->y1 = 0;
+ }
+}
#include "pipe/p_format.h"
+#include "util/u_pack_color.h"
+
+
+
+/**********************************************************************
+ * Pipe copy/fill rect helpers.
+ */
+/* These really should move to a different file:
+ */
+#include "pipe/p_format.h"
extern void
util_copy_rect(ubyte * dst, enum pipe_format format,
@@ -47,7 +97,7 @@ util_copy_rect(ubyte * dst, enum pipe_format format,
extern void
util_fill_rect(ubyte * dst, enum pipe_format format,
unsigned dst_stride, unsigned dst_x, unsigned dst_y,
- unsigned width, unsigned height, uint32_t value);
+ unsigned width, unsigned height, union util_color *uc);
#endif /* U_RECT_H */
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index 5b682f496c..58ef68377f 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -37,6 +37,7 @@
#include "pipe/p_context.h"
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
#include "util/u_simple_shaders.h"
#include "util/u_debug.h"
#include "tgsi/tgsi_ureg.h"
diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h
index 206e1ec311..7f80fc1270 100644
--- a/src/gallium/auxiliary/util/u_split_prim.h
+++ b/src/gallium/auxiliary/util/u_split_prim.h
@@ -1,5 +1,12 @@
/* Originally written by Ben Skeggs for the nv50 driver*/
-#include <pipe/p_defines.h>
+
+#ifndef U_SPLIT_PRIM_H
+#define U_SPLIT_PRIM_H
+
+#include "pipe/p_defines.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_debug.h"
struct util_split_prim {
void *priv;
@@ -48,7 +55,7 @@ util_split_prim_next(struct util_split_prim *s, unsigned max_verts)
}
}
- if (s->p_start + s->close_first + max_verts >= s->p_end) {
+ if ((s->p_end - s->p_start) + s->close_first <= max_verts) {
s->emit(s->priv, s->p_start, s->p_end - s->p_start);
if (s->close_first)
s->emit(s->priv, s->start, 1);
@@ -103,3 +110,5 @@ util_split_prim_next(struct util_split_prim *s, unsigned max_verts)
s->p_start += (max_verts - repeat);
return FALSE;
}
+
+#endif /* U_SPLIT_PRIM_H */
diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c
index 607c31f5ee..c5d68f8df8 100644
--- a/src/gallium/auxiliary/util/u_staging.c
+++ b/src/gallium/auxiliary/util/u_staging.c
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#include "util/u_staging.h"
#include "pipe/p_context.h"
#include "util/u_memory.h"
@@ -8,7 +34,7 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne
{
memset(template, 0, sizeof(struct pipe_resource));
if(pt->target != PIPE_BUFFER && depth <= 1)
- template->target = PIPE_TEXTURE_2D;
+ template->target = PIPE_TEXTURE_RECT;
else
template->target = pt->target;
template->format = pt->format;
@@ -23,20 +49,16 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne
}
struct util_staging_transfer *
-util_staging_transfer_new(struct pipe_context *pipe,
+util_staging_transfer_init(struct pipe_context *pipe,
struct pipe_resource *pt,
struct pipe_subresource sr,
unsigned usage,
const struct pipe_box *box,
- bool direct)
+ bool direct, struct util_staging_transfer *tx)
{
struct pipe_screen *pscreen = pipe->screen;
- struct util_staging_transfer *tx;
- struct pipe_resource staging_resource_template;
- tx = CALLOC_STRUCT(util_staging_transfer);
- if (!tx)
- return NULL;
+ struct pipe_resource staging_resource_template;
pipe_resource_reference(&tx->base.resource, pt);
tx->base.sr = sr;
diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h
index 602faa2971..1aab78cc88 100644
--- a/src/gallium/auxiliary/util/u_staging.h
+++ b/src/gallium/auxiliary/util/u_staging.h
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
/* Direct3D 10/11 has no concept of transfers. Applications instead
* create resources with a STAGING or DYNAMIC usage, copy between them
* and the real resource and use Map to map the STAGING/DYNAMIC resource.
@@ -21,15 +47,15 @@ struct util_staging_transfer {
};
/* user must be stride, slice_stride and offset */
-/* pt->usage == PIPE_USAGE_DYNAMIC should be a good value to pass for direct */
-/* staging resource is currently created with PIPE_USAGE_DYNAMIC */
+/* pt->usage == PIPE_USAGE_DYNAMIC || pt->usage == PIPE_USAGE_STAGING should be a good value to pass for direct */
+/* staging resource is currently created with PIPE_USAGE_STAGING */
struct util_staging_transfer *
-util_staging_transfer_new(struct pipe_context *pipe,
+util_staging_transfer_init(struct pipe_context *pipe,
struct pipe_resource *pt,
struct pipe_subresource sr,
unsigned usage,
const struct pipe_box *box,
- bool direct);
+ bool direct, struct util_staging_transfer *tx);
void
util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx);
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index cab7691c70..af99163b2e 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -216,7 +216,7 @@ util_clear_render_target(struct pipe_context *pipe,
assert(dst->texture);
if (!dst->texture)
return;
- util_pack_color(rgba, dst->texture->format, &uc);
+
dst_trans = pipe_get_transfer(pipe,
dst->texture,
dst->face,
@@ -232,46 +232,10 @@ util_clear_render_target(struct pipe_context *pipe,
if (dst_map) {
assert(dst_trans->stride > 0);
- switch (util_format_get_blocksize(dst->texture->format)) {
- case 1:
- case 2:
- case 4:
- util_pack_color(rgba, dst->texture->format, &uc);
- util_fill_rect(dst_map, dst->texture->format,
- dst_trans->stride,
- 0, 0, width, height, uc.ui);
- break;
- case 8:
- {
- /* expand the 4-byte clear value to an 8-byte value */
- /* should probably not convert back from ubyte but not
- sure what this code really achieved since it doesn't even
- check for format type... */
- ushort *row = (ushort *) dst_map;
- ushort val0 = UBYTE_TO_USHORT((uc.ui >> 0) & 0xff);
- ushort val1 = UBYTE_TO_USHORT((uc.ui >> 8) & 0xff);
- ushort val2 = UBYTE_TO_USHORT((uc.ui >> 16) & 0xff);
- ushort val3 = UBYTE_TO_USHORT((uc.ui >> 24) & 0xff);
- unsigned i, j;
- val0 = (val0 << 8) | val0;
- val1 = (val1 << 8) | val1;
- val2 = (val2 << 8) | val2;
- val3 = (val3 << 8) | val3;
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- row[j*4+0] = val0;
- row[j*4+1] = val1;
- row[j*4+2] = val2;
- row[j*4+3] = val3;
- }
- row += dst_trans->stride/2;
- }
- }
- break;
- default:
- assert(0);
- break;
- }
+ util_pack_color(rgba, dst->texture->format, &uc);
+ util_fill_rect(dst_map, dst->texture->format,
+ dst_trans->stride,
+ 0, 0, width, height, &uc);
}
pipe->transfer_unmap(pipe, dst_trans);
diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c
index 7733ad24d0..404e121995 100644
--- a/src/gallium/auxiliary/util/u_surfaces.c
+++ b/src/gallium/auxiliary/util/u_surfaces.c
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#include "u_surfaces.h"
#include "util/u_hash_table.h"
#include "util/u_inlines.h"
diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h
index af978c7057..17d8a5d3a5 100644
--- a/src/gallium/auxiliary/util/u_surfaces.h
+++ b/src/gallium/auxiliary/util/u_surfaces.h
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#ifndef U_SURFACES_H_
#define U_SURFACES_H_
@@ -22,7 +48,7 @@ struct pipe_surface *util_surfaces_do_get(struct util_surfaces *us, unsigned sur
static INLINE struct pipe_surface *
util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags)
{
- if(likely(pt->target == PIPE_TEXTURE_2D && us->u.array))
+ if(likely((pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT) && us->u.array))
{
struct pipe_surface *ps = us->u.array[level];
if(ps)
@@ -52,7 +78,7 @@ void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps);
static INLINE void
util_surfaces_detach(struct util_surfaces *us, struct pipe_surface *ps)
{
- if(likely(ps->texture->target == PIPE_TEXTURE_2D))
+ if(likely(ps->texture->target == PIPE_TEXTURE_2D || ps->texture->target == PIPE_TEXTURE_RECT))
{
us->u.array[ps->level] = 0;
return;
diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h
index 986eee0743..558351d0ce 100644
--- a/src/gallium/auxiliary/util/u_tile.h
+++ b/src/gallium/auxiliary/util/u_tile.h
@@ -29,7 +29,10 @@
#define P_TILE_H
#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
+struct pipe_context;
struct pipe_transfer;
/**
diff --git a/src/gallium/auxiliary/util/u_transfer.h b/src/gallium/auxiliary/util/u_transfer.h
index eb07945d15..e3a38730f2 100644
--- a/src/gallium/auxiliary/util/u_transfer.h
+++ b/src/gallium/auxiliary/util/u_transfer.h
@@ -8,6 +8,7 @@
#include "pipe/p_state.h"
struct pipe_context;
+struct winsys_handle;
boolean u_default_resource_get_handle(struct pipe_screen *screen,
struct pipe_resource *resource,
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index a124924fc8..de016df02e 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -32,11 +32,8 @@
#ifndef U_UPLOAD_MGR_H
#define U_UPLOAD_MGR_H
-#include "pipe/p_defines.h"
-
-struct pipe_screen;
+struct pipe_context;
struct pipe_resource;
-struct u_upload_mgr;
struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index f241411a00..8250c30f2a 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -63,7 +63,9 @@ objects. They all follow simple, one-method binding calls, e.g.
* ``set_scissor_state`` sets the bounds for the scissor test, which culls
pixels before blending to render targets. If the :ref:`Rasterizer` does
not have the scissor test enabled, then the scissor bounds never need to
- be set since they will not be used.
+ be set since they will not be used. Note that scissor xmin and ymin are
+ inclusive, but xmax and ymax are exclusive. The inclusive ranges in x
+ and y would be [xmin..xmax-1] and [ymin..ymax-1].
* ``set_viewport_state``
diff --git a/src/gallium/docs/source/debugging.rst b/src/gallium/docs/source/debugging.rst
index 42bda5aee9..e081cbf74e 100644
--- a/src/gallium/docs/source/debugging.rst
+++ b/src/gallium/docs/source/debugging.rst
@@ -21,6 +21,10 @@ This option controls if the debug variables should be printed to stderr. This
is probably the most useful variable, since it allows you to find which
variables a driver uses.
+.. envvar:: GALLIUM_GALAHAD <bool> (false)
+
+Controls if the :ref:`galahad` sanity checker module should be used.
+
.. envvar:: GALLIUM_RBUG <bool> (false)
Controls if the :ref:`rbug` should be used.
diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst
index 70d75b51e6..08c8eab890 100644
--- a/src/gallium/docs/source/distro.rst
+++ b/src/gallium/docs/source/distro.rst
@@ -79,6 +79,15 @@ Rbug
Wrapper driver. :ref:`rbug` driver used with stand alone rbug-gui.
+.. _galahad:
+
+Galahad
+^^^^^^^
+
+Wrapper driver. Sanity checker for the internal gallium state. Normally
+a driver should n't have to sanity check the input it gets from a state
+tracker. Any wrong state received should be perceived as a state tracker bug.
+
State Trackers
--------------
diff --git a/src/gallium/docs/source/index.rst b/src/gallium/docs/source/index.rst
index 6c19842dac..2a73e3ab59 100644
--- a/src/gallium/docs/source/index.rst
+++ b/src/gallium/docs/source/index.rst
@@ -15,6 +15,7 @@ Contents:
debugging
tgsi
screen
+ resources
context
cso
distro
diff --git a/src/gallium/docs/source/resources.rst b/src/gallium/docs/source/resources.rst
new file mode 100644
index 0000000000..c8a5766821
--- /dev/null
+++ b/src/gallium/docs/source/resources.rst
@@ -0,0 +1,195 @@
+Resources and derived objects
+=============================
+
+Resources represent objects that hold data: textures and buffers.
+
+They are mostly modelled after the resources in Direct3D 10/11, but with a
+different transfer/update mechanism, and more features for OpenGL support.
+
+Resources can be used in several ways, and it is required to specify all planned uses through an appropriate set of bind flags.
+
+TODO: write much more on resources
+
+Transfers
+---------
+
+Transfers are the mechanism used to access resources with the CPU.
+
+OpenGL: OpenGL supports mapping buffers and has inline transfer functions for both buffers and textures
+
+D3D11: D3D11 lacks transfers, but has special resource types that are mappable to the CPU address space
+
+TODO: write much more on transfers
+
+Resource targets
+----------------
+
+Resource targets determine the type of a resource.
+
+Note that drivers may not actually have the restrictions listed regarding
+coordinate normalization and wrap modes, and in fact efficient OpenCL
+support will probably require drivers that don't have any of them, which
+will probably be advertised with an appropriate cap.
+
+TODO: document all targets. Note that both 3D and cube have restrictions
+that depend on the hardware generation.
+
+TODO: can buffers have a non-R8 format?
+
+PIPE_BUFFER
+^^^^^^^^^^^
+
+Buffer resource: can be used as a vertex, index, constant buffer (appropriate bind flags must be requested).
+
+They can be bound to stream output if supported.
+TODO: what about the restrictions lifted by the several later GL transform feedback extensions? How does one advertise that in Gallium?
+
+They can be also be bound to a shader stage as usual.
+TODO: are all drivers supposed to support this? how does this work exactly? are there size limits?
+
+They can be also be bound to the framebuffer as usual.
+TODO: are all drivers supposed to support this? how does this work exactly? are there size limits?
+TODO: is there any chance of supporting GL pixel buffer object acceleration with this?
+
+- depth0 must be 1
+- last_level must be 0
+- TODO: what about normalization?
+- TODO: wrap modes/other sampling state?
+- TODO: are arbitrary formats supported? in which cases?
+
+OpenGL: vertex buffers in GL 1.5 or GL_ARB_vertex_buffer_object
+
+- Binding to stream out requires GL 3.0 or GL_NV_transform_feedback
+- Binding as constant buffers requires GL 3.1 or GL_ARB_uniform_buffer_object
+- Binding to a sampling stage requires GL 3.1 or GL_ARB_texture_buffer_object
+- TODO: can they be bound to an FBO?
+
+D3D11: buffer resources
+- Binding to a render target requires D3D_FEATURE_LEVEL_10_0
+
+PIPE_TEXTURE_1D
+^^^^^^^^^^^^^^^
+1D surface accessed with normalized coordinates.
+
+UNIMPLEMENTED: 1D texture arrays not supported
+
+- If PIPE_CAP_NPOT_TEXTURES is not supported,
+ width must be a power of two
+- height0 must be 1
+- depth0 must be 1
+- Mipmaps can be used
+- Must use normalized coordinates
+
+OpenGL: GL_TEXTURE_1D in GL 1.0
+
+- PIPE_CAP_NPOT_TEXTURES is equivalent to GL 2.0 or GL_ARB_texture_non_power_of_two
+
+D3D11: 1D textures in D3D_FEATURE_LEVEL_10_0
+
+PIPE_TEXTURE_RECT
+^^^^^^^^^^^^^^^^^
+2D surface with OpenGL GL_TEXTURE_RECTANGLE semantics.
+
+- depth0 must be 1
+- last_level must be 0
+- Must use unnormalized coordinates
+- Must use a clamp wrap mode
+
+OpenGL: GL_TEXTURE_RECTANGLE in GL 3.1 or GL_ARB_texture_rectangle or GL_NV_texture_rectangle
+
+OpenCL: can create OpenCL images based on this, that can then be sampled arbitrarily
+
+D3D11: not supported (only PIPE_TEXTURE_2D with normalized coordinates is supported)
+
+PIPE_TEXTURE_2D
+^^^^^^^^^^^^^^^
+2D surface accessed with normalized coordinates.
+
+UNIMPLEMENTED: 2D texture arrays not supported
+
+- If PIPE_CAP_NPOT_TEXTURES is not supported,
+ width and height must be powers of two
+- depth0 must be 1
+- Mipmaps can be used
+- Must use normalized coordinates
+- No special restrictions on wrap modes
+
+OpenGL: GL_TEXTURE_2D in GL 1.0
+
+- PIPE_CAP_NPOT_TEXTURES is equivalent to GL 2.0 or GL_ARB_texture_non_power_of_two
+
+OpenCL: can create OpenCL images based on this, that can then be sampled arbitrarily
+
+D3D11: 2D textures
+
+- PIPE_CAP_NPOT_TEXTURES is equivalent to D3D_FEATURE_LEVEL_9_3
+
+PIPE_TEXTURE_3D
+^^^^^^^^^^^^^^^
+
+3-dimensional array of texels.
+Mipmap dimensions are reduced in all 3 coordinates.
+
+- If PIPE_CAP_NPOT_TEXTURES is not supported,
+ width, height and depth must be powers of two
+- Must use normalized coordinates
+
+OpenGL: GL_TEXTURE_3D in GL 1.2 or GL_EXT_texture3D
+
+- PIPE_CAP_NPOT_TEXTURES is equivalent to GL 2.0 or GL_ARB_texture_non_power_of_two
+
+D3D11: 3D textures
+
+- PIPE_CAP_NPOT_TEXTURES is equivalent to D3D_FEATURE_LEVEL_10_0
+
+PIPE_TEXTURE_CUBE
+^^^^^^^^^^^^^^^^^
+
+Cube maps consist of 6 2D faces.
+The 6 surfaces form an imaginary cube, and sampling happens by mapping an
+input 3-vector to the point of the cube surface in that direction.
+
+Sampling may be optionally seamless, resulting in filtering taking samples
+from multiple surfaces near to the edge.
+UNIMPLEMENTED: seamless cube map sampling not supported
+
+UNIMPLEMENTED: cube map arrays not supported
+
+- Width and height must be equal
+- If PIPE_CAP_NPOT_TEXTURES is not supported,
+ width and height must be powers of two
+- Must use normalized coordinates
+
+OpenGL: GL_TEXTURE_CUBE_MAP in GL 1.3 or EXT_texture_cube_map
+
+- PIPE_CAP_NPOT_TEXTURES is equivalent to GL 2.0 or GL_ARB_texture_non_power_of_two
+- Seamless cube maps require GL 3.2 or GL_ARB_seamless_cube_map or GL_AMD_seamless_cubemap_per_texture
+- Cube map arrays require GL 4.0 or GL_ARB_texture_cube_map_array
+
+D3D11: 2D array textures with the D3D11_RESOURCE_MISC_TEXTURECUBE flag
+
+- PIPE_CAP_NPOT_TEXTURES is equivalent to D3D_FEATURE_LEVEL_10_0
+- Cube map arrays require D3D_FEATURE_LEVEL_10_1
+- TODO: are (non)seamless cube maps supported in D3D11? how?
+
+Surfaces
+--------
+
+Surfaces are views of a resource that can be bound as a framebuffer to serve as the render target or depth buffer.
+
+TODO: write much more on surfaces
+
+OpenGL: FBOs are collections of surfaces in GL 3.0 or GL_ARB_framebuffer_object
+
+D3D11: render target views and depth/stencil views
+
+Sampler views
+-------------
+
+Sampler views are views of a resource that can be bound to a pipeline stage to be sampled from shaders.
+
+TODO: write much more on sampler views
+
+OpenGL: texture objects are actually sampler view and resource in a single unit
+
+D3D11: shader resource views
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index 4adef5b8c0..a367fa3fe1 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -78,20 +78,13 @@ cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
draw_set_mapped_vertex_buffer(draw, i, buf);
}
/* Map index buffer, if present */
- if (info->indexed && cell->index_buffer.buffer) {
+ if (info->indexed && cell->index_buffer.buffer)
mapped_indices = cell_resource(cell->index_buffer.buffer)->data;
- mapped_indices += cell->index_buffer.offset;
- }
- draw_set_mapped_element_buffer_range(draw, (mapped_indices) ?
- lp->index_buffer.index_size : 0,
- info->index_bias,
- info->min_index,
- info->max_index,
- mapped_indices);
+ draw_set_mapped_index_buffer(draw, mapped_indices);
/* draw! */
- draw_arrays(draw, info->mode, info->start, info->count);
+ draw_vbo(draw, info);
/*
* unmap vertex/index buffers - will cause draw module to flush
@@ -100,7 +93,7 @@ cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
draw_set_mapped_vertex_buffer(draw, i, NULL);
}
if (mapped_indices) {
- draw_set_mapped_element_buffer(draw, 0, 0, NULL);
+ draw_set_mapped_index_buffer(draw, NULL);
}
/*
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
index 4e3701cd0a..a065d68b5a 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
@@ -102,7 +102,7 @@ cell_set_index_buffer(struct pipe_context *pipe,
else
memset(&cell->index_buffer, 0, sizeof(cell->index_buffer));
- /* TODO make this more like a state */
+ draw_set_index_buffer(cell->draw, ib);
}
diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c
index fe14a287ef..383c448926 100644
--- a/src/gallium/drivers/galahad/glhd_context.c
+++ b/src/gallium/drivers/galahad/glhd_context.c
@@ -185,6 +185,12 @@ galahad_bind_fragment_sampler_states(struct pipe_context *_pipe,
struct galahad_context *glhd_pipe = galahad_context(_pipe);
struct pipe_context *pipe = glhd_pipe->pipe;
+ if (num_samplers > PIPE_MAX_SAMPLERS) {
+ glhd_error("%u fragment samplers requested, "
+ "but only %u are permitted by API",
+ num_samplers, PIPE_MAX_SAMPLERS);
+ }
+
pipe->bind_fragment_sampler_states(pipe,
num_samplers,
samplers);
@@ -198,6 +204,12 @@ galahad_bind_vertex_sampler_states(struct pipe_context *_pipe,
struct galahad_context *glhd_pipe = galahad_context(_pipe);
struct pipe_context *pipe = glhd_pipe->pipe;
+ if (num_samplers > PIPE_MAX_VERTEX_SAMPLERS) {
+ glhd_error("%u vertex samplers requested, "
+ "but only %u are permitted by API",
+ num_samplers, PIPE_MAX_VERTEX_SAMPLERS);
+ }
+
pipe->bind_vertex_sampler_states(pipe,
num_samplers,
samplers);
@@ -447,6 +459,19 @@ galahad_set_constant_buffer(struct pipe_context *_pipe,
struct pipe_resource *unwrapped_resource;
struct pipe_resource *resource = NULL;
+ if (shader >= PIPE_SHADER_TYPES) {
+ glhd_error("Unknown shader type %u", shader);
+ }
+
+ if (index &&
+ index >=
+ pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_CONST_BUFFERS)) {
+ glhd_error("Access to constant buffer %u requested, "
+ "but only %d are supported",
+ index,
+ pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_CONST_BUFFERS));
+ }
+
/* XXX hmm? unwrap the input state */
if (_resource) {
unwrapped_resource = galahad_resource_unwrap(_resource);
@@ -972,5 +997,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
glhd_pipe->pipe = pipe;
+ glhd_warn("Created context %p", glhd_pipe);
+
return &glhd_pipe->base;
}
diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c
index 4117485702..75e4c2d82e 100644
--- a/src/gallium/drivers/galahad/glhd_screen.c
+++ b/src/gallium/drivers/galahad/glhd_screen.c
@@ -30,6 +30,7 @@
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
#include "util/u_memory.h"
+#include "util/u_math.h"
#include "glhd_public.h"
#include "glhd_screen.h"
@@ -134,6 +135,33 @@ galahad_screen_resource_create(struct pipe_screen *_screen,
struct pipe_screen *screen = glhd_screen->screen;
struct pipe_resource *result;
+ if (templat->target >= PIPE_MAX_TEXTURE_TYPES)
+ glhd_warn("Received bogus resource target %d", templat->target);
+
+ if(templat->target != PIPE_TEXTURE_RECT && templat->target != PIPE_BUFFER && !screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES))
+ {
+ if(!util_is_power_of_two(templat->width0) || !util_is_power_of_two(templat->height0))
+ glhd_warn("Requested NPOT (%ux%u) non-rectangle texture without NPOT support", templat->width0, templat->height0);
+ }
+
+ if(templat->target == PIPE_TEXTURE_RECT && templat->last_level)
+ glhd_warn("Rectangle textures cannot have mipmaps, but last_level = %u", templat->last_level);
+
+ if(templat->target == PIPE_BUFFER && templat->last_level)
+ glhd_warn("Buffers cannot have mipmaps, but last_level = %u", templat->last_level);
+
+ if(templat->target != PIPE_TEXTURE_3D && templat->depth0 != 1)
+ glhd_warn("Only 3D textures can have depth != 1, but received target %u and depth %u", templat->target, templat->depth0);
+
+ if(templat->target == PIPE_TEXTURE_1D && templat->height0 != 1)
+ glhd_warn("1D textures must have height 1 but got asked for height %u", templat->height0);
+
+ if(templat->target == PIPE_BUFFER && templat->height0 != 1)
+ glhd_warn("Buffers must have height 1 but got asked for height %u", templat->height0);
+
+ if(templat->target == PIPE_TEXTURE_CUBE && templat->width0 != templat->height0)
+ glhd_warn("Cube maps must be square, but got asked for %ux%u", templat->width0, templat->height0);
+
result = screen->resource_create(screen,
templat);
@@ -330,5 +358,7 @@ galahad_screen_create(struct pipe_screen *screen)
glhd_screen->screen = screen;
+ glhd_warn("Created screen %p", glhd_screen);
+
return &glhd_screen->base;
}
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index 2beb9e3091..847dd6dd47 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -66,18 +66,9 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
/*
* Map index buffer, if present
*/
- if (info->indexed && i915->index_buffer.buffer) {
- char *indices = (char *) i915_buffer(i915->index_buffer.buffer)->data;
- mapped_indices = (void *) (indices + i915->index_buffer.offset);
- }
-
- draw_set_mapped_element_buffer_range(draw, (mapped_indices) ?
- i915->index_buffer.index_size : 0,
- info->index_bias,
- info->min_index,
- info->max_index,
- mapped_indices);
-
+ if (info->indexed && i915->index_buffer.buffer)
+ mapped_indices = i915_buffer(i915->index_buffer.buffer)->data;
+ draw_set_mapped_index_buffer(draw, mapped_indices);
draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0,
i915->current.constants[PIPE_SHADER_VERTEX],
@@ -87,7 +78,7 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
/*
* Do the drawing
*/
- draw_arrays(i915->draw, info->mode, info->start, info->count);
+ draw_vbo(i915->draw, info);
/*
* unmap vertex/index buffers
@@ -96,9 +87,8 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
draw_set_mapped_vertex_buffer(draw, i, NULL);
}
- if (mapped_indices) {
- draw_set_mapped_element_buffer(draw, 0, 0, NULL);
- }
+ if (mapped_indices)
+ draw_set_mapped_index_buffer(draw, NULL);
}
diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c
index 752ddaae7b..c5c6179b16 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -360,6 +360,7 @@ i915_texture_layout(struct i915_texture * tex)
switch (pt->target) {
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
if (!i9x5_special_layout(tex))
i915_texture_layout_2d(tex);
break;
@@ -605,6 +606,7 @@ i945_texture_layout(struct i915_texture * tex)
switch (pt->target) {
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
if (!i9x5_special_layout(tex))
i945_texture_layout_2d(tex);
break;
@@ -829,7 +831,8 @@ i915_texture_from_handle(struct pipe_screen * screen,
buffer = iws->buffer_from_handle(iws, whandle, &stride);
/* Only supports one type */
- if (template->target != PIPE_TEXTURE_2D ||
+ if ((template->target != PIPE_TEXTURE_2D &&
+ template->target != PIPE_TEXTURE_RECT) ||
template->last_level != 0 ||
template->depth0 != 1) {
return NULL;
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 385c3b2d2d..bbfcff6bc4 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -294,8 +294,6 @@ static void i915_bind_sampler_states(struct pipe_context *pipe,
struct i915_context *i915 = i915_context(pipe);
unsigned i;
- assert(num <= PIPE_MAX_SAMPLERS);
-
/* Check for no-op */
if (num == i915->num_samplers &&
!memcmp(i915->sampler, sampler, num * sizeof(void *)))
@@ -529,9 +527,6 @@ static void i915_set_constant_buffer(struct pipe_context *pipe,
struct i915_context *i915 = i915_context(pipe);
draw_flush(i915->draw);
- assert(shader < PIPE_SHADER_TYPES);
- assert(index == 0);
-
/* Make a copy of shader constants.
* During fragment program translation we may add additional
* constants to the array.
@@ -822,7 +817,8 @@ static void i915_set_index_buffer(struct pipe_context *pipe,
else
memset(&i915->index_buffer, 0, sizeof(i915->index_buffer));
- /* TODO make this more like a state */
+ /* pass-through to draw module */
+ draw_set_index_buffer(i915->draw, ib);
}
static void
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index 8b3f46f2c1..e80067f3b1 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -162,7 +162,7 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
if (batch->ptr - batch->map > batch->buf->size) {
debug_printf("bad relocation ptr %p map %p offset %li size %i\n",
- batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+ batch->ptr, batch->map, (long) (batch->ptr - batch->map), batch->buf->size);
return PIPE_ERROR_OUT_OF_MEMORY;
}
diff --git a/src/gallium/drivers/i965/brw_resource_texture.c b/src/gallium/drivers/i965/brw_resource_texture.c
index ffd0f38672..3860d18a7a 100644
--- a/src/gallium/drivers/i965/brw_resource_texture.c
+++ b/src/gallium/drivers/i965/brw_resource_texture.c
@@ -66,6 +66,7 @@ static GLuint translate_tex_target( unsigned target )
return BRW_SURFACE_1D;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
return BRW_SURFACE_2D;
case PIPE_TEXTURE_3D:
@@ -498,7 +499,8 @@ brw_texture_from_handle(struct pipe_screen *screen,
unsigned pitch;
GLuint format;
- if (template->target != PIPE_TEXTURE_2D ||
+ if ((template->target != PIPE_TEXTURE_2D
+ && template->target != PIPE_TEXTURE_RECT) ||
template->last_level != 0 ||
template->depth0 != 1)
return NULL;
diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c
index e2767264e7..1b2aa93bef 100644
--- a/src/gallium/drivers/i965/brw_wm_debug.c
+++ b/src/gallium/drivers/i965/brw_wm_debug.c
@@ -101,16 +101,16 @@ void brw_wm_print_value( struct brw_wm_compile *c,
debug_printf("undef");
else if( value - c->vreg >= 0 &&
value - c->vreg < BRW_WM_MAX_VREG)
- debug_printf("r%d", value - c->vreg);
+ debug_printf("r%ld", (long) (value - c->vreg));
else if (value - c->creg >= 0 &&
value - c->creg < BRW_WM_MAX_PARAM)
- debug_printf("c%d", value - c->creg);
+ debug_printf("c%ld", (long) (value - c->creg));
else if (value - c->payload.input_interp >= 0 &&
value - c->payload.input_interp < PIPE_MAX_SHADER_INPUTS)
- debug_printf("i%d", value - c->payload.input_interp);
+ debug_printf("i%ld", (long) (value - c->payload.input_interp));
else if (value - c->payload.depth >= 0 &&
value - c->payload.depth < PIPE_MAX_SHADER_INPUTS)
- debug_printf("d%d", value - c->payload.depth);
+ debug_printf("d%ld", (long) (value - c->payload.depth));
else
debug_printf("?");
}
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 2892b62920..dec874623e 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -27,6 +27,8 @@ C_SOURCES = \
lp_scene_queue.c \
lp_screen.c \
lp_setup.c \
+ lp_setup_coef.c \
+ lp_setup_coef_intrin.c \
lp_setup_line.c \
lp_setup_point.c \
lp_setup_tri.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 5583fca38e..8d57db72cf 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -63,6 +63,8 @@ llvmpipe = env.ConvenienceLibrary(
'lp_setup_line.c',
'lp_setup_point.c',
'lp_setup_tri.c',
+ 'lp_setup_coef.c',
+ 'lp_setup_coef_intrin.c',
'lp_setup_vbuf.c',
'lp_state_blend.c',
'lp_state_clip.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 7543bd7b2b..39f2c6085e 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -85,6 +85,14 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
align_free( llvmpipe );
}
+static void
+do_flush( struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ llvmpipe_flush(pipe, flags, fence, __FUNCTION__);
+}
+
struct pipe_context *
llvmpipe_create_context( struct pipe_screen *screen, void *priv )
@@ -109,7 +117,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
llvmpipe->pipe.destroy = llvmpipe_destroy;
llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state;
llvmpipe->pipe.clear = llvmpipe_clear;
- llvmpipe->pipe.flush = llvmpipe_flush;
+ llvmpipe->pipe.flush = do_flush;
llvmpipe_init_blend_funcs(llvmpipe);
llvmpipe_init_clip_funcs(llvmpipe);
@@ -147,9 +155,13 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
draw_install_aapoint_stage(llvmpipe->draw, &llvmpipe->pipe);
draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe);
- /* convert points and lines into triangles: */
- draw_wide_point_threshold(llvmpipe->draw, 0.0);
- draw_wide_line_threshold(llvmpipe->draw, 0.0);
+ /* convert points and lines into triangles:
+ * (otherwise, draw points and lines natively)
+ */
+ draw_wide_point_sprites(llvmpipe->draw, FALSE);
+ draw_enable_point_sprites(llvmpipe->draw, FALSE);
+ draw_wide_point_threshold(llvmpipe->draw, 10000.0);
+ draw_wide_line_threshold(llvmpipe->draw, 10000.0);
#if USE_DRAW_STAGE_PSTIPPLE
/* Do polygon stipple w/ texture map + frag prog? */
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 50f9091c3c..34fa20e204 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -101,6 +101,9 @@ struct llvmpipe_context {
/** Vertex format */
struct vertex_info vertex_info;
+
+ /** Which vertex shader output slot contains point size */
+ int psize_slot;
/** Fragment shader input interpolation info */
unsigned num_inputs;
diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h
index 92fb2b3ee5..a928ee38be 100644
--- a/src/gallium/drivers/llvmpipe/lp_debug.h
+++ b/src/gallium/drivers/llvmpipe/lp_debug.h
@@ -46,6 +46,8 @@ st_print_current(void);
#define DEBUG_SHOW_TILES 0x200
#define DEBUG_SHOW_SUBTILES 0x400
#define DEBUG_COUNTERS 0x800
+#define DEBUG_SCENE 0x1000
+#define DEBUG_FENCE 0x2000
#ifdef DEBUG
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index e73b431cb4..3af5c8d5c5 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -68,25 +68,17 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
/* Map index buffer, if present */
- if (info->indexed && lp->index_buffer.buffer) {
- char *indices = (char *) llvmpipe_resource_data(lp->index_buffer.buffer);
- mapped_indices = (void *) (indices + lp->index_buffer.offset);
- }
+ if (info->indexed && lp->index_buffer.buffer)
+ mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer);
- draw_set_mapped_element_buffer_range(draw, (mapped_indices) ?
- lp->index_buffer.index_size : 0,
- info->index_bias,
- info->min_index,
- info->max_index,
- mapped_indices);
+ draw_set_mapped_index_buffer(draw, mapped_indices);
llvmpipe_prepare_vertex_sampling(lp,
lp->num_vertex_sampler_views,
lp->vertex_sampler_views);
/* draw! */
- draw_arrays_instanced(draw, info->mode, info->start, info->count,
- info->start_instance, info->instance_count);
+ draw_vbo(draw, info);
/*
* unmap vertex/index buffers
@@ -95,7 +87,7 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
draw_set_mapped_vertex_buffer(draw, i, NULL);
}
if (mapped_indices) {
- draw_set_mapped_element_buffer(draw, 0, 0, NULL);
+ draw_set_mapped_index_buffer(draw, NULL);
}
llvmpipe_cleanup_vertex_sampling(lp);
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c
index f9805e5d68..3a55e76bc3 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.c
+++ b/src/gallium/drivers/llvmpipe/lp_fence.c
@@ -44,6 +44,7 @@
struct lp_fence *
lp_fence_create(unsigned rank)
{
+ static int fence_id;
struct lp_fence *fence = CALLOC_STRUCT(lp_fence);
pipe_reference_init(&fence->reference, 1);
@@ -51,8 +52,12 @@ lp_fence_create(unsigned rank)
pipe_mutex_init(fence->mutex);
pipe_condvar_init(fence->signalled);
+ fence->id = fence_id++;
fence->rank = rank;
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s %d\n", __FUNCTION__, fence->id);
+
return fence;
}
@@ -61,6 +66,9 @@ lp_fence_create(unsigned rank)
void
lp_fence_destroy(struct lp_fence *fence)
{
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s %d\n", __FUNCTION__, fence->id);
+
pipe_mutex_destroy(fence->mutex);
pipe_condvar_destroy(fence->signalled);
FREE(fence);
@@ -68,82 +76,49 @@ lp_fence_destroy(struct lp_fence *fence)
/**
- * For reference counting.
- * This is a Gallium API function.
- */
-static void
-llvmpipe_fence_reference(struct pipe_screen *screen,
- struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *fence)
-{
- struct lp_fence **old = (struct lp_fence **) ptr;
- struct lp_fence *f = (struct lp_fence *) fence;
-
- lp_fence_reference(old, f);
-}
-
-
-/**
- * Has the fence been executed/finished?
- * This is a Gallium API function.
- */
-static int
-llvmpipe_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flag)
-{
- struct lp_fence *f = (struct lp_fence *) fence;
-
- return f->count == f->rank;
-}
-
-
-/**
- * Wait for the fence to finish.
- * This is a Gallium API function.
- */
-static int
-llvmpipe_fence_finish(struct pipe_screen *screen,
- struct pipe_fence_handle *fence_handle,
- unsigned flag)
-{
- struct lp_fence *fence = (struct lp_fence *) fence_handle;
-
- pipe_mutex_lock(fence->mutex);
- while (fence->count < fence->rank) {
- pipe_condvar_wait(fence->signalled, fence->mutex);
- }
- pipe_mutex_unlock(fence->mutex);
-
- return 0;
-}
-
-
-/**
* Called by the rendering threads to increment the fence counter.
* When the counter == the rank, the fence is finished.
*/
void
lp_fence_signal(struct lp_fence *fence)
{
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s %d\n", __FUNCTION__, fence->id);
+
pipe_mutex_lock(fence->mutex);
fence->count++;
assert(fence->count <= fence->rank);
- LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__,
- fence->count, fence->rank);
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s count=%u rank=%u\n", __FUNCTION__,
+ fence->count, fence->rank);
- pipe_condvar_signal(fence->signalled);
+ /* Wakeup all threads waiting on the mutex:
+ */
+ pipe_condvar_broadcast(fence->signalled);
pipe_mutex_unlock(fence->mutex);
}
+boolean
+lp_fence_signalled(struct lp_fence *f)
+{
+ return f->count == f->rank;
+}
void
-llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen)
+lp_fence_wait(struct lp_fence *f)
{
- screen->fence_reference = llvmpipe_fence_reference;
- screen->fence_signalled = llvmpipe_fence_signalled;
- screen->fence_finish = llvmpipe_fence_finish;
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s %d\n", __FUNCTION__, f->id);
+
+ pipe_mutex_lock(f->mutex);
+ assert(f->issued);
+ while (f->count < f->rank) {
+ pipe_condvar_wait(f->signalled, f->mutex);
+ }
+ pipe_mutex_unlock(f->mutex);
}
+
+
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h
index 13358fb99f..3c59118780 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.h
+++ b/src/gallium/drivers/llvmpipe/lp_fence.h
@@ -41,10 +41,12 @@ struct pipe_screen;
struct lp_fence
{
struct pipe_reference reference;
+ unsigned id;
pipe_mutex mutex;
pipe_condvar signalled;
+ boolean issued;
unsigned rank;
unsigned count;
};
@@ -57,6 +59,11 @@ lp_fence_create(unsigned rank);
void
lp_fence_signal(struct lp_fence *fence);
+boolean
+lp_fence_signalled(struct lp_fence *fence);
+
+void
+lp_fence_wait(struct lp_fence *fence);
void
llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
@@ -78,5 +85,11 @@ lp_fence_reference(struct lp_fence **ptr,
*ptr = f;
}
+static INLINE boolean
+lp_fence_issued(const struct lp_fence *fence)
+{
+ return fence->issued;
+}
+
#endif /* LP_FENCE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index 845292f4ab..e2c723b7a8 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -31,6 +31,7 @@
#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
#include "util/u_string.h"
#include "draw/draw_context.h"
#include "lp_flush.h"
@@ -45,14 +46,15 @@
void
llvmpipe_flush( struct pipe_context *pipe,
unsigned flags,
- struct pipe_fence_handle **fence )
+ struct pipe_fence_handle **fence,
+ const char *reason)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
draw_flush(llvmpipe->draw);
/* ask the setup module to flush */
- lp_setup_flush(llvmpipe->setup, flags, fence);
+ lp_setup_flush(llvmpipe->setup, flags, fence, reason);
/* Enable to dump BMPs of the color/depth buffers each frame */
if (0) {
@@ -76,6 +78,17 @@ llvmpipe_flush( struct pipe_context *pipe,
}
}
+void
+llvmpipe_finish( struct pipe_context *pipe,
+ const char *reason )
+{
+ struct pipe_fence_handle *fence = NULL;
+ llvmpipe_flush(pipe, 0, &fence, reason);
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+}
/**
* Flush context if necessary.
@@ -93,7 +106,8 @@ llvmpipe_flush_resource(struct pipe_context *pipe,
unsigned flush_flags,
boolean read_only,
boolean cpu_access,
- boolean do_not_block)
+ boolean do_not_block,
+ const char *reason)
{
unsigned referenced;
@@ -106,31 +120,16 @@ llvmpipe_flush_resource(struct pipe_context *pipe,
/*
* Flush and wait.
*/
-
- struct pipe_fence_handle *fence = NULL;
-
if (do_not_block)
return FALSE;
- /*
- * Do the unswizzling in parallel.
- *
- * XXX: Don't abuse the PIPE_FLUSH_FRAME flag for this.
- */
- flush_flags |= PIPE_FLUSH_FRAME;
-
- llvmpipe_flush(pipe, flush_flags, &fence);
-
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
+ llvmpipe_finish(pipe, reason);
} else {
/*
* Just flush.
*/
- llvmpipe_flush(pipe, flush_flags, NULL);
+ llvmpipe_flush(pipe, flush_flags, NULL, reason);
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h
index 7b605681a9..bb538b2bd8 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.h
+++ b/src/gallium/drivers/llvmpipe/lp_flush.h
@@ -34,8 +34,14 @@ struct pipe_context;
struct pipe_fence_handle;
void
-llvmpipe_flush(struct pipe_context *pipe, unsigned flags,
- struct pipe_fence_handle **fence);
+llvmpipe_flush(struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence,
+ const char *reason);
+
+void
+llvmpipe_finish( struct pipe_context *pipe,
+ const char *reason );
boolean
llvmpipe_flush_resource(struct pipe_context *pipe,
@@ -45,6 +51,7 @@ llvmpipe_flush_resource(struct pipe_context *pipe,
unsigned flush_flags,
boolean read_only,
boolean cpu_access,
- boolean do_not_block);
+ boolean do_not_block,
+ const char *reason);
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c
index 083e7e30a5..e22532f25c 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.c
+++ b/src/gallium/drivers/llvmpipe/lp_perf.c
@@ -46,7 +46,7 @@ lp_print_counters(void)
{
if (LP_DEBUG & DEBUG_COUNTERS) {
unsigned total_64, total_16, total_4;
- float p1, p2, p3, p4;
+ float p1, p2, p3, p5, p6;
debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris);
debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris);
@@ -58,11 +58,15 @@ lp_print_counters(void)
p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64;
p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64;
p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64;
- p4 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64;
+ p5 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64;
+ p6 = 100.0 * (float) lp_count.nr_shade_64 / (float) total_64;
debug_printf("llvmpipe: nr_64x64: %9u\n", total_64);
debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64);
- debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p4, total_64);
+ debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p5, total_64);
+ debug_printf("llvmpipe: nr_pure_shade_opaque: %9u (%3.0f%% of %u)\n", lp_count.nr_pure_shade_opaque_64, 0.0, lp_count.nr_shade_opaque_64);
+ debug_printf("llvmpipe: nr_shade_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_64, p6, total_64);
+ debug_printf("llvmpipe: nr_pure_shade: %9u (%3.0f%% of %u)\n", lp_count.nr_pure_shade_64, 0.0, lp_count.nr_shade_64);
debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64);
debug_printf("llvmpipe: nr_empty_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64);
@@ -79,12 +83,17 @@ lp_print_counters(void)
debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16);
debug_printf("llvmpipe: nr_empty_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16);
- total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4);
+ total_4 = (lp_count.nr_empty_4 +
+ lp_count.nr_fully_covered_4 +
+ lp_count.nr_partially_covered_4);
p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4;
- p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4;
+ p2 = 100.0 * (float) lp_count.nr_fully_covered_4 / (float) total_4;
+ p3 = 100.0 * (float) lp_count.nr_partially_covered_4 / (float) total_4;
- debug_printf("llvmpipe: nr_4x4: %9u\n", total_4);
+ debug_printf("llvmpipe: nr_tri_4x4: %9u\n", total_4);
+ debug_printf("llvmpipe: nr_fully_covered_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_4, p2, total_4);
+ debug_printf("llvmpipe: nr_partially_covered_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_4, p3, total_4);
debug_printf("llvmpipe: nr_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4);
debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4);
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h
index 4774f64550..c28652fc30 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.h
+++ b/src/gallium/drivers/llvmpipe/lp_perf.h
@@ -44,11 +44,16 @@ struct lp_counters
unsigned nr_empty_64;
unsigned nr_fully_covered_64;
unsigned nr_partially_covered_64;
+ unsigned nr_pure_shade_opaque_64;
+ unsigned nr_pure_shade_64;
+ unsigned nr_shade_64;
unsigned nr_shade_opaque_64;
unsigned nr_empty_16;
unsigned nr_fully_covered_16;
unsigned nr_partially_covered_16;
unsigned nr_empty_4;
+ unsigned nr_fully_covered_4;
+ unsigned nr_partially_covered_4;
unsigned nr_non_empty_4;
unsigned nr_llvm_compiles;
int64_t llvm_compile_time; /**< total, in microseconds */
@@ -66,9 +71,11 @@ extern struct lp_counters lp_count;
#ifdef DEBUG
#define LP_COUNT(counter) lp_count.counter++
#define LP_COUNT_ADD(counter, incr) lp_count.counter += (incr)
+#define LP_COUNT_GET(counter) (lp_count.counter)
#else
#define LP_COUNT(counter)
#define LP_COUNT_ADD(counter, incr) (void) incr
+#define LP_COUNT_GET(counter) 0
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c
index 02eeaf6487..67fd797af2 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -35,9 +35,8 @@
#include "util/u_memory.h"
#include "lp_context.h"
#include "lp_flush.h"
+#include "lp_fence.h"
#include "lp_query.h"
-#include "lp_rast.h"
-#include "lp_rast_priv.h"
#include "lp_state.h"
@@ -69,12 +68,7 @@ llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
struct llvmpipe_query *pq = llvmpipe_query(q);
/* query might still be in process if we never waited for the result */
if (!pq->done) {
- struct pipe_fence_handle *fence = NULL;
- llvmpipe_flush(pipe, 0, &fence);
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
+ llvmpipe_finish(pipe, __FUNCTION__);
}
pipe_mutex_destroy(pq->mutex);
@@ -93,16 +87,11 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
if (!pq->done) {
if (wait) {
- struct pipe_fence_handle *fence = NULL;
- llvmpipe_flush(pipe, 0, &fence);
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
+ llvmpipe_finish(pipe, __FUNCTION__);
}
/* this is a bit inconsequent but should be ok */
else {
- llvmpipe_flush(pipe, 0, NULL);
+ llvmpipe_flush(pipe, 0, NULL, __FUNCTION__);
}
}
@@ -125,12 +114,7 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
* frame of rendering.
*/
if (pq->binned) {
- struct pipe_fence_handle *fence;
- llvmpipe_flush(pipe, 0, &fence);
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
+ llvmpipe_finish(pipe, __FUNCTION__);
}
lp_setup_begin_query(llvmpipe->setup, pq);
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 3215d0f652..b1c306bbe9 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -316,43 +316,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
}
-/**
- * Load tile color from the framebuffer surface.
- * This is a bin command called during bin processing.
- */
-#if 0
-void
-lp_rast_load_color(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- struct lp_rasterizer *rast = task->rast;
- unsigned buf;
- enum lp_texture_usage usage;
-
- LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
-
- if (scene->has_color_clear)
- usage = LP_TEX_USAGE_WRITE_ALL;
- else
- usage = LP_TEX_USAGE_READ_WRITE;
-
- /* Get pointers to color tile(s).
- * This will convert linear data to tiled if needed.
- */
- for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
- struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
- struct llvmpipe_texture *lpt;
- assert(cbuf);
- lpt = llvmpipe_texture(cbuf->texture);
- task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
- cbuf->face + cbuf->zslice,
- cbuf->level,
- usage,
- task->x, task->y);
- assert(task->color_tiles[buf]);
- }
-}
-#endif
/**
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 44319a0ad6..b4564ef33b 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -120,7 +120,7 @@ struct lp_rast_triangle {
float v[3][2];
#endif
- struct lp_rast_plane plane[7]; /* NOTE: may allocate fewer planes */
+ struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */
};
@@ -236,6 +236,8 @@ void lp_rast_triangle_6( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_triangle_7( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
+void lp_rast_triangle_8( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
void lp_rast_shade_tile( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
@@ -256,5 +258,9 @@ void lp_rast_begin_query(struct lp_rasterizer_task *,
void lp_rast_end_query(struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
+void
+lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg);
+
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index 980c18c024..dbaa8e023a 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -67,7 +67,7 @@ block_full_16(struct lp_rasterizer_task *task,
block_full_4(task, tri, x + ix, y + iy);
}
-
+#if !defined(PIPE_ARCH_SSE)
static INLINE unsigned
build_mask(int c, int dcdx, int dcdy)
{
@@ -98,6 +98,7 @@ build_mask(int c, int dcdx, int dcdy)
return mask;
}
+
static INLINE unsigned
build_mask_linear(int c, int dcdx, int dcdy)
{
@@ -129,6 +130,137 @@ build_mask_linear(int c, int dcdx, int dcdy)
}
+static INLINE void
+build_masks(int c,
+ int cdiff,
+ int dcdx,
+ int dcdy,
+ unsigned *outmask,
+ unsigned *partmask)
+{
+ *outmask |= build_mask_linear(c, dcdx, dcdy);
+ *partmask |= build_mask_linear(c + cdiff, dcdx, dcdy);
+}
+
+#else
+#include <emmintrin.h>
+#include "util/u_sse.h"
+
+
+static INLINE void
+build_masks(int c,
+ int cdiff,
+ int dcdx,
+ int dcdy,
+ unsigned *outmask,
+ unsigned *partmask)
+{
+ __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ /* Get values across the quad
+ */
+ __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+ __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+ __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+ {
+ __m128i cstep01, cstep23, result;
+
+ cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ cstep23 = _mm_packs_epi32(cstep2, cstep3);
+ result = _mm_packs_epi16(cstep01, cstep23);
+
+ *outmask |= _mm_movemask_epi8(result);
+ }
+
+
+ {
+ __m128i cio4 = _mm_set1_epi32(cdiff);
+ __m128i cstep01, cstep23, result;
+
+ cstep0 = _mm_add_epi32(cstep0, cio4);
+ cstep1 = _mm_add_epi32(cstep1, cio4);
+ cstep2 = _mm_add_epi32(cstep2, cio4);
+ cstep3 = _mm_add_epi32(cstep3, cio4);
+
+ cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ cstep23 = _mm_packs_epi32(cstep2, cstep3);
+ result = _mm_packs_epi16(cstep01, cstep23);
+
+ *partmask |= _mm_movemask_epi8(result);
+ }
+}
+
+
+static INLINE unsigned
+build_mask_linear(int c, int dcdx, int dcdy)
+{
+ __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ /* Get values across the quad
+ */
+ __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+ __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+ __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+ /* pack pairs of results into epi16
+ */
+ __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
+
+ /* pack into epi8, preserving sign bits
+ */
+ __m128i result = _mm_packs_epi16(cstep01, cstep23);
+
+ /* extract sign bits to create mask
+ */
+ return _mm_movemask_epi8(result);
+}
+
+static INLINE unsigned
+build_mask(int c, int dcdx, int dcdy)
+{
+ __m128i step = _mm_setr_epi32(0, dcdx, dcdy, dcdx + dcdy);
+ __m128i c0 = _mm_set1_epi32(c);
+
+ /* Get values across the quad
+ */
+ __m128i cstep0 = _mm_add_epi32(c0, step);
+
+ /* Scale up step for moving between quads.
+ */
+ __m128i step4 = _mm_add_epi32(step, step);
+
+ /* Get values for the remaining quads:
+ */
+ __m128i cstep1 = _mm_add_epi32(cstep0,
+ _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1)));
+ __m128i cstep2 = _mm_add_epi32(cstep0,
+ _mm_shuffle_epi32(step4, _MM_SHUFFLE(2,2,2,2)));
+ __m128i cstep3 = _mm_add_epi32(cstep2,
+ _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1)));
+
+ /* pack pairs of results into epi16
+ */
+ __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
+
+ /* pack into epi8, preserving sign bits
+ */
+ __m128i result = _mm_packs_epi16(cstep01, cstep23);
+
+ /* extract sign bits to create mask
+ */
+ return _mm_movemask_epi8(result);
+}
+
+#endif
+
+
+
+
#define TAG(x) x##_1
#define NR_PLANES 1
#include "lp_rast_tri_tmp.h"
@@ -157,3 +289,92 @@ build_mask_linear(int c, int dcdx, int dcdy)
#define NR_PLANES 7
#include "lp_rast_tri_tmp.h"
+#define TAG(x) x##_8
+#define NR_PLANES 8
+#include "lp_rast_tri_tmp.h"
+
+
+/* Special case for 3 plane triangle which is contained entirely
+ * within a 16x16 block.
+ */
+void
+lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = tri->plane;
+ unsigned mask = arg.triangle.plane_mask;
+ const int x = task->x + (mask & 0xf) * 16;
+ const int y = task->y + (mask >> 4) * 16;
+ unsigned outmask, inmask, partmask, partial_mask;
+ unsigned j;
+ int c[3];
+
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
+
+ for (j = 0; j < 3; j++) {
+ c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+
+ {
+ const int dcdx = -plane[j].dcdx * 4;
+ const int dcdy = plane[j].dcdy * 4;
+ const int cox = plane[j].eo * 4;
+ const int cio = plane[j].ei * 4 - 1;
+
+ build_masks(c[j] + cox,
+ cio - cox,
+ dcdx, dcdy,
+ &outmask, /* sign bits from c[i][0..15] + cox */
+ &partmask); /* sign bits from c[i][0..15] + cio */
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Mask of sub-blocks which are inside all trivial accept planes:
+ */
+ inmask = ~partmask & 0xffff;
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = partmask & ~outmask;
+
+ assert((partial_mask & inmask) == 0);
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+ int cx[3];
+
+ partial_mask &= ~(1 << i);
+
+ for (j = 0; j < 3; j++)
+ cx[j] = (c[j]
+ - plane[j].dcdx * ix
+ + plane[j].dcdy * iy);
+
+ do_block_4_3(task, tri, plane, px, py, cx);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+
+ inmask &= ~(1 << i);
+
+ block_full_4(task, tri, px, py);
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
index 43f72d8ca8..99a0bae45d 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -32,7 +32,7 @@
/**
- * Prototype for a 7 plane rasterizer function. Will codegenerate
+ * Prototype for a 8 plane rasterizer function. Will codegenerate
* several of these.
*
* XXX: Varients for more/fewer planes.
@@ -81,11 +81,14 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
for (j = 0; j < NR_PLANES; j++) {
const int dcdx = -plane[j].dcdx * 4;
const int dcdy = plane[j].dcdy * 4;
- const int cox = c[j] + plane[j].eo * 4;
- const int cio = c[j] + plane[j].ei * 4 - 1;
-
- outmask |= build_mask_linear(cox, dcdx, dcdy);
- partmask |= build_mask_linear(cio, dcdx, dcdy);
+ const int cox = plane[j].eo * 4;
+ const int cio = plane[j].ei * 4 - 1;
+
+ build_masks(c[j] + cox,
+ cio - cox,
+ dcdx, dcdy,
+ &outmask, /* sign bits from c[i][0..15] + cox */
+ &partmask); /* sign bits from c[i][0..15] + cio */
}
if (outmask == 0xffff)
@@ -102,6 +105,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
assert((partial_mask & inmask) == 0);
+ LP_COUNT_ADD(nr_empty_4, util_bitcount(0xffff & ~(partial_mask | inmask)));
+
/* Iterate over partials:
*/
while (partial_mask) {
@@ -114,6 +119,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
partial_mask &= ~(1 << i);
+ LP_COUNT(nr_partially_covered_4);
+
for (j = 0; j < NR_PLANES; j++)
cx[j] = (c[j]
- plane[j].dcdx * ix
@@ -133,6 +140,7 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
inmask &= ~(1 << i);
+ LP_COUNT(nr_fully_covered_4);
block_full_4(task, tri, px, py);
}
}
@@ -166,11 +174,14 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
{
const int dcdx = -plane[j].dcdx * 16;
const int dcdy = plane[j].dcdy * 16;
- const int cox = c[j] + plane[j].eo * 16;
- const int cio = c[j] + plane[j].ei * 16 - 1;
-
- outmask |= build_mask_linear(cox, dcdx, dcdy);
- partmask |= build_mask_linear(cio, dcdx, dcdy);
+ const int cox = plane[j].eo * 16;
+ const int cio = plane[j].ei * 16 - 1;
+
+ build_masks(c[j] + cox,
+ cio - cox,
+ dcdx, dcdy,
+ &outmask, /* sign bits from c[i][0..15] + cox */
+ &partmask); /* sign bits from c[i][0..15] + cio */
}
j++;
@@ -190,6 +201,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
assert((partial_mask & inmask) == 0);
+ LP_COUNT_ADD(nr_empty_16, util_bitcount(0xffff & ~(partial_mask | inmask)));
+
/* Iterate over partials:
*/
while (partial_mask) {
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index f88a759fe7..15a09b7100 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -163,12 +163,15 @@ lp_scene_reset(struct lp_scene *scene )
/* Free all but last binner command lists:
*/
- for (i = 0; i < TILES_X; i++) {
- for (j = 0; j < TILES_Y; j++) {
+ for (i = 0; i < scene->tiles_x; i++) {
+ for (j = 0; j < scene->tiles_y; j++) {
lp_scene_bin_reset(scene, i, j);
}
}
+ /* If there are any bins which weren't cleared by the loop above,
+ * they will be caught (on debug builds at least) by this assert:
+ */
assert(lp_scene_is_empty(scene));
/* Free all but last binned data block:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 167cb2ee2e..1e65a91fc6 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -61,6 +61,8 @@ static const struct debug_named_value lp_debug_flags[] = {
{ "show_tiles", DEBUG_SHOW_TILES, NULL },
{ "show_subtiles", DEBUG_SHOW_SUBTILES, NULL },
{ "counters", DEBUG_COUNTERS, NULL },
+ { "scene", DEBUG_SCENE, NULL },
+ { "fence", DEBUG_FENCE, NULL },
DEBUG_NAMED_VALUE_END
};
#endif
@@ -87,7 +89,14 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return PIPE_MAX_SAMPLERS;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return PIPE_MAX_VERTEX_SAMPLERS;
+ /* At this time, the draw module and llvmpipe driver only
+ * support vertex shader texture lookups when LLVM is enabled in
+ * the draw module.
+ */
+ if (debug_get_bool_option("DRAW_USE_LLVM", TRUE))
+ return PIPE_MAX_VERTEX_SAMPLERS;
+ else
+ return 0;
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS;
case PIPE_CAP_NPOT_TEXTURES:
@@ -230,6 +239,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
assert(target == PIPE_BUFFER ||
target == PIPE_TEXTURE_1D ||
target == PIPE_TEXTURE_2D ||
+ target == PIPE_TEXTURE_RECT ||
target == PIPE_TEXTURE_3D ||
target == PIPE_TEXTURE_CUBE);
@@ -314,6 +324,51 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen )
+
+/**
+ * Fence reference counting.
+ */
+static void
+llvmpipe_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ struct lp_fence **old = (struct lp_fence **) ptr;
+ struct lp_fence *f = (struct lp_fence *) fence;
+
+ lp_fence_reference(old, f);
+}
+
+
+/**
+ * Has the fence been executed/finished?
+ */
+static int
+llvmpipe_fence_signalled(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ struct lp_fence *f = (struct lp_fence *) fence;
+ return lp_fence_signalled(f);
+}
+
+
+/**
+ * Wait for the fence to finish.
+ */
+static int
+llvmpipe_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence_handle,
+ unsigned flag)
+{
+ struct lp_fence *f = (struct lp_fence *) fence_handle;
+
+ lp_fence_wait(f);
+ return 0;
+}
+
+
+
/**
* Create a new pipe_screen object
* Note: we're not presently subclassing pipe_screen (no llvmpipe_screen).
@@ -351,9 +406,11 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
screen->base.context_create = llvmpipe_create_context;
screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer;
+ screen->base.fence_reference = llvmpipe_fence_reference;
+ screen->base.fence_signalled = llvmpipe_fence_signalled;
+ screen->base.fence_finish = llvmpipe_fence_finish;
llvmpipe_init_screen_resource_funcs(&screen->base);
- llvmpipe_init_screen_fence_funcs(&screen->base);
lp_jit_screen_init(screen);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 556e571585..3da9097154 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -275,9 +275,10 @@ set_scene_state( struct lp_setup_context *setup,
void
lp_setup_flush( struct lp_setup_context *setup,
unsigned flags,
- struct pipe_fence_handle **fence)
+ struct pipe_fence_handle **fence,
+ const char *reason)
{
- LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+ LP_DBG(DEBUG_SETUP, "%s %s\n", __FUNCTION__, reason);
if (setup->scene) {
if (fence) {
@@ -287,6 +288,8 @@ lp_setup_flush( struct lp_setup_context *setup,
*fence = lp_setup_fence( setup );
}
+ if (setup->scene->fence)
+ setup->scene->fence->issued = TRUE;
}
set_scene_state( setup, SETUP_FLUSHED );
@@ -312,6 +315,11 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup,
* scene.
*/
util_copy_framebuffer_state(&setup->fb, fb);
+ setup->framebuffer.x0 = 0;
+ setup->framebuffer.y0 = 0;
+ setup->framebuffer.x1 = fb->width-1;
+ setup->framebuffer.y1 = fb->height-1;
+ setup->dirty |= LP_SETUP_NEW_SCISSOR;
}
@@ -469,11 +477,35 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup,
setup->ccw_is_frontface = ccw_is_frontface;
setup->cullmode = cull_mode;
setup->triangle = first_triangle;
- setup->scissor_test = scissor;
setup->pixel_offset = gl_rasterization_rules ? 0.5f : 0.0f;
+
+ if (setup->scissor_test != scissor) {
+ setup->dirty |= LP_SETUP_NEW_SCISSOR;
+ setup->scissor_test = scissor;
+ }
}
+void
+lp_setup_set_line_state( struct lp_setup_context *setup,
+ float line_width)
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+ setup->line_width = line_width;
+}
+
+void
+lp_setup_set_point_state( struct lp_setup_context *setup,
+ float point_size,
+ boolean point_size_per_vertex,
+ uint sprite)
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ setup->point_size = point_size;
+ setup->sprite = sprite;
+ setup->point_size_per_vertex = point_size_per_vertex;
+}
void
lp_setup_set_fs_inputs( struct lp_setup_context *setup,
@@ -559,10 +591,11 @@ lp_setup_set_scissor( struct lp_setup_context *setup,
assert(scissor);
- if (memcmp(&setup->scissor.current, scissor, sizeof(*scissor)) != 0) {
- setup->scissor.current = *scissor; /* struct copy */
- setup->dirty |= LP_SETUP_NEW_SCISSOR;
- }
+ setup->scissor.x0 = scissor->minx;
+ setup->scissor.x1 = scissor->maxx-1;
+ setup->scissor.y0 = scissor->miny;
+ setup->scissor.y1 = scissor->maxy-1;
+ setup->dirty |= LP_SETUP_NEW_SCISSOR;
}
@@ -713,6 +746,12 @@ lp_setup_update_state( struct lp_setup_context *setup )
*/
{
struct llvmpipe_context *lp = llvmpipe_context(scene->pipe);
+
+ /* Will probably need to move this somewhere else, just need
+ * to know about vertex shader point size attribute.
+ */
+ setup->psize = lp->psize_slot;
+
if (lp->dirty) {
llvmpipe_update_derived(lp);
}
@@ -806,6 +845,14 @@ lp_setup_update_state( struct lp_setup_context *setup )
}
}
+ if (setup->dirty & LP_SETUP_NEW_SCISSOR) {
+ setup->draw_region = setup->framebuffer;
+ if (setup->scissor_test) {
+ u_rect_possible_intersection(&setup->scissor,
+ &setup->draw_region);
+ }
+ }
+
setup->dirty = 0;
assert(setup->fs.stored);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index 73b1c85325..821ebb1087 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -85,7 +85,8 @@ lp_setup_fence( struct lp_setup_context *setup );
void
lp_setup_flush( struct lp_setup_context *setup,
unsigned flags,
- struct pipe_fence_handle **fence);
+ struct pipe_fence_handle **fence,
+ const char *reason);
void
@@ -99,6 +100,16 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup,
boolean scissor,
boolean gl_rasterization_rules );
+void
+lp_setup_set_line_state( struct lp_setup_context *setup,
+ float line_width);
+
+void
+lp_setup_set_point_state( struct lp_setup_context *setup,
+ float point_size,
+ boolean point_size_per_vertex,
+ uint sprite);
+
void
lp_setup_set_fs_inputs( struct lp_setup_context *setup,
const struct lp_shader_input *interp,
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c
new file mode 100644
index 0000000000..95e3e8fffe
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.c
@@ -0,0 +1,258 @@
+/**************************************************************************
+ *
+ * Copyright 2010, VMware.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Binning code for triangles
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "lp_perf.h"
+#include "lp_setup_context.h"
+#include "lp_setup_coef.h"
+#include "lp_rast.h"
+#include "lp_state_fs.h"
+
+#if !defined(PIPE_ARCH_SSE)
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ */
+static void constant_coef( struct lp_rast_shader_inputs *inputs,
+ unsigned slot,
+ const float value,
+ unsigned i )
+{
+ inputs->a0[slot][i] = value;
+ inputs->dadx[slot][i] = 0.0f;
+ inputs->dady[slot][i] = 0.0f;
+}
+
+
+
+static void linear_coef( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ unsigned vert_attr,
+ unsigned i)
+{
+ float a0 = info->v0[vert_attr][i];
+ float a1 = info->v1[vert_attr][i];
+ float a2 = info->v2[vert_attr][i];
+
+ float da01 = a0 - a1;
+ float da20 = a2 - a0;
+ float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20);
+ float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01);
+
+ inputs->dadx[slot][i] = dadx;
+ inputs->dady[slot][i] = dady;
+
+ /* calculate a0 as the value which would be sampled for the
+ * fragment at (0,0), taking into account that we want to sample at
+ * pixel centers, in other words (0.5, 0.5).
+ *
+ * this is neat but unfortunately not a good way to do things for
+ * triangles with very large values of dadx or dady as it will
+ * result in the subtraction and re-addition from a0 of a very
+ * large number, which means we'll end up loosing a lot of the
+ * fractional bits and precision from a0. the way to fix this is
+ * to define a0 as the sample at a pixel center somewhere near vmin
+ * instead - i'll switch to this later.
+ */
+ inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
+ dady * info->y0_center);
+}
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void perspective_coef( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ unsigned vert_attr,
+ unsigned i)
+{
+ /* premultiply by 1/w (v[0][3] is always 1/w):
+ */
+ float a0 = info->v0[vert_attr][i] * info->v0[0][3];
+ float a1 = info->v1[vert_attr][i] * info->v1[0][3];
+ float a2 = info->v2[vert_attr][i] * info->v2[0][3];
+ float da01 = a0 - a1;
+ float da20 = a2 - a0;
+ float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20;
+ float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01;
+
+ inputs->dadx[slot][i] = dadx;
+ inputs->dady[slot][i] = dady;
+ inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
+ dady * info->y0_center);
+}
+
+
+/**
+ * Special coefficient setup for gl_FragCoord.
+ * X and Y are trivial
+ * Z and W are copied from position_coef which should have already been computed.
+ * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
+ */
+static void
+setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ unsigned usage_mask)
+{
+ /*X*/
+ if (usage_mask & TGSI_WRITEMASK_X) {
+ inputs->a0[slot][0] = 0.0;
+ inputs->dadx[slot][0] = 1.0;
+ inputs->dady[slot][0] = 0.0;
+ }
+
+ /*Y*/
+ if (usage_mask & TGSI_WRITEMASK_Y) {
+ inputs->a0[slot][1] = 0.0;
+ inputs->dadx[slot][1] = 0.0;
+ inputs->dady[slot][1] = 1.0;
+ }
+
+ /*Z*/
+ if (usage_mask & TGSI_WRITEMASK_Z) {
+ linear_coef(inputs, info, slot, 0, 2);
+ }
+
+ /*W*/
+ if (usage_mask & TGSI_WRITEMASK_W) {
+ linear_coef(inputs, info, slot, 0, 3);
+ }
+}
+
+
+/**
+ * Setup the fragment input attribute with the front-facing value.
+ * \param frontface is the triangle front facing?
+ */
+static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
+ unsigned slot,
+ boolean frontface,
+ unsigned usage_mask)
+{
+ /* convert TRUE to 1.0 and FALSE to -1.0 */
+ if (usage_mask & TGSI_WRITEMASK_X)
+ constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 );
+
+ if (usage_mask & TGSI_WRITEMASK_Y)
+ constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */
+
+ if (usage_mask & TGSI_WRITEMASK_Z)
+ constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */
+
+ if (usage_mask & TGSI_WRITEMASK_W)
+ constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */
+}
+
+
+/**
+ * Compute the tri->coef[] array dadx, dady, a0 values.
+ */
+void lp_setup_tri_coef( struct lp_setup_context *setup,
+ struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info)
+{
+ unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
+ unsigned slot;
+ unsigned i;
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+ unsigned vert_attr = setup->fs.input[slot].src_index;
+ unsigned usage_mask = setup->fs.input[slot].usage_mask;
+
+ switch (setup->fs.input[slot].interp) {
+ case LP_INTERP_CONSTANT:
+ if (setup->flatshade_first) {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(inputs, slot+1, info->v0[vert_attr][i], i);
+ }
+ else {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(inputs, slot+1, info->v2[vert_attr][i], i);
+ }
+ break;
+
+ case LP_INTERP_LINEAR:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ linear_coef(inputs, info, slot+1, vert_attr, i);
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ perspective_coef(inputs, info, slot+1, vert_attr, i);
+ fragcoord_usage_mask |= TGSI_WRITEMASK_W;
+ break;
+
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0, so all need to ensure that the usage mask is covers all
+ * usages.
+ */
+ fragcoord_usage_mask |= usage_mask;
+ break;
+
+ case LP_INTERP_FACING:
+ setup_facing_coef(inputs, slot+1, info->frontfacing, usage_mask);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ /* The internal position input is in slot zero:
+ */
+ setup_fragcoord_coef(inputs, info, 0, fragcoord_usage_mask);
+}
+
+#else
+extern void lp_setup_coef_dummy(void);
+void lp_setup_coef_dummy(void)
+{
+}
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h
new file mode 100644
index 0000000000..d68b39c603
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.h
@@ -0,0 +1,61 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * The setup code is concerned with point/line/triangle setup and
+ * putting commands/data into the bins.
+ */
+
+
+#ifndef LP_SETUP_COEF_H
+#define LP_SETUP_COEF_H
+
+
+struct lp_tri_info {
+
+ float x0_center;
+ float y0_center;
+
+ /* turn these into an aligned float[4] */
+ float dy01_ooa;
+ float dy20_ooa;
+ float dx01_ooa;
+ float dx20_ooa;
+
+ const float (*v0)[4];
+ const float (*v1)[4];
+ const float (*v2)[4];
+
+ boolean frontfacing; /* remove eventually */
+};
+
+void lp_setup_tri_coef( struct lp_setup_context *setup,
+ struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info);
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
new file mode 100644
index 0000000000..73fb70599c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
@@ -0,0 +1,207 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Binning code for triangles
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "lp_perf.h"
+#include "lp_setup_context.h"
+#include "lp_setup_coef.h"
+#include "lp_rast.h"
+
+#if defined(PIPE_ARCH_SSE)
+#include <emmintrin.h>
+
+
+static void constant_coef4( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ const float *attr)
+{
+ *(__m128 *)inputs->a0[slot] = *(__m128 *)attr;
+ *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
+ *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
+}
+
+
+
+/**
+ * Setup the fragment input attribute with the front-facing value.
+ * \param frontface is the triangle front facing?
+ */
+static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot )
+{
+ /* XXX: just pass frontface directly to the shader, don't bother
+ * treating it as an input.
+ */
+ __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0,
+ 0, 0, 0);
+
+ *(__m128 *)inputs->a0[slot] = a0;
+ *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
+ *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
+}
+
+
+
+static void calc_coef4( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ __m128 a0,
+ __m128 a1,
+ __m128 a2)
+{
+ __m128 da01 = _mm_sub_ps(a0, a1);
+ __m128 da20 = _mm_sub_ps(a2, a0);
+
+ __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dy20_ooa));
+ __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dy01_ooa));
+ __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa);
+
+ __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dx20_ooa));
+ __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dx01_ooa));
+ __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa);
+
+ __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(info->x0_center));
+ __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(info->y0_center));
+ __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0);
+ __m128 attr_0 = _mm_sub_ps(a0, attr_v0);
+
+ *(__m128 *)inputs->a0[slot] = attr_0;
+ *(__m128 *)inputs->dadx[slot] = dadx;
+ *(__m128 *)inputs->dady[slot] = dady;
+}
+
+
+static void linear_coef( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ unsigned vert_attr)
+{
+ __m128 a0 = *(const __m128 *)info->v0[vert_attr];
+ __m128 a1 = *(const __m128 *)info->v1[vert_attr];
+ __m128 a2 = *(const __m128 *)info->v2[vert_attr];
+
+ calc_coef4(inputs, info, slot, a0, a1, a2);
+}
+
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void perspective_coef( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ unsigned vert_attr)
+{
+ /* premultiply by 1/w (v[0][3] is always 1/w):
+ */
+ __m128 a0 = *(const __m128 *)info->v0[vert_attr];
+ __m128 a1 = *(const __m128 *)info->v1[vert_attr];
+ __m128 a2 = *(const __m128 *)info->v2[vert_attr];
+
+ __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(info->v0[0][3]));
+ __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3]));
+ __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3]));
+
+ calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow);
+}
+
+
+
+
+
+/**
+ * Compute the inputs-> dadx, dady, a0 values.
+ */
+void lp_setup_tri_coef( struct lp_setup_context *setup,
+ struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info)
+{
+ unsigned slot;
+
+ /* The internal position input is in slot zero:
+ */
+ linear_coef(inputs, info, 0, 0);
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+ unsigned vert_attr = setup->fs.input[slot].src_index;
+
+ switch (setup->fs.input[slot].interp) {
+ case LP_INTERP_CONSTANT:
+ if (setup->flatshade_first) {
+ constant_coef4(inputs, info, slot+1, info->v0[vert_attr]);
+ }
+ else {
+ constant_coef4(inputs, info, slot+1, info->v2[vert_attr]);
+ }
+ break;
+
+ case LP_INTERP_LINEAR:
+ linear_coef(inputs, info, slot+1, vert_attr);
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ perspective_coef(inputs, info, slot+1, vert_attr);
+ break;
+
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0.
+ */
+ break;
+
+ case LP_INTERP_FACING:
+ setup_facing_coef(inputs, info, slot+1);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+}
+
+#else
+extern void lp_setup_coef_dummy(void);
+void lp_setup_coef_dummy(void)
+{
+}
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index a0606f5034..877a492c6d 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -41,6 +41,7 @@
#include "lp_scene.h"
#include "draw/draw_vbuf.h"
+#include "util/u_rect.h"
#define LP_SETUP_NEW_FS 0x01
#define LP_SETUP_NEW_CONSTANTS 0x02
@@ -73,6 +74,7 @@ struct lp_setup_context
uint prim;
uint vertex_size;
uint nr_vertices;
+ uint sprite;
uint vertex_buffer_size;
void *vertex_buffer;
@@ -88,10 +90,17 @@ struct lp_setup_context
boolean flatshade_first;
boolean ccw_is_frontface;
boolean scissor_test;
+ boolean point_size_per_vertex;
unsigned cullmode;
float pixel_offset;
+ float line_width;
+ float point_size;
+ float psize;
struct pipe_framebuffer_state fb;
+ struct u_rect framebuffer;
+ struct u_rect scissor;
+ struct u_rect draw_region; /* intersection of fb & scissor */
struct {
unsigned flags;
@@ -127,9 +136,6 @@ struct lp_setup_context
uint8_t *stored;
} blend_color;
- struct {
- struct pipe_scissor_state current;
- } scissor;
unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */
@@ -158,4 +164,29 @@ void lp_setup_update_state( struct lp_setup_context *setup );
void lp_setup_destroy( struct lp_setup_context *setup );
+void
+lp_setup_print_triangle(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4]);
+
+void
+lp_setup_print_vertex(struct lp_setup_context *setup,
+ const char *name,
+ const float (*v)[4]);
+
+
+struct lp_rast_triangle *
+lp_setup_alloc_triangle(struct lp_scene *scene,
+ unsigned nr_inputs,
+ unsigned nr_planes,
+ unsigned *tri_size);
+
+void
+lp_setup_bin_triangle( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ const struct u_rect *bbox,
+ int nr_planes );
+
#endif
+
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index be41c44e6f..ce2da55cf4 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -29,19 +29,671 @@
* Binning code for lines
*/
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "lp_perf.h"
#include "lp_setup_context.h"
+#include "lp_rast.h"
+#include "lp_state_fs.h"
-static void line_nop( struct lp_setup_context *setup,
- const float (*v0)[4],
- const float (*v1)[4] )
+#define NUM_CHANNELS 4
+
+struct lp_line_info {
+
+ float dx;
+ float dy;
+ float oneoverarea;
+
+ const float (*v1)[4];
+ const float (*v2)[4];
+};
+
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ */
+static void constant_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ unsigned slot,
+ const float value,
+ unsigned i )
+{
+ tri->inputs.a0[slot][i] = value;
+ tri->inputs.dadx[slot][i] = 0.0f;
+ tri->inputs.dady[slot][i] = 0.0f;
+}
+
+
+/**
+ * Compute a0, dadx and dady for a linearly interpolated coefficient,
+ * for a triangle.
+ */
+static void linear_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ struct lp_line_info *info,
+ unsigned slot,
+ unsigned vert_attr,
+ unsigned i)
+{
+ float a1 = info->v1[vert_attr][i];
+ float a2 = info->v2[vert_attr][i];
+
+ float da21 = a1 - a2;
+ float dadx = da21 * info->dx * info->oneoverarea;
+ float dady = da21 * info->dy * info->oneoverarea;
+
+ tri->inputs.dadx[slot][i] = dadx;
+ tri->inputs.dady[slot][i] = dady;
+
+ tri->inputs.a0[slot][i] = (a1 -
+ (dadx * (info->v1[0][0] - setup->pixel_offset) +
+ dady * (info->v1[0][1] - setup->pixel_offset)));
+}
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void perspective_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ struct lp_line_info *info,
+ unsigned slot,
+ unsigned vert_attr,
+ unsigned i)
+{
+ /* premultiply by 1/w (v[0][3] is always 1/w):
+ */
+ float a1 = info->v1[vert_attr][i] * info->v1[0][3];
+ float a2 = info->v2[vert_attr][i] * info->v2[0][3];
+
+ float da21 = a1 - a2;
+ float dadx = da21 * info->dx * info->oneoverarea;
+ float dady = da21 * info->dy * info->oneoverarea;
+
+ tri->inputs.dadx[slot][i] = dadx;
+ tri->inputs.dady[slot][i] = dady;
+
+ tri->inputs.a0[slot][i] = (a1 -
+ (dadx * (info->v1[0][0] - setup->pixel_offset) +
+ dady * (info->v1[0][1] - setup->pixel_offset)));
+}
+
+static void
+setup_fragcoord_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ struct lp_line_info *info,
+ unsigned slot,
+ unsigned usage_mask)
+{
+ /*X*/
+ if (usage_mask & TGSI_WRITEMASK_X) {
+ tri->inputs.a0[slot][0] = 0.0;
+ tri->inputs.dadx[slot][0] = 1.0;
+ tri->inputs.dady[slot][0] = 0.0;
+ }
+
+ /*Y*/
+ if (usage_mask & TGSI_WRITEMASK_Y) {
+ tri->inputs.a0[slot][1] = 0.0;
+ tri->inputs.dadx[slot][1] = 0.0;
+ tri->inputs.dady[slot][1] = 1.0;
+ }
+
+ /*Z*/
+ if (usage_mask & TGSI_WRITEMASK_Z) {
+ linear_coef(setup, tri, info, slot, 0, 2);
+ }
+
+ /*W*/
+ if (usage_mask & TGSI_WRITEMASK_W) {
+ linear_coef(setup, tri, info, slot, 0, 3);
+ }
+}
+
+/**
+ * Compute the tri->coef[] array dadx, dady, a0 values.
+ */
+static void setup_line_coefficients( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ struct lp_line_info *info)
+{
+ unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
+ unsigned slot;
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+ unsigned vert_attr = setup->fs.input[slot].src_index;
+ unsigned usage_mask = setup->fs.input[slot].usage_mask;
+ unsigned i;
+
+ switch (setup->fs.input[slot].interp) {
+ case LP_INTERP_CONSTANT:
+ if (setup->flatshade_first) {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i);
+ }
+ else {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i);
+ }
+ break;
+
+ case LP_INTERP_LINEAR:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ linear_coef(setup, tri, info, slot+1, vert_attr, i);
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ perspective_coef(setup, tri, info, slot+1, vert_attr, i);
+ fragcoord_usage_mask |= TGSI_WRITEMASK_W;
+ break;
+
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0, so all need to ensure that the usage mask is covers all
+ * usages.
+ */
+ fragcoord_usage_mask |= usage_mask;
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ /* The internal position input is in slot zero:
+ */
+ setup_fragcoord_coef(setup, tri, info, 0,
+ fragcoord_usage_mask);
+}
+
+
+
+static INLINE int subpixel_snap( float a )
+{
+ return util_iround(FIXED_ONE * a);
+}
+
+
+/**
+ * Print line vertex attribs (for debug).
+ */
+static void
+print_line(struct lp_setup_context *setup,
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ uint i;
+
+ debug_printf("llvmpipe line\n");
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
+ debug_printf(" v1[%d]: %f %f %f %f\n", i,
+ v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
+ }
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
+ debug_printf(" v2[%d]: %f %f %f %f\n", i,
+ v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
+ }
+}
+
+
+static INLINE boolean sign(float x){
+ return x >= 0;
+}
+
+
+/* Used on positive floats only:
+ */
+static INLINE float fracf(float f)
{
+ return f - floorf(f);
}
-void
-lp_setup_choose_line( struct lp_setup_context *setup )
+
+static void
+lp_setup_line( struct lp_setup_context *setup,
+ const float (*v1)[4],
+ const float (*v2)[4])
{
- setup->line = line_nop;
+ struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_rast_triangle *line;
+ struct lp_line_info info;
+ float width = MAX2(1.0, setup->line_width);
+ struct u_rect bbox;
+ unsigned tri_bytes;
+ int x[4];
+ int y[4];
+ int i;
+ int nr_planes = 4;
+
+ /* linewidth should be interpreted as integer */
+ int fixed_width = util_iround(width) * FIXED_ONE;
+
+ float x_offset=0;
+ float y_offset=0;
+ float x_offset_end=0;
+ float y_offset_end=0;
+
+ float x1diff;
+ float y1diff;
+ float x2diff;
+ float y2diff;
+ float dx, dy;
+
+ boolean draw_start;
+ boolean draw_end;
+ boolean will_draw_start;
+ boolean will_draw_end;
+
+ if (0)
+ print_line(setup, v1, v2);
+
+ if (setup->scissor_test) {
+ nr_planes = 8;
+ }
+ else {
+ nr_planes = 4;
+ }
+
+
+ dx = v1[0][0] - v2[0][0];
+ dy = v1[0][1] - v2[0][1];
+
+ /* X-MAJOR LINE */
+ if (fabsf(dx) >= fabsf(dy)) {
+ float dydx = dy / dx;
+
+ x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
+ y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
+ x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
+ y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+
+ if (y2diff==-0.5 && dy<0){
+ y2diff = 0.5;
+ }
+
+ /*
+ * Diamond exit rule test for starting point
+ */
+ if (fabsf(x1diff) + fabsf(y1diff) < 0.5) {
+ draw_start = TRUE;
+ }
+ else if (sign(x1diff) == sign(-dx)) {
+ draw_start = FALSE;
+ }
+ else if (sign(-y1diff) != sign(dy)) {
+ draw_start = TRUE;
+ }
+ else {
+ /* do intersection test */
+ float yintersect = fracf(v1[0][1]) + x1diff * dydx;
+ draw_start = (yintersect < 1.0 && yintersect > 0.0);
+ }
+
+
+ /*
+ * Diamond exit rule test for ending point
+ */
+ if (fabsf(x2diff) + fabsf(y2diff) < 0.5) {
+ draw_end = FALSE;
+ }
+ else if (sign(x2diff) != sign(-dx)) {
+ draw_end = FALSE;
+ }
+ else if (sign(-y2diff) == sign(dy)) {
+ draw_end = TRUE;
+ }
+ else {
+ /* do intersection test */
+ float yintersect = fracf(v2[0][1]) + x2diff * dydx;
+ draw_end = (yintersect < 1.0 && yintersect > 0.0);
+ }
+
+ /* Are we already drawing start/end?
+ */
+ will_draw_start = sign(-x1diff) != sign(dx);
+ will_draw_end = (sign(x2diff) == sign(-dx)) || x2diff==0;
+
+ if (dx < 0) {
+ /* if v2 is to the right of v1, swap pointers */
+ const float (*temp)[4] = v1;
+ v1 = v2;
+ v2 = temp;
+ dx = -dx;
+ dy = -dy;
+ /* Otherwise shift planes appropriately */
+ if (will_draw_start != draw_start) {
+ x_offset_end = - x1diff - 0.5;
+ y_offset_end = x_offset_end * dydx;
+
+ }
+ if (will_draw_end != draw_end) {
+ x_offset = - x2diff - 0.5;
+ y_offset = x_offset * dydx;
+ }
+
+ }
+ else{
+ /* Otherwise shift planes appropriately */
+ if (will_draw_start != draw_start) {
+ x_offset = - x1diff + 0.5;
+ y_offset = x_offset * dydx;
+ }
+ if (will_draw_end != draw_end) {
+ x_offset_end = - x2diff + 0.5;
+ y_offset_end = x_offset_end * dydx;
+ }
+ }
+
+ /* x/y positions in fixed point */
+ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset);
+ x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset);
+ x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset);
+ x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset);
+
+ y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) - fixed_width/2;
+ y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) - fixed_width/2;
+ y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) + fixed_width/2;
+ y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) + fixed_width/2;
+
+ }
+ else {
+ const float dxdy = dx / dy;
+
+ /* Y-MAJOR LINE */
+ x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
+ y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
+ x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
+ y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+
+ if (x2diff==-0.5 && dx<0) {
+ x2diff = 0.5;
+ }
+
+ /*
+ * Diamond exit rule test for starting point
+ */
+ if (fabsf(x1diff) + fabsf(y1diff) < 0.5) {
+ draw_start = TRUE;
+ }
+ else if (sign(-y1diff) == sign(dy)) {
+ draw_start = FALSE;
+ }
+ else if (sign(x1diff) != sign(-dx)) {
+ draw_start = TRUE;
+ }
+ else {
+ /* do intersection test */
+ float xintersect = fracf(v1[0][0]) + y1diff * dxdy;
+ draw_start = (xintersect < 1.0 && xintersect > 0.0);
+ }
+
+ /*
+ * Diamond exit rule test for ending point
+ */
+ if (fabsf(x2diff) + fabsf(y2diff) < 0.5) {
+ draw_end = FALSE;
+ }
+ else if (sign(-y2diff) != sign(dy) ) {
+ draw_end = FALSE;
+ }
+ else if (sign(x2diff) == sign(-dx) ) {
+ draw_end = TRUE;
+ }
+ else {
+ /* do intersection test */
+ float xintersect = fracf(v2[0][0]) + y2diff * dxdy;
+ draw_end = (xintersect < 1.0 && xintersect > 0.0);
+ }
+
+ /* Are we already drawing start/end?
+ */
+ will_draw_start = sign(y1diff) == sign(dy);
+ will_draw_end = (sign(-y2diff) == sign(dy)) || y2diff==0;
+
+ if (dy > 0) {
+ /* if v2 is on top of v1, swap pointers */
+ const float (*temp)[4] = v1;
+ v1 = v2;
+ v2 = temp;
+ dx = -dx;
+ dy = -dy;
+
+ /* Otherwise shift planes appropriately */
+ if (will_draw_start != draw_start) {
+ y_offset_end = - y1diff + 0.5;
+ x_offset_end = y_offset_end * dxdy;
+ }
+ if (will_draw_end != draw_end) {
+ y_offset = - y2diff + 0.5;
+ x_offset = y_offset * dxdy;
+ }
+ }
+ else {
+ /* Otherwise shift planes appropriately */
+ if (will_draw_start != draw_start) {
+ y_offset = - y1diff - 0.5;
+ x_offset = y_offset * dxdy;
+
+ }
+ if (will_draw_end != draw_end) {
+ y_offset_end = - y2diff - 0.5;
+ x_offset_end = y_offset_end * dxdy;
+ }
+ }
+
+ /* x/y positions in fixed point */
+ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2;
+ x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2;
+ x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) + fixed_width/2;
+ x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) + fixed_width/2;
+
+ y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset);
+ y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset);
+ y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset);
+ y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset);
+ }
+
+
+
+ LP_COUNT(nr_tris);
+
+
+ /* Bounding rectangle (in pixels) */
+ {
+ /* Yes this is necessary to accurately calculate bounding boxes
+ * with the two fill-conventions we support. GL (normally) ends
+ * up needing a bottom-left fill convention, which requires
+ * slightly different rounding.
+ */
+ int adj = (setup->pixel_offset != 0) ? 1 : 0;
+
+ bbox.x0 = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.x1 = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.y0 = (MIN4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ bbox.y1 = (MAX4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+
+ /* Inclusive coordinates:
+ */
+ bbox.x1--;
+ bbox.y1--;
+ }
+
+ if (bbox.x1 < bbox.x0 ||
+ bbox.y1 < bbox.y0) {
+ if (0) debug_printf("empty bounding box\n");
+ LP_COUNT(nr_culled_tris);
+ return;
+ }
+
+ if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
+ if (0) debug_printf("offscreen\n");
+ LP_COUNT(nr_culled_tris);
+ return;
+ }
+
+ u_rect_find_intersection(&setup->draw_region, &bbox);
+
+ line = lp_setup_alloc_triangle(scene,
+ setup->fs.nr_inputs,
+ nr_planes,
+ &tri_bytes);
+ if (!line)
+ return;
+
+#ifdef DEBUG
+ line->v[0][0] = v1[0][0];
+ line->v[1][0] = v2[0][0];
+ line->v[0][1] = v1[0][1];
+ line->v[1][1] = v2[0][1];
+#endif
+
+ /* calculate the deltas */
+ line->plane[0].dcdy = x[0] - x[1];
+ line->plane[1].dcdy = x[1] - x[2];
+ line->plane[2].dcdy = x[2] - x[3];
+ line->plane[3].dcdy = x[3] - x[0];
+
+ line->plane[0].dcdx = y[0] - y[1];
+ line->plane[1].dcdx = y[1] - y[2];
+ line->plane[2].dcdx = y[2] - y[3];
+ line->plane[3].dcdx = y[3] - y[0];
+
+
+ info.oneoverarea = 1.0f / (dx * dx + dy * dy);
+ info.dx = dx;
+ info.dy = dy;
+ info.v1 = v1;
+ info.v2 = v2;
+
+ /* Setup parameter interpolants:
+ */
+ setup_line_coefficients( setup, line, &info);
+
+ line->inputs.facing = 1.0F;
+ line->inputs.state = setup->fs.stored;
+
+ for (i = 0; i < 4; i++) {
+ struct lp_rast_plane *plane = &line->plane[i];
+
+ /* half-edge constants, will be interated over the whole render
+ * target.
+ */
+ plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
+
+
+ /* correct for top-left vs. bottom-left fill convention.
+ *
+ * note that we're overloading gl_rasterization_rules to mean
+ * both (0.5,0.5) pixel centers *and* bottom-left filling
+ * convention.
+ *
+ * GL actually has a top-left filling convention, but GL's
+ * notion of "top" differs from gallium's...
+ *
+ * Also, sometimes (in FBO cases) GL will render upside down
+ * to its usual method, in which case it will probably want
+ * to use the opposite, top-left convention.
+ */
+ if (plane->dcdx < 0) {
+ /* both fill conventions want this - adjust for left edges */
+ plane->c++;
+ }
+ else if (plane->dcdx == 0) {
+ if (setup->pixel_offset == 0) {
+ /* correct for top-left fill convention:
+ */
+ if (plane->dcdy > 0) plane->c++;
+ }
+ else {
+ /* correct for bottom-left fill convention:
+ */
+ if (plane->dcdy < 0) plane->c++;
+ }
+ }
+
+ plane->dcdx *= FIXED_ONE;
+ plane->dcdy *= FIXED_ONE;
+
+ /* find trivial reject offsets for each edge for a single-pixel
+ * sized block. These will be scaled up at each recursive level to
+ * match the active blocksize. Scaling in this way works best if
+ * the blocks are square.
+ */
+ plane->eo = 0;
+ if (plane->dcdx < 0) plane->eo -= plane->dcdx;
+ if (plane->dcdy > 0) plane->eo += plane->dcdy;
+
+ /* Calculate trivial accept offsets from the above.
+ */
+ plane->ei = plane->dcdy - plane->dcdx - plane->eo;
+ }
+
+
+ /*
+ * When rasterizing scissored tris, use the intersection of the
+ * triangle bounding box and the scissor rect to generate the
+ * scissor planes.
+ *
+ * This permits us to cut off the triangle "tails" that are present
+ * in the intermediate recursive levels caused when two of the
+ * triangles edges don't diverge quickly enough to trivially reject
+ * exterior blocks from the triangle.
+ *
+ * It's not really clear if it's worth worrying about these tails,
+ * but since we generate the planes for each scissored tri, it's
+ * free to trim them in this case.
+ *
+ * Note that otherwise, the scissor planes only vary in 'C' value,
+ * and even then only on state-changes. Could alternatively store
+ * these planes elsewhere.
+ */
+ if (nr_planes == 8) {
+ line->plane[4].dcdx = -1;
+ line->plane[4].dcdy = 0;
+ line->plane[4].c = 1-bbox.x0;
+ line->plane[4].ei = 0;
+ line->plane[4].eo = 1;
+
+ line->plane[5].dcdx = 1;
+ line->plane[5].dcdy = 0;
+ line->plane[5].c = bbox.x1+1;
+ line->plane[5].ei = -1;
+ line->plane[5].eo = 0;
+
+ line->plane[6].dcdx = 0;
+ line->plane[6].dcdy = 1;
+ line->plane[6].c = 1-bbox.y0;
+ line->plane[6].ei = 0;
+ line->plane[6].eo = 1;
+
+ line->plane[7].dcdx = 0;
+ line->plane[7].dcdy = -1;
+ line->plane[7].c = bbox.y1+1;
+ line->plane[7].ei = -1;
+ line->plane[7].eo = 0;
+ }
+
+ lp_setup_bin_triangle(setup, line, &bbox, nr_planes);
+}
+
+
+void lp_setup_choose_line( struct lp_setup_context *setup )
+{
+ setup->line = lp_setup_line;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index 9f69e6c5ce..6ae318d328 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2010, VMware Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -18,7 +18,7 @@
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -30,17 +30,299 @@
*/
#include "lp_setup_context.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "lp_perf.h"
+#include "lp_setup_context.h"
+#include "lp_rast.h"
+#include "lp_state_fs.h"
+#include "tgsi/tgsi_scan.h"
+
+#define NUM_CHANNELS 4
+
+struct point_info {
+ /* x,y deltas */
+ int dy01, dy12;
+ int dx01, dx12;
+
+ const float (*v0)[4];
+};
+
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ */
+static void constant_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *point,
+ unsigned slot,
+ const float value,
+ unsigned i )
+{
+ point->inputs.a0[slot][i] = value;
+ point->inputs.dadx[slot][i] = 0.0f;
+ point->inputs.dady[slot][i] = 0.0f;
+}
+
+static void perspective_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *point,
+ const struct point_info *info,
+ unsigned slot,
+ unsigned vert_attr,
+ unsigned i)
+{
+ if (i == 0) {
+ float dadx = FIXED_ONE / (float)info->dx12;
+ float dady = 0.0f;
+ point->inputs.dadx[slot][i] = dadx;
+ point->inputs.dady[slot][i] = dady;
+ point->inputs.a0[slot][i] = (0.5 -
+ (dadx * ((float)info->v0[0][0] - setup->pixel_offset) +
+ dady * ((float)info->v0[0][1] - setup->pixel_offset)));
+ }
+
+ else if (i == 1) {
+ float dadx = 0.0f;
+ float dady = FIXED_ONE / (float)info->dx12;
+
+ point->inputs.dadx[slot][i] = dadx;
+ point->inputs.dady[slot][i] = dady;
+ point->inputs.a0[slot][i] = (0.5 -
+ (dadx * ((float)info->v0[0][0] - setup->pixel_offset) +
+ dady * ((float)info->v0[0][1] - setup->pixel_offset)));
+ }
+
+ else if (i == 2) {
+ point->inputs.a0[slot][i] = 0.0f;
+ point->inputs.dadx[slot][i] = 0.0f;
+ point->inputs.dady[slot][i] = 0.0f;
+ }
+
+ else if (i == 3) {
+ point->inputs.a0[slot][i] = 1.0f;
+ point->inputs.dadx[slot][i] = 0.0f;
+ point->inputs.dady[slot][i] = 0.0f;
+ }
+
+}
+
+
+/**
+ * Special coefficient setup for gl_FragCoord.
+ * X and Y are trivial
+ * Z and W are copied from position_coef which should have already been computed.
+ * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
+ */
+static void
+setup_point_fragcoord_coef(struct lp_setup_context *setup,
+ struct lp_rast_triangle *point,
+ const struct point_info *info,
+ unsigned slot,
+ unsigned usage_mask)
+{
+ /*X*/
+ if (usage_mask & TGSI_WRITEMASK_X) {
+ point->inputs.a0[slot][0] = 0.0;
+ point->inputs.dadx[slot][0] = 1.0;
+ point->inputs.dady[slot][0] = 0.0;
+ }
+
+ /*Y*/
+ if (usage_mask & TGSI_WRITEMASK_Y) {
+ point->inputs.a0[slot][1] = 0.0;
+ point->inputs.dadx[slot][1] = 0.0;
+ point->inputs.dady[slot][1] = 1.0;
+ }
+
+ /*Z*/
+ if (usage_mask & TGSI_WRITEMASK_Z) {
+ constant_coef(setup, point, slot, info->v0[0][2], 2);
+ }
+
+ /*W*/
+ if (usage_mask & TGSI_WRITEMASK_W) {
+ constant_coef(setup, point, slot, info->v0[0][3], 3);
+ }
+}
+
+/**
+ * Compute the point->coef[] array dadx, dady, a0 values.
+ */
+static void
+setup_point_coefficients( struct lp_setup_context *setup,
+ struct lp_rast_triangle *point,
+ const struct point_info *info)
+{
+ unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
+ unsigned slot;
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+ unsigned vert_attr = setup->fs.input[slot].src_index;
+ unsigned usage_mask = setup->fs.input[slot].usage_mask;
+ unsigned i;
+
+ switch (setup->fs.input[slot].interp) {
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0, so all need to ensure that the usage mask is covers all
+ * usages.
+ */
+ fragcoord_usage_mask |= usage_mask;
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ /* For point sprite textures */
+ if (setup->fs.current.variant->shader->info.input_semantic_name[slot]
+ == TGSI_SEMANTIC_GENERIC)
+ {
+ int index = setup->fs.current.variant->shader->info.input_semantic_index[slot];
+
+ if (setup->sprite & (1 << index)) {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ perspective_coef(setup, point, info, slot+1, vert_attr, i);
+ fragcoord_usage_mask |= TGSI_WRITEMASK_W;
+ break;
+ }
+ }
+
+ /* Otherwise fallthrough */
+ default:
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (usage_mask & (1 << i))
+ constant_coef(setup, point, slot+1, info->v0[vert_attr][i], i);
+ }
+ }
+ }
-static void point_nop( struct lp_setup_context *setup,
- const float (*v0)[4] )
+ /* The internal position input is in slot zero:
+ */
+ setup_point_fragcoord_coef(setup, point, info, 0,
+ fragcoord_usage_mask);
+}
+
+static INLINE int
+subpixel_snap(float a)
{
+ return util_iround(FIXED_ONE * a);
+}
+
+
+static void lp_setup_point( struct lp_setup_context *setup,
+ const float (*v0)[4] )
+{
+ /* x/y positions in fixed point */
+ const int sizeAttr = setup->psize;
+ const float size
+ = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0]
+ : setup->point_size;
+
+ /* Point size as fixed point integer, remove rounding errors
+ * and gives minimum width for very small points
+ */
+ int fixed_width = MAX2(FIXED_ONE,
+ (subpixel_snap(size) + FIXED_ONE/2 - 1) & ~(FIXED_ONE-1));
+
+ const int x0 = subpixel_snap(v0[0][0] - setup->pixel_offset) - fixed_width/2;
+ const int y0 = subpixel_snap(v0[0][1] - setup->pixel_offset) - fixed_width/2;
+
+ struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_rast_triangle *point;
+ unsigned bytes;
+ struct u_rect bbox;
+ unsigned nr_planes = 4;
+ struct point_info info;
+
+
+ /* Bounding rectangle (in pixels) */
+ {
+ /* Yes this is necessary to accurately calculate bounding boxes
+ * with the two fill-conventions we support. GL (normally) ends
+ * up needing a bottom-left fill convention, which requires
+ * slightly different rounding.
+ */
+ int adj = (setup->pixel_offset != 0) ? 1 : 0;
+
+ bbox.x0 = (x0 + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ bbox.x1 = (x0 + fixed_width + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ bbox.y0 = (y0 + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.y1 = (y0 + fixed_width + (FIXED_ONE-1)) >> FIXED_ORDER;
+
+ /* Inclusive coordinates:
+ */
+ bbox.x1--;
+ bbox.y1--;
+ }
+
+ if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
+ if (0) debug_printf("offscreen\n");
+ LP_COUNT(nr_culled_tris);
+ return;
+ }
+
+ u_rect_find_intersection(&setup->draw_region, &bbox);
+
+ point = lp_setup_alloc_triangle(scene,
+ setup->fs.nr_inputs,
+ nr_planes,
+ &bytes);
+ if (!point)
+ return;
+
+#ifdef DEBUG
+ point->v[0][0] = v0[0][0];
+ point->v[0][1] = v0[0][1];
+#endif
+
+ info.v0 = v0;
+ info.dx01 = 0;
+ info.dx12 = fixed_width;
+ info.dy01 = fixed_width;
+ info.dy12 = 0;
+
+ /* Setup parameter interpolants:
+ */
+ setup_point_coefficients(setup, point, &info);
+
+ point->inputs.facing = 1.0F;
+ point->inputs.state = setup->fs.stored;
+
+ {
+ point->plane[0].dcdx = -1;
+ point->plane[0].dcdy = 0;
+ point->plane[0].c = 1-bbox.x0;
+ point->plane[0].ei = 0;
+ point->plane[0].eo = 1;
+
+ point->plane[1].dcdx = 1;
+ point->plane[1].dcdy = 0;
+ point->plane[1].c = bbox.x1+1;
+ point->plane[1].ei = -1;
+ point->plane[1].eo = 0;
+
+ point->plane[2].dcdx = 0;
+ point->plane[2].dcdy = 1;
+ point->plane[2].c = 1-bbox.y0;
+ point->plane[2].ei = 0;
+ point->plane[2].eo = 1;
+
+ point->plane[3].dcdx = 0;
+ point->plane[3].dcdy = -1;
+ point->plane[3].c = bbox.y1+1;
+ point->plane[3].ei = -1;
+ point->plane[3].eo = 0;
+ }
+
+ lp_setup_bin_triangle(setup, point, &bbox, nr_planes);
}
void
lp_setup_choose_point( struct lp_setup_context *setup )
{
- setup->point = point_nop;
+ setup->point = lp_setup_point;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 393533ebee..0180d95090 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -31,35 +31,15 @@
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_rect.h"
#include "lp_perf.h"
#include "lp_setup_context.h"
+#include "lp_setup_coef.h"
#include "lp_rast.h"
#include "lp_state_fs.h"
#define NUM_CHANNELS 4
-struct tri_info {
-
- float pixel_offset;
-
- /* fixed point vertex coordinates */
- int x[3];
- int y[3];
-
- /* float x,y deltas - all from the original coordinates
- */
- float dy01, dy20;
- float dx01, dx20;
- float oneoverarea;
-
- const float (*v0)[4];
- const float (*v1)[4];
- const float (*v2)[4];
-
- boolean frontfacing;
-};
-
-
static INLINE int
@@ -76,247 +56,6 @@ fixed_to_float(int a)
-/**
- * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
- */
-static void constant_coef( struct lp_rast_triangle *tri,
- unsigned slot,
- const float value,
- unsigned i )
-{
- tri->inputs.a0[slot][i] = value;
- tri->inputs.dadx[slot][i] = 0.0f;
- tri->inputs.dady[slot][i] = 0.0f;
-}
-
-
-
-static void linear_coef( struct lp_rast_triangle *tri,
- const struct tri_info *info,
- unsigned slot,
- unsigned vert_attr,
- unsigned i)
-{
- float a0 = info->v0[vert_attr][i];
- float a1 = info->v1[vert_attr][i];
- float a2 = info->v2[vert_attr][i];
-
- float da01 = a0 - a1;
- float da20 = a2 - a0;
- float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea;
- float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea;
-
- tri->inputs.dadx[slot][i] = dadx;
- tri->inputs.dady[slot][i] = dady;
-
- /* calculate a0 as the value which would be sampled for the
- * fragment at (0,0), taking into account that we want to sample at
- * pixel centers, in other words (0.5, 0.5).
- *
- * this is neat but unfortunately not a good way to do things for
- * triangles with very large values of dadx or dady as it will
- * result in the subtraction and re-addition from a0 of a very
- * large number, which means we'll end up loosing a lot of the
- * fractional bits and precision from a0. the way to fix this is
- * to define a0 as the sample at a pixel center somewhere near vmin
- * instead - i'll switch to this later.
- */
- tri->inputs.a0[slot][i] = (a0 -
- (dadx * (info->v0[0][0] - info->pixel_offset) +
- dady * (info->v0[0][1] - info->pixel_offset)));
-}
-
-
-/**
- * Compute a0, dadx and dady for a perspective-corrected interpolant,
- * for a triangle.
- * We basically multiply the vertex value by 1/w before computing
- * the plane coefficients (a0, dadx, dady).
- * Later, when we compute the value at a particular fragment position we'll
- * divide the interpolated value by the interpolated W at that fragment.
- */
-static void perspective_coef( struct lp_rast_triangle *tri,
- const struct tri_info *info,
- unsigned slot,
- unsigned vert_attr,
- unsigned i)
-{
- /* premultiply by 1/w (v[0][3] is always 1/w):
- */
- float a0 = info->v0[vert_attr][i] * info->v0[0][3];
- float a1 = info->v1[vert_attr][i] * info->v1[0][3];
- float a2 = info->v2[vert_attr][i] * info->v2[0][3];
- float da01 = a0 - a1;
- float da20 = a2 - a0;
- float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea;
- float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea;
-
- tri->inputs.dadx[slot][i] = dadx;
- tri->inputs.dady[slot][i] = dady;
- tri->inputs.a0[slot][i] = (a0 -
- (dadx * (info->v0[0][0] - info->pixel_offset) +
- dady * (info->v0[0][1] - info->pixel_offset)));
-}
-
-
-/**
- * Special coefficient setup for gl_FragCoord.
- * X and Y are trivial
- * Z and W are copied from position_coef which should have already been computed.
- * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
- */
-static void
-setup_fragcoord_coef(struct lp_rast_triangle *tri,
- const struct tri_info *info,
- unsigned slot,
- unsigned usage_mask)
-{
- /*X*/
- if (usage_mask & TGSI_WRITEMASK_X) {
- tri->inputs.a0[slot][0] = 0.0;
- tri->inputs.dadx[slot][0] = 1.0;
- tri->inputs.dady[slot][0] = 0.0;
- }
-
- /*Y*/
- if (usage_mask & TGSI_WRITEMASK_Y) {
- tri->inputs.a0[slot][1] = 0.0;
- tri->inputs.dadx[slot][1] = 0.0;
- tri->inputs.dady[slot][1] = 1.0;
- }
-
- /*Z*/
- if (usage_mask & TGSI_WRITEMASK_Z) {
- linear_coef(tri, info, slot, 0, 2);
- }
-
- /*W*/
- if (usage_mask & TGSI_WRITEMASK_W) {
- linear_coef(tri, info, slot, 0, 3);
- }
-}
-
-
-/**
- * Setup the fragment input attribute with the front-facing value.
- * \param frontface is the triangle front facing?
- */
-static void setup_facing_coef( struct lp_rast_triangle *tri,
- unsigned slot,
- boolean frontface,
- unsigned usage_mask)
-{
- /* convert TRUE to 1.0 and FALSE to -1.0 */
- if (usage_mask & TGSI_WRITEMASK_X)
- constant_coef( tri, slot, 2.0f * frontface - 1.0f, 0 );
-
- if (usage_mask & TGSI_WRITEMASK_Y)
- constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
-
- if (usage_mask & TGSI_WRITEMASK_Z)
- constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
-
- if (usage_mask & TGSI_WRITEMASK_W)
- constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
-}
-
-
-/**
- * Compute the tri->coef[] array dadx, dady, a0 values.
- */
-static void setup_tri_coefficients( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
- const struct tri_info *info)
-{
- unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
- unsigned slot;
- unsigned i;
-
- /* setup interpolation for all the remaining attributes:
- */
- for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
- unsigned vert_attr = setup->fs.input[slot].src_index;
- unsigned usage_mask = setup->fs.input[slot].usage_mask;
-
- switch (setup->fs.input[slot].interp) {
- case LP_INTERP_CONSTANT:
- if (setup->flatshade_first) {
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- constant_coef(tri, slot+1, info->v0[vert_attr][i], i);
- }
- else {
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- constant_coef(tri, slot+1, info->v2[vert_attr][i], i);
- }
- break;
-
- case LP_INTERP_LINEAR:
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- linear_coef(tri, info, slot+1, vert_attr, i);
- break;
-
- case LP_INTERP_PERSPECTIVE:
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- perspective_coef(tri, info, slot+1, vert_attr, i);
- fragcoord_usage_mask |= TGSI_WRITEMASK_W;
- break;
-
- case LP_INTERP_POSITION:
- /*
- * The generated pixel interpolators will pick up the coeffs from
- * slot 0, so all need to ensure that the usage mask is covers all
- * usages.
- */
- fragcoord_usage_mask |= usage_mask;
- break;
-
- case LP_INTERP_FACING:
- setup_facing_coef(tri, slot+1, info->frontfacing, usage_mask);
- break;
-
- default:
- assert(0);
- }
- }
-
- /* The internal position input is in slot zero:
- */
- setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask);
-
- if (0) {
- for (i = 0; i < NUM_CHANNELS; i++) {
- float a0 = tri->inputs.a0 [0][i];
- float dadx = tri->inputs.dadx[0][i];
- float dady = tri->inputs.dady[0][i];
-
- debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
- "xyzw"[i],
- a0, dadx, dady);
- }
-
- for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
- unsigned usage_mask = setup->fs.input[slot].usage_mask;
- for (i = 0; i < NUM_CHANNELS; i++) {
- if (usage_mask & (1 << i)) {
- float a0 = tri->inputs.a0 [1 + slot][i];
- float dadx = tri->inputs.dadx[1 + slot][i];
- float dady = tri->inputs.dady[1 + slot][i];
-
- debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
- slot,
- "xyzw"[i],
- a0, dadx, dady);
- }
- }
- }
- }
-}
-
-
@@ -329,11 +68,11 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
* \param nr_inputs number of fragment shader inputs
* \return pointer to triangle space
*/
-static INLINE struct lp_rast_triangle *
-alloc_triangle(struct lp_scene *scene,
- unsigned nr_inputs,
- unsigned nr_planes,
- unsigned *tri_size)
+struct lp_rast_triangle *
+lp_setup_alloc_triangle(struct lp_scene *scene,
+ unsigned nr_inputs,
+ unsigned nr_planes,
+ unsigned *tri_size)
{
unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
struct lp_rast_triangle *tri;
@@ -357,35 +96,71 @@ alloc_triangle(struct lp_scene *scene,
return tri;
}
+void
+lp_setup_print_vertex(struct lp_setup_context *setup,
+ const char *name,
+ const float (*v)[4])
+{
+ int i, j;
+
+ debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n",
+ name,
+ v[0][0], v[0][1], v[0][2], v[0][3]);
+
+ for (i = 0; i < setup->fs.nr_inputs; i++) {
+ const float *in = v[setup->fs.input[i].src_index];
+
+ debug_printf(" in[%d] (%s[%d]) %s%s%s%s ",
+ i,
+ name, setup->fs.input[i].src_index,
+ (setup->fs.input[i].usage_mask & 0x1) ? "x" : " ",
+ (setup->fs.input[i].usage_mask & 0x2) ? "y" : " ",
+ (setup->fs.input[i].usage_mask & 0x4) ? "z" : " ",
+ (setup->fs.input[i].usage_mask & 0x8) ? "w" : " ");
+
+ for (j = 0; j < 4; j++)
+ if (setup->fs.input[i].usage_mask & (1<<j))
+ debug_printf("%.5f ", in[j]);
+
+ debug_printf("\n");
+ }
+}
+
/**
* Print triangle vertex attribs (for debug).
*/
-static void
-print_triangle(struct lp_setup_context *setup,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4])
+void
+lp_setup_print_triangle(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
{
- uint i;
+ debug_printf("triangle\n");
- debug_printf("llvmpipe triangle\n");
- for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
- debug_printf(" v1[%d]: %f %f %f %f\n", i,
- v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
- }
- for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
- debug_printf(" v2[%d]: %f %f %f %f\n", i,
- v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
- }
- for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
- debug_printf(" v3[%d]: %f %f %f %f\n", i,
- v3[i][0], v3[i][1], v3[i][2], v3[i][3]);
+ {
+ const float ex = v0[0][0] - v2[0][0];
+ const float ey = v0[0][1] - v2[0][1];
+ const float fx = v1[0][0] - v2[0][0];
+ const float fy = v1[0][1] - v2[0][1];
+
+ /* det = cross(e,f).z */
+ const float det = ex * fy - ey * fx;
+ if (det < 0.0f)
+ debug_printf(" - ccw\n");
+ else if (det > 0.0f)
+ debug_printf(" - cw\n");
+ else
+ debug_printf(" - zero area\n");
}
+
+ lp_setup_print_vertex(setup, "v0", v0);
+ lp_setup_print_vertex(setup, "v1", v1);
+ lp_setup_print_vertex(setup, "v2", v2);
}
-lp_rast_cmd lp_rast_tri_tab[8] = {
+lp_rast_cmd lp_rast_tri_tab[9] = {
NULL, /* should be impossible */
lp_rast_triangle_1,
lp_rast_triangle_2,
@@ -393,7 +168,8 @@ lp_rast_cmd lp_rast_tri_tab[8] = {
lp_rast_triangle_4,
lp_rast_triangle_5,
lp_rast_triangle_6,
- lp_rast_triangle_7
+ lp_rast_triangle_7,
+ lp_rast_triangle_8
};
/**
@@ -403,25 +179,27 @@ lp_rast_cmd lp_rast_tri_tab[8] = {
*/
static void
do_triangle_ccw(struct lp_setup_context *setup,
+ const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4],
- const float (*v3)[4],
boolean frontfacing )
{
-
struct lp_scene *scene = lp_setup_get_current_scene(setup);
- struct lp_fragment_shader_variant *variant = setup->fs.current.variant;
struct lp_rast_triangle *tri;
- struct tri_info info;
+ int x[3];
+ int y[3];
+ float dy01, dy20;
+ float dx01, dx20;
+ float oneoverarea;
+ struct lp_tri_info info;
int area;
- int minx, maxx, miny, maxy;
- int ix0, ix1, iy0, iy1;
+ struct u_rect bbox;
unsigned tri_bytes;
int i;
int nr_planes = 3;
if (0)
- print_triangle(setup, v1, v2, v3);
+ lp_setup_print_triangle(setup, v0, v1, v2);
if (setup->scissor_test) {
nr_planes = 7;
@@ -430,38 +208,73 @@ do_triangle_ccw(struct lp_setup_context *setup,
nr_planes = 3;
}
+ /* x/y positions in fixed point */
+ x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
+ x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
+ x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
+ y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
+ y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
+ y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
+
+
+ /* Bounding rectangle (in pixels) */
+ {
+ /* Yes this is necessary to accurately calculate bounding boxes
+ * with the two fill-conventions we support. GL (normally) ends
+ * up needing a bottom-left fill convention, which requires
+ * slightly different rounding.
+ */
+ int adj = (setup->pixel_offset != 0) ? 1 : 0;
+
+ bbox.x0 = (MIN3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.x1 = (MAX3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.y0 = (MIN3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ bbox.y1 = (MAX3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+
+ /* Inclusive coordinates:
+ */
+ bbox.x1--;
+ bbox.y1--;
+ }
+
+ if (bbox.x1 < bbox.x0 ||
+ bbox.y1 < bbox.y0) {
+ if (0) debug_printf("empty bounding box\n");
+ LP_COUNT(nr_culled_tris);
+ return;
+ }
+
+ if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
+ if (0) debug_printf("offscreen\n");
+ LP_COUNT(nr_culled_tris);
+ return;
+ }
+
+ u_rect_find_intersection(&setup->draw_region, &bbox);
- tri = alloc_triangle(scene,
- setup->fs.nr_inputs,
- nr_planes,
- &tri_bytes);
+ tri = lp_setup_alloc_triangle(scene,
+ setup->fs.nr_inputs,
+ nr_planes,
+ &tri_bytes);
if (!tri)
return;
#ifdef DEBUG
- tri->v[0][0] = v1[0][0];
- tri->v[1][0] = v2[0][0];
- tri->v[2][0] = v3[0][0];
- tri->v[0][1] = v1[0][1];
- tri->v[1][1] = v2[0][1];
- tri->v[2][1] = v3[0][1];
+ tri->v[0][0] = v0[0][0];
+ tri->v[1][0] = v1[0][0];
+ tri->v[2][0] = v2[0][0];
+ tri->v[0][1] = v0[0][1];
+ tri->v[1][1] = v1[0][1];
+ tri->v[2][1] = v2[0][1];
#endif
- /* x/y positions in fixed point */
- info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset);
- info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset);
- info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset);
- info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset);
- info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset);
- info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset);
-
- tri->plane[0].dcdy = info.x[0] - info.x[1];
- tri->plane[1].dcdy = info.x[1] - info.x[2];
- tri->plane[2].dcdy = info.x[2] - info.x[0];
+ tri->plane[0].dcdy = x[0] - x[1];
+ tri->plane[1].dcdy = x[1] - x[2];
+ tri->plane[2].dcdy = x[2] - x[0];
- tri->plane[0].dcdx = info.y[0] - info.y[1];
- tri->plane[1].dcdx = info.y[1] - info.y[2];
- tri->plane[2].dcdx = info.y[2] - info.y[0];
+ tri->plane[0].dcdx = y[0] - y[1];
+ tri->plane[1].dcdx = y[1] - y[2];
+ tri->plane[2].dcdx = y[2] - y[0];
area = (tri->plane[0].dcdy * tri->plane[2].dcdx -
tri->plane[2].dcdy * tri->plane[0].dcdx);
@@ -478,57 +291,29 @@ do_triangle_ccw(struct lp_setup_context *setup,
return;
}
- /* Bounding rectangle (in pixels) */
- {
- /* Yes this is necessary to accurately calculate bounding boxes
- * with the two fill-conventions we support. GL (normally) ends
- * up needing a bottom-left fill convention, which requires
- * slightly different rounding.
- */
- int adj = (setup->pixel_offset != 0) ? 1 : 0;
-
- minx = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
- maxx = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
- miny = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
- maxy = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
- }
-
- if (setup->scissor_test) {
- minx = MAX2(minx, setup->scissor.current.minx);
- maxx = MIN2(maxx, setup->scissor.current.maxx);
- miny = MAX2(miny, setup->scissor.current.miny);
- maxy = MIN2(maxy, setup->scissor.current.maxy);
- }
- else {
- minx = MAX2(minx, 0);
- miny = MAX2(miny, 0);
- maxx = MIN2(maxx, scene->fb.width);
- maxy = MIN2(maxy, scene->fb.height);
- }
-
-
- if (miny >= maxy || minx >= maxx) {
- lp_scene_putback_data( scene, tri_bytes );
- LP_COUNT(nr_culled_tris);
- return;
- }
/*
*/
- info.pixel_offset = setup->pixel_offset;
- info.v0 = v1;
- info.v1 = v2;
- info.v2 = v3;
- info.dx01 = info.v0[0][0] - info.v1[0][0];
- info.dx20 = info.v2[0][0] - info.v0[0][0];
- info.dy01 = info.v0[0][1] - info.v1[0][1];
- info.dy20 = info.v2[0][1] - info.v0[0][1];
- info.oneoverarea = 1.0f / (info.dx01 * info.dy20 - info.dx20 * info.dy01);
+ dx01 = v0[0][0] - v1[0][0];
+ dy01 = v0[0][1] - v1[0][1];
+ dx20 = v2[0][0] - v0[0][0];
+ dy20 = v2[0][1] - v0[0][1];
+ oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01);
+
+ info.v0 = v0;
+ info.v1 = v1;
+ info.v2 = v2;
info.frontfacing = frontfacing;
+ info.x0_center = v0[0][0] - setup->pixel_offset;
+ info.y0_center = v0[0][1] - setup->pixel_offset;
+ info.dx01_ooa = dx01 * oneoverarea;
+ info.dx20_ooa = dx20 * oneoverarea;
+ info.dy01_ooa = dy01 * oneoverarea;
+ info.dy20_ooa = dy20 * oneoverarea;
/* Setup parameter interpolants:
*/
- setup_tri_coefficients( setup, tri, &info );
+ lp_setup_tri_coef( setup, &tri->inputs, &info );
tri->inputs.facing = frontfacing ? 1.0F : -1.0F;
tri->inputs.state = setup->fs.stored;
@@ -541,7 +326,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* half-edge constants, will be interated over the whole render
* target.
*/
- plane->c = plane->dcdx * info.x[i] - plane->dcdy * info.y[i];
+ plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
/* correct for top-left vs. bottom-left fill convention.
*
@@ -612,29 +397,43 @@ do_triangle_ccw(struct lp_setup_context *setup,
if (nr_planes == 7) {
tri->plane[3].dcdx = -1;
tri->plane[3].dcdy = 0;
- tri->plane[3].c = 1-minx;
+ tri->plane[3].c = 1-bbox.x0;
tri->plane[3].ei = 0;
tri->plane[3].eo = 1;
tri->plane[4].dcdx = 1;
tri->plane[4].dcdy = 0;
- tri->plane[4].c = maxx;
+ tri->plane[4].c = bbox.x1+1;
tri->plane[4].ei = -1;
tri->plane[4].eo = 0;
tri->plane[5].dcdx = 0;
tri->plane[5].dcdy = 1;
- tri->plane[5].c = 1-miny;
+ tri->plane[5].c = 1-bbox.y0;
tri->plane[5].ei = 0;
tri->plane[5].eo = 1;
tri->plane[6].dcdx = 0;
tri->plane[6].dcdy = -1;
- tri->plane[6].c = maxy;
+ tri->plane[6].c = bbox.y1+1;
tri->plane[6].ei = -1;
tri->plane[6].eo = 0;
}
+ lp_setup_bin_triangle( setup, tri, &bbox, nr_planes );
+}
+
+
+void
+lp_setup_bin_triangle( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ const struct u_rect *bbox,
+ int nr_planes )
+{
+ struct lp_scene *scene = setup->scene;
+ struct lp_fragment_shader_variant *variant = setup->fs.current.variant;
+ int ix0, ix1, iy0, iy1;
+ int i;
/*
* All fields of 'tri' are now set. The remaining code here is
@@ -643,10 +442,30 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* Convert to tile coordinates, and inclusive ranges:
*/
- ix0 = minx / TILE_SIZE;
- iy0 = miny / TILE_SIZE;
- ix1 = (maxx-1) / TILE_SIZE;
- iy1 = (maxy-1) / TILE_SIZE;
+ if (nr_planes == 3) {
+ int ix0 = bbox->x0 / 16;
+ int iy0 = bbox->y0 / 16;
+ int ix1 = bbox->x1 / 16;
+ int iy1 = bbox->y1 / 16;
+
+ if (iy0 == iy1 && ix0 == ix1)
+ {
+
+ /* Triangle is contained in a single 16x16 block:
+ */
+ int mask = (ix0 & 3) | ((iy0 & 3) << 4);
+
+ lp_scene_bin_command( scene, ix0/4, iy0/4,
+ lp_rast_triangle_3_16,
+ lp_rast_arg_triangle(tri, mask) );
+ return;
+ }
+ }
+
+ ix0 = bbox->x0 / TILE_SIZE;
+ iy0 = bbox->y0 / TILE_SIZE;
+ ix1 = bbox->x1 / TILE_SIZE;
+ iy1 = bbox->y1 / TILE_SIZE;
/*
* Clamp to framebuffer size
@@ -799,9 +618,10 @@ static void triangle_both( struct lp_setup_context *setup,
const float fy = v1[0][1] - v2[0][1];
/* det = cross(e,f).z */
- if (ex * fy - ey * fx < 0.0f)
+ const float det = ex * fy - ey * fx;
+ if (det < 0.0f)
triangle_ccw( setup, v0, v1, v2 );
- else
+ else if (det > 0.0f)
triangle_cw( setup, v0, v1, v2 );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 77bec4640b..edd723f65f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -74,6 +74,15 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
vs_index = draw_find_shader_output(llvmpipe->draw,
lpfs->info.input_semantic_name[i],
lpfs->info.input_semantic_index[i]);
+ if (vs_index < 0) {
+ /*
+ * This can happen with sprite coordinates - the vertex
+ * shader doesn't need to provide an output as we generate
+ * them internally. However, lets keep pretending that there
+ * is something there to not confuse other code.
+ */
+ vs_index = 0;
+ }
/* This can be pre-computed, except for flatshade:
*/
@@ -125,6 +134,17 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
inputs[i].src_index = vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
}
+
+ /* Figure out if we need pointsize as well.
+ */
+ vs_index = draw_find_shader_output(llvmpipe->draw,
+ TGSI_SEMANTIC_PSIZE, 0);
+
+ if (vs_index > 0) {
+ llvmpipe->psize_slot = vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ }
+
llvmpipe->num_inputs = lpfs->info.num_inputs;
draw_compute_vertex_size(vinfo);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index dbca49a2ef..33c1a49efe 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -808,7 +808,7 @@ generate_variant(struct llvmpipe_context *lp,
variant->list_item_local.base = variant;
variant->no = shader->variants_created++;
- memcpy(&variant->key, key, sizeof *key);
+ memcpy(&variant->key, key, shader->variant_key_size);
if (gallivm_debug & GALLIVM_DEBUG_IR) {
debug_printf("llvmpipe: Creating fragment shader #%u variant #%u:\n",
@@ -840,6 +840,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
{
struct lp_fragment_shader *shader;
+ int nr_samplers;
shader = CALLOC_STRUCT(lp_fragment_shader);
if (!shader)
@@ -854,6 +855,11 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
/* we need to keep a local copy of the tokens */
shader->base.tokens = tgsi_dup_tokens(templ->tokens);
+ nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+
+ shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key,
+ sampler[nr_samplers]);
+
if (LP_DEBUG & DEBUG_TGSI) {
unsigned attrib;
debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader);
@@ -921,7 +927,6 @@ static void
llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
- struct pipe_fence_handle *fence = NULL;
struct lp_fragment_shader *shader = fs;
struct lp_fs_variant_list_item *li;
@@ -934,12 +939,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
* Flushing alone might not sufficient we need to wait on it too.
*/
- llvmpipe_flush(pipe, 0, &fence);
-
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
+ llvmpipe_finish(pipe, __FUNCTION__);
li = first_elem(&shader->variants);
while(!at_end(&shader->variants, li)) {
@@ -1027,7 +1027,7 @@ make_variant_key(struct llvmpipe_context *lp,
{
unsigned i;
- memset(key, 0, sizeof *key);
+ memset(key, 0, shader->variant_key_size);
if (lp->framebuffer.zsbuf) {
if (lp->depth_stencil->depth.enabled) {
@@ -1097,9 +1097,17 @@ make_variant_key(struct llvmpipe_context *lp,
}
}
- for(i = 0; i < PIPE_MAX_SAMPLERS; ++i)
- if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
- lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]);
+ /* This value will be the same for all the variants of a given shader:
+ */
+ key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+
+ for(i = 0; i < key->nr_samplers; ++i) {
+ if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+ lp_sampler_static_state(&key->sampler[i],
+ lp->fragment_sampler_views[i],
+ lp->sampler[i]);
+ }
+ }
}
/**
@@ -1118,7 +1126,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
li = first_elem(&shader->variants);
while(!at_end(&shader->variants, li)) {
- if(memcmp(&li->base->key, &key, sizeof key) == 0) {
+ if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) {
variant = li->base;
break;
}
@@ -1134,19 +1142,14 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
unsigned i;
if (lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS) {
struct pipe_context *pipe = &lp->pipe;
- struct pipe_fence_handle *fence = NULL;
/*
* XXX: we need to flush the context until we have some sort of reference
* counting in fragment shaders as they may still be binned
* Flushing alone might not be sufficient we need to wait on it too.
*/
- llvmpipe_flush(pipe, 0, &fence);
+ llvmpipe_finish(pipe, __FUNCTION__);
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
for (i = 0; i < LP_MAX_SHADER_VARIANTS / 4; i++) {
struct lp_fs_variant_list_item *item = last_elem(&lp->fs_variants_list);
remove_shader_variant(lp, item->base);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index 37900fc544..33c480010d 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -53,13 +53,10 @@ struct lp_fragment_shader_variant_key
struct pipe_blend_state blend;
enum pipe_format zsbuf_format;
unsigned nr_cbufs:8;
+ unsigned nr_samplers:8; /* actually derivable from just the shader */
unsigned flatshade:1;
unsigned occlusion_count:1;
- struct {
- ubyte colormask;
- } cbuf_blend[PIPE_MAX_COLOR_BUFS];
-
struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS];
};
@@ -97,6 +94,7 @@ struct lp_fragment_shader
struct lp_fs_variant_list_item variants;
/* For debugging/profiling purposes */
+ unsigned variant_key_size;
unsigned no;
unsigned variants_created;
unsigned variants_cached;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
index afd3e0b21c..0bad7320f3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
@@ -73,7 +73,13 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
llvmpipe->rasterizer->gl_rasterization_rules);
lp_setup_set_flatshade_first( llvmpipe->setup,
llvmpipe->rasterizer->flatshade_first);
- }
+ lp_setup_set_line_state( llvmpipe->setup,
+ llvmpipe->rasterizer->line_width);
+ lp_setup_set_point_state( llvmpipe->setup,
+ llvmpipe->rasterizer->point_size,
+ llvmpipe->rasterizer->point_size_per_vertex,
+ llvmpipe->rasterizer->sprite_coord_enable);
+ }
llvmpipe->dirty |= LP_NEW_RASTERIZER;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
index d86e66b4fb..fb29423dd3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -100,7 +100,7 @@ llvmpipe_set_index_buffer(struct pipe_context *pipe,
else
memset(&llvmpipe->index_buffer, 0, sizeof(llvmpipe->index_buffer));
- /* TODO make this more like a state */
+ draw_set_index_buffer(llvmpipe->draw, ib);
}
void
diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c
index f761e82850..63ddc669c2 100644
--- a/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -68,14 +68,16 @@ lp_resource_copy(struct pipe_context *pipe,
0, /* flush_flags */
FALSE, /* read_only */
TRUE, /* cpu_access */
- FALSE); /* do_not_block */
+ FALSE,
+ "blit dst"); /* do_not_block */
llvmpipe_flush_resource(pipe,
src, subsrc.face, subsrc.level,
0, /* flush_flags */
TRUE, /* read_only */
TRUE, /* cpu_access */
- FALSE); /* do_not_block */
+ FALSE,
+ "blit src"); /* do_not_block */
/*
printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n",
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 25112c10a6..5832ea2744 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -67,6 +67,7 @@ resource_is_texture(const struct pipe_resource *resource)
return FALSE;
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_CUBE:
return TRUE;
@@ -583,7 +584,8 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
0, /* flush_flags */
read_only,
TRUE, /* cpu_access */
- do_not_block)) {
+ do_not_block,
+ "transfer dest")) {
/*
* It would have blocked, but state tracker requested no to.
*/
diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h
index f44979e562..d9f35b4c4b 100644
--- a/src/gallium/drivers/nouveau/nouveau_class.h
+++ b/src/gallium/drivers/nouveau/nouveau_class.h
@@ -6189,6 +6189,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV34TCL_FP_REG_CONTROL_UNK1_MASK 0xffff0000
#define NV34TCL_FP_REG_CONTROL_UNK0_SHIFT 0
#define NV34TCL_FP_REG_CONTROL_UNK0_MASK 0x0000ffff
+#define NV34TCL_FLATSHADE_FIRST 0x00001454
+#define NV34TCL_EDGEFLAG_ENABLE 0x0000145c
#define NV34TCL_VP_CLIP_PLANES_ENABLE 0x00001478
#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 (1 << 1)
#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 (1 << 5)
@@ -6222,10 +6224,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV34TCL_VTXFMT__SIZE 0x00000010
#define NV34TCL_VTXFMT_TYPE_SHIFT 0
#define NV34TCL_VTXFMT_TYPE_MASK 0x0000000f
-#define NV34TCL_VTXFMT_TYPE_FLOAT 0x00000002
-#define NV34TCL_VTXFMT_TYPE_HALF 0x00000003
-#define NV34TCL_VTXFMT_TYPE_UBYTE 0x00000004
-#define NV34TCL_VTXFMT_TYPE_USHORT 0x00000005
+#define NV34TCL_VTXFMT_TYPE_16_SNORM 0x00000001
+#define NV34TCL_VTXFMT_TYPE_32_FLOAT 0x00000002
+#define NV34TCL_VTXFMT_TYPE_16_FLOAT 0x00000003
+#define NV34TCL_VTXFMT_TYPE_8_UNORM 0x00000004
+#define NV34TCL_VTXFMT_TYPE_16_SSCALED 0x00000005
+#define NV34TCL_VTXFMT_TYPE_11_11_10_SNORM 0x00000006
+#define NV34TCL_VTXFMT_TYPE_8_USCALED 0x00000007
#define NV34TCL_VTXFMT_SIZE_SHIFT 4
#define NV34TCL_VTXFMT_SIZE_MASK 0x000000f0
#define NV34TCL_VTXFMT_STRIDE_SHIFT 8
@@ -6368,6 +6373,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV34TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00
#define NV34TCL_TX_FORMAT_FORMAT_A8L8_RECT 0x00002000
#define NV34TCL_TX_FORMAT_FORMAT_DSDT8 0x00002800
+#define NV34TCL_TX_FORMAT_FORMAT_Z24 0x2a00
+#define NV34TCL_TX_FORMAT_FORMAT_Z24_RECT 0x2b00 /* XXX: guess! */
+#define NV34TCL_TX_FORMAT_FORMAT_Z16 0x2c00
+#define NV34TCL_TX_FORMAT_FORMAT_Z16_RECT 0x2d00 /* XXX: guess! */
#define NV34TCL_TX_FORMAT_FORMAT_HILO16 0x00003300
#define NV34TCL_TX_FORMAT_FORMAT_HILO16_RECT 0x00003600
#define NV34TCL_TX_FORMAT_FORMAT_HILO8 0x00004400
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 513e5e02bc..ebb21a6e5a 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -258,6 +258,7 @@ nouveau_screen_fini(struct nouveau_screen *screen)
{
struct pipe_winsys *ws = screen->base.winsys;
nouveau_channel_free(&screen->channel);
- ws->destroy(ws);
+ if (ws)
+ ws->destroy(ws);
}
diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h
deleted file mode 100644
index b165f7a611..0000000000
--- a/src/gallium/drivers/nouveau/nouveau_util.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef __NOUVEAU_UTIL_H__
-#define __NOUVEAU_UTIL_H__
-
-/* Determine how many vertices can be pushed into the command stream.
- * Where the remaining space isn't large enough to represent all verices,
- * split the buffer at primitive boundaries.
- *
- * Returns a count of vertices that can be rendered, and an index to
- * restart drawing at after a flush.
- */
-static INLINE unsigned
-nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp,
- unsigned mode, unsigned start, unsigned count,
- unsigned *restart)
-{
- int max, adj = 0;
-
- max = remaining - overhead;
- if (max < 0)
- return 0;
-
- max *= vpp;
- if (max >= count)
- return count;
-
- switch (mode) {
- case PIPE_PRIM_POINTS:
- break;
- case PIPE_PRIM_LINES:
- max = max & 1;
- break;
- case PIPE_PRIM_TRIANGLES:
- max = max - (max % 3);
- break;
- case PIPE_PRIM_QUADS:
- max = max & ~3;
- break;
- case PIPE_PRIM_LINE_LOOP:
- case PIPE_PRIM_LINE_STRIP:
- if (max < 2)
- max = 0;
- adj = 1;
- break;
- case PIPE_PRIM_POLYGON:
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_TRIANGLE_FAN:
- if (max < 3)
- max = 0;
- adj = 2;
- break;
- case PIPE_PRIM_QUAD_STRIP:
- if (max < 4)
- max = 0;
- adj = 3;
- break;
- default:
- assert(0);
- }
-
- *restart = start + max - adj;
- return max;
-}
-
-/* Integer base-2 logarithm, rounded towards zero. */
-static INLINE unsigned log2i(unsigned i)
-{
- unsigned r = 0;
-
- if (i & 0xffff0000) {
- i >>= 16;
- r += 16;
- }
- if (i & 0x0000ff00) {
- i >>= 8;
- r += 8;
- }
- if (i & 0x000000f0) {
- i >>= 4;
- r += 4;
- }
- if (i & 0x0000000c) {
- i >>= 2;
- r += 2;
- }
- if (i & 0x00000002) {
- r += 1;
- }
- return r;
-}
-
-#endif
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 12b5ad106c..dd0e8fd41b 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -238,7 +238,8 @@ nv50_miptree_from_handle(struct pipe_screen *pscreen,
unsigned stride;
/* Only supports 2D, non-mipmapped textures for the moment */
- if (template->target != PIPE_TEXTURE_2D ||
+ if ((template->target != PIPE_TEXTURE_2D &&
+ template->target != PIPE_TEXTURE_RECT) ||
template->last_level != 0 ||
template->depth0 != 1)
return NULL;
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c
index 0091927a98..380f69406a 100644
--- a/src/gallium/drivers/nv50/nv50_push.c
+++ b/src/gallium/drivers/nv50/nv50_push.c
@@ -108,8 +108,9 @@ emit_vertex(struct push_context *ctx, unsigned n)
int i;
if (ctx->edgeflag_attr < 16) {
- float *edgeflag = (uint8_t *)ctx->attr[ctx->edgeflag_attr].map +
- ctx->attr[ctx->edgeflag_attr].stride * n;
+ float *edgeflag = (float *)
+ ((uint8_t *)ctx->attr[ctx->edgeflag_attr].map +
+ ctx->attr[ctx->edgeflag_attr].stride * n);
if (*edgeflag != ctx->edgeflag) {
BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 5535818370..658324ec5b 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -83,6 +83,9 @@ nv50_tex_construct(struct nv50_sampler_view *view)
case PIPE_TEXTURE_2D:
tic[2] |= NV50TIC_0_2_TARGET_2D;
break;
+ case PIPE_TEXTURE_RECT:
+ tic[2] |= NV50TIC_0_2_TARGET_RECT;
+ break;
case PIPE_TEXTURE_3D:
tic[2] |= NV50TIC_0_2_TARGET_3D;
break;
diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile
index c1d57ca396..6cbbad699e 100644
--- a/src/gallium/drivers/nvfx/Makefile
+++ b/src/gallium/drivers/nvfx/Makefile
@@ -4,7 +4,7 @@ include $(TOP)/configs/current
LIBNAME = nvfx
C_SOURCES = \
- nv04_surface_2d.c \
+ nv04_2d.c \
nvfx_buffer.c \
nvfx_context.c \
nvfx_clear.c \
@@ -14,6 +14,7 @@ C_SOURCES = \
nv30_fragtex.c \
nv40_fragtex.c \
nvfx_miptree.c \
+ nvfx_push.c \
nvfx_query.c \
nvfx_resource.c \
nvfx_screen.c \
diff --git a/src/gallium/drivers/nvfx/SConscript b/src/gallium/drivers/nvfx/SConscript
index 02d931b10e..80e3ef2257 100644
--- a/src/gallium/drivers/nvfx/SConscript
+++ b/src/gallium/drivers/nvfx/SConscript
@@ -9,7 +9,7 @@ env.PrependUnique(delete_existing=1, CPPPATH = [
nvfx = env.ConvenienceLibrary(
target = 'nvfx',
source = [
- 'nv04_surface_2d.c',
+ 'nv04_2d.c',
'nvfx_buffer.c',
'nvfx_context.c',
'nvfx_clear.c',
@@ -19,6 +19,7 @@ nvfx = env.ConvenienceLibrary(
'nv30_fragtex.c',
'nv40_fragtex.c',
'nvfx_miptree.c',
+ 'nvfx_push.c',
'nvfx_query.c',
'nvfx_resource.c',
'nvfx_screen.c',
diff --git a/src/gallium/drivers/nvfx/nv04_2d.c b/src/gallium/drivers/nvfx/nv04_2d.c
new file mode 100644
index 0000000000..c05312219b
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv04_2d.c
@@ -0,0 +1,1341 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Ben Skeggs
+ * Copyright 2009 Younes Manton
+ * Copyright 2010 Luca Barbieri
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+/* this code has no Mesa or Gallium dependency and can be reused in the classic Mesa driver or DDX */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <nouveau/nouveau_class.h>
+#include <nouveau/nouveau_device.h>
+#include <nouveau/nouveau_pushbuf.h>
+#include <nouveau/nouveau_channel.h>
+#include <nouveau/nouveau_bo.h>
+#include <nouveau/nouveau_notifier.h>
+#include <nouveau/nouveau_grobj.h>
+#include "nv04_2d.h"
+
+/* avoid depending on Mesa/Gallium */
+#ifdef __GNUC__
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#define likely(x) !!(x)
+#define unlikely(x) !!(x)
+#endif
+
+#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) )
+#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) )
+
+struct nv04_2d_context
+{
+ struct nouveau_notifier *ntfy;
+ struct nouveau_grobj *surf2d;
+ struct nouveau_grobj *swzsurf;
+ struct nouveau_grobj *m2mf;
+ struct nouveau_grobj *rect;
+ struct nouveau_grobj *sifm;
+ struct nouveau_grobj *blit;
+};
+
+static inline int
+align(int value, int alignment)
+{
+ return (value + alignment - 1) & ~(alignment - 1);
+}
+
+static inline int
+util_is_pot(unsigned x)
+{
+ return (x & (x - 1)) == 0;
+}
+
+/* Integer base-2 logarithm, rounded towards zero. */
+static inline unsigned log2i(unsigned i)
+{
+ unsigned r = 0;
+
+ if (i & 0xffff0000) {
+ i >>= 16;
+ r += 16;
+ }
+ if (i & 0x0000ff00) {
+ i >>= 8;
+ r += 8;
+ }
+ if (i & 0x000000f0) {
+ i >>= 4;
+ r += 4;
+ }
+ if (i & 0x0000000c) {
+ i >>= 2;
+ r += 2;
+ }
+ if (i & 0x00000002) {
+ r += 1;
+ }
+ return r;
+}
+
+//#define NV04_REGION_DEBUG
+
+// Yes, we really want to inline everything, since all the functions are used only once
+#if defined(__GNUC__) && defined(DEBUG)
+#define inline __attribute__((always_inline)) inline
+#endif
+
+static inline unsigned
+nv04_swizzle_bits_square(unsigned x, unsigned y)
+{
+ unsigned u = (x & 0x001) << 0 |
+ (x & 0x002) << 1 |
+ (x & 0x004) << 2 |
+ (x & 0x008) << 3 |
+ (x & 0x010) << 4 |
+ (x & 0x020) << 5 |
+ (x & 0x040) << 6 |
+ (x & 0x080) << 7 |
+ (x & 0x100) << 8 |
+ (x & 0x200) << 9 |
+ (x & 0x400) << 10 |
+ (x & 0x800) << 11;
+
+ unsigned v = (y & 0x001) << 1 |
+ (y & 0x002) << 2 |
+ (y & 0x004) << 3 |
+ (y & 0x008) << 4 |
+ (y & 0x010) << 5 |
+ (y & 0x020) << 6 |
+ (y & 0x040) << 7 |
+ (y & 0x080) << 8 |
+ (y & 0x100) << 9 |
+ (y & 0x200) << 10 |
+ (y & 0x400) << 11 |
+ (y & 0x800) << 12;
+ return v | u;
+}
+
+/* rectangular swizzled textures are linear concatenations of swizzled square tiles */
+static inline unsigned
+nv04_swizzle_bits_2d(unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ if(h <= 1)
+ return x;
+ else
+ {
+ unsigned s = MIN2(w, h);
+ unsigned m = s - 1;
+ return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m);
+ }
+}
+
+// general 3D texture case
+static inline unsigned
+nv04_swizzle_bits(unsigned x, unsigned y, unsigned z, unsigned w, unsigned h, unsigned d)
+{
+ if(d <= 1)
+ return nv04_swizzle_bits_2d(x, y, w, h);
+ else
+ {
+ // TODO: autogenerate code for all possible texture sizes (13 * 13 * 13 with dims <= 4096) and do a single indirect call
+ unsigned v = 0;
+ w >>= 1;
+ h >>= 1;
+ d >>= 1;
+ for(int i = 0;;)
+ {
+ int oldi = i;
+ if(likely(w))
+ {
+ v |= (x & 1) << i;
+ x >>= 1;
+ w >>= 1;
+ ++i;
+ }
+
+ if(likely(h))
+ {
+ v |= (y & 1) << i;
+ y >>= 1;
+ h >>= 1;
+ ++i;
+ }
+
+ if(likely(d))
+ {
+ v |= (z & 1) << i;
+ z >>= 1;
+ d >>= 1;
+ ++i;
+ }
+
+ if(i == oldi)
+ break;
+ }
+ return v;
+ }
+}
+
+unsigned
+nv04_region_begin(struct nv04_region* rgn, unsigned w, unsigned h)
+{
+ if(rgn->pitch)
+ return rgn->pitch * rgn->y + (rgn->x << rgn->bpps);
+ else
+ return nv04_swizzle_bits(rgn->x, rgn->y, rgn->z, rgn->w, rgn->h, rgn->d) << rgn->bpps;
+}
+
+unsigned
+nv04_region_end(struct nv04_region* rgn, unsigned w, unsigned h)
+{
+ if(rgn->pitch)
+ return rgn->pitch * (rgn->y + h - 1) + ((rgn->x + w) << rgn->bpps);
+ else
+ return (nv04_swizzle_bits(rgn->x + w - 1, rgn->y + h - 1, rgn->z, rgn->w, rgn->h, rgn->d) + 1) << rgn->bpps;
+}
+
+// *pitch = -1 -> use 3D swizzling for (x, y), *pitch = 0 -> use 2D swizzling, other *pitch -> use linear calculations
+// returns 2 if pixel order is 3D-swizzled and 1 if subrect is 2D-swizzled
+/* *pitch == -1 ret = 0 -> 3D swizzled subrect
+ * *pitch == 0 ret = 0 -> 2D swizzled subrect
+ * *pitch > 0 ret = 0 -> linear subrect
+ * *pitch > 0 ret = 1 -> linear subrect, but with swizzled 3D data inside
+ */
+
+static inline void
+nv04_region_print(struct nv04_region* rgn)
+{
+ fprintf(stderr, "<%i[%i]> ", rgn->bo->handle, rgn->offset);
+ if(rgn->pitch)
+ fprintf(stderr, "lin %i", rgn->pitch);
+ else
+ fprintf(stderr, "swz %ix%ix%i", rgn->w, rgn->h, rgn->d);
+ fprintf(stderr, " (%i, %i, %i)", rgn->x, rgn->y, rgn->z);
+}
+
+static inline void
+nv04_region_assert(struct nv04_region* rgn, unsigned w, unsigned h)
+{
+ unsigned end = rgn->offset + nv04_region_end(rgn, w, h);
+
+ assert(rgn->offset <= (int)rgn->bo->size);
+ assert(end <= rgn->bo->size);
+ (void) end;
+ if(!rgn->pitch) {
+ assert(util_is_pot(rgn->w));
+ assert(util_is_pot(rgn->h));
+ }
+}
+
+/* determine if region can be linearized or fake-linearized */
+static inline int
+nv04_region_is_contiguous(struct nv04_region* rgn, int w, int h)
+{
+ int surf_min;
+ int rect_min;
+
+ if(rgn->pitch)
+ return rgn->pitch == w << rgn->bpps;
+
+ // redundant, but this is the fast path for the common case
+ if(w == rgn->w && h == rgn->h && rgn->d <= 1)
+ return 1;
+
+ // must be POT
+ if((w & (w - 1)) || (h & (h - 1)))
+ return 0;
+
+ // must be aligned
+ if((rgn->x & (w - 1)) || (rgn->y & (h - 1)))
+ return 0;
+
+ if(rgn->d > 1)
+ return 0;
+
+ surf_min = MIN2(rgn->w, rgn->h);
+ rect_min = MIN2(w, h);
+
+ if((rect_min == surf_min) || (w == h) || (w == 2 * h))
+ return 1;
+
+ return 0;
+}
+
+// double the pitch until it is larger than the alignment, or the height becomes odd or 1
+static inline void
+nv04_region_contiguous_shape(struct nv04_region* rgn, int* w, int* h, int align)
+{
+ while(!(*h & 1) && (*w << rgn->bpps) < (1 << align))
+ {
+ *w <<= 1;
+ *h >>= 1;
+ }
+
+ while((*w << rgn->bpps) > 16384 && !(*w & 1))
+ {
+ *w >>= 1;
+ *h <<= 1;
+ }
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tCONTIGUOUS %ix%i\n", *w, *h);
+#endif
+}
+
+static inline void
+nv04_region_linearize_contiguous(struct nv04_region* rgn, unsigned w, unsigned h)
+{
+ int pos;
+ if(rgn->pitch)
+ {
+ rgn->offset += rgn->y * rgn->pitch + (rgn->x << rgn->bpps);
+ rgn->x = 0;
+ rgn->y = 0;
+ }
+ else
+ {
+ rgn->offset += (rgn->w * rgn->h * rgn->z) << rgn->bpps;
+ pos = nv04_swizzle_bits(rgn->x, rgn->y, rgn->z, rgn->w, rgn->h, rgn->d);
+ rgn->x = pos & (w - 1);
+ rgn->y = pos / w;
+ }
+ rgn->pitch = w << rgn->bpps;
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tLINEARIZE ");
+ nv04_region_print(rgn);
+ fprintf(stderr, "\n");
+#endif
+}
+
+ /* preserve the offset! */
+ /*
+ rgn->pitch = util_format_get_stride(rgn->format, w);
+ int pos = nv04_swizzle_bits(rgn->x, rgn->y, rgn->z, rgn->w, rgn->h, rgn->d);
+ rgn->x = pos & (w - 1);
+ rgn->y = pos & ~(w - 1);
+ */
+
+ /*
+ rgn->offset +=
+ rgn->pitch = util_format_get_stride(rgn->format, w);
+ rgn->x = 0;
+ rgn->y = 0;
+ */
+
+/* This code will get used for, and always succeed on:
+ * - 4x2 1bpp swizzled texture mipmap levels
+ * - linear regions created by linearization
+ *
+ * This code will get used for, and MAY work for:
+ * - misaligned texture blanket
+ * - linear surfaces created without wide_pitch (in this case, it will only work if we are lucky)
+ *
+ * The general case requires splitting the region in 2.
+ */
+static inline int
+nv04_region_do_align_offset(struct nv04_region* rgn, unsigned w, unsigned h, int shift)
+{
+ if(rgn->pitch > 0)
+ {
+ int delta;
+
+ assert(!(rgn->offset & ((1 << rgn->bpps) - 1))); // fatal!
+ delta = rgn->offset & ((1 << shift) - 1);
+
+ if(h <= 1)
+ {
+ rgn->x += delta >> rgn->bpps;
+ rgn->offset -= delta;
+ rgn->pitch = align((rgn->x + w) << rgn->bpps, 1 << shift);
+ }
+ else
+ {
+ int newxo = (rgn->x << rgn->bpps) + delta;
+ int dy = newxo / rgn->pitch;
+ newxo -= dy * rgn->pitch;
+ if((newxo + (w << rgn->bpps)) > rgn->pitch)
+ {
+ // TODO: split the region into two rectangles (!) if *really* necessary, unless the hardware actually supports "wrapping" rectangles
+ // this does not happen if the surface is pitch-aligned, which it should always be
+ assert(0);
+ return -1;
+ }
+ rgn->x = newxo >> rgn->bpps;
+ rgn->y += dy;
+ }
+ }
+ else
+ {
+ int size;
+ int min;
+ int v;
+
+ // we don't care about the alignment of 3D surfaces since the 2D engine can't use them
+ if(rgn->d < 0)
+ return -1;
+
+ min = MIN2(rgn->w, rgn->h);
+ size = min * min << rgn->bpps;
+
+ // this is unfixable, and should not be happening
+ if(rgn->offset & (size - 1))
+ return -1;
+
+ v = (rgn->offset & ((1 << shift) - 1)) / size;
+ rgn->offset -= v * size;
+
+ if(rgn->h == min)
+ {
+ unsigned w;
+ rgn->x += rgn->h * v;
+ w = rgn->w + rgn->h * v;
+
+ while(rgn->w < w)
+ rgn->w += rgn->w;
+ }
+ else
+ {
+ unsigned h;
+ rgn->y += rgn->w * v;
+ h = rgn->h + rgn->w * v;
+
+ while(rgn->h < h)
+ rgn->h += rgn->h;
+ }
+ }
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tALIGNED ");
+ nv04_region_print(rgn);
+ fprintf(stderr, "\n");
+#endif
+ return 0;
+}
+
+// both pitch and shift
+// will leave the region unchanged if it fails
+static inline int
+nv04_region_align(struct nv04_region* rgn, unsigned w, unsigned h, int shift)
+{
+ if(rgn->pitch & ((1 << shift) - 1))
+ {
+ if(h == 1)
+ goto do_align; /* this will fix pitch too in this case */
+ else
+ return -1;
+ }
+
+ if(rgn->offset & ((1 << shift) - 1))
+ {
+ do_align:
+ if(nv04_region_do_align_offset(rgn, w, h, shift))
+ return -1;
+ }
+ return 0;
+}
+
+/* this contains 22 different copy loops after preprocessing. unfortunately, it's necessary */
+void
+nv04_region_copy_cpu(struct nv04_region* dst, struct nv04_region* src, int w, int h)
+{
+ uint8_t* mdst;
+ uint8_t* msrc;
+ int size;
+
+ if(dst->bo != src->bo)
+ {
+ nouveau_bo_map(dst->bo, NOUVEAU_BO_WR);
+ nouveau_bo_map(src->bo, NOUVEAU_BO_RD);
+ }
+ else
+ nouveau_bo_map(dst->bo, NOUVEAU_BO_WR | NOUVEAU_BO_RD);
+
+ mdst = (uint8_t*)dst->bo->map + dst->offset;
+ msrc = (uint8_t*)src->bo->map + src->offset;
+
+ size = w << dst->bpps;
+
+ nv04_region_assert(dst, w, h);
+ nv04_region_assert(src, w, h);
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tRGN_COPY_CPU [%i, %i: %i] ", w, h, dst->bpps);
+ for(int i = 0; i < 2; ++i)
+ {
+ nv04_region_print(i ? src : dst);
+ fprintf(stderr, i ? "\n" : " <- ");
+ }
+
+// for(int i = 0; i < 16; ++i)
+// fprintf(stderr, "%02x ", msrc[i]);
+// fprintf(stderr, "\n");
+#endif
+
+ // TODO: support overlapping copies!
+ if(src->pitch && dst->pitch)
+ {
+ mdst += dst->y * dst->pitch + (dst->x << dst->bpps);
+ msrc += src->y * src->pitch + (src->x << src->bpps);
+ if(dst->bo != src->bo)
+ goto simple;
+ else if(mdst < msrc)
+ {
+ if(mdst + size <= msrc)
+ {
+simple:
+ for(int iy = 0; iy < h; ++iy)
+ {
+ assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
+ assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size);
+ memcpy(mdst, msrc, size);
+ msrc += src->pitch; mdst += dst->pitch;
+ }
+ }
+ else
+ {
+ for(int iy = 0; iy < h; ++iy)
+ {
+ assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
+ assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size);
+ memmove(mdst, msrc, size);
+ msrc += src->pitch; mdst += dst->pitch;
+ }
+ }
+ }
+ else
+ {
+ /* copy backwards so we don't destroy data we have to read yet */
+ if(msrc + size <= mdst)
+ {
+ for(int iy = h - 1; iy >= 0; --iy)
+ {
+ assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
+ assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size);
+ memcpy(mdst, msrc, size);
+ msrc += src->pitch; mdst += dst->pitch;
+ }
+ }
+ else
+ {
+ for(int iy = h - 1; iy >= 0; --iy)
+ {
+ assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
+ assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size);
+ memmove(mdst, msrc, size);
+ msrc += src->pitch; mdst += dst->pitch;
+ }
+ }
+ }
+ }
+ else
+ {
+ int* dswx = NULL;
+ int* dswy = NULL;
+ int* sswx = NULL;
+ int* sswy = NULL;
+ int dir;
+
+ if(!dst->pitch)
+ {
+ dswx = alloca(w * sizeof(int));
+ for(int ix = 0; ix < w; ++ix) // we are adding, so z cannot be contributed by both
+ dswx[ix] = nv04_swizzle_bits(dst->x + ix, 0, 0, dst->w, dst->h, dst->d);
+ dswy = alloca(h * sizeof(int));
+ for(int iy = 0; iy < h; ++iy)
+ dswy[iy] = nv04_swizzle_bits(0, dst->y + iy, dst->z, dst->w, dst->h, dst->d);
+ }
+
+ if(!src->pitch)
+ {
+ sswx = alloca(w * sizeof(int));
+ for(int ix = 0; ix < w; ++ix)
+ sswx[ix] = nv04_swizzle_bits(src->x + ix, 0, 0, src->w, src->h, src->d);
+ sswy = alloca(h * sizeof(int));
+ for(int iy = 0; iy < h; ++iy)
+ sswy[iy] = nv04_swizzle_bits(0, src->y + iy, src->z, src->w, src->h, src->d);
+ }
+
+ dir = 1;
+ /* do backwards copies for overlapping swizzled surfaces */
+ if(dst->pitch == src->pitch && dst->offset == src->offset)
+ {
+ if(dst->y > src->y || (dst->y == src->y && dst->x > src->x))
+ dir = -1;
+ }
+
+#define SWIZZLED_COPY_LOOPS
+ if(dir == 1)
+ {
+ int dir = 1;
+#define LOOP_Y for(int iy = 0; iy < h; ++iy)
+#define LOOP_X for(int ix = 0; ix < w; ++ix)
+#include "nv04_2d_loops.h"
+#undef LOOP_X
+#undef LOOP_Y
+ }
+ else
+ {
+ int dir = -1;
+#define LOOP_Y for(int iy = h - 1; iy >= 0; --iy)
+#define LOOP_X for(int ix = w - 1; ix >= 0; --ix)
+#include "nv04_2d_loops.h"
+#undef LOOP_X
+#undef LOOP_Y
+ }
+#undef SWIZZLED_COPY_LOOP
+ }
+
+ if(src->bo != dst->bo)
+ nouveau_bo_unmap(src->bo);
+ nouveau_bo_unmap(dst->bo);
+}
+
+/* TODO: if the destination is swizzled, we are doing random writes, which causes write combining to fail
+ * the alternative is to read, modify and copy back, which may or may not be faster
+ * loading 3D textures is a common case that hits this and could probably benefit from the temporary
+ */
+void
+nv04_region_fill_cpu(struct nv04_region* dst, int w, int h, unsigned value)
+{
+ uint8_t* mdst = (nouveau_bo_map(dst->bo, NOUVEAU_BO_WR), (uint8_t*)dst->bo->map + dst->offset);
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tRGN_FILL_CPU ");
+ nv04_region_print(dst);
+ fprintf(stderr, "\n");
+#endif
+
+ nv04_region_assert(dst, w, h);
+
+ if(dst->pitch)
+ {
+ unsigned size = w << dst->bpps;
+
+#define FILL(T) do { \
+ for(int iy = 0; iy < h; ++iy) \
+ { \
+ assert((char*)((T*)mdst + w) <= (char*)dst->bo->map + dst->bo->size); \
+ for(int ix = 0; ix < w; ++ix) \
+ ((T*)mdst)[ix] = (T)value; \
+ mdst += dst->pitch; \
+ } \
+ } while(0)
+
+ mdst += dst->y * dst->pitch + (dst->x << dst->bpps);
+
+ if(dst->bpps == 0)
+ {
+ms:
+ assert(mdst + size * h <= (uint8_t*)dst->bo->map + dst->bo->size);
+ if(size == dst->pitch)
+ memset(mdst, (uint8_t)value, size * h);
+ else
+ {
+ for(int iy = 0; iy < h; ++iy)
+ {
+ assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
+ memset(mdst, (uint8_t)value, size);
+ mdst += dst->pitch;
+ }
+ }
+ }
+ else if(dst->bpps == 1)
+ {
+ if(!((uint8_t)value ^ (uint8_t)(value >> 8)))
+ goto ms;
+
+ FILL(uint16_t);
+ }
+ else if(dst->bpps == 2)
+ {
+ if(value == (uint8_t)value * 0x1010101)
+ goto ms;
+ FILL(uint32_t);
+ }
+ else
+ assert(0);
+#undef FILL
+ }
+ else
+ {
+ int* dswx;
+ int* dswy;
+
+ dswx = alloca(w * sizeof(int));
+ for(int ix = 0; ix < w; ++ix)
+ dswx[ix] = nv04_swizzle_bits(dst->x + ix, 0, dst->z, dst->w, dst->h, dst->d);
+ dswy = alloca(h * sizeof(int));
+ for(int iy = 0; iy < h; ++iy)
+ dswy[iy] = nv04_swizzle_bits(0, dst->y + iy, dst->z, dst->w, dst->h, dst->d);
+
+#define FILL(T) do { \
+ T tvalue = (T)value; \
+ for(int iy = 0; iy < h; ++iy) \
+ { \
+ T* pdst = (T*)mdst + dswy[iy]; \
+ for(int ix = 0; ix < w; ++ix) \
+ { \
+ assert((uint8_t*)&pdst[dswx[ix] + 1] <= (uint8_t*)dst->bo->map + dst->bo->size); \
+ pdst[dswx[ix]] = tvalue; \
+ } \
+ } \
+ } while(0)
+
+ if(dst->bpps == 0)
+ FILL(uint8_t);
+ else if(dst->bpps == 1)
+ FILL(uint16_t);
+ else if(dst->bpps == 2)
+ FILL(uint32_t);
+ else
+ assert(0 && "unhandled bpp");
+#undef FILL
+ }
+
+ nouveau_bo_unmap(dst->bo);
+}
+
+static void
+nv04_region_copy_swizzle(struct nv04_2d_context *ctx,
+ struct nv04_region* dst,
+ struct nv04_region* src,
+ int w, int h, int cs2d_format, int sifm_format)
+{
+ struct nouveau_channel *chan = ctx->swzsurf->channel;
+ struct nouveau_grobj *swzsurf = ctx->swzsurf;
+ struct nouveau_grobj *sifm = ctx->sifm;
+ /* Max width & height may not be the same on all HW, but must be POT */
+ unsigned max_shift = 10;
+ unsigned cw = 1 << max_shift;
+ unsigned ch = 1 << max_shift;
+ unsigned sx = dst->x >> max_shift;
+ unsigned sy = dst->y >> max_shift;
+ unsigned ex = (dst->x + w - 1) >> max_shift;
+ unsigned ey = (dst->y + h - 1) >> max_shift;
+ unsigned chunks = (ex - sx + 1) * (ey - sy + 1);
+ unsigned chunk_size;
+ if(dst->w < cw)
+ cw = dst->w;
+ if(dst->h < ch)
+ ch = dst->h;
+ chunk_size = cw * ch << dst->bpps;
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tRGN_COPY_SWIZZLE [%i, %i: %i] ", w, h, dst->bpps);
+ for(int i = 0; i < 2; ++i)
+ {
+ nv04_region_print(i ? src : dst);
+ fprintf(stderr, i ? "\n" : " <- ");
+ }
+#endif
+
+ nv04_region_assert(dst, w, h);
+ nv04_region_assert(src, w, h);
+
+ MARK_RING (chan, 8 + chunks * 17, 2 + chunks * 2);
+
+ BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
+ OUT_RELOCo(chan, dst->bo,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1);
+ OUT_RING (chan, cs2d_format |
+ log2i(cw) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT |
+ log2i(ch) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT);
+
+ BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
+ OUT_RELOCo(chan, src->bo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1);
+ OUT_RING (chan, swzsurf->handle);
+
+ assert(!(dst->offset & 63));
+
+ for (int cy = sy; cy <= ey; ++cy) {
+ int ry = MAX2(0, (int)(dst->y - ch * cy));
+ int rh = MIN2((int)ch, (int)(dst->y - ch * cy + h)) - ry;
+ for (int cx = sx; cx <= ex; ++cx) {
+ int rx = MAX2(0, (int)(dst->x - cw * cx));
+ int rw = MIN2((int)cw, (int)(dst->x - cw * cx + w)) - rx;
+ unsigned dst_offset;
+ unsigned src_offset;
+
+ BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
+
+ dst_offset = dst->offset + (nv04_swizzle_bits_2d(cx * cw, cy * ch, dst->w, dst->h) << dst->bpps);
+ assert(dst_offset <= dst->bo->size);
+ assert(dst_offset + chunk_size <= dst->bo->size);
+ OUT_RELOCl(chan, dst->bo, dst_offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, sifm, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
+ OUT_RING (chan, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
+ OUT_RING (chan, sifm_format);
+ OUT_RING (chan, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
+ OUT_RING (chan, rx | (ry << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT));
+ OUT_RING (chan, rh << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | rw);
+ OUT_RING (chan, rx | (ry << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT));
+ OUT_RING (chan, rh << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | rw);
+ OUT_RING (chan, 1 << 20);
+ OUT_RING (chan, 1 << 20);
+
+ BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_SIZE, 4);
+ OUT_RING (chan, rh << NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | align(rw, 8));
+ OUT_RING (chan, src->pitch |
+ NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
+ NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
+ src_offset = src->offset + (cy * ch + ry + src->y - dst->y) * src->pitch + ((cx * cw + rx + src->x - dst->x) << src->bpps);
+ assert(src_offset <= src->bo->size);
+ assert(src_offset + (src->pitch * (rh - 1)) + (rw << src->bpps) <= src->bo->size);
+ OUT_RELOCl(chan, src->bo, src_offset,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, 0);
+ }
+ }
+}
+
+static inline void
+nv04_copy_m2mf_begin(struct nv04_2d_context *ctx, struct nouveau_bo* dstbo, struct nouveau_bo* srcbo, unsigned commands)
+{
+ struct nouveau_channel *chan = ctx->m2mf->channel;
+ struct nouveau_grobj *m2mf = ctx->m2mf;
+ MARK_RING (chan, 3 + commands * 9, 2 + commands * 2);
+ BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
+ OUT_RELOCo(chan, srcbo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCo(chan, dstbo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+}
+
+static inline void
+nv04_copy_m2mf_body(struct nv04_2d_context *ctx, struct nouveau_bo* dstbo, int* pdstoff, unsigned dstpitch, struct nouveau_bo* srcbo, int* psrcoff, unsigned srcpitch, unsigned size, unsigned lines)
+{
+ struct nouveau_channel *chan = ctx->m2mf->channel;
+ struct nouveau_grobj *m2mf = ctx->m2mf;
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\t\t\tCOPY_M2MF_BODY [%i, %i] <%i[%u]> lin %u <- <%i[%u]> lin %u\n", size, lines, dstbo->handle, *pdstoff, dstpitch, srcbo->handle, *psrcoff, srcpitch);
+#endif
+
+ BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+ OUT_RELOCl(chan, srcbo, *psrcoff,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, dstbo, *pdstoff,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RING (chan, srcpitch);
+ OUT_RING (chan, dstpitch);
+ OUT_RING (chan, size);
+ OUT_RING (chan, lines);
+ OUT_RING (chan, 0x0101);
+ OUT_RING (chan, 0);
+
+ *psrcoff += srcpitch * lines;
+ *pdstoff += dstpitch * lines;
+}
+
+static void
+nv04_copy_m2mf(struct nv04_2d_context *ctx,
+ struct nouveau_bo* dstbo, int dstoff, unsigned dstpitch,
+ struct nouveau_bo* srcbo, int srcoff, unsigned srcpitch,
+ unsigned size, unsigned h)
+{
+ unsigned max_pitch = 32767;
+ unsigned max_lines = 2047;
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\t\tCOPY_M2MF [%i, %i] <%i[%i]> lin %u <- <%i[%i]> lin %u\n", size, h, dstbo->handle, dstoff, dstpitch, srcbo->handle, srcoff, srcpitch);
+#endif
+
+ if(srcpitch <= max_pitch && dstpitch <= max_pitch)
+ {
+ unsigned full_pages = h / max_lines;
+ unsigned leftover_lines = h - full_pages * max_lines;
+
+ nv04_copy_m2mf_begin(ctx, dstbo, srcbo, full_pages + !!leftover_lines);
+
+ for(unsigned i = 0; i < full_pages; ++i)
+ nv04_copy_m2mf_body(ctx, dstbo, &dstoff, dstpitch, srcbo, &srcoff, srcpitch, size, max_lines);
+
+ if(leftover_lines)
+ nv04_copy_m2mf_body(ctx, dstbo, &dstoff, dstpitch, srcbo, &srcoff, srcpitch, size, leftover_lines);
+ }
+ else
+ {
+ unsigned lines = size / max_pitch;
+ unsigned leftover = size - lines * max_pitch;
+ unsigned full_pages = lines / max_lines;
+ unsigned leftover_lines = lines - full_pages * max_lines;
+ unsigned srcgap = srcpitch - size;
+ unsigned dstgap = dstpitch - size;
+
+ nv04_copy_m2mf_begin(ctx, dstbo, srcbo, h * (full_pages + !!leftover_lines + !!leftover));
+
+ for(unsigned i = 0; i < h; ++i)
+ {
+ for(unsigned j = 0; j < full_pages; ++j)
+ nv04_copy_m2mf_body(ctx, dstbo, &dstoff, max_pitch, srcbo, &srcoff, max_pitch, max_pitch, max_lines);
+
+ if(leftover_lines)
+ nv04_copy_m2mf_body(ctx, dstbo, &dstoff, max_pitch, srcbo, &srcoff, max_pitch, max_pitch, leftover_lines);
+
+ if(leftover)
+ nv04_copy_m2mf_body(ctx, dstbo, &dstoff, leftover, srcbo, &srcoff, leftover, leftover, 1);
+
+ srcoff += srcgap;
+ dstoff += dstgap;
+ }
+ }
+}
+
+void
+nv04_memcpy(struct nv04_2d_context *ctx, struct nouveau_bo* dstbo, int dstoff, struct nouveau_bo* srcbo, int srcoff, unsigned size)
+{
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tMEMCPY [%i] <%i[%i]> <- <%i[%i]>\n", size, dstbo->handle, dstoff, srcbo->handle, srcoff);
+#endif
+
+ nv04_copy_m2mf(ctx, dstbo, dstoff, size, srcbo, srcoff, size, size, 1);
+}
+
+static void
+nv04_region_copy_m2mf(struct nv04_2d_context *ctx, struct nv04_region *dst, struct nv04_region *src, int w, int h)
+{
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tRGN_COPY_M2MF [%i, %i: %i] ", w, h, dst->bpps);
+ for(int i = 0; i < 2; ++i)
+ {
+ nv04_region_print(i ? src : dst);
+ fprintf(stderr, i ? "\n" : " <- ");
+ }
+#endif
+
+ nv04_region_assert(dst, w, h);
+ nv04_region_assert(src, w, h);
+ assert(src->pitch);
+ assert(dst->pitch);
+
+ nv04_copy_m2mf(ctx,
+ dst->bo, dst->offset + dst->y * dst->pitch + (dst->x << dst->bpps), dst->pitch,
+ src->bo, src->offset + src->y * src->pitch + (src->x << src->bpps), src->pitch,
+ w << src->bpps, h);
+}
+
+static inline void
+nv04_region_copy_blit(struct nv04_2d_context *ctx, struct nv04_region* dst, struct nv04_region* src, int w, int h, int format)
+{
+ struct nouveau_channel *chan = ctx->surf2d->channel;
+ struct nouveau_grobj *surf2d = ctx->surf2d;
+ struct nouveau_grobj *blit = ctx->blit;
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tRGN_COPY_BLIT [%i, %i: %i] ", w, h, dst->bpps);
+ for(int i = 0; i < 2; ++i)
+ {
+ nv04_region_print(i ? src : dst);
+ fprintf(stderr, i ? "\n" : " <- ");
+ }
+#endif
+
+ assert(!(src->pitch & 63) && src->pitch);
+ assert(!(dst->pitch & 63) && dst->pitch);
+ nv04_region_assert(dst, w, h);
+ nv04_region_assert(src, w, h);
+
+ MARK_RING (chan, 12, 4);
+ BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+ OUT_RELOCo(chan, src->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCo(chan, dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
+ OUT_RING (chan, format);
+ OUT_RING (chan, (dst->pitch << 16) | src->pitch);
+ OUT_RELOCl(chan, src->bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, blit, 0x0300, 3);
+ OUT_RING (chan, (src->y << 16) | src->x);
+ OUT_RING (chan, (dst->y << 16) | dst->x);
+ OUT_RING (chan, ( h << 16) | w);
+}
+
+/* THEOREM: a non-linearizable swizzled destination is always 64 byte aligned, except for 4x2 mipmap levels of swizzled 1bpp surfaces
+ * HYPOTESIS:
+ * 1. The first mipmap level is 64-byte-aligned
+ * PROOF:
+ * 1. Thus, all mipmaps level with a parent which is 64-byte or more in size are.
+ * 2. At 1bpp, the smallest levels with a <= 32-byte parent are either Nx1 or 1xN or size <=8, thus 4x2, 2x2 or 2x4
+ * 3. Nx1, 1xN, 2x4, 2x2 have all subrects linearizable. 4x2 does not.
+ * 4. At 2/4bpp or more, the smallest levels with a 32-byte parent are 1xN, Nx1 or 2x2
+ *
+ * However, nv04_region_align handles that.
+ */
+
+// 0 -> done, 1 -> do with 3D engine or CPU, -1 -> do with CPU
+// dst and src may be modified, and the possibly modified version should be passed to nv04_region_cpu if necessary
+int
+nv04_region_copy_2d(struct nv04_2d_context *ctx, struct nv04_region* dst, struct nv04_region* src,
+ int w, int h, int cs2d_format, int sifm_format, int dst_to_gpu, int src_on_gpu)
+{
+ assert(src->bpps == dst->bpps);
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "RGN_COPY%s [%i, %i: %i] ", (cs2d_format >= 0) ? "_2D" : "_NO2D", w, h, dst->bpps);
+ for(int i = 0; i < 2; ++i)
+ {
+ int gpu = i ? src_on_gpu : dst_to_gpu;
+ nv04_region_print(i ? src : dst);
+ fprintf(stderr, " %s", gpu ? "gpu" : "cpu");
+ fprintf(stderr, i ? "\n" : " <- ");
+ }
+#endif
+
+ // if they are contiguous and either both swizzled or both linear, reshape
+ if(!dst->pitch == !src->pitch
+ && nv04_region_is_contiguous(dst, w, h)
+ && nv04_region_is_contiguous(src, w, h))
+ {
+ nv04_region_contiguous_shape(dst, &w, &h, 6);
+ nv04_region_linearize_contiguous(dst, w, h);
+ nv04_region_linearize_contiguous(src, w, h);
+ }
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tOPT ");
+ for(int i = 0; i < 2; ++i)
+ {
+ nv04_region_print(i ? src : dst);
+ fprintf(stderr, i ? "\n" : " <- ");
+ }
+#endif
+
+ /* if the destination is not for GPU _and_ source is on CPU, use CPU */
+ /* if the destination is not for GPU _or_ source is on CPU, use CPU only if we think it's faster than the GPU */
+ /* TODO: benchmark to find out in which cases exactly we should prefer the CPU */
+ if((!dst_to_gpu && !src_on_gpu)
+ || (!dst->pitch && dst->d > 1)
+ /* 3D swizzled destination are unwritable by the GPU, and 2D swizzled ones are readable only by the 3D engine */
+ )
+ return -1;
+ /* there is no known way to read 2D/3D-swizzled surfaces with the 2D engine
+ * ask the caller to use the 3D engine
+ * If a format cannot be sampled from the 3D engine there is no point in making it swizzled, so we must not do so
+ */
+ else if(!src->pitch)
+ {
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tCOPY_ENG3D\n");
+#endif
+ return 1;
+ }
+ /* Setup transfer to swizzle the texture to vram if needed */
+ else
+ {
+ if (!dst->pitch)
+ {
+ if(cs2d_format < 0 || sifm_format < 0 || !dst_to_gpu)
+ {
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tCOPY_ENG3D\n");
+#endif
+ return 1;
+ }
+ else
+ {
+ assert(!nv04_region_align(dst, w, h, 6));
+
+ nv04_region_copy_swizzle(ctx, dst, src, w, h, cs2d_format, sifm_format);
+ return 0;
+ }
+ }
+ else
+ {
+ /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback
+ * to NV_MEMORY_TO_MEMORY_FORMAT in this case.
+ * TODO: is this also true for the source? possibly not
+ */
+
+ if ((cs2d_format < 0)
+ || !dst_to_gpu
+ || nv04_region_align(src, w, h, 6)
+ || nv04_region_align(dst, w, h, 6)
+ )
+ nv04_region_copy_m2mf(ctx, dst, src, w, h);
+ else
+ nv04_region_copy_blit(ctx, dst, src, w, h, cs2d_format);
+
+ return 0;
+ }
+ }
+}
+
+static inline void
+nv04_region_fill_gdirect(struct nv04_2d_context *ctx, struct nv04_region* dst, int w, int h, unsigned value)
+{
+ struct nouveau_channel *chan = ctx->surf2d->channel;
+ struct nouveau_grobj *surf2d = ctx->surf2d;
+ struct nouveau_grobj *rect = ctx->rect;
+ int cs2d_format, gdirect_format;
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tFILL_GDIRECT\n");
+#endif
+
+ assert(!(dst->pitch & 63) && dst->pitch);
+ nv04_region_assert(dst, w, h);
+
+ if(dst->bpps == 0)
+ {
+ gdirect_format = NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
+ cs2d_format = NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
+ }
+ else if(dst->bpps == 1)
+ {
+ gdirect_format = NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
+ cs2d_format = NV04_CONTEXT_SURFACES_2D_FORMAT_Y16;
+ }
+ else if(dst->bpps == 2)
+ {
+ gdirect_format = NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
+ cs2d_format = NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
+ }
+ else
+ {
+ assert(0);
+ gdirect_format = 0;
+ cs2d_format = 0;
+ }
+
+ MARK_RING (chan, 15, 4);
+ BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+ OUT_RELOCo(chan, dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCo(chan, dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
+ OUT_RING (chan, cs2d_format);
+ OUT_RING (chan, (dst->pitch << 16) | dst->pitch);
+ OUT_RELOCl(chan, dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1);
+ OUT_RING (chan, gdirect_format);
+ BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1);
+ OUT_RING (chan, value);
+ BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2);
+ OUT_RING (chan, (dst->x << 16) | dst->y);
+ OUT_RING (chan, ( w << 16) | h);
+}
+
+int
+nv04_region_fill_2d(struct nv04_2d_context *ctx, struct nv04_region *dst,
+ int w, int h, unsigned value)
+{
+ if(!w || !h)
+ return 0;
+
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "FILL [%i, %i: %i] ", w, h, dst->bpps);
+ nv04_region_print(dst);
+ fprintf(stderr, " <- 0x%x\n", value);
+#endif
+
+ if(nv04_region_is_contiguous(dst, w, h))
+ {
+ nv04_region_contiguous_shape(dst, &w, &h, 6);
+ nv04_region_linearize_contiguous(dst, w, h);
+ }
+
+ // TODO: maybe do intermediate copies for some cases instead of using the 3D engine/CPU
+ /* GdiRect doesn't work together with swzsurf, so the 3D engine, or an intermediate copy, is the only option here */
+ if(!dst->pitch)
+ {
+#ifdef NV04_REGION_DEBUG
+ fprintf(stderr, "\tFILL_ENG3D\n");
+#endif
+ return 1;
+ }
+ else if(!nv04_region_align(dst, w, h, 6))
+ {
+ nv04_region_fill_gdirect(ctx, dst, w, h, value);
+ return 0;
+ }
+ else
+ return -1;
+}
+
+
+void
+nv04_2d_context_takedown(struct nv04_2d_context *ctx)
+{
+ nouveau_notifier_free(&ctx->ntfy);
+ nouveau_grobj_free(&ctx->m2mf);
+ nouveau_grobj_free(&ctx->surf2d);
+ nouveau_grobj_free(&ctx->swzsurf);
+ nouveau_grobj_free(&ctx->rect);
+ nouveau_grobj_free(&ctx->blit);
+ nouveau_grobj_free(&ctx->sifm);
+
+ free(ctx);
+}
+
+struct nv04_2d_context *
+nv04_2d_context_init(struct nouveau_channel* chan)
+{
+ struct nv04_2d_context *ctx = calloc(1, sizeof(struct nv04_2d_context));
+ unsigned handle = 0x88000000, class;
+ int ret;
+
+ if (!ctx)
+ return NULL;
+
+ ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy);
+ if (ret) {
+ nv04_2d_context_takedown(ctx);
+ return NULL;
+ }
+
+ ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf);
+ if (ret) {
+ nv04_2d_context_takedown(ctx);
+ return NULL;
+ }
+
+ BEGIN_RING(chan, ctx->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
+ OUT_RING (chan, ctx->ntfy->handle);
+
+ if (chan->device->chipset < 0x10)
+ class = NV04_CONTEXT_SURFACES_2D;
+ else
+ class = NV10_CONTEXT_SURFACES_2D;
+
+ ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d);
+ if (ret) {
+ nv04_2d_context_takedown(ctx);
+ return NULL;
+ }
+
+ BEGIN_RING(chan, ctx->surf2d,
+ NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle);
+
+ if (chan->device->chipset < 0x10)
+ class = NV04_IMAGE_BLIT;
+ else
+ class = NV12_IMAGE_BLIT;
+
+ ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit);
+ if (ret) {
+ nv04_2d_context_takedown(ctx);
+ return NULL;
+ }
+
+ BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_DMA_NOTIFY, 1);
+ OUT_RING (chan, ctx->ntfy->handle);
+ BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1);
+ OUT_RING (chan, ctx->surf2d->handle);
+ BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_OPERATION, 1);
+ OUT_RING (chan, NV01_IMAGE_BLIT_OPERATION_SRCCOPY);
+
+ ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT,
+ &ctx->rect);
+ if (ret) {
+ nv04_2d_context_takedown(ctx);
+ return NULL;
+ }
+
+ BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1);
+ OUT_RING (chan, ctx->ntfy->handle);
+ BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1);
+ OUT_RING (chan, ctx->surf2d->handle);
+ BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
+ OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY);
+ BEGIN_RING(chan, ctx->rect,
+ NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1);
+ OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE);
+
+ switch (chan->device->chipset & 0xf0) {
+ case 0x00:
+ case 0x10:
+ class = NV04_SWIZZLED_SURFACE;
+ break;
+ case 0x20:
+ class = NV20_SWIZZLED_SURFACE;
+ break;
+ case 0x30:
+ class = NV30_SWIZZLED_SURFACE;
+ break;
+ case 0x40:
+ case 0x60:
+ class = NV40_SWIZZLED_SURFACE;
+ break;
+ default:
+ /* Famous last words: this really can't happen.. */
+ assert(0);
+ break;
+ }
+
+ ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf);
+ if (ret) {
+ nv04_2d_context_takedown(ctx);
+ return NULL;
+ }
+
+ /* all the Gallium MARK_RING calculations assume no autobinding, so do that now */
+ if(ctx->swzsurf->bound == NOUVEAU_GROBJ_UNBOUND)
+ nouveau_grobj_autobind(ctx->swzsurf);
+
+ switch (chan->device->chipset & 0xf0) {
+ case 0x10:
+ case 0x20:
+ class = NV10_SCALED_IMAGE_FROM_MEMORY;
+ break;
+ case 0x30:
+ class = NV30_SCALED_IMAGE_FROM_MEMORY;
+ break;
+ case 0x40:
+ case 0x60:
+ class = NV40_SCALED_IMAGE_FROM_MEMORY;
+ break;
+ default:
+ class = NV04_SCALED_IMAGE_FROM_MEMORY;
+ break;
+ }
+
+ ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm);
+ if (ret) {
+ nv04_2d_context_takedown(ctx);
+ return NULL;
+ }
+
+ /* all the Gallium MARK_RING calculations assume no autobinding, so do that now */
+ if(ctx->sifm->bound == NOUVEAU_GROBJ_UNBOUND)
+ nouveau_grobj_autobind(ctx->sifm);
+
+ return ctx;
+}
diff --git a/src/gallium/drivers/nvfx/nv04_2d.h b/src/gallium/drivers/nvfx/nv04_2d.h
new file mode 100644
index 0000000000..e638b8c874
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv04_2d.h
@@ -0,0 +1,87 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Ben Skeggs
+ * Copyright 2009 Younes Manton
+ * Copyright 2010 Luca Barbieri
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+/* this code has no Mesa or Gallium dependency and can be reused in the classic Mesa driver or DDX */
+
+#ifndef __NV04_2D_H__
+#define __NV04_2D_H__
+
+struct nv04_2d_context;
+struct nouveau_channel;
+struct nouveau_bo;
+
+// NOTE: all functions taking this as a parameter will CLOBBER it (except for ->bo)
+struct nv04_region {
+ struct nouveau_bo* bo;
+ int offset;
+ unsigned pitch; // 0 -> swizzled
+ unsigned bpps; // bpp shift (0, 1, 2; 3, 4 for fp/compressed)
+ unsigned x, y, z;
+ unsigned w, h, d;
+};
+
+void
+nv04_memcpy(struct nv04_2d_context *ctx,
+ struct nouveau_bo* dstbo, int dstoff,
+ struct nouveau_bo* srcbo, int srcoff,
+ unsigned size);
+
+unsigned
+nv04_region_begin(struct nv04_region* rgn, unsigned w, unsigned h);
+
+unsigned
+nv04_region_end(struct nv04_region* rgn, unsigned w, unsigned h);
+
+void
+nv04_2d_context_takedown(struct nv04_2d_context *pctx);
+
+struct nv04_2d_context *
+nv04_2d_context_init(struct nouveau_channel* chan);
+
+void
+nv04_region_copy_cpu(struct nv04_region* dst, struct nv04_region* src, int w, int h);
+
+void
+nv04_region_fill_cpu(struct nv04_region* dst, int w, int h, unsigned value);
+
+int
+nv04_region_copy_2d(struct nv04_2d_context *ctx,
+ struct nv04_region* dst, struct nv04_region* src,
+ int w, int h,
+ int cs2d_format, int sifm_format,
+ int dst_to_gpu, int src_on_gpu);
+
+int
+nv04_region_fill_2d(struct nv04_2d_context *ctx,
+ struct nv04_region *dst,
+ int w, int h,
+ unsigned value);
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nv04_2d_loops.h b/src/gallium/drivers/nvfx/nv04_2d_loops.h
new file mode 100644
index 0000000000..3a6787c071
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv04_2d_loops.h
@@ -0,0 +1,70 @@
+#ifndef T
+{
+ if(dst->bpps == 0)
+#define T uint8_t
+#include "nv04_2d_loops.h"
+#undef T
+ else if(dst->bpps == 1)
+#define T uint16_t
+#include "nv04_2d_loops.h"
+#undef T
+ else if(dst->bpps == 2)
+#define T uint32_t
+#include "nv04_2d_loops.h"
+#undef T
+ else
+ assert(0);
+}
+#else
+#ifdef SWIZZLED_COPY_LOOPS
+{
+ if(!dst->pitch)
+ {
+ if(!src->pitch)
+ {
+ LOOP_Y
+ {
+ T* pdst = (T*)mdst + dswy[iy];
+ T* psrc = (T*)msrc + sswy[iy];
+ LOOP_X
+ {
+ assert((char*)&psrc[sswx[ix] + 1] <= ((char*)src->bo->map + src->bo->size));
+ assert((char*)&pdst[dswx[ix] + 1] <= ((char*)dst->bo->map + dst->bo->size));
+ pdst[dswx[ix]] = psrc[sswx[ix]];
+ }
+ }
+ }
+ else
+ {
+ T* psrc = (T*)(msrc + ((dir > 0) ? src->y : (src->y + h - 1)) * src->pitch) + src->x;
+ LOOP_Y
+ {
+ T* pdst = (T*)mdst + dswy[iy];
+ LOOP_X
+ {
+ assert((char*)&psrc[ix + 1] <= ((char*)src->bo->map + src->bo->size));
+ assert((char*)&pdst[dswx[ix] + 1] <= ((char*)dst->bo->map + dst->bo->size));
+ pdst[dswx[ix]] = psrc[ix];
+ }
+ psrc = (T*)((char*)psrc + dir * src->pitch);
+ }
+ }
+ }
+ else
+ {
+ T* pdst = (T*)(mdst + ((dir > 0) ? dst->y : (dst->y + h - 1)) * dst->pitch) + dst->x;
+ LOOP_Y
+ {
+ T* psrc = (T*)msrc + sswy[iy];
+ LOOP_X
+ {
+ assert((char*)&psrc[sswx[ix] + 1] <= ((char*)src->bo->map + src->bo->size));
+ assert((char*)&pdst[ix + 1] <= ((char*)dst->bo->map + dst->bo->size));
+ pdst[ix] = psrc[sswx[ix]];
+ }
+ pdst = (T*)((char*)pdst + dir * dst->pitch);
+ }
+ }
+}
+#endif
+#endif
diff --git a/src/gallium/drivers/nvfx/nv04_surface_2d.c b/src/gallium/drivers/nvfx/nv04_surface_2d.c
deleted file mode 100644
index 7acbb505df..0000000000
--- a/src/gallium/drivers/nvfx/nv04_surface_2d.c
+++ /dev/null
@@ -1,532 +0,0 @@
-#include "pipe/p_context.h"
-#include "pipe/p_format.h"
-#include "util/u_format.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-#include "nouveau/nouveau_winsys.h"
-#include "nouveau/nouveau_util.h"
-#include "nouveau/nouveau_screen.h"
-#include "nv04_surface_2d.h"
-
-static INLINE int
-nv04_surface_format(enum pipe_format format)
-{
- switch (format) {
- case PIPE_FORMAT_A8_UNORM:
- case PIPE_FORMAT_L8_UNORM:
- case PIPE_FORMAT_I8_UNORM:
- return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
- case PIPE_FORMAT_R16_SNORM:
- case PIPE_FORMAT_B5G6R5_UNORM:
- case PIPE_FORMAT_Z16_UNORM:
- case PIPE_FORMAT_L8A8_UNORM:
- return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8;
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- case PIPE_FORMAT_X8Z24_UNORM:
- return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
- default:
- return -1;
- }
-}
-
-static INLINE int
-nv04_rect_format(enum pipe_format format)
-{
- switch (format) {
- case PIPE_FORMAT_A8_UNORM:
- return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
- case PIPE_FORMAT_B5G6R5_UNORM:
- case PIPE_FORMAT_L8A8_UNORM:
- case PIPE_FORMAT_Z16_UNORM:
- return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- case PIPE_FORMAT_X8Z24_UNORM:
- return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
- default:
- return -1;
- }
-}
-
-static INLINE int
-nv04_scaled_image_format(enum pipe_format format)
-{
- switch (format) {
- case PIPE_FORMAT_A8_UNORM:
- case PIPE_FORMAT_L8_UNORM:
- case PIPE_FORMAT_I8_UNORM:
- return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8;
- case PIPE_FORMAT_B5G5R5A1_UNORM:
- return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8;
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8;
- case PIPE_FORMAT_B5G6R5_UNORM:
- case PIPE_FORMAT_R16_SNORM:
- case PIPE_FORMAT_L8A8_UNORM:
- return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
- default:
- return -1;
- }
-}
-
-static INLINE unsigned
-nv04_swizzle_bits_square(unsigned x, unsigned y)
-{
- unsigned u = (x & 0x001) << 0 |
- (x & 0x002) << 1 |
- (x & 0x004) << 2 |
- (x & 0x008) << 3 |
- (x & 0x010) << 4 |
- (x & 0x020) << 5 |
- (x & 0x040) << 6 |
- (x & 0x080) << 7 |
- (x & 0x100) << 8 |
- (x & 0x200) << 9 |
- (x & 0x400) << 10 |
- (x & 0x800) << 11;
-
- unsigned v = (y & 0x001) << 1 |
- (y & 0x002) << 2 |
- (y & 0x004) << 3 |
- (y & 0x008) << 4 |
- (y & 0x010) << 5 |
- (y & 0x020) << 6 |
- (y & 0x040) << 7 |
- (y & 0x080) << 8 |
- (y & 0x100) << 9 |
- (y & 0x200) << 10 |
- (y & 0x400) << 11 |
- (y & 0x800) << 12;
- return v | u;
-}
-
-/* rectangular swizzled textures are linear concatenations of swizzled square tiles */
-static INLINE unsigned
-nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h)
-{
- unsigned s = MIN2(w, h);
- unsigned m = s - 1;
- return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m);
-}
-
-static int
-nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
- struct pipe_surface *dst, int dx, int dy,
- struct pipe_surface *src, int sx, int sy,
- int w, int h)
-{
- struct nouveau_channel *chan = ctx->swzsurf->channel;
- struct nouveau_grobj *swzsurf = ctx->swzsurf;
- struct nouveau_grobj *sifm = ctx->sifm;
- struct nouveau_bo *src_bo = ctx->buf(src);
- struct nouveau_bo *dst_bo = ctx->buf(dst);
- const unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
- /* Max width & height may not be the same on all HW, but must be POT */
- const unsigned max_w = 1024;
- const unsigned max_h = 1024;
- unsigned sub_w = w > max_w ? max_w : w;
- unsigned sub_h = h > max_h ? max_h : h;
- unsigned x;
- unsigned y;
-
- /* Swizzled surfaces must be POT */
- assert(util_is_pot(dst->width) && util_is_pot(dst->height));
-
- /* If area is too large to copy in one shot we must copy it in POT chunks to meet alignment requirements */
- assert(sub_w == w || util_is_pot(sub_w));
- assert(sub_h == h || util_is_pot(sub_h));
-
- MARK_RING (chan, 8 + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*17, 2 +
- ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*2);
-
- BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
- OUT_RELOCo(chan, dst_bo,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-
- BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1);
- OUT_RING (chan, nv04_surface_format(dst->format) |
- log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT |
- log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT);
-
- BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
- OUT_RELOCo(chan, src_bo,
- NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
- BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1);
- OUT_RING (chan, swzsurf->handle);
-
- for (y = 0; y < h; y += sub_h) {
- sub_h = MIN2(sub_h, h - y);
-
- for (x = 0; x < w; x += sub_w) {
- sub_w = MIN2(sub_w, w - x);
-
- assert(!(dst->offset & 63));
-
- BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
- OUT_RELOCl(chan, dst_bo, dst->offset,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-
- BEGIN_RING(chan, sifm, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
- OUT_RING (chan, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
- OUT_RING (chan, nv04_scaled_image_format(src->format));
- OUT_RING (chan, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
- OUT_RING (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT));
- OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w);
- OUT_RING (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT));
- OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w);
- OUT_RING (chan, 1 << 20);
- OUT_RING (chan, 1 << 20);
-
- BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_SIZE, 4);
- OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | sub_w);
- OUT_RING (chan, src_pitch |
- NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
- NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
- OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * util_format_get_blocksize(src->texture->format),
- NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
- OUT_RING (chan, 0);
- }
- }
-
- return 0;
-}
-
-static int
-nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
- struct pipe_surface *dst, int dx, int dy,
- struct pipe_surface *src, int sx, int sy, int w, int h)
-{
- struct nouveau_channel *chan = ctx->m2mf->channel;
- struct nouveau_grobj *m2mf = ctx->m2mf;
- struct nouveau_bo *src_bo = ctx->buf(src);
- struct nouveau_bo *dst_bo = ctx->buf(dst);
- unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
- unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
- unsigned dst_offset = dst->offset + dy * dst_pitch +
- dx * util_format_get_blocksize(dst->texture->format);
- unsigned src_offset = src->offset + sy * src_pitch +
- sx * util_format_get_blocksize(src->texture->format);
-
- MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2);
- BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
- OUT_RELOCo(chan, src_bo,
- NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
- OUT_RELOCo(chan, dst_bo,
- NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-
- while (h) {
- int count = (h > 2047) ? 2047 : h;
-
- BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
- OUT_RELOCl(chan, src_bo, src_offset,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RELOCl(chan, dst_bo, dst_offset,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
- OUT_RING (chan, src_pitch);
- OUT_RING (chan, dst_pitch);
- OUT_RING (chan, w * util_format_get_blocksize(src->texture->format));
- OUT_RING (chan, count);
- OUT_RING (chan, 0x0101);
- OUT_RING (chan, 0);
-
- h -= count;
- src_offset += src_pitch * count;
- dst_offset += dst_pitch * count;
- }
-
- return 0;
-}
-
-static int
-nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
- int dx, int dy, struct pipe_surface *src, int sx, int sy,
- int w, int h)
-{
- struct nouveau_channel *chan = ctx->surf2d->channel;
- struct nouveau_grobj *surf2d = ctx->surf2d;
- struct nouveau_grobj *blit = ctx->blit;
- struct nouveau_bo *src_bo = ctx->buf(src);
- struct nouveau_bo *dst_bo = ctx->buf(dst);
- unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
- unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
- int format;
-
- format = nv04_surface_format(dst->format);
- if (format < 0)
- return 1;
-
- MARK_RING (chan, 12, 4);
- BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
- OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
- OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
- OUT_RING (chan, format);
- OUT_RING (chan, (dst_pitch << 16) | src_pitch);
- OUT_RELOCl(chan, src_bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
- OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-
- BEGIN_RING(chan, blit, 0x0300, 3);
- OUT_RING (chan, (sy << 16) | sx);
- OUT_RING (chan, (dy << 16) | dx);
- OUT_RING (chan, ( h << 16) | w);
-
- return 0;
-}
-
-static void
-nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
- int dx, int dy, struct pipe_surface *src, int sx, int sy,
- int w, int h)
-{
- int src_linear = src->texture->flags & NVFX_RESOURCE_FLAG_LINEAR;
- int dst_linear = dst->texture->flags & NVFX_RESOURCE_FLAG_LINEAR;
-
- assert(src->format == dst->format);
-
- /* Setup transfer to swizzle the texture to vram if needed */
- if (src_linear && !dst_linear && w > 1 && h > 1) {
- nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h);
- return;
- }
-
- /* Use M2MF instead of the blitter since it always works
- * Any possible performance drop is likely to be not very significant
- * and dwarfed anyway by the current buffer management problems
- */
- nv04_surface_copy_m2mf(ctx, dst, dx, dy, src, sx, sy, w, h);
-}
-
-static void
-nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
- int dx, int dy, int w, int h, unsigned value)
-{
- struct nouveau_channel *chan = ctx->surf2d->channel;
- struct nouveau_grobj *surf2d = ctx->surf2d;
- struct nouveau_grobj *rect = ctx->rect;
- struct nouveau_bo *dst_bo = ctx->buf(dst);
- unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
- int cs2d_format, gdirect_format;
-
- cs2d_format = nv04_surface_format(dst->format);
- assert(cs2d_format >= 0);
-
- gdirect_format = nv04_rect_format(dst->format);
- assert(gdirect_format >= 0);
-
- MARK_RING (chan, 16, 4);
- BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
- OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
- OUT_RING (chan, cs2d_format);
- OUT_RING (chan, (dst_pitch << 16) | dst_pitch);
- OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-
- BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1);
- OUT_RING (chan, gdirect_format);
- BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1);
- OUT_RING (chan, value);
- BEGIN_RING(chan, rect,
- NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2);
- OUT_RING (chan, (dx << 16) | dy);
- OUT_RING (chan, ( w << 16) | h);
-}
-
-void
-nv04_surface_2d_takedown(struct nv04_surface_2d **pctx)
-{
- struct nv04_surface_2d *ctx;
-
- if (!pctx || !*pctx)
- return;
- ctx = *pctx;
- *pctx = NULL;
-
- nouveau_notifier_free(&ctx->ntfy);
- nouveau_grobj_free(&ctx->m2mf);
- nouveau_grobj_free(&ctx->surf2d);
- nouveau_grobj_free(&ctx->swzsurf);
- nouveau_grobj_free(&ctx->rect);
- nouveau_grobj_free(&ctx->blit);
- nouveau_grobj_free(&ctx->sifm);
-
- FREE(ctx);
-}
-
-struct nv04_surface_2d *
-nv04_surface_2d_init(struct nouveau_screen *screen)
-{
- struct nv04_surface_2d *ctx = CALLOC_STRUCT(nv04_surface_2d);
- struct nouveau_channel *chan = screen->channel;
- unsigned handle = 0x88000000, class;
- int ret;
-
- if (!ctx)
- return NULL;
-
- ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy);
- if (ret) {
- nv04_surface_2d_takedown(&ctx);
- return NULL;
- }
-
- ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf);
- if (ret) {
- nv04_surface_2d_takedown(&ctx);
- return NULL;
- }
-
- BEGIN_RING(chan, ctx->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
- OUT_RING (chan, ctx->ntfy->handle);
-
- if (chan->device->chipset < 0x10)
- class = NV04_CONTEXT_SURFACES_2D;
- else
- class = NV10_CONTEXT_SURFACES_2D;
-
- ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d);
- if (ret) {
- nv04_surface_2d_takedown(&ctx);
- return NULL;
- }
-
- BEGIN_RING(chan, ctx->surf2d,
- NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
- OUT_RING (chan, chan->vram->handle);
- OUT_RING (chan, chan->vram->handle);
-
- if (chan->device->chipset < 0x10)
- class = NV04_IMAGE_BLIT;
- else
- class = NV12_IMAGE_BLIT;
-
- ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit);
- if (ret) {
- nv04_surface_2d_takedown(&ctx);
- return NULL;
- }
-
- BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_DMA_NOTIFY, 1);
- OUT_RING (chan, ctx->ntfy->handle);
- BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1);
- OUT_RING (chan, ctx->surf2d->handle);
- BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_OPERATION, 1);
- OUT_RING (chan, NV01_IMAGE_BLIT_OPERATION_SRCCOPY);
-
- ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT,
- &ctx->rect);
- if (ret) {
- nv04_surface_2d_takedown(&ctx);
- return NULL;
- }
-
- BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1);
- OUT_RING (chan, ctx->ntfy->handle);
- BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1);
- OUT_RING (chan, ctx->surf2d->handle);
- BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
- OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY);
- BEGIN_RING(chan, ctx->rect,
- NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1);
- OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE);
-
- switch (chan->device->chipset & 0xf0) {
- case 0x00:
- case 0x10:
- class = NV04_SWIZZLED_SURFACE;
- break;
- case 0x20:
- class = NV20_SWIZZLED_SURFACE;
- break;
- case 0x30:
- class = NV30_SWIZZLED_SURFACE;
- break;
- case 0x40:
- case 0x60:
- class = NV40_SWIZZLED_SURFACE;
- break;
- default:
- /* Famous last words: this really can't happen.. */
- assert(0);
- break;
- }
-
- ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf);
- if (ret) {
- nv04_surface_2d_takedown(&ctx);
- return NULL;
- }
-
- switch (chan->device->chipset & 0xf0) {
- case 0x10:
- case 0x20:
- class = NV10_SCALED_IMAGE_FROM_MEMORY;
- break;
- case 0x30:
- class = NV30_SCALED_IMAGE_FROM_MEMORY;
- break;
- case 0x40:
- case 0x60:
- class = NV40_SCALED_IMAGE_FROM_MEMORY;
- break;
- default:
- class = NV04_SCALED_IMAGE_FROM_MEMORY;
- break;
- }
-
- ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm);
- if (ret) {
- nv04_surface_2d_takedown(&ctx);
- return NULL;
- }
-
- ctx->copy = nv04_surface_copy;
- ctx->fill = nv04_surface_fill;
- return ctx;
-}
-
-struct nv04_surface*
-nv04_surface_wrap_for_render(struct pipe_screen *pscreen,
- struct nv04_surface_2d* eng2d, struct nv04_surface* ns)
-{
- struct pipe_resource templ;
- struct pipe_resource* temp_tex;
- struct nv04_surface* temp_ns;
- int temp_flags;
-
- temp_flags = ns->base.usage;
-
- ns->base.usage = 0;
-
- memset(&templ, 0, sizeof(templ));
- templ.format = ns->base.texture->format;
- templ.target = PIPE_TEXTURE_2D;
- templ.width0 = ns->base.width;
- templ.height0 = ns->base.height;
- templ.depth0 = 1;
- templ.last_level = 0;
-
- // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented
- templ.nr_samples = ns->base.texture->nr_samples;
-
- templ.bind = ns->base.texture->bind | PIPE_BIND_RENDER_TARGET;
-
- temp_tex = pscreen->resource_create(pscreen, &templ);
- temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags);
- temp_ns->backing = ns;
-
- if(1) /* hmm */
- eng2d->copy(eng2d, &temp_ns->backing->base,
- 0, 0, &ns->base,
- 0, 0, ns->base.width, ns->base.height);
-
- return temp_ns;
-}
diff --git a/src/gallium/drivers/nvfx/nv04_surface_2d.h b/src/gallium/drivers/nvfx/nv04_surface_2d.h
deleted file mode 100644
index 2123c3ed08..0000000000
--- a/src/gallium/drivers/nvfx/nv04_surface_2d.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef __NV04_SURFACE_2D_H__
-#define __NV04_SURFACE_2D_H__
-
-#include "pipe/p_state.h"
-
-struct nouveau_screen;
-
-struct nv04_surface {
- struct pipe_surface base;
- unsigned pitch;
- struct nv04_surface* backing;
-};
-
-struct nv04_surface_2d {
- struct nouveau_notifier *ntfy;
- struct nouveau_grobj *surf2d;
- struct nouveau_grobj *swzsurf;
- struct nouveau_grobj *m2mf;
- struct nouveau_grobj *rect;
- struct nouveau_grobj *blit;
- struct nouveau_grobj *sifm;
-
- struct nouveau_bo *(*buf)(struct pipe_surface *);
-
- void (*copy)(struct nv04_surface_2d *, struct pipe_surface *dst,
- int dx, int dy, struct pipe_surface *src, int sx, int sy,
- int w, int h);
- void (*fill)(struct nv04_surface_2d *, struct pipe_surface *dst,
- int dx, int dy, int w, int h, unsigned value);
-};
-
-struct nv04_surface_2d *
-nv04_surface_2d_init(struct nouveau_screen *screen);
-
-void
-nv04_surface_2d_takedown(struct nv04_surface_2d **);
-
-struct nv04_surface*
-nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns);
-
-#define NVFX_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
-
-#endif
diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c
index dec073ac90..0c3d43fd57 100644
--- a/src/gallium/drivers/nvfx/nv30_fragtex.c
+++ b/src/gallium/drivers/nvfx/nv30_fragtex.c
@@ -1,7 +1,6 @@
#include "util/u_format.h"
#include "nvfx_context.h"
-#include "nouveau/nouveau_util.h"
#include "nvfx_tex.h"
#include "nvfx_resource.h"
@@ -10,138 +9,109 @@ nv30_sampler_state_init(struct pipe_context *pipe,
struct nvfx_sampler_state *ps,
const struct pipe_sampler_state *cso)
{
- if (cso->max_anisotropy >= 8) {
- ps->en |= NV34TCL_TX_ENABLE_ANISO_8X;
- } else
- if (cso->max_anisotropy >= 4) {
- ps->en |= NV34TCL_TX_ENABLE_ANISO_4X;
- } else
- if (cso->max_anisotropy >= 2) {
- ps->en |= NV34TCL_TX_ENABLE_ANISO_2X;
- }
+ float limit;
+ if (cso->max_anisotropy >= 2)
{
- float limit;
+ if (cso->max_anisotropy >= 8)
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_8X;
+ else if (cso->max_anisotropy >= 4)
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_4X;
+ else if (cso->max_anisotropy >= 2)
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_2X;
+ }
- limit = CLAMP(cso->lod_bias, -16.0, 15.0);
- ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0 + (255.0 / 256.0));
+ ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
- limit = CLAMP(cso->max_lod, 0.0, 15.0);
- ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/;
+ ps->max_lod = (int)CLAMP(cso->max_lod, 0.0, 15.0);
+ ps->min_lod = (int)CLAMP(cso->min_lod, 0.0, 15.0);
- limit = CLAMP(cso->min_lod, 0.0, 15.0);
- ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/;
- }
+ ps->en |= NV34TCL_TX_ENABLE_ENABLE;
}
-#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \
-{ \
- TRUE, \
- PIPE_FORMAT_##m, \
- NV34TCL_TX_FORMAT_FORMAT_##tf, \
- (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \
- NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \
- NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \
- NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w) \
-}
-
-struct nv30_texture_format {
- boolean defined;
- uint pipe;
- int format;
- int swizzle;
-};
-
-static struct nv30_texture_format
-nv30_texture_formats[] = {
- _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W),
- _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W),
- _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W),
- _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W),
- _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W),
- _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X),
- _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X),
- _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X),
- _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y),
- _(Z16_UNORM , R5G6B5 , S1, S1, S1, ONE, X, X, X, X),
- _(S8_USCALED_Z24_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X),
- _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W),
- _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W),
- _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W),
- _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W),
- {},
-};
-
-static struct nv30_texture_format *
-nv30_fragtex_format(uint pipe_format)
+void
+nv30_sampler_view_init(struct pipe_context *pipe,
+ struct nvfx_sampler_view *sv)
{
- struct nv30_texture_format *tf = nv30_texture_formats;
-
- while (tf->defined) {
- if (tf->pipe == pipe_format)
- return tf;
- tf++;
- }
-
- NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format));
- return NULL;
+ struct pipe_resource* pt = sv->base.texture;
+ struct nvfx_texture_format *tf = &nvfx_texture_formats[sv->base.format];
+ unsigned txf;
+ unsigned level = pt->target == PIPE_TEXTURE_CUBE ? 0 : sv->base.first_level;
+
+ assert(tf->fmt[0] >= 0);
+
+ txf = sv->u.init_fmt;
+ txf |= (level != sv->base.last_level ? NV34TCL_TX_FORMAT_MIPMAP : 0);
+ txf |= util_logbase2(u_minify(pt->width0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
+ txf |= util_logbase2(u_minify(pt->height0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
+ txf |= util_logbase2(u_minify(pt->depth0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
+ txf |= 0x10000;
+
+ sv->u.nv30.fmt[0] = tf->fmt[0] | txf;
+ sv->u.nv30.fmt[1] = tf->fmt[1] | txf;
+ sv->u.nv30.fmt[2] = tf->fmt[2] | txf;
+ sv->u.nv30.fmt[3] = tf->fmt[3] | txf;
+
+ sv->swizzle |= (nvfx_subresource_pitch(pt, 0) << NV34TCL_TX_SWIZZLE_RECT_PITCH_SHIFT);
+
+ if(pt->height0 <= 1 || util_format_is_compressed(sv->base.format))
+ sv->u.nv30.rect = -1;
+ else
+ sv->u.nv30.rect = !!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR);
+
+ sv->lod_offset = sv->base.first_level - level;
+ sv->max_lod_limit = sv->base.last_level - level;
}
-
void
nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
{
struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
- struct nvfx_miptree *nv30mt = (struct nvfx_miptree *)nvfx->fragment_sampler_views[unit]->texture;
- struct pipe_resource *pt = &nv30mt->base.base;
- struct nouveau_bo *bo = nv30mt->base.bo;
- struct nv30_texture_format *tf;
+ struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit];
+ struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo;
struct nouveau_channel* chan = nvfx->screen->base.channel;
- uint32_t txf, txs;
+ unsigned txf;
unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+ unsigned use_rect;
+ unsigned max_lod = MIN2(ps->max_lod + sv->lod_offset, sv->max_lod_limit);
+ unsigned min_lod = MIN2(ps->min_lod + sv->lod_offset, max_lod) ;
- tf = nv30_fragtex_format(pt->format);
- if (!tf)
- return;
-
- txf = tf->format;
- txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0);
- txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
- txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
- txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
- txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000;
-
- switch (pt->target) {
- case PIPE_TEXTURE_CUBE:
- txf |= NV34TCL_TX_FORMAT_CUBIC;
- /* fall-through */
- case PIPE_TEXTURE_2D:
- txf |= NV34TCL_TX_FORMAT_DIMS_2D;
- break;
- case PIPE_TEXTURE_3D:
- txf |= NV34TCL_TX_FORMAT_DIMS_3D;
- break;
- case PIPE_TEXTURE_1D:
- txf |= NV34TCL_TX_FORMAT_DIMS_1D;
- break;
- default:
- NOUVEAU_ERR("Unknown target %d\n", pt->target);
- return;
+ if(sv->u.nv30.rect < 0)
+ {
+ /* in the case of compressed or 1D textures, we can get away with this,
+ * since the layout is the same
+ */
+ use_rect = ps->fmt;
+ }
+ else
+ {
+ static boolean warned = FALSE;
+ if( !!ps->fmt != sv->u.nv30.rect && !warned) {
+ warned = TRUE;
+ fprintf(stderr,
+ "Unimplemented: coordinate normalization mismatch. Possible reasons:\n"
+ "1. ARB_texture_non_power_of_two is being used despite the fact it isn't supported\n"
+ "2. The state tracker is not using the appropriate coordinate normalization\n"
+ "3. The state tracker is not supported\n");
+ }
+
+ use_rect = sv->u.nv30.rect;
}
- txs = tf->swizzle;
+ txf = sv->u.nv30.fmt[ps->compare + (use_rect ? 2 : 0)];
MARK_RING(chan, 9, 2);
OUT_RING(chan, RING_3D(NV34TCL_TX_OFFSET(unit), 8));
- OUT_RELOC(chan, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
- OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR,
- NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
- OUT_RING(chan, ps->wrap);
- OUT_RING(chan, NV34TCL_TX_ENABLE_ENABLE | ps->en);
- OUT_RING(chan, txs);
- OUT_RING(chan, ps->filt | 0x2000 /*voodoo*/);
- OUT_RING(chan, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
- pt->height0);
+ OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+ OUT_RELOC(chan, bo, txf,
+ tex_flags | NOUVEAU_BO_OR,
+ NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
+ OUT_RING(chan, (ps->wrap & sv->wrap_mask) | sv->wrap);
+ OUT_RING(chan, ps->en | (min_lod << NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT) | (max_lod << NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT));
+ OUT_RING(chan, sv->swizzle);
+ OUT_RING(chan, ps->filt | sv->filt);
+ OUT_RING(chan, sv->npot_size);
OUT_RING(chan, ps->bcol);
nvfx->hw_txf[unit] = txf;
diff --git a/src/gallium/drivers/nvfx/nv30_vertprog.h b/src/gallium/drivers/nvfx/nv30_vertprog.h
index ec0444c07f..9a68f5c1fb 100644
--- a/src/gallium/drivers/nvfx/nv30_vertprog.h
+++ b/src/gallium/drivers/nvfx/nv30_vertprog.h
@@ -68,7 +68,7 @@
#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16
#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16)
#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15)
-#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16)
+#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 16)
#define NV30_VP_INST_COND_TEST_ENABLE (1<<14)
#define NV30_VP_INST_COND_SHIFT 11
#define NV30_VP_INST_COND_MASK (0x07 << 11)
@@ -111,7 +111,7 @@
#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/
#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/
#define NV30_VP_INST_IADDR_SHIFT 2
-#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */
+#define NV30_VP_INST_IADDR_MASK (0x1FF << 2) /* NV30_VP_SRC2_LOW_MASK << 28 */
/* DWORD 3 */
#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/
@@ -125,7 +125,7 @@
#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/
#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/
#define NV30_VP_INST_DEST_SHIFT 2
-#define NV30_VP_INST_DEST_MASK (0x0F << 2)
+#define NV30_VP_INST_DEST_MASK (0x1F << 2)
# define NV30_VP_INST_DEST_POS 0
# define NV30_VP_INST_DEST_BFC0 1
# define NV30_VP_INST_DEST_BFC1 2
@@ -133,7 +133,8 @@
# define NV30_VP_INST_DEST_COL1 4
# define NV30_VP_INST_DEST_FOGC 5
# define NV30_VP_INST_DEST_PSZ 6
-# define NV30_VP_INST_DEST_TC(n) (8+n)
+# define NV30_VP_INST_DEST_TC(n) (8+(n))
+# define NV30_VP_INST_DEST_CLP(n) (17 + (n))
/* Useful to split the source selection regs into their pieces */
#define NV30_VP_SRC0_HIGH_SHIFT 6
diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c
index 0068b1ba54..106ce71a07 100644
--- a/src/gallium/drivers/nvfx/nv40_fragtex.c
+++ b/src/gallium/drivers/nvfx/nv40_fragtex.c
@@ -8,168 +8,97 @@ nv40_sampler_state_init(struct pipe_context *pipe,
struct nvfx_sampler_state *ps,
const struct pipe_sampler_state *cso)
{
+ float limit;
if (cso->max_anisotropy >= 2) {
/* no idea, binary driver sets it, works without it.. meh.. */
ps->wrap |= (1 << 5);
- if (cso->max_anisotropy >= 16) {
+ if (cso->max_anisotropy >= 16)
ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X;
- } else
- if (cso->max_anisotropy >= 12) {
+ else if (cso->max_anisotropy >= 12)
ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X;
- } else
- if (cso->max_anisotropy >= 10) {
+ else if (cso->max_anisotropy >= 10)
ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X;
- } else
- if (cso->max_anisotropy >= 8) {
+ else if (cso->max_anisotropy >= 8)
ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X;
- } else
- if (cso->max_anisotropy >= 6) {
+ else if (cso->max_anisotropy >= 6)
ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X;
- } else
- if (cso->max_anisotropy >= 4) {
+ else if (cso->max_anisotropy >= 4)
ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X;
- } else {
+ else
ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X;
- }
}
- {
- float limit;
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0 + (255.0 / 256.0));
+ ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
- limit = CLAMP(cso->lod_bias, -16.0, 15.0);
- ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+ ps->max_lod = (int)(CLAMP(cso->max_lod, 0.0, 15.0 + (255.0 / 256.0)) * 256.0);
+ ps->min_lod = (int)(CLAMP(cso->min_lod, 0.0, 15.0 + (255.0 / 256.0)) * 256.0);
- limit = CLAMP(cso->max_lod, 0.0, 15.0);
- ps->en |= (int)(limit * 256.0) << 7;
-
- limit = CLAMP(cso->min_lod, 0.0, 15.0);
- ps->en |= (int)(limit * 256.0) << 19;
- }
-}
-
-#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \
-{ \
- TRUE, \
- PIPE_FORMAT_##m, \
- NV40TCL_TEX_FORMAT_FORMAT_##tf, \
- (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \
- NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \
- NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \
- NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w), \
- ((NV34TCL_TX_FILTER_SIGNED_RED*sx) | (NV34TCL_TX_FILTER_SIGNED_GREEN*sy) | \
- (NV34TCL_TX_FILTER_SIGNED_BLUE*sz) | (NV34TCL_TX_FILTER_SIGNED_ALPHA*sw)) \
+ ps->en |= NV40TCL_TEX_ENABLE_ENABLE;
}
-struct nv40_texture_format {
- boolean defined;
- uint pipe;
- int format;
- int swizzle;
- int sign;
-};
-
-static struct nv40_texture_format
-nv40_texture_formats[] = {
- _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
- _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
- _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
- _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
- _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
- _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
- _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0),
- _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1),
- _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0),
- _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0),
- _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
- _(S8_USCALED_Z24_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
- _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
- _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
- _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
- _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
- {},
-};
-
-static struct nv40_texture_format *
-nv40_fragtex_format(uint pipe_format)
+void
+nv40_sampler_view_init(struct pipe_context *pipe,
+ struct nvfx_sampler_view *sv)
{
- struct nv40_texture_format *tf = nv40_texture_formats;
-
- while (tf->defined) {
- if (tf->pipe == pipe_format)
- return tf;
- tf++;
+ struct pipe_resource* pt = sv->base.texture;
+ struct nvfx_miptree* mt = (struct nvfx_miptree*)pt;
+ struct nvfx_texture_format *tf = &nvfx_texture_formats[sv->base.format];
+ unsigned txf;
+ unsigned level = pt->target == PIPE_TEXTURE_CUBE ? 0 : sv->base.first_level;
+ assert(tf->fmt[4] >= 0);
+
+ txf = sv->u.init_fmt;
+ txf |= 0x8000;
+ if(pt->target == PIPE_TEXTURE_CUBE)
+ txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT);
+ else
+ txf |= (((sv->base.last_level - sv->base.first_level) + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT);
+
+ if (!mt->linear_pitch)
+ sv->u.nv40.npot_size2 = 0;
+ else {
+ sv->u.nv40.npot_size2 = mt->linear_pitch;
+ txf |= NV40TCL_TEX_FORMAT_LINEAR;
}
- NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format));
- return NULL;
-}
+ sv->u.nv40.fmt[0] = tf->fmt[4] | txf;
+ sv->u.nv40.fmt[1] = tf->fmt[5] | txf;
+ sv->u.nv40.npot_size2 |= (u_minify(pt->depth0, level) << NV40TCL_TEX_SIZE1_DEPTH_SHIFT);
+
+ sv->lod_offset = (sv->base.first_level - level) * 256;
+ sv->max_lod_limit = (sv->base.last_level - level) * 256;
+}
void
nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
- struct nvfx_miptree *nv40mt = (struct nvfx_miptree *)nvfx->fragment_sampler_views[unit]->texture;
- struct nouveau_bo *bo = nv40mt->base.bo;
- struct pipe_resource *pt = &nv40mt->base.base;
- struct nv40_texture_format *tf;
-
- uint32_t txf, txs, txp;
+ struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit];
+ struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo;
unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+ unsigned txf;
+ unsigned max_lod = MIN2(ps->max_lod + sv->lod_offset, sv->max_lod_limit);
+ unsigned min_lod = MIN2(ps->min_lod + sv->lod_offset, max_lod);
- tf = nv40_fragtex_format(pt->format);
- if (!tf)
- assert(0);
-
- txf = ps->fmt;
- txf |= tf->format | 0x8000;
- txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT);
-
- if (1) /* XXX */
- txf |= NV34TCL_TX_FORMAT_NO_BORDER;
-
- switch (pt->target) {
- case PIPE_TEXTURE_CUBE:
- txf |= NV34TCL_TX_FORMAT_CUBIC;
- /* fall-through */
- case PIPE_TEXTURE_2D:
- txf |= NV34TCL_TX_FORMAT_DIMS_2D;
- break;
- case PIPE_TEXTURE_3D:
- txf |= NV34TCL_TX_FORMAT_DIMS_3D;
- break;
- case PIPE_TEXTURE_1D:
- txf |= NV34TCL_TX_FORMAT_DIMS_1D;
- break;
- default:
- NOUVEAU_ERR("Unknown target %d\n", pt->target);
- return;
- }
-
- if (!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR)) {
- txp = 0;
- } else {
- txp = nv40mt->level[0].pitch;
- txf |= NV40TCL_TEX_FORMAT_LINEAR;
- }
-
- txs = tf->swizzle;
+ txf = sv->u.nv40.fmt[ps->compare] | ps->fmt;
- MARK_RING(chan, 11 + 2 * !unit, 2);
+ MARK_RING(chan, 11, 2);
OUT_RING(chan, RING_3D(NV34TCL_TX_OFFSET(unit), 8));
- OUT_RELOC(chan, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+ OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0);
OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR,
NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
- OUT_RING(chan, ps->wrap);
- OUT_RING(chan, NV40TCL_TEX_ENABLE_ENABLE | ps->en);
- OUT_RING(chan, txs);
- OUT_RING(chan, ps->filt | tf->sign | 0x2000 /*voodoo*/);
- OUT_RING(chan, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | pt->height0);
+ OUT_RING(chan, (ps->wrap & sv->wrap_mask) | sv->wrap);
+ OUT_RING(chan, ps->en | (min_lod << 19) | (max_lod << 7));
+ OUT_RING(chan, sv->swizzle);
+ OUT_RING(chan, ps->filt | sv->filt);
+ OUT_RING(chan, sv->npot_size);
OUT_RING(chan, ps->bcol);
OUT_RING(chan, RING_3D(NV40TCL_TEX_SIZE1(unit), 1));
- OUT_RING(chan, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
+ OUT_RING(chan, sv->u.nv40.npot_size2);
nvfx->hw_txf[unit] = txf;
nvfx->hw_samplers |= (1 << unit);
diff --git a/src/gallium/drivers/nvfx/nv40_vertprog.h b/src/gallium/drivers/nvfx/nv40_vertprog.h
index 7337293bab..3d0a1fe3d1 100644
--- a/src/gallium/drivers/nvfx/nv40_vertprog.h
+++ b/src/gallium/drivers/nvfx/nv40_vertprog.h
@@ -44,7 +44,7 @@
#define NV40_VP_INST_SRC1_ABS (1 << 22)
#define NV40_VP_INST_SRC0_ABS (1 << 21)
#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15
-#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15)
+#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x3F << 15)
#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13)
#define NV40_VP_INST_COND_SHIFT 10
#define NV40_VP_INST_COND_MASK (0x7 << 10)
@@ -100,7 +100,7 @@
#define NV40_VP_INST_SRC2H_SHIFT 0
#define NV40_VP_INST_SRC2H_MASK (0x3F << 0)
#define NV40_VP_INST_IADDRH_SHIFT 0
-#define NV40_VP_INST_IADDRH_MASK (0x1F << 0)
+#define NV40_VP_INST_IADDRH_MASK (0x3F << 0)
/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */
#define NV40_VP_INST_IADDRL_SHIFT 29
diff --git a/src/gallium/drivers/nvfx/nvfx_buffer.c b/src/gallium/drivers/nvfx/nvfx_buffer.c
index 05b824b8f7..041099e0e5 100644
--- a/src/gallium/drivers/nvfx/nvfx_buffer.c
+++ b/src/gallium/drivers/nvfx/nvfx_buffer.c
@@ -6,115 +6,39 @@
#include "nouveau/nouveau_screen.h"
#include "nouveau/nouveau_winsys.h"
#include "nvfx_resource.h"
+#include "nvfx_screen.h"
-
-/* Currently using separate implementations for buffers and textures,
- * even though gallium has a unified abstraction of these objects.
- * Eventually these should be combined, and mechanisms like transfers
- * be adapted to work for both buffer and texture uploads.
- */
-static void nvfx_buffer_destroy(struct pipe_screen *pscreen,
+void nvfx_buffer_destroy(struct pipe_screen *pscreen,
struct pipe_resource *presource)
{
- struct nvfx_resource *buffer = nvfx_resource(presource);
+ struct nvfx_buffer *buffer = nvfx_buffer(presource);
- nouveau_screen_bo_release(pscreen, buffer->bo);
+ if(!(buffer->base.base.flags & NVFX_RESOURCE_FLAG_USER))
+ align_free(buffer->data);
+ nouveau_screen_bo_release(pscreen, buffer->base.bo);
FREE(buffer);
}
-
-
-
-/* Utility functions for transfer create/destroy are hooked in and
- * just record the arguments to those functions.
- */
-static void *
-nvfx_buffer_transfer_map( struct pipe_context *pipe,
- struct pipe_transfer *transfer )
-{
- struct nvfx_resource *buffer = nvfx_resource(transfer->resource);
- uint8_t *map;
-
- map = nouveau_screen_bo_map_range( pipe->screen,
- buffer->bo,
- transfer->box.x,
- transfer->box.width,
- nouveau_screen_transfer_flags(transfer->usage) );
- if (map == NULL)
- return NULL;
-
- return map + transfer->box.x;
-}
-
-
-
-static void nvfx_buffer_transfer_flush_region( struct pipe_context *pipe,
- struct pipe_transfer *transfer,
- const struct pipe_box *box)
-{
- struct nvfx_resource *buffer = nvfx_resource(transfer->resource);
-
- nouveau_screen_bo_map_flush_range(pipe->screen,
- buffer->bo,
- transfer->box.x + box->x,
- box->width);
-}
-
-static void nvfx_buffer_transfer_unmap( struct pipe_context *pipe,
- struct pipe_transfer *transfer )
-{
- struct nvfx_resource *buffer = nvfx_resource(transfer->resource);
-
- nouveau_screen_bo_unmap(pipe->screen, buffer->bo);
-}
-
-
-
-
-struct u_resource_vtbl nvfx_buffer_vtbl =
-{
- u_default_resource_get_handle, /* get_handle */
- nvfx_buffer_destroy, /* resource_destroy */
- NULL, /* is_resource_referenced */
- u_default_get_transfer, /* get_transfer */
- u_default_transfer_destroy, /* transfer_destroy */
- nvfx_buffer_transfer_map, /* transfer_map */
- nvfx_buffer_transfer_flush_region, /* transfer_flush_region */
- nvfx_buffer_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
-};
-
-
-
struct pipe_resource *
nvfx_buffer_create(struct pipe_screen *pscreen,
const struct pipe_resource *template)
{
- struct nvfx_resource *buffer;
+ struct nvfx_screen* screen = nvfx_screen(pscreen);
+ struct nvfx_buffer* buffer;
- buffer = CALLOC_STRUCT(nvfx_resource);
+ buffer = CALLOC_STRUCT(nvfx_buffer);
if (!buffer)
return NULL;
- buffer->base = *template;
- buffer->vtbl = &nvfx_buffer_vtbl;
- pipe_reference_init(&buffer->base.reference, 1);
- buffer->base.screen = pscreen;
-
- buffer->bo = nouveau_screen_bo_new(pscreen,
- 16,
- buffer->base.usage,
- buffer->base.bind,
- buffer->base.width0);
-
- if (buffer->bo == NULL)
- goto fail;
-
- return &buffer->base;
+ buffer->base.base = *template;
+ buffer->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+ pipe_reference_init(&buffer->base.base.reference, 1);
+ buffer->base.base.screen = pscreen;
+ buffer->size = util_format_get_stride(template->format, template->width0);
+ buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold;
+ buffer->data = align_malloc(buffer->size, 16);
-fail:
- FREE(buffer);
- return NULL;
+ return &buffer->base.base;
}
@@ -124,30 +48,49 @@ nvfx_user_buffer_create(struct pipe_screen *pscreen,
unsigned bytes,
unsigned usage)
{
- struct nvfx_resource *buffer;
+ struct nvfx_screen* screen = nvfx_screen(pscreen);
+ struct nvfx_buffer* buffer;
- buffer = CALLOC_STRUCT(nvfx_resource);
+ buffer = CALLOC_STRUCT(nvfx_buffer);
if (!buffer)
return NULL;
- pipe_reference_init(&buffer->base.reference, 1);
- buffer->vtbl = &nvfx_buffer_vtbl;
- buffer->base.screen = pscreen;
- buffer->base.format = PIPE_FORMAT_R8_UNORM;
- buffer->base.usage = PIPE_USAGE_IMMUTABLE;
- buffer->base.bind = usage;
- buffer->base.width0 = bytes;
- buffer->base.height0 = 1;
- buffer->base.depth0 = 1;
-
- buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes);
- if (!buffer->bo)
- goto fail;
-
- return &buffer->base;
-
-fail:
- FREE(buffer);
- return NULL;
+ pipe_reference_init(&buffer->base.base.reference, 1);
+ buffer->base.base.flags = NVFX_RESOURCE_FLAG_LINEAR | NVFX_RESOURCE_FLAG_USER;
+ buffer->base.base.screen = pscreen;
+ buffer->base.base.format = PIPE_FORMAT_R8_UNORM;
+ buffer->base.base.usage = PIPE_USAGE_IMMUTABLE;
+ buffer->base.base.bind = usage;
+ buffer->base.base.width0 = bytes;
+ buffer->base.base.height0 = 1;
+ buffer->base.base.depth0 = 1;
+ buffer->data = ptr;
+ buffer->size = bytes;
+ buffer->bytes_to_draw_until_static = bytes * screen->static_reuse_threshold;
+ buffer->dirty_end = bytes;
+
+ return &buffer->base.base;
}
+void nvfx_buffer_upload(struct nvfx_buffer* buffer)
+{
+ unsigned dirty = buffer->dirty_end - buffer->dirty_begin;
+ if(!buffer->base.bo)
+ {
+ buffer->base.bo = nouveau_screen_bo_new(buffer->base.base.screen,
+ 16,
+ buffer->base.base.usage,
+ buffer->base.base.bind,
+ buffer->base.base.width0);
+ }
+
+ if(dirty)
+ {
+ // TODO: may want to use a temporary in some cases
+ nouveau_bo_map(buffer->base.bo, NOUVEAU_BO_WR
+ | (buffer->dirty_unsynchronized ? NOUVEAU_BO_NOSYNC : 0));
+ memcpy((uint8_t*)buffer->base.bo->map + buffer->dirty_begin, buffer->data + buffer->dirty_begin, dirty);
+ nouveau_bo_unmap(buffer->base.bo);
+ buffer->dirty_begin = buffer->dirty_end = 0;
+ }
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c
index 7218abff22..5a2fa14c88 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_context.c
@@ -1,5 +1,6 @@
#include "draw/draw_context.h"
#include "pipe/p_defines.h"
+#include "util/u_framebuffer.h"
#include "nvfx_context.h"
#include "nvfx_screen.h"
@@ -14,6 +15,7 @@ nvfx_flush(struct pipe_context *pipe, unsigned flags,
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *eng3d = screen->eng3d;
+ /* XXX: we need to actually be intelligent here */
if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
BEGIN_RING(chan, eng3d, 0x1fd8, 1);
OUT_RING (chan, 2);
@@ -31,8 +33,22 @@ nvfx_destroy(struct pipe_context *pipe)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
+ if(nvfx->dummy_fs)
+ pipe->delete_fs_state(pipe, nvfx->dummy_fs);
+
+ for(unsigned i = 0; i < nvfx->vtxbuf_nr; ++i)
+ pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
+ pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
+ util_unreference_framebuffer_state(&nvfx->framebuffer);
+ for(unsigned i = 0; i < PIPE_MAX_SAMPLERS; ++i)
+ pipe_sampler_view_reference(&nvfx->fragment_sampler_views[i], 0);
+
if (nvfx->draw)
draw_destroy(nvfx->draw);
+
+ if(nvfx->screen->cur_ctx == nvfx)
+ nvfx->screen->cur_ctx = NULL;
+
FREE(nvfx);
}
@@ -59,14 +75,21 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
nvfx->pipe.clear = nvfx_clear;
nvfx->pipe.flush = nvfx_flush;
- screen->base.channel->user_private = nvfx;
-
nvfx->is_nv4x = screen->is_nv4x;
+ /* TODO: it seems that nv30 might have fixed function clipping usable with vertex programs
+ * However, my code for that doesn't work, so use vp clipping for all cards, which works.
+ */
+ nvfx->use_vp_clipping = TRUE;
nvfx_init_query_functions(nvfx);
nvfx_init_surface_functions(nvfx);
nvfx_init_state_functions(nvfx);
+ nvfx_init_sampling_functions(nvfx);
+ nvfx_init_vbo_functions(nvfx);
+ nvfx_init_fragprog_functions(nvfx);
+ nvfx_init_vertprog_functions(nvfx);
nvfx_init_resource_functions(&nvfx->pipe);
+ nvfx_init_transfer_functions(&nvfx->pipe);
/* Create, configure, and install fallback swtnl path */
nvfx->draw = draw_create(&nvfx->pipe);
@@ -78,6 +101,12 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
/* set these to that we init them on first validation */
nvfx->state.scissor_enabled = ~0;
- nvfx->state.stipple_enabled = ~0;
+ nvfx->hw_pointsprite_control = -1;
+ nvfx->hw_vp_output = -1;
+ nvfx->use_vertex_buffers = -1;
+ nvfx->relocs_needed = NVFX_RELOCATE_ALL;
+
+ LIST_INITHEAD(&nvfx->render_cache);
+
return &nvfx->pipe;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h
index 89f94c10bd..4c654bfa8b 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.h
+++ b/src/gallium/drivers/nvfx/nvfx_context.h
@@ -11,8 +11,10 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_inlines.h"
+#include "util/u_double_list.h"
#include "draw/draw_vertex.h"
+#include "util/u_blitter.h"
#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
@@ -42,17 +44,26 @@
#define NVFX_NEW_SR (1 << 13)
#define NVFX_NEW_VERTCONST (1 << 14)
#define NVFX_NEW_FRAGCONST (1 << 15)
+#define NVFX_NEW_INDEX (1 << 16)
+#define NVFX_NEW_SPRITE (1 << 17)
+
+#define NVFX_RELOCATE_FRAMEBUFFER (1 << 0)
+#define NVFX_RELOCATE_FRAGTEX (1 << 1)
+#define NVFX_RELOCATE_FRAGPROG (1 << 2)
+#define NVFX_RELOCATE_VTXBUF (1 << 3)
+#define NVFX_RELOCATE_IDXBUF (1 << 4)
+#define NVFX_RELOCATE_ALL 0x1f
struct nvfx_rasterizer_state {
struct pipe_rasterizer_state pipe;
unsigned sb_len;
- uint32_t sb[32];
+ uint32_t sb[34];
};
struct nvfx_zsa_state {
struct pipe_depth_stencil_alpha_state pipe;
unsigned sb_len;
- uint32_t sb[26];
+ uint32_t sb[24];
};
struct nvfx_blend_state {
@@ -64,13 +75,57 @@ struct nvfx_blend_state {
struct nvfx_state {
unsigned scissor_enabled;
- unsigned stipple_enabled;
unsigned fp_samplers;
+ unsigned render_temps;
+};
+
+struct nvfx_per_vertex_element {
+ unsigned idx;
+ unsigned vertex_buffer_index;
+ unsigned src_offset;
+};
+
+struct nvfx_low_frequency_element {
+ unsigned idx;
+ unsigned vertex_buffer_index;
+ unsigned src_offset;
+ void (*fetch_rgba_float)(float *dst, const uint8_t *src, unsigned i, unsigned j);
+ unsigned ncomp;
+};
+
+struct nvfx_per_instance_element {
+ struct nvfx_low_frequency_element base;
+ unsigned instance_divisor;
+};
+
+struct nvfx_per_vertex_buffer_info
+{
+ unsigned vertex_buffer_index;
+ unsigned per_vertex_size;
};
struct nvfx_vtxelt_state {
struct pipe_vertex_element pipe[16];
unsigned num_elements;
+ unsigned vtxfmt[16];
+
+ unsigned num_per_vertex_buffer_infos;
+ struct nvfx_per_vertex_buffer_info per_vertex_buffer_info[16];
+
+ unsigned num_per_vertex;
+ struct nvfx_per_vertex_element per_vertex[16];
+
+ unsigned num_per_instance;
+ struct nvfx_per_instance_element per_instance[16];
+
+ unsigned num_constant;
+ struct nvfx_low_frequency_element constant[16];
+
+ boolean needs_translate;
+ struct translate* translate;
+
+ unsigned vertex_length;
+ unsigned max_vertices_per_packet;
};
struct nvfx_render_target {
@@ -86,8 +141,11 @@ struct nvfx_context {
struct nvfx_screen *screen;
unsigned is_nv4x; /* either 0 or ~0 */
+ boolean use_vp_clipping;
struct draw_context *draw;
+ struct blitter_context* blitter;
+ struct list_head render_cache;
/* HW state derived from pipe states */
struct nvfx_state state;
@@ -111,7 +169,7 @@ struct nvfx_context {
unsigned stipple[32];
struct pipe_clip_state clip;
struct nvfx_vertex_program *vertprog;
- struct nvfx_fragment_program *fragprog;
+ struct nvfx_pipe_fragment_program *fragprog;
struct pipe_resource *constbuf[PIPE_SHADER_TYPES];
unsigned constbuf_nr[PIPE_SHADER_TYPES];
struct nvfx_rasterizer_state *rasterizer;
@@ -122,23 +180,34 @@ struct nvfx_context {
struct pipe_viewport_state viewport;
struct pipe_framebuffer_state framebuffer;
struct pipe_index_buffer idxbuf;
- struct pipe_resource *idxbuf_buffer;
- unsigned idxbuf_format;
struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
+ struct nvfx_pipe_fragment_program* dummy_fs;
+
unsigned nr_samplers;
unsigned nr_textures;
unsigned dirty_samplers;
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned vtxbuf_nr;
struct nvfx_vtxelt_state *vtxelt;
+ int base_vertex;
+ boolean use_index_buffer;
+ /* -1 = hardware input setup is outdated
+ * 0 = hardware input setup is for inline vertices
+ * 1 = hardware input setup is for hardware vertices
+ */
+ int use_vertex_buffers;
- unsigned vbo_bo;
unsigned hw_vtxelt_nr;
uint8_t hw_samplers;
uint32_t hw_txf[8];
struct nvfx_render_target hw_rt[4];
struct nvfx_render_target hw_zeta;
+ int hw_pointsprite_control;
+ int hw_vp_output;
+ struct nvfx_fragment_program* hw_fragprog;
+
+ unsigned relocs_needed;
};
static INLINE struct nvfx_context *
@@ -175,15 +244,12 @@ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers,
/* nvfx_draw.c */
extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx);
-extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe,
- struct pipe_resource *idxbuf,
- unsigned ib_size, int ib_bias,
- unsigned mode,
- unsigned start, unsigned count);
+extern void nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info);
extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx);
/* nvfx_fb.c */
-extern void nvfx_state_framebuffer_validate(struct nvfx_context *nvfx);
+extern int nvfx_framebuffer_prepare(struct nvfx_context *nvfx);
+extern void nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result);
void
nvfx_framebuffer_relocate(struct nvfx_context *nvfx);
@@ -191,19 +257,24 @@ nvfx_framebuffer_relocate(struct nvfx_context *nvfx);
extern void nvfx_fragprog_destroy(struct nvfx_context *,
struct nvfx_fragment_program *);
extern void nvfx_fragprog_validate(struct nvfx_context *nvfx);
-extern void
-nvfx_fragprog_relocate(struct nvfx_context *nvfx);
+extern void nvfx_fragprog_relocate(struct nvfx_context *nvfx);
+extern void nvfx_init_fragprog_functions(struct nvfx_context *nvfx);
/* nvfx_fragtex.c */
+extern void nvfx_init_sampling_functions(struct nvfx_context *nvfx);
extern void nvfx_fragtex_validate(struct nvfx_context *nvfx);
-extern void
-nvfx_fragtex_relocate(struct nvfx_context *nvfx);
+extern void nvfx_fragtex_relocate(struct nvfx_context *nvfx);
+
+struct nvfx_sampler_view;
/* nv30_fragtex.c */
extern void
nv30_sampler_state_init(struct pipe_context *pipe,
struct nvfx_sampler_state *ps,
const struct pipe_sampler_state *cso);
+extern void
+nv30_sampler_view_init(struct pipe_context *pipe,
+ struct nvfx_sampler_view *sv);
extern void nv30_fragtex_set(struct nvfx_context *nvfx, int unit);
/* nv40_fragtex.c */
@@ -211,6 +282,9 @@ extern void
nv40_sampler_state_init(struct pipe_context *pipe,
struct nvfx_sampler_state *ps,
const struct pipe_sampler_state *cso);
+extern void
+nv40_sampler_view_init(struct pipe_context *pipe,
+ struct nvfx_sampler_view *sv);
extern void nv40_fragtex_set(struct nvfx_context *nvfx, int unit);
/* nvfx_state.c */
@@ -225,23 +299,75 @@ extern void nvfx_state_sr_validate(struct nvfx_context *nvfx);
extern void nvfx_state_zsa_validate(struct nvfx_context *nvfx);
/* nvfx_state_emit.c */
-extern void nvfx_state_relocate(struct nvfx_context *nvfx);
+extern void nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs);
extern boolean nvfx_state_validate(struct nvfx_context *nvfx);
extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx);
-extern void nvfx_state_emit(struct nvfx_context *nvfx);
+
+static inline void
+nvfx_state_emit(struct nvfx_context *nvfx)
+{
+ unsigned relocs = NVFX_RELOCATE_FRAMEBUFFER | NVFX_RELOCATE_FRAGTEX | NVFX_RELOCATE_FRAGPROG;
+ if (nvfx->render_mode == HW)
+ {
+ relocs |= NVFX_RELOCATE_VTXBUF;
+ if(nvfx->use_index_buffer)
+ relocs |= NVFX_RELOCATE_IDXBUF;
+ }
+
+ relocs &= nvfx->relocs_needed;
+ if(relocs)
+ nvfx_state_relocate(nvfx, relocs);
+}
/* nvfx_transfer.c */
-extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx);
+extern void nvfx_init_transfer_functions(struct pipe_context *pipe);
/* nvfx_vbo.c */
extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx);
extern void nvfx_vbo_relocate(struct nvfx_context *nvfx);
+extern void nvfx_idxbuf_validate(struct nvfx_context* nvfx);
+extern void nvfx_idxbuf_relocate(struct nvfx_context* nvfx);
extern void nvfx_draw_vbo(struct pipe_context *pipe,
const struct pipe_draw_info *info);
+extern void nvfx_init_vbo_functions(struct nvfx_context *nvfx);
+extern unsigned nvfx_vertex_formats[];
/* nvfx_vertprog.c */
extern boolean nvfx_vertprog_validate(struct nvfx_context *nvfx);
extern void nvfx_vertprog_destroy(struct nvfx_context *,
struct nvfx_vertex_program *);
+extern void nvfx_init_vertprog_functions(struct nvfx_context *nvfx);
+
+/* nvfx_push.c */
+extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
+
+/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */
+static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, const float* v, unsigned ncomp)
+{
+ switch (ncomp) {
+ case 4:
+ OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
+ OUT_RING(chan, fui(v[0]));
+ OUT_RING(chan, fui(v[1]));
+ OUT_RING(chan, fui(v[2]));
+ OUT_RING(chan, fui(v[3]));
+ break;
+ case 3:
+ OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
+ OUT_RING(chan, fui(v[0]));
+ OUT_RING(chan, fui(v[1]));
+ OUT_RING(chan, fui(v[2]));
+ break;
+ case 2:
+ OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
+ OUT_RING(chan, fui(v[0]));
+ OUT_RING(chan, fui(v[1]));
+ break;
+ case 1:
+ OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
+ OUT_RING(chan, fui(v[0]));
+ break;
+ }
+}
#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c
index 22cff370b7..2601d5b8e2 100644
--- a/src/gallium/drivers/nvfx/nvfx_draw.c
+++ b/src/gallium/drivers/nvfx/nvfx_draw.c
@@ -9,6 +9,7 @@
#include "draw/draw_pipe.h"
#include "nvfx_context.h"
+#include "nvfx_resource.h"
/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
* often at all. Uses "quadro style" vertex submission + a fixed vertex
@@ -39,30 +40,21 @@ nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v)
unsigned idx = nvfx->swtnl.draw[i];
unsigned hw = nvfx->swtnl.hw[i];
+ WAIT_RING(chan, 5);
switch (nvfx->swtnl.emit[i]) {
case EMIT_OMIT:
break;
case EMIT_1F:
- BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1);
- OUT_RING (chan, fui(v->data[idx][0]));
+ nvfx_emit_vtx_attr(chan, hw, v->data[idx], 1);
break;
case EMIT_2F:
- BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2);
- OUT_RING (chan, fui(v->data[idx][0]));
- OUT_RING (chan, fui(v->data[idx][1]));
+ nvfx_emit_vtx_attr(chan, hw, v->data[idx], 2);
break;
case EMIT_3F:
- BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3);
- OUT_RING (chan, fui(v->data[idx][0]));
- OUT_RING (chan, fui(v->data[idx][1]));
- OUT_RING (chan, fui(v->data[idx][2]));
+ nvfx_emit_vtx_attr(chan, hw, v->data[idx], 3);
break;
case EMIT_4F:
- BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
- OUT_RING (chan, fui(v->data[idx][0]));
- OUT_RING (chan, fui(v->data[idx][1]));
- OUT_RING (chan, fui(v->data[idx][2]));
- OUT_RING (chan, fui(v->data[idx][3]));
+ nvfx_emit_vtx_attr(chan, hw, v->data[idx], 4);
break;
case 0xff:
BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
@@ -231,15 +223,9 @@ nvfx_draw_render_stage(struct nvfx_context *nvfx)
}
void
-nvfx_draw_elements_swtnl(struct pipe_context *pipe,
- struct pipe_resource *idxbuf,
- unsigned idxbuf_size, int idxbuf_bias,
- unsigned mode, unsigned start, unsigned count)
+nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
- struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
- struct pipe_transfer *ib_transfer = NULL;
- struct pipe_transfer *cb_transfer = NULL;
unsigned i;
void *map;
@@ -247,47 +233,28 @@ nvfx_draw_elements_swtnl(struct pipe_context *pipe,
return;
nvfx_state_emit(nvfx);
+ /* these must be passed without adding the offsets */
for (i = 0; i < nvfx->vtxbuf_nr; i++) {
- map = pipe_buffer_map(pipe, nvfx->vtxbuf[i].buffer,
- PIPE_TRANSFER_READ,
- &vb_transfer[i]);
+ map = nvfx_buffer(nvfx->vtxbuf[i].buffer)->data;
draw_set_mapped_vertex_buffer(nvfx->draw, i, map);
}
- if (idxbuf) {
- map = pipe_buffer_map(pipe, idxbuf,
- PIPE_TRANSFER_READ,
- &ib_transfer);
- draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, idxbuf_bias, map);
- } else {
- draw_set_mapped_element_buffer(nvfx->draw, 0, 0, NULL);
- }
+ map = NULL;
+ if (info->indexed && nvfx->idxbuf.buffer)
+ map = nvfx_buffer(nvfx->idxbuf.buffer)->data;
+ draw_set_mapped_index_buffer(nvfx->draw, map);
if (nvfx->constbuf[PIPE_SHADER_VERTEX]) {
const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX];
- map = pipe_buffer_map(pipe,
- nvfx->constbuf[PIPE_SHADER_VERTEX],
- PIPE_TRANSFER_READ,
- &cb_transfer);
+ map = nvfx_buffer(nvfx->constbuf[PIPE_SHADER_VERTEX])->data;
draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0,
map, nr);
}
- draw_arrays(nvfx->draw, mode, start, count);
-
- for (i = 0; i < nvfx->vtxbuf_nr; i++)
- pipe_buffer_unmap(pipe, nvfx->vtxbuf[i].buffer, vb_transfer[i]);
-
- if (idxbuf)
- pipe_buffer_unmap(pipe, idxbuf, ib_transfer);
-
- if (nvfx->constbuf[PIPE_SHADER_VERTEX])
- pipe_buffer_unmap(pipe, nvfx->constbuf[PIPE_SHADER_VERTEX],
- cb_transfer);
+ draw_vbo(nvfx->draw, info);
draw_flush(nvfx->draw);
- pipe->flush(pipe, 0, NULL);
}
static INLINE void
@@ -305,19 +272,19 @@ emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit,
void
nvfx_vtxfmt_validate(struct nvfx_context *nvfx)
{
- struct nvfx_fragment_program *fp = nvfx->fragprog;
+ struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog;
unsigned colour = 0, texcoords = 0, fog = 0, i;
/* Determine needed fragprog inputs */
- for (i = 0; i < fp->info.num_inputs; i++) {
- switch (fp->info.input_semantic_name[i]) {
+ for (i = 0; i < pfp->info.num_inputs; i++) {
+ switch (pfp->info.input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
break;
case TGSI_SEMANTIC_COLOR:
- colour |= (1 << fp->info.input_semantic_index[i]);
+ colour |= (1 << pfp->info.input_semantic_index[i]);
break;
case TGSI_SEMANTIC_GENERIC:
- texcoords |= (1 << fp->info.input_semantic_index[i]);
+ texcoords |= (1 << pfp->info.input_semantic_index[i]);
break;
case TGSI_SEMANTIC_FOG:
fog = 1;
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
index ee41f03b9b..275672a31f 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -2,25 +2,31 @@
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "util/u_inlines.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_ureg.h"
#include "nvfx_context.h"
#include "nvfx_shader.h"
+#include "nvfx_resource.h"
#define MAX_CONSTS 128
#define MAX_IMM 32
+
struct nvfx_fpc {
+ struct nvfx_pipe_fragment_program* pfp;
struct nvfx_fragment_program *fp;
- uint attrib_map[PIPE_MAX_SHADER_INPUTS];
-
- unsigned r_temps;
- unsigned r_temps_discard;
- struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
- struct nvfx_sreg *r_temp;
+ unsigned max_temps;
+ unsigned long long r_temps;
+ unsigned long long r_temps_discard;
+ struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS];
+ struct nvfx_reg *r_temp;
+ unsigned sprite_coord_temp;
int num_regs;
@@ -33,34 +39,40 @@ struct nvfx_fpc {
} consts[MAX_CONSTS];
int nr_consts;
- struct nvfx_sreg imm[MAX_IMM];
+ struct nvfx_reg imm[MAX_IMM];
unsigned nr_imm;
+
+ unsigned char generic_to_slot[256]; /* semantic idx for each input semantic */
+
+ struct util_dynarray if_stack;
+ //struct util_dynarray loop_stack;
+ struct util_dynarray label_relocs;
};
-static INLINE struct nvfx_sreg
+static INLINE struct nvfx_reg
temp(struct nvfx_fpc *fpc)
{
- int idx = ffs(~fpc->r_temps) - 1;
+ int idx = __builtin_ctzll(~fpc->r_temps);
- if (idx < 0) {
+ if (idx >= fpc->max_temps) {
NOUVEAU_ERR("out of temps!!\n");
assert(0);
- return nvfx_sr(NVFXSR_TEMP, 0);
+ return nvfx_reg(NVFXSR_TEMP, 0);
}
- fpc->r_temps |= (1 << idx);
- fpc->r_temps_discard |= (1 << idx);
- return nvfx_sr(NVFXSR_TEMP, idx);
+ fpc->r_temps |= (1ULL << idx);
+ fpc->r_temps_discard |= (1ULL << idx);
+ return nvfx_reg(NVFXSR_TEMP, idx);
}
static INLINE void
release_temps(struct nvfx_fpc *fpc)
{
fpc->r_temps &= ~fpc->r_temps_discard;
- fpc->r_temps_discard = 0;
+ fpc->r_temps_discard = 0ULL;
}
-static INLINE struct nvfx_sreg
+static INLINE struct nvfx_reg
constant(struct nvfx_fpc *fpc, int pipe, float vals[4])
{
int idx;
@@ -72,16 +84,9 @@ constant(struct nvfx_fpc *fpc, int pipe, float vals[4])
fpc->consts[idx].pipe = pipe;
if (pipe == -1)
memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
- return nvfx_sr(NVFXSR_CONST, idx);
+ return nvfx_reg(NVFXSR_CONST, idx);
}
-#define arith(cc,s,o,d,m,s0,s1,s2) \
- nvfx_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \
- (d), (m), (s0), (s1), (s2))
-#define tex(cc,s,o,u,d,m,s0,s1,s2) \
- nvfx_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \
- (d), (m), (s0), none, none)
-
static void
grow_insns(struct nvfx_fpc *fpc, int size)
{
@@ -92,23 +97,29 @@ grow_insns(struct nvfx_fpc *fpc, int size)
}
static void
-emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src)
+emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src)
{
struct nvfx_fragment_program *fp = fpc->fp;
uint32_t *hw = &fp->insn[fpc->inst_offset];
uint32_t sr = 0;
- switch (src.type) {
+ switch (src.reg.type) {
case NVFXSR_INPUT:
sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
- hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
+ hw[0] |= (src.reg.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
break;
case NVFXSR_OUTPUT:
sr |= NVFX_FP_REG_SRC_HALF;
/* fall-through */
case NVFXSR_TEMP:
sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
- sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);
+ sr |= (src.reg.index << NVFX_FP_REG_SRC_SHIFT);
+ break;
+ case NVFXSR_RELOCATED:
+ sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
+ sr |= (fpc->sprite_coord_temp << NVFX_FP_REG_SRC_SHIFT);
+ //printf("adding relocation at %x for %x\n", fpc->inst_offset, src.index);
+ util_dynarray_append(&fpc->fp->slot_relocations[src.reg.index], unsigned, fpc->inst_offset + pos + 1);
break;
case NVFXSR_CONST:
if (!fpc->have_const) {
@@ -117,18 +128,18 @@ emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src)
}
hw = &fp->insn[fpc->inst_offset];
- if (fpc->consts[src.index].pipe >= 0) {
+ if (fpc->consts[src.reg.index].pipe >= 0) {
struct nvfx_fragment_program_data *fpd;
fp->consts = realloc(fp->consts, ++fp->nr_consts *
sizeof(*fpd));
fpd = &fp->consts[fp->nr_consts - 1];
fpd->offset = fpc->inst_offset + 4;
- fpd->index = fpc->consts[src.index].pipe;
+ fpd->index = fpc->consts[src.reg.index].pipe;
memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
} else {
memcpy(&fp->insn[fpc->inst_offset + 4],
- fpc->consts[src.index].vals,
+ fpc->consts[src.reg.index].vals,
sizeof(uint32_t) * 4);
}
@@ -156,7 +167,7 @@ emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src)
}
static void
-emit_dst(struct nvfx_fpc *fpc, struct nvfx_sreg dst)
+emit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst)
{
struct nvfx_fragment_program *fp = fpc->fp;
uint32_t *hw = &fp->insn[fpc->inst_offset];
@@ -184,9 +195,7 @@ emit_dst(struct nvfx_fpc *fpc, struct nvfx_sreg dst)
}
static void
-nvfx_fp_arith(struct nvfx_fpc *fpc, int sat, int op,
- struct nvfx_sreg dst, int mask,
- struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
+nvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn)
{
struct nvfx_fragment_program *fp = fpc->fp;
uint32_t *hw;
@@ -197,68 +206,225 @@ nvfx_fp_arith(struct nvfx_fpc *fpc, int sat, int op,
hw = &fp->insn[fpc->inst_offset];
memset(hw, 0, sizeof(uint32_t) * 4);
- if (op == NVFX_FP_OP_OPCODE_KIL)
+ if (insn.op == NVFX_FP_OP_OPCODE_KIL)
fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
- hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT);
- hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT);
- hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT);
+ hw[0] |= (insn.op << NVFX_FP_OP_OPCODE_SHIFT);
+ hw[0] |= (insn.mask << NVFX_FP_OP_OUTMASK_SHIFT);
+ hw[2] |= (insn.scale << NVFX_FP_OP_DST_SCALE_SHIFT);
- if (sat)
+ if (insn.sat)
hw[0] |= NVFX_FP_OP_OUT_SAT;
- if (dst.cc_update)
+ if (insn.cc_update)
hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
- hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT);
- hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
- (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
- (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
- (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
-
- emit_dst(fpc, dst);
- emit_src(fpc, 0, s0);
- emit_src(fpc, 1, s1);
- emit_src(fpc, 2, s2);
+ hw[1] |= (insn.cc_test << NVFX_FP_OP_COND_SHIFT);
+ hw[1] |= ((insn.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
+ (insn.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
+ (insn.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
+ (insn.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
+
+ if(insn.unit >= 0)
+ {
+ hw[0] |= (insn.unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
+ fp->samplers |= (1 << insn.unit);
+ }
+
+ emit_dst(fpc, insn.dst);
+ emit_src(fpc, 0, insn.src[0]);
+ emit_src(fpc, 1, insn.src[1]);
+ emit_src(fpc, 2, insn.src[2]);
}
+#define arith(s,o,d,m,s0,s1,s2) \
+ nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \
+ (d), (m), (s0), (s1), (s2))
+
+#define tex(s,o,u,d,m,s0,s1,s2) \
+ nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, (u), \
+ (d), (m), (s0), none, none)
+
+/* IF src.x != 0, as TGSI specifies */
static void
-nvfx_fp_tex(struct nvfx_fpc *fpc, int sat, int op, int unit,
- struct nvfx_sreg dst, int mask,
- struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
+nv40_fp_if(struct nvfx_fpc *fpc, struct nvfx_src src)
{
- struct nvfx_fragment_program *fp = fpc->fp;
+ const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
+ struct nvfx_insn insn = arith(0, MOV, none.reg, NVFX_FP_MASK_X, src, none, none);
+ uint32_t *hw;
+ insn.cc_update = 1;
+ nvfx_fp_emit(fpc, insn);
- nvfx_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
+ fpc->inst_offset = fpc->fp->insn_len;
+ grow_insns(fpc, 4);
+ hw = &fpc->fp->insn[fpc->inst_offset];
+ /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
+ hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) |
+ NV40_FP_OP_OUT_NONE |
+ (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
+ /* Use .xxxx swizzle so that we check only src[0].x*/
+ hw[1] = (0 << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
+ (0 << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
+ (0 << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
+ (0 << NVFX_FP_OP_COND_SWZ_W_SHIFT) |
+ (NVFX_FP_OP_COND_NE << NVFX_FP_OP_COND_SHIFT);
+ hw[2] = 0; /* | NV40_FP_OP_OPCODE_IS_BRANCH | else_offset */
+ hw[3] = 0; /* | endif_offset */
+ util_dynarray_append(&fpc->if_stack, unsigned, fpc->inst_offset);
+}
+
+/* IF src.x != 0, as TGSI specifies */
+static void
+nv40_fp_cal(struct nvfx_fpc *fpc, unsigned target)
+{
+ struct nvfx_relocation reloc;
+ uint32_t *hw;
+ fpc->inst_offset = fpc->fp->insn_len;
+ grow_insns(fpc, 4);
+ hw = &fpc->fp->insn[fpc->inst_offset];
+ /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
+ hw[0] = (NV40_FP_OP_BRA_OPCODE_CAL << NVFX_FP_OP_OPCODE_SHIFT);
+ /* Use .xxxx swizzle so that we check only src[0].x*/
+ hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
+ (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
+ hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */
+ hw[3] = 0;
+ reloc.target = target;
+ reloc.location = fpc->inst_offset + 2;
+ util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
+}
+
+static void
+nv40_fp_ret(struct nvfx_fpc *fpc)
+{
+ uint32_t *hw;
+ fpc->inst_offset = fpc->fp->insn_len;
+ grow_insns(fpc, 4);
+ hw = &fpc->fp->insn[fpc->inst_offset];
+ /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
+ hw[0] = (NV40_FP_OP_BRA_OPCODE_RET << NVFX_FP_OP_OPCODE_SHIFT);
+ /* Use .xxxx swizzle so that we check only src[0].x*/
+ hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
+ (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
+ hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */
+ hw[3] = 0;
+}
- fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
- fp->samplers |= (1 << unit);
+static void
+nv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target)
+{
+ struct nvfx_relocation reloc;
+ uint32_t *hw;
+ fpc->inst_offset = fpc->fp->insn_len;
+ grow_insns(fpc, 4);
+ hw = &fpc->fp->insn[fpc->inst_offset];
+ /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
+ hw[0] = (NV40_FP_OP_BRA_OPCODE_REP << NVFX_FP_OP_OPCODE_SHIFT) |
+ NV40_FP_OP_OUT_NONE |
+ (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
+ /* Use .xxxx swizzle so that we check only src[0].x*/
+ hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
+ (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
+ hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH |
+ (count << NV40_FP_OP_REP_COUNT1_SHIFT) |
+ (count << NV40_FP_OP_REP_COUNT2_SHIFT) |
+ (count << NV40_FP_OP_REP_COUNT3_SHIFT);
+ hw[3] = 0; /* | end_offset */
+ reloc.target = target;
+ reloc.location = fpc->inst_offset + 3;
+ util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
+ //util_dynarray_append(&fpc->loop_stack, unsigned, target);
}
-static INLINE struct nvfx_sreg
+/* warning: this only works forward, and probably only if not inside any IF */
+static void
+nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target)
+{
+ struct nvfx_relocation reloc;
+ uint32_t *hw;
+ fpc->inst_offset = fpc->fp->insn_len;
+ grow_insns(fpc, 4);
+ hw = &fpc->fp->insn[fpc->inst_offset];
+ /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
+ hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) |
+ NV40_FP_OP_OUT_NONE |
+ (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
+ /* Use .xxxx swizzle so that we check only src[0].x*/
+ hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
+ (NVFX_FP_OP_COND_FL << NVFX_FP_OP_COND_SHIFT);
+ hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | else_offset */
+ hw[3] = 0; /* | endif_offset */
+ reloc.target = target;
+ reloc.location = fpc->inst_offset + 2;
+ util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
+ reloc.target = target;
+ reloc.location = fpc->inst_offset + 3;
+ util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
+}
+
+static void
+nv40_fp_brk(struct nvfx_fpc *fpc)
+{
+ uint32_t *hw;
+ fpc->inst_offset = fpc->fp->insn_len;
+ grow_insns(fpc, 4);
+ hw = &fpc->fp->insn[fpc->inst_offset];
+ /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
+ hw[0] = (NV40_FP_OP_BRA_OPCODE_BRK << NVFX_FP_OP_OPCODE_SHIFT) |
+ NV40_FP_OP_OUT_NONE;
+ /* Use .xxxx swizzle so that we check only src[0].x*/
+ hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
+ (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
+ hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH;
+ hw[3] = 0;
+}
+
+static INLINE struct nvfx_src
tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
{
- struct nvfx_sreg src = { 0 };
+ struct nvfx_src src;
switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
- src = nvfx_sr(NVFXSR_INPUT,
- fpc->attrib_map[fsrc->Register.Index]);
+ if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_POSITION) {
+ assert(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 0);
+ src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_POSITION);
+ } else if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_COLOR) {
+ if(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 0)
+ src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL0);
+ else if(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 1)
+ src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL1);
+ else
+ assert(0);
+ } else if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG) {
+ assert(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 0);
+ src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_FOGC);
+ } else if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FACE) {
+ /* TODO: check this has the correct values */
+ /* XXX: what do we do for nv30 here (assuming it lacks facing)?! */
+ assert(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 0);
+ src.reg = nvfx_reg(NVFXSR_INPUT, NV40_FP_OP_INPUT_SRC_FACING);
+ } else {
+ assert(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_GENERIC);
+ src.reg = nvfx_reg(NVFXSR_RELOCATED, fpc->generic_to_slot[fpc->pfp->info.input_semantic_index[fsrc->Register.Index]]);
+ }
break;
case TGSI_FILE_CONSTANT:
- src = constant(fpc, fsrc->Register.Index, NULL);
+ src.reg = constant(fpc, fsrc->Register.Index, NULL);
break;
case TGSI_FILE_IMMEDIATE:
assert(fsrc->Register.Index < fpc->nr_imm);
- src = fpc->imm[fsrc->Register.Index];
+ src.reg = fpc->imm[fsrc->Register.Index];
break;
case TGSI_FILE_TEMPORARY:
- src = fpc->r_temp[fsrc->Register.Index];
+ src.reg = fpc->r_temp[fsrc->Register.Index];
break;
/* NV40 fragprog result regs are just temps, so this is simple */
case TGSI_FILE_OUTPUT:
- src = fpc->r_result[fsrc->Register.Index];
+ src.reg = fpc->r_result[fsrc->Register.Index];
break;
default:
NOUVEAU_ERR("bad src file\n");
+ src.reg.index = 0;
+ src.reg.type = 0;
break;
}
@@ -271,7 +437,7 @@ tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
return src;
}
-static INLINE struct nvfx_sreg
+static INLINE struct nvfx_reg
tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
switch (fdst->Register.File) {
case TGSI_FILE_OUTPUT:
@@ -279,10 +445,10 @@ tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
case TGSI_FILE_TEMPORARY:
return fpc->r_temp[fdst->Register.Index];
case TGSI_FILE_NULL:
- return nvfx_sr(NVFXSR_NONE, 0);
+ return nvfx_reg(NVFXSR_NONE, 0);
default:
NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
- return nvfx_sr(NVFXSR_NONE, 0);
+ return nvfx_reg(NVFXSR_NONE, 0);
}
}
@@ -302,8 +468,10 @@ static boolean
nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
const struct tgsi_full_instruction *finst)
{
- const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
- struct nvfx_sreg src[3], dst, tmp;
+ const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
+ struct nvfx_insn insn;
+ struct nvfx_src src[3], tmp, tmp2;
+ struct nvfx_reg dst;
int mask, sat, unit = 0;
int ai = -1, ci = -1, ii = -1;
int i;
@@ -331,9 +499,8 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
ai = fsrc->Register.Index;
src[i] = tgsi_src(fpc, fsrc);
} else {
- src[i] = temp(fpc);
- arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
- tgsi_src(fpc, fsrc), none, none);
+ src[i] = nvfx_src(temp(fpc));
+ nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
}
break;
case TGSI_FILE_CONSTANT:
@@ -342,9 +509,8 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
ci = fsrc->Register.Index;
src[i] = tgsi_src(fpc, fsrc);
} else {
- src[i] = temp(fpc);
- arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
- tgsi_src(fpc, fsrc), none, none);
+ src[i] = nvfx_src(temp(fpc));
+ nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
}
break;
case TGSI_FILE_IMMEDIATE:
@@ -353,9 +519,8 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
ii = fsrc->Register.Index;
src[i] = tgsi_src(fpc, fsrc);
} else {
- src[i] = temp(fpc);
- arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
- tgsi_src(fpc, fsrc), none, none);
+ src[i] = nvfx_src(temp(fpc));
+ nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
}
break;
case TGSI_FILE_TEMPORARY:
@@ -378,277 +543,345 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
switch (finst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
- arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
+ nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, abs(src[0]), none, none));
break;
case TGSI_OPCODE_ADD:
- arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_CMP:
- tmp = nvfx_sr(NVFXSR_NONE, 0);
- tmp.cc_update = 1;
- arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
- dst.cc_test = NVFX_COND_GE;
- arith(fpc, sat, MOV, dst, mask, src[2], none, none);
- dst.cc_test = NVFX_COND_LT;
- arith(fpc, sat, MOV, dst, mask, src[1], none, none);
+ insn = arith(0, MOV, none.reg, mask, src[0], none, none);
+ insn.cc_update = 1;
+ nvfx_fp_emit(fpc, insn);
+
+ insn = arith(sat, MOV, dst, mask, src[2], none, none);
+ insn.cc_test = NVFX_COND_GE;
+ nvfx_fp_emit(fpc, insn);
+
+ insn = arith(sat, MOV, dst, mask, src[1], none, none);
+ insn.cc_test = NVFX_COND_LT;
+ nvfx_fp_emit(fpc, insn);
break;
case TGSI_OPCODE_COS:
- arith(fpc, sat, COS, dst, mask, src[0], none, none);
+ nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none));
break;
case TGSI_OPCODE_DDX:
if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
- tmp = temp(fpc);
- arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y,
- swz(src[0], Z, W, Z, W), none, none);
- arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W,
- swz(tmp, X, Y, X, Y), none, none);
- arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0],
- none, none);
- arith(fpc, 0, MOV, dst, mask, tmp, none, none);
+ tmp = nvfx_src(temp(fpc));
+ nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
+ nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
+ nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
+ nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
} else {
- arith(fpc, sat, DDX, dst, mask, src[0], none, none);
+ nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none));
}
break;
case TGSI_OPCODE_DDY:
if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
- tmp = temp(fpc);
- arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y,
- swz(src[0], Z, W, Z, W), none, none);
- arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W,
- swz(tmp, X, Y, X, Y), none, none);
- arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0],
- none, none);
- arith(fpc, 0, MOV, dst, mask, tmp, none, none);
+ tmp = nvfx_src(temp(fpc));
+ nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
+ nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
+ nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
+ nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
} else {
- arith(fpc, sat, DDY, dst, mask, src[0], none, none);
+ nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none));
}
break;
+ case TGSI_OPCODE_DP2:
+ tmp = nvfx_src(temp(fpc));
+ nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none));
+ nvfx_fp_emit(fpc, arith(0, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none));
+ break;
case TGSI_OPCODE_DP3:
- arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_DP4:
- arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_DPH:
- tmp = temp(fpc);
- arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[1], none);
- arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
- swz(src[1], W, W, W, W), none);
+ tmp = nvfx_src(temp(fpc));
+ nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[1], none));
+ nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none));
break;
case TGSI_OPCODE_DST:
- arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_EX2:
- arith(fpc, sat, EX2, dst, mask, src[0], none, none);
+ nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none));
break;
case TGSI_OPCODE_FLR:
- arith(fpc, sat, FLR, dst, mask, src[0], none, none);
+ nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none));
break;
case TGSI_OPCODE_FRC:
- arith(fpc, sat, FRC, dst, mask, src[0], none, none);
+ nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none));
break;
case TGSI_OPCODE_KILP:
- arith(fpc, 0, KIL, none, 0, none, none, none);
+ nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none));
break;
case TGSI_OPCODE_KIL:
- dst = nvfx_sr(NVFXSR_NONE, 0);
- dst.cc_update = 1;
- arith(fpc, 0, MOV, dst, NVFX_FP_MASK_ALL, src[0], none, none);
- dst.cc_update = 0; dst.cc_test = NVFX_COND_LT;
- arith(fpc, 0, KIL, dst, 0, none, none, none);
+ insn = arith(0, MOV, none.reg, NVFX_FP_MASK_ALL, src[0], none, none);
+ insn.cc_update = 1;
+ nvfx_fp_emit(fpc, insn);
+
+ insn = arith(0, KIL, none.reg, 0, none, none, none);
+ insn.cc_test = NVFX_COND_LT;
+ nvfx_fp_emit(fpc, insn);
break;
case TGSI_OPCODE_LG2:
- arith(fpc, sat, LG2, dst, mask, src[0], none, none);
+ nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none));
break;
// case TGSI_OPCODE_LIT:
case TGSI_OPCODE_LRP:
if(!nvfx->is_nv4x)
- arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]);
+ nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2]));
else {
- tmp = temp(fpc);
- arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
- arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
+ tmp = nvfx_src(temp(fpc));
+ nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));
+ nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp));
}
break;
case TGSI_OPCODE_MAD:
- arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
+ nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2]));
break;
case TGSI_OPCODE_MAX:
- arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_MIN:
- arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_MOV:
- arith(fpc, sat, MOV, dst, mask, src[0], none, none);
+ nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none));
break;
case TGSI_OPCODE_MUL:
- arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none));
+ break;
+ case TGSI_OPCODE_NOP:
break;
case TGSI_OPCODE_POW:
if(!nvfx->is_nv4x)
- arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none));
else {
- tmp = temp(fpc);
- arith(fpc, 0, LG2, tmp, NVFX_FP_MASK_X,
- swz(src[0], X, X, X, X), none, none);
- arith(fpc, 0, MUL, tmp, NVFX_FP_MASK_X, swz(tmp, X, X, X, X),
- swz(src[1], X, X, X, X), none);
- arith(fpc, sat, EX2, dst, mask,
- swz(tmp, X, X, X, X), none, none);
+ tmp = nvfx_src(temp(fpc));
+ nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
+ nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));
+ nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none));
}
break;
case TGSI_OPCODE_RCP:
- arith(fpc, sat, RCP, dst, mask, src[0], none, none);
- break;
- case TGSI_OPCODE_RET:
- assert(0);
+ nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none));
break;
case TGSI_OPCODE_RFL:
if(!nvfx->is_nv4x)
- arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(0, RFL_NV30, dst, mask, src[0], src[1], none));
else {
- tmp = temp(fpc);
- arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[0], none);
- arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_Y, src[0], src[1], none);
- arith(fpc, 0, DIV, scale(tmp, 2X), NVFX_FP_MASK_Z,
- swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
- arith(fpc, sat, MAD, dst, mask,
- swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
+ tmp = nvfx_src(temp(fpc));
+ nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[0], none));
+ nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_Y, src[0], src[1], none));
+ insn = arith(0, DIV, tmp.reg, NVFX_FP_MASK_Z, swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
+ insn.scale = NVFX_FP_OP_DST_SCALE_2X;
+ nvfx_fp_emit(fpc, insn);
+ nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])));
}
break;
case TGSI_OPCODE_RSQ:
if(!nvfx->is_nv4x)
- arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
+ nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none));
else {
- tmp = temp(fpc);
- arith(fpc, 0, LG2, scale(tmp, INV_2X), NVFX_FP_MASK_X,
- abs(swz(src[0], X, X, X, X)), none, none);
- arith(fpc, sat, EX2, dst, mask,
- neg(swz(tmp, X, X, X, X)), none, none);
+ tmp = nvfx_src(temp(fpc));
+ insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none);
+ insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X;
+ nvfx_fp_emit(fpc, insn);
+ nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none));
}
break;
case TGSI_OPCODE_SCS:
/* avoid overwriting the source */
if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
{
- if (mask & NVFX_FP_MASK_X) {
- arith(fpc, sat, COS, dst, NVFX_FP_MASK_X,
- swz(src[0], X, X, X, X), none, none);
- }
- if (mask & NVFX_FP_MASK_Y) {
- arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y,
- swz(src[0], X, X, X, X), none, none);
- }
+ if (mask & NVFX_FP_MASK_X)
+ nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
+ if (mask & NVFX_FP_MASK_Y)
+ nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
}
else
{
- if (mask & NVFX_FP_MASK_Y) {
- arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y,
- swz(src[0], X, X, X, X), none, none);
- }
- if (mask & NVFX_FP_MASK_X) {
- arith(fpc, sat, COS, dst, NVFX_FP_MASK_X,
- swz(src[0], X, X, X, X), none, none);
- }
+ if (mask & NVFX_FP_MASK_Y)
+ nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
+ if (mask & NVFX_FP_MASK_X)
+ nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
}
break;
case TGSI_OPCODE_SEQ:
- arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SFL:
- arith(fpc, sat, SFL, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, SFL, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SGE:
- arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SGT:
- arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SIN:
- arith(fpc, sat, SIN, dst, mask, src[0], none, none);
+ nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none));
break;
case TGSI_OPCODE_SLE:
- arith(fpc, sat, SLE, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SLT:
- arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SNE:
- arith(fpc, sat, SNE, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none));
+ break;
+ case TGSI_OPCODE_SSG:
+ tmp = nvfx_src(temp(fpc));
+ tmp2 = nvfx_src(temp(fpc));
+ nvfx_fp_emit(fpc, arith(0, SGT, tmp.reg, mask, src[0], nvfx_src(nvfx_reg(NVFXSR_CONST, 0)), none));
+ nvfx_fp_emit(fpc, arith(0, SLT, tmp.reg, mask, src[0], nvfx_src(nvfx_reg(NVFXSR_CONST, 0)), none));
+ nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, tmp, neg(tmp2), none));
break;
case TGSI_OPCODE_STR:
- arith(fpc, sat, STR, dst, mask, src[0], src[1], none);
+ nvfx_fp_emit(fpc, arith(sat, STR, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SUB:
- arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
+ nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], neg(src[1]), none));
break;
case TGSI_OPCODE_TEX:
- tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
+ nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
break;
- case TGSI_OPCODE_TXB:
- tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
+ case TGSI_OPCODE_TRUNC:
+ tmp = nvfx_src(temp(fpc));
+ insn = arith(0, MOV, none.reg, mask, src[0], none, none);
+ insn.cc_update = 1;
+ nvfx_fp_emit(fpc, insn);
+
+ nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none));
+ nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, tmp, none, none));
+
+ insn = arith(sat, MOV, dst, mask, neg(tmp), none, none);
+ insn.cc_test = NVFX_COND_LT;
+ nvfx_fp_emit(fpc, insn);
+ break;
+ case TGSI_OPCODE_TXB:
+ nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none));
+ break;
+ case TGSI_OPCODE_TXL:
+ if(nvfx->is_nv4x)
+ nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none));
+ else /* unsupported on nv30, use TEX and hope they like it */
+ nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
+ break;
+ case TGSI_OPCODE_TXP:
+ nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none));
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = nvfx_src(temp(fpc));
+ nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
+ nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
break;
- case TGSI_OPCODE_TXP:
- tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
+
+ case TGSI_OPCODE_IF:
+ // MOVRC0 R31 (TR0.xyzw), R<src>:
+ // IF (NE.xxxx) ELSE <else> END <end>
+ if(!nvfx->is_nv4x)
+ goto nv3x_cflow;
+ nv40_fp_if(fpc, src[0]);
break;
- case TGSI_OPCODE_XPD:
- tmp = temp(fpc);
- arith(fpc, 0, MUL, tmp, mask,
- swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
- arith(fpc, sat, MAD, dst, (mask & ~NVFX_FP_MASK_W),
- swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
- neg(tmp));
+
+ case TGSI_OPCODE_ELSE:
+ {
+ uint32_t *hw;
+ if(!nvfx->is_nv4x)
+ goto nv3x_cflow;
+ assert(util_dynarray_contains(&fpc->if_stack, unsigned));
+ hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)];
+ hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
break;
- default:
- NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
- return FALSE;
}
- release_temps(fpc);
- return TRUE;
-}
+ case TGSI_OPCODE_ENDIF:
+ {
+ uint32_t *hw;
+ if(!nvfx->is_nv4x)
+ goto nv3x_cflow;
+ assert(util_dynarray_contains(&fpc->if_stack, unsigned));
+ hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)];
+ if(!hw[2])
+ hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
+ hw[3] = fpc->fp->insn_len;
+ break;
+ }
-static boolean
-nvfx_fragprog_parse_decl_attrib(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
- const struct tgsi_full_declaration *fdec)
-{
- int hw;
+ case TGSI_OPCODE_BRA:
+ /* This can in limited cases be implemented with an IF with the else and endif labels pointing to the target */
+ /* no state tracker uses this, so don't implement this for now */
+ assert(0);
+ nv40_fp_bra(fpc, finst->Label.Label);
+ break;
- switch (fdec->Semantic.Name) {
- case TGSI_SEMANTIC_POSITION:
- hw = NVFX_FP_OP_INPUT_SRC_POSITION;
+ case TGSI_OPCODE_BGNSUB:
+ case TGSI_OPCODE_ENDSUB:
+ /* nothing to do here */
break;
- case TGSI_SEMANTIC_COLOR:
- if (fdec->Semantic.Index == 0) {
- hw = NVFX_FP_OP_INPUT_SRC_COL0;
- } else
- if (fdec->Semantic.Index == 1) {
- hw = NVFX_FP_OP_INPUT_SRC_COL1;
- } else {
- NOUVEAU_ERR("bad colour semantic index\n");
- return FALSE;
- }
+
+ case TGSI_OPCODE_CAL:
+ if(!nvfx->is_nv4x)
+ goto nv3x_cflow;
+ nv40_fp_cal(fpc, finst->Label.Label);
break;
- case TGSI_SEMANTIC_FOG:
- hw = NVFX_FP_OP_INPUT_SRC_FOGC;
+
+ case TGSI_OPCODE_RET:
+ if(!nvfx->is_nv4x)
+ goto nv3x_cflow;
+ nv40_fp_ret(fpc);
break;
- case TGSI_SEMANTIC_GENERIC:
- if (fdec->Semantic.Index <= 7) {
- hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.
- Index);
- } else {
- NOUVEAU_ERR("bad generic semantic index\n");
- return FALSE;
+
+ case TGSI_OPCODE_BGNLOOP:
+ if(!nvfx->is_nv4x)
+ goto nv3x_cflow;
+ /* TODO: we should support using two nested REPs to allow a > 255 iteration count */
+ nv40_fp_rep(fpc, 255, finst->Label.Label);
+ break;
+
+ case TGSI_OPCODE_ENDLOOP:
+ break;
+
+ case TGSI_OPCODE_BRK:
+ if(!nvfx->is_nv4x)
+ goto nv3x_cflow;
+ nv40_fp_brk(fpc);
+ break;
+
+ case TGSI_OPCODE_CONT:
+ {
+ static int warned = 0;
+ if(!warned) {
+ NOUVEAU_ERR("Sorry, the continue keyword is not implemented: ignoring it.\n");
+ warned = 1;
}
break;
- default:
- NOUVEAU_ERR("bad input semantic\n");
+ }
+
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
return FALSE;
}
- fpc->attrib_map[fdec->Range.First] = hw;
+out:
+ release_temps(fpc);
return TRUE;
+nv3x_cflow:
+ {
+ static int warned = 0;
+ if(!warned) {
+ NOUVEAU_ERR(
+ "Sorry, control flow instructions are not supported in hardware on nv3x: ignoring them\n"
+ "If rendering is incorrect, try to disable GLSL support in the application.\n");
+ warned = 1;
+ }
+ }
+ goto out;
}
static boolean
@@ -680,8 +913,8 @@ nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
return FALSE;
}
- fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);
- fpc->r_temps |= (1 << hw);
+ fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
+ fpc->r_temps |= (1ULL << hw);
return TRUE;
}
@@ -690,8 +923,22 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
{
struct tgsi_parse_context p;
int high_temp = -1, i;
+ struct util_semantic_set set;
+ float const0v[4] = {0, 0, 0, 0};
+ struct nvfx_reg const0;
+
+ fpc->fp->num_slots = util_semantic_set_from_program_file(&set, fpc->pfp->pipe.tokens, TGSI_FILE_INPUT);
+ if(fpc->fp->num_slots > 8)
+ return FALSE;
+ util_semantic_layout_from_set(fpc->fp->slot_to_generic, &set, 0, 8);
+ util_semantic_table_from_layout(fpc->generic_to_slot, fpc->fp->slot_to_generic, 0, 8);
- tgsi_parse_init(&p, fpc->fp->pipe.tokens);
+ memset(fpc->fp->slot_to_fp_input, 0xff, sizeof(fpc->fp->slot_to_fp_input));
+
+ const0 = constant(fpc, -1, const0v);
+ assert(const0.index == 0);
+
+ tgsi_parse_init(&p, fpc->pfp->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&p)) {
const union tgsi_full_token *tok = &p.FullToken;
@@ -702,10 +949,6 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
const struct tgsi_full_declaration *fdec;
fdec = &p.FullToken.FullDeclaration;
switch (fdec->Declaration.File) {
- case TGSI_FILE_INPUT:
- if (!nvfx_fragprog_parse_decl_attrib(nvfx, fpc, fdec))
- goto out_err;
- break;
case TGSI_FILE_OUTPUT:
if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec))
goto out_err;
@@ -744,40 +987,66 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
tgsi_parse_free(&p);
if (++high_temp) {
- fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
+ fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));
for (i = 0; i < high_temp; i++)
fpc->r_temp[i] = temp(fpc);
- fpc->r_temps_discard = 0;
+ fpc->r_temps_discard = 0ULL;
}
return TRUE;
out_err:
- if (fpc->r_temp)
+ if (fpc->r_temp) {
FREE(fpc->r_temp);
+ fpc->r_temp = NULL;
+ }
tgsi_parse_free(&p);
return FALSE;
}
-static void
+DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", FALSE)
+
+static struct nvfx_fragment_program*
nvfx_fragprog_translate(struct nvfx_context *nvfx,
- struct nvfx_fragment_program *fp)
+ struct nvfx_pipe_fragment_program *pfp,
+ boolean emulate_sprite_flipping)
{
struct tgsi_parse_context parse;
struct nvfx_fpc *fpc = NULL;
+ struct util_dynarray insns;
+ struct nvfx_fragment_program* fp = NULL;
+ const int min_size = 4096;
- fpc = CALLOC(1, sizeof(struct nvfx_fpc));
+ fp = CALLOC_STRUCT(nvfx_fragment_program);
+ if(!fp)
+ goto out_err;
+
+ fpc = CALLOC_STRUCT(nvfx_fpc);
if (!fpc)
- return;
+ goto out_err;
+
+ fpc->max_temps = nvfx->is_nv4x ? 48 : 32;
+ fpc->pfp = pfp;
fpc->fp = fp;
fpc->num_regs = 2;
- if (!nvfx_fragprog_prepare(nvfx, fpc)) {
- FREE(fpc);
- return;
- }
+ if (!nvfx_fragprog_prepare(nvfx, fpc))
+ goto out_err;
- tgsi_parse_init(&parse, fp->pipe.tokens);
+ tgsi_parse_init(&parse, pfp->pipe.tokens);
+ util_dynarray_init(&insns);
+
+ if(emulate_sprite_flipping)
+ {
+ struct nvfx_reg reg = temp(fpc);
+ struct nvfx_src sprite_input = nvfx_src(nvfx_reg(NVFXSR_RELOCATED, fp->num_slots));
+ float v[4] = {1, -1, 0, 0};
+ struct nvfx_src imm = nvfx_src(constant(fpc, -1, v));
+
+ fpc->sprite_coord_temp = reg.index;
+ fpc->r_temps_discard = 0ULL;
+ nvfx_fp_emit(fpc, arith(0, MAD, reg, NVFX_FP_MASK_ALL, sprite_input, swz(imm, X, Y, X, X), swz(imm, Z, X, Z, Z)));
+ }
while (!tgsi_parse_end_of_tokens(&parse)) {
tgsi_parse_token(&parse);
@@ -787,6 +1056,7 @@ nvfx_fragprog_translate(struct nvfx_context *nvfx,
{
const struct tgsi_full_instruction *finst;
+ util_dynarray_append(&insns, unsigned, fp->insn_len);
finst = &parse.FullToken.FullInstruction;
if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst))
goto out_err;
@@ -796,6 +1066,14 @@ nvfx_fragprog_translate(struct nvfx_context *nvfx,
break;
}
}
+ util_dynarray_append(&insns, unsigned, fp->insn_len);
+
+ for(unsigned i = 0; i < fpc->label_relocs.size; i += sizeof(struct nvfx_relocation))
+ {
+ struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)fpc->label_relocs.data + i);
+ fp->insn[label_reloc->location] |= ((unsigned*)insns.data)[label_reloc->target];
+ }
+ util_dynarray_fini(&insns);
if(!nvfx->is_nv4x)
fp->fp_control |= (fpc->num_regs-1)/2;
@@ -804,9 +1082,9 @@ nvfx_fragprog_translate(struct nvfx_context *nvfx,
/* Terminate final instruction */
if(fp->insn)
- fp->insn[fpc->inst_offset] |= 0x00000001;
+ fp->insn[fpc->inst_offset] |= 0x00000001;
- /* Append NOP + END instruction, may or may not be necessary. */
+ /* Append NOP + END instruction for branches to the end of the program */
fpc->inst_offset = fp->insn_len;
grow_insns(fpc, 4);
fp->insn[fpc->inst_offset + 0] = 0x00000001;
@@ -814,12 +1092,48 @@ nvfx_fragprog_translate(struct nvfx_context *nvfx,
fp->insn[fpc->inst_offset + 2] = 0x00000000;
fp->insn[fpc->inst_offset + 3] = 0x00000000;
- fp->translated = TRUE;
-out_err:
+ if(debug_get_option_nvfx_dump_fp())
+ {
+ debug_printf("\n");
+ tgsi_dump(pfp->pipe.tokens, 0);
+
+ debug_printf("\n%s fragment program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x");
+ for (unsigned i = 0; i < fp->insn_len; i += 4)
+ debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]);
+ debug_printf("\n");
+ }
+
+ fp->prog_size = (fp->insn_len * 4 + 63) & ~63;
+
+ if(fp->prog_size >= min_size)
+ fp->progs_per_bo = 1;
+ else
+ fp->progs_per_bo = min_size / fp->prog_size;
+ fp->bo_prog_idx = fp->progs_per_bo - 1;
+
+out:
tgsi_parse_free(&parse);
- if (fpc->r_temp)
- FREE(fpc->r_temp);
- FREE(fpc);
+ if(fpc)
+ {
+ if (fpc->r_temp)
+ FREE(fpc->r_temp);
+ util_dynarray_fini(&fpc->if_stack);
+ util_dynarray_fini(&fpc->label_relocs);
+ //util_dynarray_fini(&fpc->loop_stack);
+ FREE(fpc);
+ }
+ return fp;
+
+out_err:
+ _debug_printf("Error: failed to compile this fragment program:\n");
+ tgsi_dump(pfp->pipe.tokens, 0);
+
+ if(fp)
+ {
+ FREE(fp);
+ fp = NULL;
+ }
+ goto out;
}
static inline void
@@ -836,53 +1150,189 @@ nvfx_fp_memcpy(void* dst, const void* src, size_t len)
#endif
}
+/* The hardware only supports immediate constants inside the fragment program,
+ * and at least on nv30 doesn't support an indirect linkage table.
+ *
+ * Hence, we need to patch the fragment program itself both to update constants
+ * and update linkage.
+ *
+ * Using a single fragment program would entail unacceptable stalls if the GPU is
+ * already rendering with that fragment program.
+ * Thus, we instead use a "rotating queue" of buffer objects, each of which is
+ * packed with multiple versions of the same program.
+ *
+ * Whenever we need to patch something, we move to the next program and
+ * patch it. If all buffer objects are in use by the GPU, we allocate another one,
+ * expanding the queue.
+ *
+ * As an additional optimization, we record when all the programs have the
+ * current input slot configuration, and at that point we stop patching inputs.
+ * This happens, for instance, if a given fragment program is always used with
+ * the same vertex program (i.e. always with GLSL), or if the layouts match
+ * enough (non-GLSL).
+ *
+ * Note that instead of using multiple programs, we could push commands
+ * on the FIFO to patch a single program: it's not fully clear which option is
+ * faster, but my guess is that the current way is faster.
+ *
+ * We also track the previous slot assignments for each version and don't
+ * patch if they are the same (this could perhaps be removed).
+ */
+
void
nvfx_fragprog_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
- struct nvfx_fragment_program *fp = nvfx->fragprog;
- int update = 0;
-
- if (!fp->translated)
+ struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog;
+ struct nvfx_vertex_program* vp;
+ /* Gallium always puts the point coord in GENERIC[0]
+ * TODO: this is wrong, Gallium needs to be fixed
+ */
+ unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * (nvfx->rasterizer->pipe.sprite_coord_enable | 1);
+
+ boolean emulate_sprite_flipping = sprite_coord_enable && nvfx->rasterizer->pipe.sprite_coord_mode;
+ unsigned key = emulate_sprite_flipping;
+ struct nvfx_fragment_program* fp;
+
+ fp = pfp->fps[key];
+ if (!fp)
{
- const int min_size = 4096;
+ fp = nvfx_fragprog_translate(nvfx, pfp, emulate_sprite_flipping);
- nvfx_fragprog_translate(nvfx, fp);
- if (!fp->translated) {
- static unsigned dummy[8] = {1, 0, 0, 0, 1, 0, 0, 0};
- static int warned = 0;
- if(!warned)
+ if(!fp)
+ {
+ if(!nvfx->dummy_fs)
{
- fprintf(stderr, "nvfx: failed to translate fragment program!\n");
- warned = 1;
+ struct ureg_program *ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
+ if (ureg)
+ {
+ ureg_END( ureg );
+ nvfx->dummy_fs = ureg_create_shader_and_destroy( ureg, &nvfx->pipe );
+ }
+
+ if(!nvfx->dummy_fs)
+ {
+ _debug_printf("Error: unable to create a dummy fragment shader: aborting.");
+ abort();
+ }
}
- /* use dummy program: we cannot fail here */
- fp->translated = TRUE;
- fp->insn = malloc(sizeof(dummy));
- memcpy(fp->insn, dummy, sizeof(dummy));
- fp->insn_len = sizeof(dummy) / sizeof(dummy[0]);
+ fp = nvfx_fragprog_translate(nvfx, nvfx->dummy_fs, FALSE);
+ emulate_sprite_flipping = FALSE;
+
+ if(!fp)
+ {
+ _debug_printf("Error: unable to compile even a dummy fragment shader: aborting.");
+ abort();
+ }
}
- update = TRUE;
- fp->prog_size = (fp->insn_len * 4 + 63) & ~63;
+ pfp->fps[key] = fp;
+ }
+
+ vp = nvfx->render_mode == HW ? nvfx->vertprog : nvfx->swtnl.vertprog;
- if(fp->prog_size >= min_size)
- fp->progs_per_bo = 1;
+ if (fp->last_vp_id != vp->id || fp->last_sprite_coord_enable != sprite_coord_enable) {
+ int sprite_real_input = -1;
+ int sprite_reloc_input;
+ unsigned i;
+ fp->last_vp_id = vp->id;
+ fp->last_sprite_coord_enable = sprite_coord_enable;
+
+ if(sprite_coord_enable)
+ {
+ sprite_real_input = vp->sprite_fp_input;
+ if(sprite_real_input < 0)
+ {
+ unsigned used_texcoords = 0;
+ for(unsigned i = 0; i < fp->num_slots; ++i) {
+ unsigned generic = fp->slot_to_generic[i];
+ if(!((1 << generic) & sprite_coord_enable))
+ {
+ unsigned char slot_mask = vp->generic_to_fp_input[generic];
+ if(slot_mask >= 0xf0)
+ used_texcoords |= 1 << ((slot_mask & 0xf) - NVFX_FP_OP_INPUT_SRC_TC0);
+ }
+ }
+
+ sprite_real_input = NVFX_FP_OP_INPUT_SRC_TC(__builtin_ctz(~used_texcoords));
+ }
+
+ fp->point_sprite_control |= (1 << (sprite_real_input - NVFX_FP_OP_INPUT_SRC_TC0 + 8));
+ }
else
- fp->progs_per_bo = min_size / fp->prog_size;
- fp->bo_prog_idx = fp->progs_per_bo - 1;
- }
+ fp->point_sprite_control = 0;
- /* we must update constants even on "just" fragprog changes, because
- we don't check whether the current constant buffer matches the latest
- one bound to this fragment program */
- if (nvfx->dirty & (NVFX_NEW_FRAGCONST | NVFX_NEW_FRAGPROG))
- update = TRUE;
+ if(emulate_sprite_flipping)
+ sprite_reloc_input = 0;
+ else
+ sprite_reloc_input = sprite_real_input;
- if(update) {
+ for(i = 0; i < fp->num_slots; ++i) {
+ unsigned generic = fp->slot_to_generic[i];
+ if((1 << generic) & sprite_coord_enable)
+ {
+ if(fp->slot_to_fp_input[i] != sprite_reloc_input)
+ goto update_slots;
+ }
+ else
+ {
+ unsigned char slot_mask = vp->generic_to_fp_input[generic];
+ if((slot_mask >> 4) & (slot_mask ^ fp->slot_to_fp_input[i]))
+ goto update_slots;
+ }
+ }
+
+ if(emulate_sprite_flipping)
+ {
+ if(fp->slot_to_fp_input[fp->num_slots] != sprite_real_input)
+ goto update_slots;
+ }
+
+ if(0)
+ {
+update_slots:
+ /* optimization: we start updating from the slot we found the first difference in */
+ for(; i < fp->num_slots; ++i)
+ {
+ unsigned generic = fp->slot_to_generic[i];
+ if((1 << generic) & sprite_coord_enable)
+ fp->slot_to_fp_input[i] = sprite_reloc_input;
+ else
+ fp->slot_to_fp_input[i] = vp->generic_to_fp_input[generic] & 0xf;
+ }
+
+ fp->slot_to_fp_input[fp->num_slots] = sprite_real_input;
+
+ if(nvfx->is_nv4x)
+ {
+ fp->or = 0;
+ for(i = 0; i <= fp->num_slots; ++i) {
+ unsigned fp_input = fp->slot_to_fp_input[i];
+ if(fp_input == NVFX_FP_OP_INPUT_SRC_TC(8))
+ fp->or |= (1 << 12);
+ else if(fp_input == NVFX_FP_OP_INPUT_SRC_TC(9))
+ fp->or |= (1 << 13);
+ else if(fp_input >= NVFX_FP_OP_INPUT_SRC_TC(0) && fp_input <= NVFX_FP_OP_INPUT_SRC_TC(7))
+ fp->or |= (1 << (fp_input - NVFX_FP_OP_INPUT_SRC_TC0 + 14));
+ }
+ }
+
+ fp->progs_left_with_obsolete_slot_assignments = fp->progs;
+ goto update;
+ }
+ }
+
+ /* We must update constants even on "just" fragprog changes, because
+ * we don't check whether the current constant buffer matches the latest
+ * one bound to this fragment program.
+ * Doing such a check would likely be a pessimization.
+ */
+ if ((nvfx->hw_fragprog != fp) || (nvfx->dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST))) {
int offset;
+ uint32_t* fpmap;
+update:
++fp->bo_prog_idx;
if(fp->bo_prog_idx >= fp->progs_per_bo)
{
@@ -892,10 +1342,12 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
}
else
{
- struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + fp->prog_size * fp->progs_per_bo, 16);
- char *map, *buf;
- int i;
+ struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + (fp->prog_size + 8) * fp->progs_per_bo, 16);
+ uint8_t* map;
+ uint8_t* buf;
+ fpbo->slots = (unsigned char*)&fpbo->insn[(fp->prog_size) * fp->progs_per_bo];
+ memset(fpbo->slots, 0, 8 * fp->progs_per_bo);
if(fp->fpbo)
{
fpbo->next = fp->fpbo->next;
@@ -905,12 +1357,14 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
fpbo->next = fpbo;
fp->fpbo = fpbo;
fpbo->bo = 0;
+ fp->progs += fp->progs_per_bo;
+ fp->progs_left_with_obsolete_slot_assignments += fp->progs_per_bo;
nouveau_bo_new(nvfx->screen->base.device, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 64, fp->prog_size * fp->progs_per_bo, &fpbo->bo);
nouveau_bo_map(fpbo->bo, NOUVEAU_BO_NOSYNC);
map = fpbo->bo->map;
- buf = fpbo->insn;
- for(i = 0; i < fp->progs_per_bo; ++i)
+ buf = (uint8_t*)fpbo->insn;
+ for(unsigned i = 0; i < fp->progs_per_bo; ++i)
{
memcpy(buf, fp->insn, fp->insn_len * 4);
nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4);
@@ -922,13 +1376,11 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
}
offset = fp->bo_prog_idx * fp->prog_size;
+ fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) {
struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT];
- // TODO: avoid using transfers, just directly the buffer
- struct pipe_transfer* transfer;
- // TODO: does this check make any sense, or should we do this unconditionally?
- uint32_t* map = pipe_buffer_map(&nvfx->pipe, constbuf, PIPE_TRANSFER_READ, &transfer);
+ uint32_t* map = (uint32_t*)nvfx_buffer(constbuf)->data;
uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset);
int i;
@@ -942,12 +1394,61 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
nvfx_fp_memcpy(&fpmap[off], &map[idx], 4 * sizeof(uint32_t));
}
}
- pipe_buffer_unmap(&nvfx->pipe, constbuf, transfer);
}
- }
- if(update || (nvfx->dirty & NVFX_NEW_FRAGPROG)) {
- int offset = fp->bo_prog_idx * fp->prog_size;
+ /* we only do this if we aren't sure that all program versions have the
+ * current slot assignments, otherwise we just update constants for speed
+ */
+ if(fp->progs_left_with_obsolete_slot_assignments) {
+ unsigned char* fpbo_slots = &fp->fpbo->slots[fp->bo_prog_idx * 8];
+ /* also relocate sprite coord slot, if any */
+ for(unsigned i = 0; i <= fp->num_slots; ++i) {
+ unsigned value = fp->slot_to_fp_input[i];;
+ if(value != fpbo_slots[i]) {
+ unsigned* p;
+ unsigned* begin = (unsigned*)fp->slot_relocations[i].data;
+ unsigned* end = (unsigned*)((char*)fp->slot_relocations[i].data + fp->slot_relocations[i].size);
+ //printf("fp %p reloc slot %u/%u: %u -> %u\n", fp, i, fp->num_slots, fpbo_slots[i], value);
+ if(value == 0)
+ {
+ /* was relocated to an input, switch type to temporary */
+ for(p = begin; p != end; ++p) {
+ unsigned off = *p;
+ unsigned dw = fp->insn[off];
+ dw &=~ NVFX_FP_REG_TYPE_MASK;
+ //printf("reloc_tmp at %x\n", off);
+ nvfx_fp_memcpy(&fpmap[off], &dw, sizeof(dw));
+ }
+ } else {
+ if(!fpbo_slots[i])
+ {
+ /* was relocated to a temporary, switch type to input */
+ for(p= begin; p != end; ++p) {
+ unsigned off = *p;
+ unsigned dw = fp->insn[off];
+ //printf("reloc_in at %x\n", off);
+ dw |= NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT;
+ nvfx_fp_memcpy(&fpmap[off], &dw, sizeof(dw));
+ }
+ }
+
+ /* set the correct input index */
+ for(p = begin; p != end; ++p) {
+ unsigned off = *p & ~3;
+ unsigned dw = fp->insn[off];
+ //printf("reloc&~3 at %x\n", off);
+ dw = (dw & ~NVFX_FP_OP_INPUT_SRC_MASK) | (value << NVFX_FP_OP_INPUT_SRC_SHIFT);
+ nvfx_fp_memcpy(&fpmap[off], &dw, sizeof(dw));
+ }
+ }
+ fpbo_slots[i] = value;
+ }
+ }
+ --fp->progs_left_with_obsolete_slot_assignments;
+ }
+
+ nvfx->hw_fragprog = fp;
+
MARK_RING(chan, 8, 1);
OUT_RING(chan, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1));
OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM |
@@ -963,13 +1464,26 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
OUT_RING(chan, fp->samplers);
}
}
+
+ {
+ unsigned pointsprite_control = fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization;
+ if(pointsprite_control != nvfx->hw_pointsprite_control)
+ {
+ WAIT_RING(chan, 2);
+ OUT_RING(chan, RING_3D(NV34TCL_POINT_SPRITE, 1));
+ OUT_RING(chan, pointsprite_control);
+ nvfx->hw_pointsprite_control = pointsprite_control;
+ }
+ }
+
+ nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGPROG;
}
void
nvfx_fragprog_relocate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
- struct nvfx_fragment_program *fp = nvfx->fragprog;
+ struct nvfx_fragment_program *fp = nvfx->hw_fragprog;
struct nouveau_bo* bo = fp->fpbo->bo;
int offset = fp->bo_prog_idx * fp->prog_size;
unsigned fp_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; // TODO: GART?
@@ -979,12 +1493,14 @@ nvfx_fragprog_relocate(struct nvfx_context *nvfx)
OUT_RELOC(chan, bo, offset, fp_flags | NOUVEAU_BO_LOW |
NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
+ nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGPROG;
}
void
nvfx_fragprog_destroy(struct nvfx_context *nvfx,
struct nvfx_fragment_program *fp)
{
+ unsigned i;
struct nvfx_fragment_program_bo* fpbo = fp->fpbo;
if(fpbo)
{
@@ -999,7 +1515,60 @@ nvfx_fragprog_destroy(struct nvfx_context *nvfx,
while(fpbo != fp->fpbo);
}
+ for(i = 0; i < Elements(fp->slot_relocations); ++i)
+ util_dynarray_fini(&fp->slot_relocations[i]);
+
if (fp->insn_len)
FREE(fp->insn);
}
+static void *
+nvfx_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nvfx_pipe_fragment_program *pfp;
+
+ pfp = CALLOC(1, sizeof(struct nvfx_pipe_fragment_program));
+ pfp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(pfp->pipe.tokens, &pfp->info);
+
+ return (void *)pfp;
+}
+
+static void
+nvfx_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->fragprog = hwcso;
+ nvfx->dirty |= NVFX_NEW_FRAGPROG;
+}
+
+static void
+nvfx_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_pipe_fragment_program *pfp = hwcso;
+ unsigned i;
+
+ for(i = 0; i < Elements(pfp->fps); ++i)
+ {
+ if(pfp->fps[i])
+ {
+ nvfx_fragprog_destroy(nvfx, pfp->fps[i]);
+ FREE(pfp->fps[i]);
+ }
+ }
+
+ FREE((void*)pfp->pipe.tokens);
+ FREE(pfp);
+}
+
+void
+nvfx_init_fragprog_functions(struct nvfx_context *nvfx)
+{
+ nvfx->pipe.create_fs_state = nvfx_fp_state_create;
+ nvfx->pipe.bind_fs_state = nvfx_fp_state_bind;
+ nvfx->pipe.delete_fs_state = nvfx_fp_state_delete;
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c
index 0b4a434fec..6503c7afcb 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragtex.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c
@@ -1,5 +1,177 @@
#include "nvfx_context.h"
#include "nvfx_resource.h"
+#include "nvfx_tex.h"
+
+static void *
+nvfx_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_sampler_state *ps;
+
+ ps = MALLOC(sizeof(struct nvfx_sampler_state));
+
+ /* on nv30, we use this as an internal flag */
+ ps->fmt = cso->normalized_coords ? 0 : NV40TCL_TEX_FORMAT_RECT;
+ ps->en = 0;
+ ps->filt = nvfx_tex_filter(cso) | 0x2000; /*voodoo*/
+ ps->wrap = (nvfx_tex_wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) |
+ (nvfx_tex_wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) |
+ (nvfx_tex_wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT);
+ ps->compare = FALSE;
+
+ if(cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
+ {
+ ps->wrap |= nvfx_tex_wrap_compare_mode(cso->compare_func);
+ ps->compare = TRUE;
+ }
+ ps->bcol = nvfx_tex_border_color(cso->border_color);
+
+ if(nvfx->is_nv4x)
+ nv40_sampler_state_init(pipe, ps, cso);
+ else
+ nv30_sampler_state_init(pipe, ps, cso);
+
+ return (void *)ps;
+}
+
+static void
+nvfx_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nvfx_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nvfx->tex_sampler[unit] = sampler[unit];
+ nvfx->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nvfx->nr_samplers; unit++) {
+ nvfx->tex_sampler[unit] = NULL;
+ nvfx->dirty_samplers |= (1 << unit);
+ }
+
+ nvfx->nr_samplers = nr;
+ nvfx->dirty |= NVFX_NEW_SAMPLER;
+}
+
+static struct pipe_sampler_view *
+nvfx_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_sampler_view *templ)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_sampler_view *sv = CALLOC_STRUCT(nvfx_sampler_view);
+ struct nvfx_texture_format *tf = &nvfx_texture_formats[templ->format];
+ unsigned txf;
+
+ if (!sv)
+ return NULL;
+
+ sv->base = *templ;
+ sv->base.reference.count = 1;
+ sv->base.texture = NULL;
+ pipe_resource_reference(&sv->base.texture, pt);
+ sv->base.context = pipe;
+
+ txf = NV34TCL_TX_FORMAT_NO_BORDER;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV34TCL_TX_FORMAT_CUBIC;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ txf |= NV34TCL_TX_FORMAT_DIMS_2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_3D;
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_1D;
+ break;
+ default:
+ assert(0);
+ }
+ sv->u.init_fmt = txf;
+
+ sv->swizzle = 0
+ | (tf->src[sv->base.swizzle_r] << NV34TCL_TX_SWIZZLE_S0_Z_SHIFT)
+ | (tf->src[sv->base.swizzle_g] << NV34TCL_TX_SWIZZLE_S0_Y_SHIFT)
+ | (tf->src[sv->base.swizzle_b] << NV34TCL_TX_SWIZZLE_S0_X_SHIFT)
+ | (tf->src[sv->base.swizzle_a] << NV34TCL_TX_SWIZZLE_S0_W_SHIFT)
+ | (tf->comp[sv->base.swizzle_r] << NV34TCL_TX_SWIZZLE_S1_Z_SHIFT)
+ | (tf->comp[sv->base.swizzle_g] << NV34TCL_TX_SWIZZLE_S1_Y_SHIFT)
+ | (tf->comp[sv->base.swizzle_b] << NV34TCL_TX_SWIZZLE_S1_X_SHIFT)
+ | (tf->comp[sv->base.swizzle_a] << NV34TCL_TX_SWIZZLE_S1_W_SHIFT);
+
+ sv->filt = tf->sign;
+ sv->wrap = tf->wrap;
+ sv->wrap_mask = ~0;
+
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ {
+ sv->offset = 0;
+ sv->npot_size = (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | pt->height0;
+ }
+ else
+ {
+ sv->offset = nvfx_subresource_offset(pt, 0, sv->base.first_level, 0);
+ sv->npot_size = (u_minify(pt->width0, sv->base.first_level) << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | u_minify(pt->height0, sv->base.first_level);
+
+ /* apparently, we need to ignore the t coordinate for 1D textures to fix piglit tex1d-2dborder */
+ if(pt->target == PIPE_TEXTURE_1D)
+ {
+ sv->wrap_mask &=~ NV34TCL_TX_WRAP_T_MASK;
+ sv->wrap |= NV34TCL_TX_WRAP_T_REPEAT;
+ }
+ }
+
+ if(nvfx->is_nv4x)
+ nv40_sampler_view_init(pipe, sv);
+ else
+ nv30_sampler_view_init(pipe, sv);
+
+ return &sv->base;
+}
+
+static void
+nvfx_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+ FREE(view);
+}
+
+static void
+nvfx_set_fragment_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit],
+ views[unit]);
+ nvfx->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nvfx->nr_textures; unit++) {
+ pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit],
+ NULL);
+ nvfx->dirty_samplers |= (1 << unit);
+ }
+
+ nvfx->nr_textures = nr;
+ nvfx->dirty |= NVFX_NEW_SAMPLER;
+}
void
nvfx_fragtex_validate(struct nvfx_context *nvfx)
@@ -16,6 +188,10 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx)
samplers &= ~(1 << unit);
if(nvfx->fragment_sampler_views[unit] && nvfx->tex_sampler[unit]) {
+ util_dirty_surfaces_use_for_sampling(&nvfx->pipe,
+ &((struct nvfx_miptree*)nvfx->fragment_sampler_views[unit]->texture)->dirty_surfaces,
+ nvfx_surface_flush);
+
if(!nvfx->is_nv4x)
nv30_fragtex_set(nvfx, unit);
else
@@ -29,6 +205,7 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx)
}
}
nvfx->dirty_samplers = 0;
+ nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGTEX;
}
void
@@ -55,4 +232,128 @@ nvfx_fragtex_relocate(struct nvfx_context *nvfx)
OUT_RELOC(chan, bo, nvfx->hw_txf[unit], tex_flags | NOUVEAU_BO_OR | NOUVEAU_BO_DUMMY,
NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
}
+ nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGTEX;
+}
+
+void
+nvfx_init_sampling_functions(struct nvfx_context *nvfx)
+{
+ nvfx->pipe.create_sampler_state = nvfx_sampler_state_create;
+ nvfx->pipe.bind_fragment_sampler_states = nvfx_sampler_state_bind;
+ nvfx->pipe.delete_sampler_state = nvfx_sampler_state_delete;
+ nvfx->pipe.set_fragment_sampler_views = nvfx_set_fragment_sampler_views;
+ nvfx->pipe.create_sampler_view = nvfx_create_sampler_view;
+ nvfx->pipe.sampler_view_destroy = nvfx_sampler_view_destroy;
+}
+
+#define NV34TCL_TX_FORMAT_FORMAT_DXT1_RECT NV34TCL_TX_FORMAT_FORMAT_DXT1
+#define NV34TCL_TX_FORMAT_FORMAT_DXT3_RECT NV34TCL_TX_FORMAT_FORMAT_DXT3
+#define NV34TCL_TX_FORMAT_FORMAT_DXT5_RECT NV34TCL_TX_FORMAT_FORMAT_DXT5
+
+#define NV40TCL_TEX_FORMAT_FORMAT_HILO16 NV40TCL_TEX_FORMAT_FORMAT_A16L16
+
+#define NV34TCL_TX_FORMAT_FORMAT_RGBA16F 0x00004a00
+#define NV34TCL_TX_FORMAT_FORMAT_RGBA16F_RECT NV34TCL_TX_FORMAT_FORMAT_RGBA16F
+#define NV34TCL_TX_FORMAT_FORMAT_RGBA32F 0x00004b00
+#define NV34TCL_TX_FORMAT_FORMAT_RGBA32F_RECT NV34TCL_TX_FORMAT_FORMAT_RGBA32F
+#define NV34TCL_TX_FORMAT_FORMAT_R32F 0x00004c00
+#define NV34TCL_TX_FORMAT_FORMAT_R32F_RECT NV34TCL_TX_FORMAT_FORMAT_R32F
+
+// TODO: guess!
+#define NV40TCL_TEX_FORMAT_FORMAT_R32F 0x00001c00
+
+#define SRGB 0x00700000
+
+#define __(m,tf,tfc,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sign,wrap) \
+[PIPE_FORMAT_##m] = { \
+ {NV34TCL_TX_FORMAT_FORMAT_##tf, \
+ NV34TCL_TX_FORMAT_FORMAT_##tfc, \
+ NV34TCL_TX_FORMAT_FORMAT_##tf##_RECT, \
+ NV34TCL_TX_FORMAT_FORMAT_##tfc##_RECT, \
+ NV40TCL_TEX_FORMAT_FORMAT_##tf, \
+ NV40TCL_TEX_FORMAT_FORMAT_##tfc}, \
+ sign, wrap, \
+ {ts0z, ts0y, ts0x, ts0w, 0, 1}, {ts1z, ts1y, ts1x, ts1w, 0, 0} \
}
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sign, wrap) \
+ __(m,tf,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sign, wrap)
+
+/* Depth formats works by reading the depth value most significant 8/16 bits.
+ * We are losing precision, but nVidia loses even more by using A8R8G8B8 instead of HILO16
+ * There is no 32-bit integer texture support, so other things are infeasible.
+ *
+ * TODO: is it possible to read 16 bits for Z16? A16 doesn't seem to work, either due to normalization or endianness issues
+ */
+
+#define T 2
+
+#define X 3
+#define Y 2
+#define Z 1
+#define W 0
+
+#define SNORM ((NV34TCL_TX_FILTER_SIGNED_RED) | (NV34TCL_TX_FILTER_SIGNED_GREEN) | (NV34TCL_TX_FILTER_SIGNED_BLUE) | (NV34TCL_TX_FILTER_SIGNED_ALPHA))
+#define UNORM 0
+
+struct nvfx_texture_format
+nvfx_texture_formats[PIPE_FORMAT_COUNT] = {
+ [0 ... PIPE_FORMAT_COUNT - 1] = {{-1, -1, -1, -1, -1, -1}},
+ _(B8G8R8X8_UNORM, A8R8G8B8, T, T, T, 1, X, Y, Z, W, UNORM, 0),
+ _(B8G8R8X8_SRGB, A8R8G8B8, T, T, T, 1, X, Y, Z, W, UNORM, SRGB),
+ _(B8G8R8A8_UNORM, A8R8G8B8, T, T, T, T, X, Y, Z, W, UNORM, 0),
+ _(B8G8R8A8_SRGB, A8R8G8B8, T, T, T, T, X, Y, Z, W, UNORM, SRGB),
+
+ _(R8G8B8A8_UNORM, A8R8G8B8, T, T, T, T, Z, Y, X, W, UNORM, 0),
+ _(R8G8B8A8_SRGB, A8R8G8B8, T, T, T, T, Z, Y, X, W, UNORM, SRGB),
+ _(R8G8B8X8_UNORM, A8R8G8B8, T, T, T, 1, Z, Y, X, W, UNORM, 0),
+
+ _(A8R8G8B8_UNORM, A8R8G8B8, T, T, T, T, W, Z, Y, X, UNORM, 0),
+ _(A8R8G8B8_SRGB, A8R8G8B8, T, T, T, T, W, Z, Y, X, UNORM, SRGB),
+ _(A8B8G8R8_UNORM, A8R8G8B8, T, T, T, T, W, X, Y, Z, UNORM, 0),
+ _(A8B8G8R8_SRGB, A8R8G8B8, T, T, T, T, W, X, Y, Z, UNORM, SRGB),
+ _(X8R8G8B8_UNORM, A8R8G8B8, T, T, T, 1, W, Z, Y, X, UNORM, 0),
+ _(X8R8G8B8_SRGB, A8R8G8B8, T, T, T, 1, W, Z, Y, X, UNORM, SRGB),
+
+ _(B5G5R5A1_UNORM, A1R5G5B5, T, T, T, T, X, Y, Z, W, UNORM, 0),
+ _(B5G5R5X1_UNORM, A1R5G5B5, T, T, T, 1, X, Y, Z, W, UNORM, 0),
+
+ _(B4G4R4A4_UNORM, A4R4G4B4, T, T, T, T, X, Y, Z, W, UNORM, 0),
+ _(B4G4R4X4_UNORM, A4R4G4B4, T, T, T, 1, X, Y, Z, W, UNORM, 0),
+
+ _(B5G6R5_UNORM, R5G6B5, T, T, T, 1, X, Y, Z, W, UNORM, 0),
+
+ _(R8_UNORM, L8, T, 0, 0, 1, X, X, X, X, UNORM, 0),
+ _(R8_SNORM, L8, T, 0, 0, 1, X, X, X, X, SNORM, 0),
+ _(L8_UNORM, L8, T, T, T, 1, X, X, X, X, UNORM, 0),
+ _(L8_SRGB, L8, T, T, T, 1, X, X, X, X, UNORM, SRGB),
+ _(A8_UNORM, L8, 0, 0, 0, T, X, X, X, X, UNORM, 0),
+ _(I8_UNORM, L8, T, T, T, T, X, X, X, X, UNORM, 0),
+
+ _(R8G8_UNORM, A8L8, T, T, T, T, X, X, X, W, UNORM, 0),
+ _(R8G8_SNORM, A8L8, T, T, T, T, X, X, X, W, SNORM, 0),
+ _(L8A8_UNORM, A8L8, T, T, T, T, X, X, X, W, UNORM, 0),
+ _(L8A8_SRGB, A8L8, T, T, T, T, X, X, X, W, UNORM, SRGB),
+
+ _(DXT1_RGB, DXT1, T, T, T, 1, X, Y, Z, W, UNORM, 0),
+ _(DXT1_SRGB, DXT1, T, T, T, 1, X, Y, Z, W, UNORM, SRGB),
+ _(DXT1_RGBA, DXT1, T, T, T, T, X, Y, Z, W, UNORM, 0),
+ _(DXT1_SRGBA, DXT1, T, T, T, T, X, Y, Z, W, UNORM, SRGB),
+ _(DXT3_RGBA, DXT3, T, T, T, T, X, Y, Z, W, UNORM, 0),
+ _(DXT3_SRGBA, DXT3, T, T, T, T, X, Y, Z, W, UNORM, SRGB),
+ _(DXT5_RGBA, DXT5, T, T, T, T, X, Y, Z, W, UNORM, 0),
+ _(DXT5_SRGBA, DXT5, T, T, T, T, X, Y, Z, W, UNORM, SRGB),
+
+ __(Z16_UNORM, A8L8, Z16, T, T, T, 1, W, W, W, W, UNORM, 0),
+ __(S8_USCALED_Z24_UNORM,HILO16,Z24, T, T, T, 1, W, W, W, W, UNORM, 0),
+ __(X8Z24_UNORM, HILO16,Z24, T, T, T, 1, W, W, W, W, UNORM, 0),
+
+ _(R16_UNORM, A16, T, 0, 0, 1, X, X, X, X, UNORM, 0),
+ _(R16_SNORM, A16, T, 0, 0, 1, X, X, X, X, SNORM, 0),
+ _(R16G16_UNORM, HILO16, T, T, 0, 1, X, Y, X, X, UNORM, 0),
+ _(R16G16_SNORM, HILO16, T, T, 0, 1, X, Y, X, X, SNORM, 0),
+
+ _(R16G16B16A16_FLOAT, RGBA16F, T, T, T, T, X, Y, Z, W, UNORM, 0),
+ _(R32G32B32A32_FLOAT, RGBA32F, T, T, T, T, X, Y, Z, W, UNORM, 0),
+ _(R32_FLOAT, R32F, T, 0, 0, 1, X, X, X, X, UNORM, 0)
+};
diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c
index b5639bb464..0916aaa828 100644
--- a/src/gallium/drivers/nvfx/nvfx_miptree.c
+++ b/src/gallium/drivers/nvfx/nvfx_miptree.c
@@ -2,309 +2,220 @@
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "util/u_memory.h"
#include "util/u_math.h"
-
-#include "nvfx_context.h"
+#include "util/u_staging.h"
+#include "state_tracker/drm_driver.h"
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_screen.h"
+#include "nvfx_screen.h"
#include "nvfx_resource.h"
-#include "nvfx_transfer.h"
-#include "nv04_surface_2d.h"
-
-/* Currently using separate implementations for buffers and textures,
- * even though gallium has a unified abstraction of these objects.
- * Eventually these should be combined, and mechanisms like transfers
- * be adapted to work for both buffer and texture uploads.
- */
static void
-nvfx_miptree_layout(struct nvfx_miptree *mt)
+nvfx_miptree_choose_format(struct nvfx_miptree *mt)
{
struct pipe_resource *pt = &mt->base.base;
- uint width = pt->width0;
- uint offset = 0;
- int nr_faces, l, f;
- uint wide_pitch = pt->bind & (PIPE_BIND_SAMPLER_VIEW |
- PIPE_BIND_DEPTH_STENCIL |
- PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT);
-
- if (pt->target == PIPE_TEXTURE_CUBE) {
- nr_faces = 6;
- } else
- if (pt->target == PIPE_TEXTURE_3D) {
- nr_faces = pt->depth0;
- } else {
- nr_faces = 1;
+ unsigned uniform_pitch = 0;
+ static int no_swizzle = -1;
+ if(no_swizzle < 0)
+ no_swizzle = debug_get_bool_option("NV40_NO_SWIZZLE", FALSE); /* this will break things on nv30 */
+
+ if (!util_is_power_of_two(pt->width0) ||
+ !util_is_power_of_two(pt->height0) ||
+ !util_is_power_of_two(pt->depth0) ||
+ (!nvfx_screen(pt->screen)->is_nv4x && pt->target == PIPE_TEXTURE_RECT)
+ )
+ uniform_pitch = 1;
+
+ if (
+ (pt->bind & (PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET))
+ || (pt->usage & PIPE_USAGE_DYNAMIC) || (pt->usage & PIPE_USAGE_STAGING)
+ || util_format_is_compressed(pt->format)
+ || no_swizzle
+ )
+ mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+
+ /* non compressed formats with uniform pitch must be linear, and vice versa */
+ if(!util_format_is_s3tc(pt->format)
+ && (uniform_pitch || mt->base.base.flags & NVFX_RESOURCE_FLAG_LINEAR))
+ {
+ mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+ uniform_pitch = 1;
}
- for (l = 0; l <= pt->last_level; l++) {
- if (wide_pitch && (pt->flags & NVFX_RESOURCE_FLAG_LINEAR))
- mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64);
- else
- mt->level[l].pitch = util_format_get_stride(pt->format, width);
+ if(uniform_pitch)
+ {
+ mt->linear_pitch = util_format_get_stride(pt->format, pt->width0);
- mt->level[l].image_offset =
- CALLOC(nr_faces, sizeof(unsigned));
+ // TODO: this is only a constraint for rendering and not sampling, apparently
+ // we may also want this unconditionally
+ if(pt->bind & (PIPE_BIND_SAMPLER_VIEW |
+ PIPE_BIND_DEPTH_STENCIL |
+ PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT))
+ mt->linear_pitch = align(mt->linear_pitch, 64);
+ }
+ else
+ mt->linear_pitch = 0;
+}
+
+static unsigned
+nvfx_miptree_layout(struct nvfx_miptree *mt)
+{
+ struct pipe_resource* pt = &mt->base.base;
+ uint offset = 0;
- width = u_minify(width, 1);
+ if(!nvfx_screen(pt->screen)->is_nv4x)
+ {
+ assert(pt->target == PIPE_TEXTURE_RECT
+ || (util_is_power_of_two(pt->width0) && util_is_power_of_two(pt->height0)));
}
- for (f = 0; f < nr_faces; f++) {
- for (l = 0; l < pt->last_level; l++) {
- mt->level[l].image_offset[f] = offset;
+ for (unsigned l = 0; l <= pt->last_level; l++)
+ {
+ unsigned size;
+ mt->level_offset[l] = offset;
- if (!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR) &&
- u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
- offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64);
- else
- offset += mt->level[l].pitch * u_minify(pt->height0, l);
- }
+ if(mt->linear_pitch)
+ size = mt->linear_pitch;
+ else
+ size = util_format_get_stride(pt->format, u_minify(pt->width0, l));
+ size = util_format_get_2d_size(pt->format, size, u_minify(pt->height0, l));
- mt->level[l].image_offset[f] = offset;
- offset += mt->level[l].pitch * u_minify(pt->height0, l);
+ if(pt->target == PIPE_TEXTURE_3D)
+ size *= u_minify(pt->depth0, l);
+
+ offset += size;
}
- mt->total_size = offset;
+ offset = align(offset, 128);
+ mt->face_size = offset;
+ if(mt->base.base.target == PIPE_TEXTURE_CUBE)
+ offset += 5 * mt->face_size;
+ return offset;
}
-static boolean
-nvfx_miptree_get_handle(struct pipe_screen *pscreen,
- struct pipe_resource *ptexture,
- struct winsys_handle *whandle)
+static void
+nvfx_miptree_surface_final_destroy(struct pipe_surface* ps)
{
- struct nvfx_miptree* mt = (struct nvfx_miptree*)ptexture;
-
- if (!mt || !mt->base.bo)
- return FALSE;
-
- return nouveau_screen_bo_get_handle(pscreen,
- mt->base.bo,
- mt->level[0].pitch,
- whandle);
+ struct nvfx_surface* ns = (struct nvfx_surface*)ps;
+ pipe_resource_reference(&ps->texture, 0);
+ pipe_resource_reference((struct pipe_resource**)&ns->temp, 0);
+ FREE(ps);
}
-
-static void
+void
nvfx_miptree_destroy(struct pipe_screen *screen, struct pipe_resource *pt)
{
struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
- int l;
-
+ util_surfaces_destroy(&mt->surfaces, pt, nvfx_miptree_surface_final_destroy);
nouveau_screen_bo_release(screen, mt->base.bo);
-
- for (l = 0; l <= pt->last_level; l++) {
- if (mt->level[l].image_offset)
- FREE(mt->level[l].image_offset);
- }
-
FREE(mt);
}
-
-
-
-struct u_resource_vtbl nvfx_miptree_vtbl =
+static struct nvfx_miptree*
+nvfx_miptree_create_skeleton(struct pipe_screen *pscreen, const struct pipe_resource *pt)
{
- nvfx_miptree_get_handle, /* get_handle */
- nvfx_miptree_destroy, /* resource_destroy */
- NULL, /* is_resource_referenced */
- nvfx_miptree_transfer_new, /* get_transfer */
- nvfx_miptree_transfer_del, /* transfer_destroy */
- nvfx_miptree_transfer_map, /* transfer_map */
- u_default_transfer_flush_region, /* transfer_flush_region */
- nvfx_miptree_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
-};
+ struct nvfx_miptree *mt;
+ if(pt->width0 > 4096 || pt->height0 > 4096)
+ return NULL;
+ mt = CALLOC_STRUCT(nvfx_miptree);
+ if (!mt)
+ return NULL;
-struct pipe_resource *
-nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt)
-{
- struct nvfx_miptree *mt;
- static int no_swizzle = -1;
- if(no_swizzle < 0)
- no_swizzle = debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE);
-
- mt = CALLOC_STRUCT(nvfx_miptree);
- if (!mt)
- return NULL;
-
- mt->base.base = *pt;
- mt->base.vtbl = &nvfx_miptree_vtbl;
- pipe_reference_init(&mt->base.base.reference, 1);
- mt->base.base.screen = pscreen;
+ mt->base.base = *pt;
+ util_dirty_surfaces_init(&mt->dirty_surfaces);
- /* Swizzled textures must be POT */
- if (pt->width0 & (pt->width0 - 1) ||
- pt->height0 & (pt->height0 - 1))
- mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
- else
- if (pt->bind & (PIPE_BIND_SCANOUT |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_DEPTH_STENCIL))
- mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
- else
- if (pt->usage == PIPE_USAGE_DYNAMIC)
- mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
- else {
- switch (pt->format) {
- case PIPE_FORMAT_B5G6R5_UNORM:
- case PIPE_FORMAT_L8A8_UNORM:
- case PIPE_FORMAT_A8_UNORM:
- case PIPE_FORMAT_L8_UNORM:
- case PIPE_FORMAT_I8_UNORM:
- /* TODO: we can actually swizzle these formats on nv40, we
- are just preserving the pre-unification behavior.
- The whole 2D code is going to be rewritten anyway. */
- if(nvfx_screen(pscreen)->is_nv4x) {
- mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
- break;
- }
- /* TODO: Figure out which formats can be swizzled */
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- case PIPE_FORMAT_R16_SNORM:
- {
- if (no_swizzle)
- mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
- break;
- }
- default:
- mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
- }
- }
+ pipe_reference_init(&mt->base.base.reference, 1);
+ mt->base.base.screen = pscreen;
- /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
- * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
- * This also happens for small mipmaps of large textures. */
- if (pt->bind & PIPE_BIND_RENDER_TARGET &&
- util_format_get_stride(pt->format, pt->width0) < 64)
- mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+ // set this to the actual capabilities, we use it to decide whether to use the 3D engine for copies
+ // TODO: is this the correct way to use Gallium?
+ mt->base.base.bind = pt->bind | PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_DEPTH_STENCIL;
- nvfx_miptree_layout(mt);
+ // on our current driver (and the driver too), format support does not depend on geometry, so don't bother computing it
+ // TODO: may want to revisit this
+ if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_RENDER_TARGET, 0))
+ mt->base.base.bind &=~ PIPE_BIND_RENDER_TARGET;
+ if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_SAMPLER_VIEW, 0))
+ mt->base.base.bind &=~ PIPE_BIND_SAMPLER_VIEW;
+ if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_DEPTH_STENCIL, 0))
+ mt->base.base.bind &=~ PIPE_BIND_DEPTH_STENCIL;
- mt->base.bo = nouveau_screen_bo_new(pscreen, 256,
- pt->usage, pt->bind, mt->total_size);
- if (!mt->base.bo) {
- FREE(mt);
- return NULL;
- }
- return &mt->base.base;
+ return mt;
}
-
-
struct pipe_resource *
-nvfx_miptree_from_handle(struct pipe_screen *pscreen,
- const struct pipe_resource *template,
- struct winsys_handle *whandle)
+nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt)
{
- struct nvfx_miptree *mt;
- unsigned stride;
+ struct nvfx_miptree* mt = nvfx_miptree_create_skeleton(pscreen, pt);
+ unsigned size;
+ nvfx_miptree_choose_format(mt);
- /* Only supports 2D, non-mipmapped textures for the moment */
- if (template->target != PIPE_TEXTURE_2D ||
- template->last_level != 0 ||
- template->depth0 != 1)
- return NULL;
+ size = nvfx_miptree_layout(mt);
- mt = CALLOC_STRUCT(nvfx_miptree);
- if (!mt)
- return NULL;
+ mt->base.bo = nouveau_screen_bo_new(pscreen, 256, pt->usage, pt->bind, size);
- mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
- if (mt->base.bo == NULL) {
+ if (!mt->base.bo) {
FREE(mt);
return NULL;
}
-
- mt->base.base = *template;
- mt->base.vtbl = &nvfx_miptree_vtbl;
- pipe_reference_init(&mt->base.base.reference, 1);
- mt->base.base.screen = pscreen;
- mt->level[0].pitch = stride;
- mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
-
- /* Assume whoever created this buffer expects it to be linear for now */
- mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
-
- /* XXX: Need to adjust bo refcount??
- */
- /* nouveau_bo_ref(bo, &mt->base.bo); */
return &mt->base.base;
}
+// TODO: redo this, just calling miptree_layout
+struct pipe_resource *
+nvfx_miptree_from_handle(struct pipe_screen *pscreen, const struct pipe_resource *template, struct winsys_handle *whandle)
+{
+ struct nvfx_miptree* mt = nvfx_miptree_create_skeleton(pscreen, template);
+ unsigned stride;
+ if(whandle->stride) {
+ mt->linear_pitch = whandle->stride;
+ mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+ } else
+ nvfx_miptree_choose_format(mt);
+ nvfx_miptree_layout(mt);
+ mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
+ if (mt->base.bo == NULL) {
+ FREE(mt);
+ return NULL;
+ }
+ return &mt->base.base;
+}
-
-/* Surface helpers, not strictly required to implement the resource vtbl:
- */
struct pipe_surface *
nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
unsigned face, unsigned level, unsigned zslice,
unsigned flags)
{
- struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
- struct nv04_surface *ns;
-
- ns = CALLOC_STRUCT(nv04_surface);
- if (!ns)
- return NULL;
- pipe_resource_reference(&ns->base.texture, pt);
- ns->base.format = pt->format;
- ns->base.width = u_minify(pt->width0, level);
- ns->base.height = u_minify(pt->height0, level);
- ns->base.usage = flags;
- pipe_reference_init(&ns->base.reference, 1);
- ns->base.face = face;
- ns->base.level = level;
- ns->base.zslice = zslice;
- ns->pitch = mt->level[level].pitch;
-
- if (pt->target == PIPE_TEXTURE_CUBE) {
- ns->base.offset = mt->level[level].image_offset[face];
- } else
- if (pt->target == PIPE_TEXTURE_3D) {
- ns->base.offset = mt->level[level].image_offset[zslice];
- } else {
- ns->base.offset = mt->level[level].image_offset[0];
- }
-
- /* create a linear temporary that we can render into if
- * necessary.
- *
- * Note that ns->pitch is always a multiple of 64 for linear
- * surfaces and swizzled surfaces are POT, so ns->pitch & 63
- * is equivalent to (ns->pitch < 64 && swizzled)
- */
-
- if ((ns->pitch & 63) &&
- (ns->base.usage & PIPE_BIND_RENDER_TARGET))
- {
- struct nv04_surface_2d* eng2d =
- ((struct nvfx_screen*)pscreen)->eng2d;
-
- ns = nv04_surface_wrap_for_render(pscreen, eng2d, ns);
+ struct nvfx_miptree* mt = (struct nvfx_miptree*)pt;
+ struct nvfx_surface *ns;
+
+ ns = (struct nvfx_surface*)util_surfaces_get(&mt->surfaces, sizeof(struct nvfx_surface), pscreen, pt, face, level, zslice, flags);
+ if(ns->base.base.offset == ~0) {
+ util_dirty_surface_init(&ns->base);
+ ns->pitch = nvfx_subresource_pitch(pt, level);
+ ns->base.base.offset = nvfx_subresource_offset(pt, face, level, zslice);
}
- return &ns->base;
+ return &ns->base.base;
}
void
nvfx_miptree_surface_del(struct pipe_surface *ps)
{
- struct nv04_surface* ns = (struct nv04_surface*)ps;
- if(ns->backing)
+ struct nvfx_surface* ns = (struct nvfx_surface*)ps;
+
+ if(!ns->temp)
{
- struct nvfx_screen* screen = (struct nvfx_screen*)ps->texture->screen;
- if(1 /*ns->backing->base.usage & PIPE_BIND_BLIT_DESTINATION*/)
- screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
- nvfx_miptree_surface_del(&ns->backing->base);
+ util_surfaces_detach(&((struct nvfx_miptree*)ps->texture)->surfaces, ps);
+ pipe_resource_reference(&ps->texture, 0);
+ FREE(ps);
}
-
- pipe_resource_reference(&ps->texture, NULL);
- FREE(ps);
}
diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c
new file mode 100644
index 0000000000..ffe7e98357
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_push.c
@@ -0,0 +1,414 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_split_prim.h"
+#include "translate/translate.h"
+
+#include "nvfx_context.h"
+#include "nvfx_resource.h"
+
+struct push_context {
+ struct nouveau_channel* chan;
+
+ void *idxbuf;
+ int32_t idxbias;
+
+ float edgeflag;
+ int edgeflag_attr;
+
+ unsigned vertex_length;
+ unsigned max_vertices_per_packet;
+
+ struct translate* translate;
+};
+
+static void
+emit_edgeflag(void *priv, boolean enabled)
+{
+ struct push_context* ctx = priv;
+ struct nouveau_channel *chan = ctx->chan;
+
+ OUT_RING(chan, RING_3D(NV34TCL_EDGEFLAG_ENABLE, 1));
+ OUT_RING(chan, enabled ? 1 : 0);
+}
+
+static void
+emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+ uint8_t* elts = (uint8_t*)ctx->idxbuf + start;
+
+ while(count)
+ {
+ unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+ unsigned length = push * ctx->vertex_length;
+
+ OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+ ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
+ ctx->chan->cur += length;
+
+ count -= push;
+ elts += push;
+ }
+}
+
+static void
+emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+ uint16_t* elts = (uint16_t*)ctx->idxbuf + start;
+
+ while(count)
+ {
+ unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+ unsigned length = push * ctx->vertex_length;
+
+ OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+ ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
+ ctx->chan->cur += length;
+
+ count -= push;
+ elts += push;
+ }
+}
+
+static void
+emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+ uint32_t* elts = (uint32_t*)ctx->idxbuf + start;
+
+ while(count)
+ {
+ unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+ unsigned length = push * ctx->vertex_length;
+
+ OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+ ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
+ ctx->chan->cur += length;
+
+ count -= push;
+ elts += push;
+ }
+}
+
+static void
+emit_vertices(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+
+ while(count)
+ {
+ unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+ unsigned length = push * ctx->vertex_length;
+
+ OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+ ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
+ ctx->chan->cur += length;
+
+ count -= push;
+ start += push;
+ }
+}
+
+static void
+emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
+{
+ struct push_context* ctx = priv;
+ struct nouveau_channel *chan = ctx->chan;
+ unsigned nr = (vc & 0xff);
+ if (nr) {
+ OUT_RING(chan, RING_3D(reg, 1));
+ OUT_RING (chan, ((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ OUT_RING(chan, RING_3D_NI(reg, push));
+ while (push--) {
+ OUT_RING(chan, ((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+}
+
+static void
+emit_ib_ranges(void* priv, unsigned start, unsigned vc)
+{
+ emit_ranges(priv, start, vc, NV34TCL_VB_INDEX_BATCH);
+}
+
+static void
+emit_vb_ranges(void* priv, unsigned start, unsigned vc)
+{
+ emit_ranges(priv, start, vc, NV34TCL_VB_VERTEX_BATCH);
+}
+
+static INLINE void
+emit_elt8(void* priv, unsigned start, unsigned vc)
+{
+ struct push_context* ctx = priv;
+ struct nouveau_channel *chan = ctx->chan;
+ uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
+ int idxbias = ctx->idxbias;
+
+ if (vc & 1) {
+ OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
+ OUT_RING (chan, elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+ unsigned push = MIN2(vc, 2047 * 2);
+
+ OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
+ for (i = 0; i < push; i+=2)
+ OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
+
+ vc -= push;
+ elts += push;
+ }
+}
+
+static INLINE void
+emit_elt16(void* priv, unsigned start, unsigned vc)
+{
+ struct push_context* ctx = priv;
+ struct nouveau_channel *chan = ctx->chan;
+ uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
+ int idxbias = ctx->idxbias;
+
+ if (vc & 1) {
+ OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
+ OUT_RING (chan, elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+ unsigned push = MIN2(vc, 2047 * 2);
+
+ OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
+ for (i = 0; i < push; i+=2)
+ OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
+
+ vc -= push;
+ elts += push;
+ }
+}
+
+static INLINE void
+emit_elt32(void* priv, unsigned start, unsigned vc)
+{
+ struct push_context* ctx = priv;
+ struct nouveau_channel *chan = ctx->chan;
+ uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
+ int idxbias = ctx->idxbias;
+
+ while (vc) {
+ unsigned push = MIN2(vc, 2047);
+
+ OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
+ assert(AVAIL_RING(chan) >= push);
+ if(idxbias)
+ {
+ for(unsigned i = 0; i < push; ++i)
+ OUT_RING(chan, elts[i] + idxbias);
+ }
+ else
+ OUT_RINGp(chan, elts, push);
+
+ vc -= push;
+ elts += push;
+ }
+}
+
+void
+nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct push_context ctx;
+ struct util_split_prim s;
+ unsigned instances_left = info->instance_count;
+ int vtx_value;
+ unsigned hw_mode = nvgl_primitive(info->mode);
+ int i;
+ struct
+ {
+ uint8_t* map;
+ unsigned step;
+ } per_instance[16];
+ unsigned p_overhead = 64 /* magic fix */
+ + 4 /* begin/end */
+ + 4; /* potential edgeflag enable/disable */
+
+ ctx.chan = nvfx->screen->base.channel;
+ ctx.translate = nvfx->vtxelt->translate;
+ ctx.idxbuf = NULL;
+ ctx.vertex_length = nvfx->vtxelt->vertex_length;
+ ctx.max_vertices_per_packet = nvfx->vtxelt->max_vertices_per_packet;
+ ctx.edgeflag = 0.5f;
+ // TODO: figure out if we really want to handle this, and do so in that case
+ ctx.edgeflag_attr = 0xff; // nvfx->vertprog->cfg.edgeflag_in;
+
+ if(!nvfx->use_vertex_buffers)
+ {
+ for(i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
+ {
+ struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+ uint8_t* data = nvfx_buffer(vb->buffer)->data + vb->buffer_offset;
+ if(info->indexed)
+ data += info->index_bias * vb->stride;
+ ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
+ }
+
+ if(ctx.edgeflag_attr < 16)
+ vtx_value = -(ctx.vertex_length + 3); /* vertex data and edgeflag header and value */
+ else
+ {
+ p_overhead += 1; /* initial vertex_data header */
+ vtx_value = -ctx.vertex_length; /* vertex data and edgeflag header and value */
+ }
+
+ if (info->indexed) {
+ // XXX: this case and is broken and probably need a new VTX_ATTR push path
+ if (nvfx->idxbuf.index_size == 1)
+ s.emit = emit_vertices_lookup8;
+ else if (nvfx->idxbuf.index_size == 2)
+ s.emit = emit_vertices_lookup16;
+ else
+ s.emit = emit_vertices_lookup32;
+ } else
+ s.emit = emit_vertices;
+ }
+ else
+ {
+ if(!info->indexed || nvfx->use_index_buffer)
+ {
+ s.emit = info->indexed ? emit_ib_ranges : emit_vb_ranges;
+ p_overhead += 3;
+ vtx_value = 0;
+ }
+ else if (nvfx->idxbuf.index_size == 4)
+ {
+ s.emit = emit_elt32;
+ p_overhead += 1;
+ vtx_value = 8;
+ }
+ else
+ {
+ s.emit = (nvfx->idxbuf.index_size == 2) ? emit_elt16 : emit_elt8;
+ p_overhead += 3;
+ vtx_value = 7;
+ }
+ }
+
+ ctx.idxbias = info->index_bias;
+ if(nvfx->use_vertex_buffers)
+ ctx.idxbias -= nvfx->base_vertex;
+
+ /* map index buffer, if present */
+ if (info->indexed && !nvfx->use_index_buffer)
+ ctx.idxbuf = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset;
+
+ s.priv = &ctx;
+ s.edge = emit_edgeflag;
+
+ for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
+ {
+ struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
+ float v[4];
+ per_instance[i].step = info->start_instance % ve->instance_divisor;
+ per_instance[i].map = nvfx_buffer(vb->buffer)->data + vb->buffer_offset + ve->base.src_offset;
+
+ nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
+
+ WAIT_RING(chan, 5);
+ nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
+ }
+
+ /* per-instance loop */
+ while (instances_left--) {
+ int max_verts;
+ boolean done;
+
+ util_split_prim_init(&s, info->mode, info->start, info->count);
+ nvfx_state_emit(nvfx);
+ for(;;) {
+ max_verts = AVAIL_RING(chan);
+ max_verts -= p_overhead;
+
+ /* if vtx_value < 0, each vertex is -vtx_value words long
+ * otherwise, each vertex is 2^(vtx_value) / 255 words long (this is an approximation)
+ */
+ if(vtx_value < 0)
+ {
+ max_verts /= -vtx_value;
+ max_verts -= (max_verts >> 10); /* vertex data headers */
+ }
+ else
+ {
+ if(max_verts >= (1 << 23)) /* avoid overflow here */
+ max_verts = (1 << 23);
+ max_verts = (max_verts * 255) >> vtx_value;
+ }
+
+ //printf("avail %u max_verts %u\n", AVAIL_RING(chan), max_verts);
+
+ if(max_verts >= 16)
+ {
+ /* XXX: any command a lot of times seems to (mostly) fix corruption that would otherwise happen */
+ /* this seems to cause issues on nv3x, and also be unneeded there */
+ if(nvfx->is_nv4x)
+ {
+ int i;
+ for(i = 0; i < 32; ++i)
+ {
+ OUT_RING(chan, RING_3D(0x1dac, 1));
+ OUT_RING(chan, 0);
+ }
+ }
+
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING(chan, hw_mode);
+ done = util_split_prim_next(&s, max_verts);
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING(chan, 0);
+
+ if(done)
+ break;
+ }
+
+ FIRE_RING(chan);
+ nvfx_state_emit(nvfx);
+ }
+
+ /* set data for the next instance, if any changed */
+ for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
+ {
+ struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
+
+ if(++per_instance[i].step == ve->instance_divisor)
+ {
+ float v[4];
+ per_instance[i].map += vb->stride;
+ per_instance[i].step = 0;
+
+ nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
+ WAIT_RING(chan, 5);
+ nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.c b/src/gallium/drivers/nvfx/nvfx_resource.c
index 10cdeed2a3..39ae893f1b 100644
--- a/src/gallium/drivers/nvfx/nvfx_resource.c
+++ b/src/gallium/drivers/nvfx/nvfx_resource.c
@@ -1,23 +1,15 @@
#include "pipe/p_context.h"
+#include "util/u_staging.h"
#include "nvfx_resource.h"
#include "nouveau/nouveau_screen.h"
-
-/* This doesn't look quite right - this query is supposed to ask
- * whether the particular context has references to the resource in
- * any unflushed rendering command buffer, and hence requires a
- * pipe->flush() for serializing some modification to that resource.
- *
- * This seems to be answering the question of whether the resource is
- * currently on hardware.
- */
static unsigned int
nvfx_resource_is_referenced(struct pipe_context *pipe,
- struct pipe_resource *resource,
+ struct pipe_resource *pr,
unsigned face, unsigned level)
{
- return nouveau_reference_flags(nvfx_resource(resource)->bo);
+ return !!nouveau_reference_flags(nvfx_resource(pr)->bo);
}
static struct pipe_resource *
@@ -30,6 +22,15 @@ nvfx_resource_create(struct pipe_screen *screen,
return nvfx_miptree_create(screen, template);
}
+static void
+nvfx_resource_destroy(struct pipe_screen *screen, struct pipe_resource *pr)
+{
+ if (pr->target == PIPE_BUFFER)
+ return nvfx_buffer_destroy(screen, pr);
+ else
+ return nvfx_miptree_destroy(screen, pr);
+}
+
static struct pipe_resource *
nvfx_resource_from_handle(struct pipe_screen * screen,
const struct pipe_resource *template,
@@ -41,15 +42,22 @@ nvfx_resource_from_handle(struct pipe_screen * screen,
return nvfx_miptree_from_handle(screen, template, whandle);
}
+static boolean
+nvfx_resource_get_handle(struct pipe_screen *pscreen,
+ struct pipe_resource *pr,
+ struct winsys_handle *whandle)
+{
+ struct nvfx_resource* res = (struct nvfx_resource*)pr;
+
+ if (!res || !res->bo)
+ return FALSE;
+
+ return nouveau_screen_bo_get_handle(pscreen, res->bo, nvfx_subresource_pitch(pr, 0), whandle);
+}
+
void
nvfx_init_resource_functions(struct pipe_context *pipe)
{
- pipe->get_transfer = u_get_transfer_vtbl;
- pipe->transfer_map = u_transfer_map_vtbl;
- pipe->transfer_flush_region = u_transfer_flush_region_vtbl;
- pipe->transfer_unmap = u_transfer_unmap_vtbl;
- pipe->transfer_destroy = u_transfer_destroy_vtbl;
- pipe->transfer_inline_write = u_transfer_inline_write_vtbl;
pipe->is_resource_referenced = nvfx_resource_is_referenced;
}
@@ -58,10 +66,10 @@ nvfx_screen_init_resource_functions(struct pipe_screen *pscreen)
{
pscreen->resource_create = nvfx_resource_create;
pscreen->resource_from_handle = nvfx_resource_from_handle;
- pscreen->resource_get_handle = u_resource_get_handle_vtbl;
- pscreen->resource_destroy = u_resource_destroy_vtbl;
+ pscreen->resource_get_handle = nvfx_resource_get_handle;
+ pscreen->resource_destroy = nvfx_resource_destroy;
pscreen->user_buffer_create = nvfx_user_buffer_create;
-
+
pscreen->get_tex_surface = nvfx_miptree_surface_new;
pscreen->tex_surface_destroy = nvfx_miptree_surface_del;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h
index a68c14cf3f..583be4de2a 100644
--- a/src/gallium/drivers/nvfx/nvfx_resource.h
+++ b/src/gallium/drivers/nvfx/nvfx_resource.h
@@ -1,44 +1,82 @@
-
#ifndef NVFX_RESOURCE_H
#define NVFX_RESOURCE_H
#include "util/u_transfer.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_double_list.h"
+#include "util/u_surfaces.h"
+#include "util/u_dirty_surfaces.h"
+#include <nouveau/nouveau_bo.h>
struct pipe_resource;
-struct nouveau_bo;
-
+struct nv04_region;
-/* This gets further specialized into either buffer or texture
- * structures. In the future we'll want to remove much of that
- * distinction, but for now try to keep as close to the existing code
- * as possible and use the vtbl struct to choose between the two
- * underlying implementations.
- */
struct nvfx_resource {
struct pipe_resource base;
- struct u_resource_vtbl *vtbl;
struct nouveau_bo *bo;
};
+static INLINE
+struct nvfx_resource *nvfx_resource(struct pipe_resource *resource)
+{
+ return (struct nvfx_resource *)resource;
+}
+
+#define NVFX_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
+#define NVFX_RESOURCE_FLAG_USER (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
+
+/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
+static INLINE boolean
+nvfx_resource_mapped_by_gpu(struct pipe_resource *resource)
+{
+ return nvfx_resource(resource)->bo->handle;
+}
+
+/* is resource in VRAM? */
+static inline int
+nvfx_resource_on_gpu(struct pipe_resource* pr)
+{
+#if 0
+ // a compiler error here means you need to apply libdrm-nouveau-add-domain.patch to libdrm
+ // TODO: return FALSE if not VRAM and on a PCI-E system
+ return ((struct nvfx_resource*)pr)->bo->domain & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+#else
+ return TRUE;
+#endif
+}
+
#define NVFX_MAX_TEXTURE_LEVELS 16
+/* We have the following invariants for render temporaries
+ *
+ * 1. Render temporaries are always linear
+ * 2. Render temporaries are always up to date
+ * 3. Currently, render temporaries are destroyed when the resource is used for sampling, but kept for any other use
+ *
+ * Also, we do NOT flush temporaries on any pipe->flush().
+ * This is fine, as long as scanout targets and shared resources never need temps.
+ *
+ * TODO: we may want to also support swizzled temporaries to improve performance in some cases.
+ */
+
struct nvfx_miptree {
- struct nvfx_resource base;
- uint total_size;
+ struct nvfx_resource base;
- struct {
- uint pitch;
- uint *image_offset;
- } level[NVFX_MAX_TEXTURE_LEVELS];
+ unsigned linear_pitch; /* for linear textures, 0 for swizzled and compressed textures with level-dependent minimal pitch */
+ unsigned face_size; /* 128-byte aligned face/total size */
+ unsigned level_offset[NVFX_MAX_TEXTURE_LEVELS];
- unsigned image_nr;
+ struct util_surfaces surfaces;
+ struct util_dirty_surfaces dirty_surfaces;
};
-static INLINE
-struct nvfx_resource *nvfx_resource(struct pipe_resource *resource)
-{
- return (struct nvfx_resource *)resource;
-}
+struct nvfx_surface {
+ struct util_dirty_surface base;
+ unsigned pitch;
+
+ struct nvfx_miptree* temp;
+};
static INLINE struct nouveau_bo *
nvfx_surface_buffer(struct pipe_surface *surf)
@@ -48,6 +86,12 @@ nvfx_surface_buffer(struct pipe_surface *surf)
return mt->bo;
}
+static INLINE struct util_dirty_surfaces*
+nvfx_surface_get_dirty_surfaces(struct pipe_surface* surf)
+{
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture;
+ return &mt->dirty_surfaces;
+}
void
nvfx_init_resource_functions(struct pipe_context *pipe);
@@ -62,30 +106,118 @@ nvfx_screen_init_resource_functions(struct pipe_screen *pscreen);
struct pipe_resource *
nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt);
+void
+nvfx_miptree_destroy(struct pipe_screen *pscreen,
+ struct pipe_resource *presource);
+
struct pipe_resource *
nvfx_miptree_from_handle(struct pipe_screen *pscreen,
const struct pipe_resource *template,
struct winsys_handle *whandle);
+void
+nvfx_miptree_surface_del(struct pipe_surface *ps);
+
+struct pipe_surface *
+nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags);
+
+/* only for miptrees, don't use for buffers */
+
+/* NOTE: for swizzled 3D textures, this just returns the offset of the mipmap level */
+static inline unsigned
+nvfx_subresource_offset(struct pipe_resource* pt, unsigned face, unsigned level, unsigned zslice)
+{
+ if(pt->target == PIPE_BUFFER)
+ return 0;
+ else
+ {
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
+
+ unsigned offset = mt->level_offset[level];
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ offset += mt->face_size * face;
+ else if (pt->target == PIPE_TEXTURE_3D && mt->linear_pitch)
+ offset += zslice * util_format_get_2d_size(pt->format, (mt->linear_pitch ? mt->linear_pitch : util_format_get_stride(pt->format, u_minify(pt->width0, level))), u_minify(pt->height0, level));
+ return offset;
+ }
+}
+
+static inline unsigned
+nvfx_subresource_pitch(struct pipe_resource* pt, unsigned level)
+{
+ if(pt->target == PIPE_BUFFER)
+ return ((struct nvfx_resource*)pt)->bo->size;
+ else
+ {
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
+
+ if(mt->linear_pitch)
+ return mt->linear_pitch;
+ else
+ return util_format_get_stride(pt->format, u_minify(pt->width0, level));
+ }
+}
+
+void
+nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf);
+
+void
+nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf);
+
+struct nvfx_buffer
+{
+ struct nvfx_resource base;
+ uint8_t* data;
+ unsigned size;
+
+ /* the range of data not yet uploaded to the GPU bo */
+ unsigned dirty_begin;
+ unsigned dirty_end;
+
+ /* whether all transfers were unsynchronized */
+ boolean dirty_unsynchronized;
+
+ /* whether it would have been profitable to upload
+ * the latest updated data to the GPU immediately */
+ boolean last_update_static;
+
+ /* how many bytes we need to draw before we deem
+ * the buffer to be static
+ */
+ long long bytes_to_draw_until_static;
+};
+
+static inline struct nvfx_buffer* nvfx_buffer(struct pipe_resource* pr)
+{
+ return (struct nvfx_buffer*)pr;
+}
+
+/* this is an heuristic to determine whether we are better off uploading the
+ * buffer to the GPU, or just continuing pushing it on the FIFO
+ */
+static inline boolean nvfx_buffer_seems_static(struct nvfx_buffer* buffer)
+{
+ return buffer->last_update_static
+ || buffer->bytes_to_draw_until_static < 0;
+}
+
struct pipe_resource *
nvfx_buffer_create(struct pipe_screen *pscreen,
const struct pipe_resource *template);
+void
+nvfx_buffer_destroy(struct pipe_screen *pscreen,
+ struct pipe_resource *presource);
+
struct pipe_resource *
nvfx_user_buffer_create(struct pipe_screen *screen,
void *ptr,
unsigned bytes,
unsigned usage);
-
-
void
-nvfx_miptree_surface_del(struct pipe_surface *ps);
-
-struct pipe_surface *
-nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned flags);
-
+nvfx_buffer_upload(struct nvfx_buffer* buffer);
#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index f2525ccb38..65ca265d45 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -8,23 +8,12 @@
#include "nvfx_context.h"
#include "nvfx_screen.h"
#include "nvfx_resource.h"
+#include "nvfx_tex.h"
#define NV30TCL_CHIPSET_3X_MASK 0x00000003
#define NV34TCL_CHIPSET_3X_MASK 0x00000010
#define NV35TCL_CHIPSET_3X_MASK 0x000001e0
-/* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h
-* to get the pointer to the context front buffer, so I copied nouveau_winsys here.
-* nv30_screen_surface_format_supported() can then use it to enforce creating fbo
-* with same number of bits everywhere.
-*/
-struct nouveau_winsys {
- struct pipe_winsys base;
-
- struct pipe_screen *pscreen;
-
- struct pipe_surface *front;
-};
#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
#define NV4X_GRCLASS4497_CHIPSETS 0x00005450
#define NV6X_GRCLASS4497_CHIPSETS 0x00000088
@@ -43,7 +32,7 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TWO_SIDED_STENCIL:
return 1;
case PIPE_CAP_GLSL:
- return 0;
+ return 1;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 1;
case PIPE_CAP_POINT_SPRITE:
@@ -162,77 +151,74 @@ nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
}
static boolean
-nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
+nvfx_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned tex_usage, unsigned geom_flags)
+ unsigned bind, unsigned geom_flags)
{
struct nvfx_screen *screen = nvfx_screen(pscreen);
- struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front;
if (sample_count > 1)
return FALSE;
- if (tex_usage & PIPE_BIND_RENDER_TARGET) {
+ if (bind & PIPE_BIND_RENDER_TARGET) {
switch (format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_B5G6R5_UNORM:
- return TRUE;
- default:
break;
+ default:
+ return FALSE;
}
- } else
- if (tex_usage & PIPE_BIND_DEPTH_STENCIL) {
+ }
+
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
switch (format) {
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
- return TRUE;
case PIPE_FORMAT_Z16_UNORM:
- /* TODO: this nv30 limitation probably does not exist */
- if (!screen->is_nv4x && front)
- return (front->format == PIPE_FORMAT_B5G6R5_UNORM);
- return TRUE;
- default:
break;
+ default:
+ return FALSE;
}
- } else {
- switch (format) {
- if (tex_usage & PIPE_BIND_SAMPLER_VIEW) {
- switch (format) {
- case PIPE_FORMAT_DXT1_RGB:
- case PIPE_FORMAT_DXT1_RGBA:
- case PIPE_FORMAT_DXT3_RGBA:
- case PIPE_FORMAT_DXT5_RGBA:
- return util_format_s3tc_enabled;
- default:
- break;
- }
+ }
+
+ if (bind & PIPE_BIND_SAMPLER_VIEW) {
+ struct nvfx_texture_format* tf = &nvfx_texture_formats[format];
+ if(util_format_is_s3tc(format) && !util_format_s3tc_enabled)
+ return FALSE;
+
+ if(screen->is_nv4x)
+ {
+ if(tf->fmt[4] < 0)
+ return FALSE;
}
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- case PIPE_FORMAT_B5G5R5A1_UNORM:
- case PIPE_FORMAT_B4G4R4A4_UNORM:
- case PIPE_FORMAT_B5G6R5_UNORM:
- case PIPE_FORMAT_L8_UNORM:
- case PIPE_FORMAT_A8_UNORM:
- case PIPE_FORMAT_I8_UNORM:
- case PIPE_FORMAT_L8A8_UNORM:
- case PIPE_FORMAT_Z16_UNORM:
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- return TRUE;
- /* TODO: does nv30 support this? */
- case PIPE_FORMAT_R16_SNORM:
- return !!screen->is_nv4x;
- default:
- break;
+ else
+ {
+ if(tf->fmt[0] < 0)
+ return FALSE;
}
}
- return FALSE;
-}
+ // note that we do actually support everything through translate
+ if (bind & PIPE_BIND_VERTEX_BUFFER) {
+ unsigned type = nvfx_vertex_formats[format];
+ if(!type)
+ return FALSE;
+ }
+
+ if (bind & PIPE_BIND_INDEX_BUFFER) {
+ // 8-bit indices supported, but not in hardware index buffer
+ if(format != PIPE_FORMAT_R16_USCALED && format != PIPE_FORMAT_R32_USCALED)
+ return FALSE;
+ }
+
+ if(bind & PIPE_BIND_STREAM_OUTPUT)
+ return FALSE;
+ return TRUE;
+}
static void
nvfx_screen_destroy(struct pipe_screen *pscreen)
@@ -245,7 +231,7 @@ nvfx_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->query);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->eng3d);
- nv04_surface_2d_takedown(&screen->eng2d);
+ nvfx_screen_surface_takedown(pscreen);
nouveau_screen_fini(&screen->base);
@@ -374,6 +360,14 @@ nvfx_screen_get_vertex_buffer_flags(struct nvfx_screen* screen)
return vram_hack ? NOUVEAU_BO_VRAM : NOUVEAU_BO_GART;
}
+static void nvfx_channel_flush_notify(struct nouveau_channel* chan)
+{
+ struct nvfx_screen* screen = chan->user_private;
+ struct nvfx_context* nvfx = screen->cur_ctx;
+ if(nvfx)
+ nvfx->relocs_needed = NVFX_RELOCATE_ALL;
+}
+
struct pipe_screen *
nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
{
@@ -395,12 +389,15 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
return NULL;
}
chan = screen->base.channel;
+ screen->cur_ctx = NULL;
+ chan->user_private = screen;
+ chan->flush_notify = nvfx_channel_flush_notify;
pscreen->winsys = ws;
pscreen->destroy = nvfx_screen_destroy;
pscreen->get_param = nvfx_screen_get_param;
pscreen->get_paramf = nvfx_screen_get_paramf;
- pscreen->is_format_supported = nvfx_screen_surface_format_supported;
+ pscreen->is_format_supported = nvfx_screen_is_format_supported;
pscreen->context_create = nvfx_create;
switch (dev->chipset & 0xf0) {
@@ -432,6 +429,11 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE);
+ screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE);
+
+ screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384);
+ screen->inline_cost_per_hardware_cost = atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0"));
+ screen->static_reuse_threshold = atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0"));
screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen);
@@ -451,8 +453,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
/* 2D engine setup */
- screen->eng2d = nv04_surface_2d_init(&screen->base);
- screen->eng2d->buf = nvfx_surface_buffer;
+ nvfx_screen_surface_init(pscreen);
/* Notifier for sync purposes */
ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h
index 5e1c3945ae..1b79235ae0 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.h
+++ b/src/gallium/drivers/nvfx/nvfx_screen.h
@@ -1,11 +1,11 @@
#ifndef __NVFX_SCREEN_H__
#define __NVFX_SCREEN_H__
+#include "pipe/p_compiler.h"
#include "util/u_double_list.h"
#include "nouveau/nouveau_screen.h"
-#include "nv04_surface_2d.h"
-struct nvfx_context;
+struct pipe_screen;
struct nvfx_screen {
struct nouveau_screen base;
@@ -16,11 +16,11 @@ struct nvfx_screen {
unsigned is_nv4x; /* either 0 or ~0 */
boolean force_swtnl;
+ boolean trace_draw;
unsigned vertex_buffer_reloc_flags;
unsigned index_buffer_reloc_flags;
/* HW graphics objects */
- struct nv04_surface_2d *eng2d;
struct nouveau_grobj *eng3d;
struct nouveau_notifier *sync;
@@ -32,6 +32,20 @@ struct nvfx_screen {
/* Vtxprog resources */
struct nouveau_resource *vp_exec_heap;
struct nouveau_resource *vp_data_heap;
+
+ struct nv04_2d_context* eng2d;
+
+ /* Once the amount of bytes drawn from the buffer reaches the updated size times this value,
+ * we will assume that the buffer will be drawn an huge number of times before the
+ * next modification
+ */
+ float static_reuse_threshold;
+
+ /* Cost of allocating a buffer in terms of the cost of copying a byte to an hardware buffer */
+ unsigned buffer_allocation_cost;
+
+ /* inline_cost/hardware_cost conversion ration */
+ float inline_cost_per_hardware_cost;
};
static INLINE struct nvfx_screen *
@@ -40,4 +54,7 @@ nvfx_screen(struct pipe_screen *screen)
return (struct nvfx_screen *)screen;
}
+int nvfx_screen_surface_init(struct pipe_screen *pscreen);
+void nvfx_screen_surface_takedown(struct pipe_screen *pscreen);
+
#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h
index 50830b3916..35006eec3d 100644
--- a/src/gallium/drivers/nvfx/nvfx_shader.h
+++ b/src/gallium/drivers/nvfx/nvfx_shader.h
@@ -1,6 +1,12 @@
#ifndef __NVFX_SHADER_H__
#define __NVFX_SHADER_H__
+#include <stdint.h>
+
+#include "pipe/p_compiler.h"
+
+#define NVFX_SWZ_IDENTITY ((3 << 6) | (2 << 4) | (1 << 2) | (0 << 0))
+
/* this will resolve to either the NV30 or the NV40 version
* depending on the current hardware */
/* unusual, but very fast and compact method */
@@ -71,11 +77,58 @@
/*
* Each fragment program opcode appears to be comprised of 4 32-bit values.
*
- * 0 - Opcode, output reg/mask, ATTRIB source
- * 1 - Source 0
- * 2 - Source 1
- * 3 - Source 2
+ * 0: OPDEST
+ * 0: program end
+ * 1-6: destination register
+ * 7: destination register is fp16?? (use for outputs)
+ * 8: set condition code
+ * 9: writemask x
+ * 10: writemask y
+ * 11: writemask z
+ * 12: writemask w
+ * 13-16: source attribute register number (e.g. COL0)
+ * 17-20: texture unit number
+ * 21: expand value on texture operation (x -> 2x - 1)
+ * 22-23: precision 0 = fp32, 1 = fp16, 2 = s1.10 fixed, 3 = s0.8 fixed (nv40-only))
+ * 24-29: opcode
+ * 30: no destination
+ * 31: saturate
+ * 1 - SRC0
+ * 0-17: see common source fields
+ * 18: execute if condition code less
+ * 19: execute if condition code equal
+ * 20: execute if condition code greater
+ * 21-22: condition code swizzle x source component
+ * 23-24: condition code swizzle y source component
+ * 25-26: condition code swizzle z source component
+ * 27-28: condition code swizzle w source component
+ * 29: source 0 absolute
+ * 30: always 0 in renouveau tests
+ * 31: always 0 in renouveau tests
+ * 2 - SRC1
+ * 0-17: see common source fields
+ * 18: source 1 absolute
+ * 19-20: input precision 0 = fp32, 1 = fp16, 2 = s1.10 fixed, 3 = ???
+ * 21-27: always 0 in renouveau tests
+ * 28-30: scale (0 = 1x, 1 = 2x, 2 = 4x, 3 = 8x, 4 = ???, 5, = 1/2, 6 = 1/4, 7 = 1/8)
+ * 31: opcode is branch
+ * 3 - SRC2
+ * 0-17: see common source fields
+ * 18: source 2 absolute
+ * 19-29: address register displacement
+ * 30: use index register
+ * 31: disable perspective-correct interpolation?
*
+* Common fields of 0, 1, 2 - SRC
+ * 0-1: source register type (0 = temp, 1 = input, 2 = immediate, 3 = ???)
+ * 2-7: source temp register index
+ * 8: source register is fp16??
+ * 9-10: source swizzle x source component
+ * 11-12: source swizzle y source component
+ * 13-14: source swizzle z source component
+ * 15-16: source swizzle w source component
+ * 17: negate
+
* There appears to be no special difference between result regs and temp regs.
* result.color == R0.xyzw
* result.depth == R1.z
@@ -210,6 +263,7 @@
/* NV40 only fragment program opcodes */
#define NVFX_FP_OP_OPCODE_TXL_NV40 0x2F
+
/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
#define NV40_FP_OP_BRA_OPCODE_BRK 0x0
#define NV40_FP_OP_BRA_OPCODE_CAL 0x1
@@ -218,10 +272,11 @@
#define NV40_FP_OP_BRA_OPCODE_REP 0x4
#define NV40_FP_OP_BRA_OPCODE_RET 0x5
+#define NV40_FP_OP_OUT_NONE (1 << 30)
#define NVFX_FP_OP_OUT_SAT (1 << 31)
/* high order bits of SRC0 */
-#define NVFX_FP_OP_OUT_ABS (1 << 29)
+#define NVFX_FP_OP_SRC0_ABS (1 << 29)
#define NVFX_FP_OP_COND_SWZ_W_SHIFT 27
#define NVFX_FP_OP_COND_SWZ_W_MASK (3 << 27)
#define NVFX_FP_OP_COND_SWZ_Z_SHIFT 25
@@ -254,6 +309,7 @@
#define NVFX_FP_OP_DST_SCALE_INV_2X 5
#define NVFX_FP_OP_DST_SCALE_INV_4X 6
#define NVFX_FP_OP_DST_SCALE_INV_8X 7
+#define NVFX_FP_OP_SRC1_ABS (1 << 18)
/* SRC1 LOOP */
#define NV40_FP_OP_LOOP_INCR_SHIFT 19
@@ -263,13 +319,13 @@
#define NV40_FP_OP_LOOP_COUNT_SHIFT 2
#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2)
-/* SRC1 IF */
-#define NV40_FP_OP_ELSE_ID_SHIFT 2
-#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2)
+/* SRC1 IF: absolute offset in dwords */
+#define NV40_FP_OP_ELSE_OFFSET_SHIFT 0
+#define NV40_FP_OP_ELSE_OFFSET_MASK (0x7FFFFFFF << 0)
/* SRC1 CAL */
-#define NV40_FP_OP_IADDR_SHIFT 2
-#define NV40_FP_OP_IADDR_MASK (0xFF << 2)
+#define NV40_FP_OP_SUB_OFFSET_SHIFT 0
+#define NV40_FP_OP_SUB_OFFSET_MASK (0x7FFFFFFF << 0)
/* SRC1 REP
* I have no idea why there are 3 count values here.. but they
@@ -283,9 +339,9 @@
#define NV40_FP_OP_REP_COUNT3_SHIFT 19
#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19)
-/* SRC2 REP/IF */
-#define NV40_FP_OP_END_ID_SHIFT 2
-#define NV40_FP_OP_END_ID_MASK (0xFF << 2)
+/* SRC2 REP/IF: absolute offset in dwords */
+#define NV40_FP_OP_END_OFFSET_SHIFT 0
+#define NV40_FP_OP_END_OFFSET_MASK (0x7FFFFFFF << 0)
/* high order bits of SRC2 */
#define NVFX_FP_OP_INDEX_INPUT (1 << 30)
@@ -323,6 +379,7 @@
#define NVFXSR_INPUT 2
#define NVFXSR_TEMP 3
#define NVFXSR_CONST 4
+#define NVFXSR_RELOCATED 5
#define NVFX_COND_FL 0
#define NVFX_COND_LT 1
@@ -352,51 +409,88 @@
#define NVFX_SWZ_Z 2
#define NVFX_SWZ_W 3
-#define swz(s,x,y,z,w) nvfx_sr_swz((s), NVFX_SWZ_##x, NVFX_SWZ_##y, NVFX_SWZ_##z, NVFX_SWZ_##w)
-#define neg(s) nvfx_sr_neg((s))
-#define abs(s) nvfx_sr_abs((s))
-#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v)
+#define swz(s,x,y,z,w) nvfx_src_swz((s), NVFX_SWZ_##x, NVFX_SWZ_##y, NVFX_SWZ_##z, NVFX_SWZ_##w)
+#define neg(s) nvfx_src_neg((s))
+#define abs(s) nvfx_src_abs((s))
-struct nvfx_sreg {
- int type;
- int index;
+struct nvfx_reg {
+ uint8_t type;
+ uint32_t index;
+};
- int dst_scale;
+struct nvfx_src {
+ struct nvfx_reg reg;
- int negate;
- int abs;
- int swz[4];
+ /* src only */
+ uint8_t negate : 1;
+ uint8_t abs : 1;
+ uint8_t swz[4];
+};
- int cc_update;
- int cc_update_reg;
- int cc_test;
- int cc_test_reg;
- int cc_swz[4];
+struct nvfx_insn
+{
+ uint8_t op;
+ char scale;
+ int8_t unit;
+ uint8_t mask;
+ uint8_t cc_swz[4];
+
+ uint8_t sat : 1;
+ uint8_t cc_update : 1;
+ uint8_t cc_update_reg : 1;
+ uint8_t cc_test : 3;
+ uint8_t cc_test_reg : 1;
+
+ struct nvfx_reg dst;
+ struct nvfx_src src[3];
};
-static INLINE struct nvfx_sreg
-nvfx_sr(int type, int index)
+static INLINE struct nvfx_insn
+nvfx_insn(boolean sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask, struct nvfx_src s0, struct nvfx_src s1, struct nvfx_src s2)
{
- struct nvfx_sreg temp = {
- .type = type,
- .index = index,
- .dst_scale = 0,
- .abs = 0,
- .negate = 0,
- .swz = { 0, 1, 2, 3 },
+ struct nvfx_insn insn = {
+ .op = op,
+ .scale = 0,
+ .unit = unit,
+ .sat = sat,
+ .mask = mask,
.cc_update = 0,
.cc_update_reg = 0,
.cc_test = NVFX_COND_TR,
.cc_test_reg = 0,
.cc_swz = { 0, 1, 2, 3 },
+ .dst = dst,
+ .src = {s0, s1, s2}
+ };
+ return insn;
+}
+
+static INLINE struct nvfx_reg
+nvfx_reg(int type, int index)
+{
+ struct nvfx_reg temp = {
+ .type = type,
+ .index = index,
};
return temp;
}
-static INLINE struct nvfx_sreg
-nvfx_sr_swz(struct nvfx_sreg src, int x, int y, int z, int w)
+static INLINE struct nvfx_src
+nvfx_src(struct nvfx_reg reg)
{
- struct nvfx_sreg dst = src;
+ struct nvfx_src temp = {
+ .reg = reg,
+ .abs = 0,
+ .negate = 0,
+ .swz = { 0, 1, 2, 3 },
+ };
+ return temp;
+}
+
+static INLINE struct nvfx_src
+nvfx_src_swz(struct nvfx_src src, int x, int y, int z, int w)
+{
+ struct nvfx_src dst = src;
dst.swz[NVFX_SWZ_X] = src.swz[x];
dst.swz[NVFX_SWZ_Y] = src.swz[y];
@@ -405,25 +499,23 @@ nvfx_sr_swz(struct nvfx_sreg src, int x, int y, int z, int w)
return dst;
}
-static INLINE struct nvfx_sreg
-nvfx_sr_neg(struct nvfx_sreg src)
+static INLINE struct nvfx_src
+nvfx_src_neg(struct nvfx_src src)
{
src.negate = !src.negate;
return src;
}
-static INLINE struct nvfx_sreg
-nvfx_sr_abs(struct nvfx_sreg src)
+static INLINE struct nvfx_src
+nvfx_src_abs(struct nvfx_src src)
{
src.abs = 1;
return src;
}
-static INLINE struct nvfx_sreg
-nvfx_sr_scale(struct nvfx_sreg src, int scale)
-{
- src.dst_scale = scale;
- return src;
-}
+struct nvfx_relocation {
+ unsigned location;
+ unsigned target;
+};
#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c
index cd58e439d7..5bd7dc07f0 100644
--- a/src/gallium/drivers/nvfx/nvfx_state.c
+++ b/src/gallium/drivers/nvfx/nvfx_state.c
@@ -1,6 +1,7 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
+#include "util/u_framebuffer.h"
#include "draw/draw_context.h"
@@ -81,111 +82,6 @@ nvfx_blend_state_delete(struct pipe_context *pipe, void *hwcso)
}
static void *
-nvfx_sampler_state_create(struct pipe_context *pipe,
- const struct pipe_sampler_state *cso)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- struct nvfx_sampler_state *ps;
-
- ps = MALLOC(sizeof(struct nvfx_sampler_state));
-
- /* on nv30, we use this as an internal flag */
- ps->fmt = cso->normalized_coords ? 0 : NV40TCL_TEX_FORMAT_RECT;
- ps->en = 0;
- ps->filt = nvfx_tex_filter(cso);
- ps->wrap = (nvfx_tex_wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) |
- (nvfx_tex_wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) |
- (nvfx_tex_wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT) |
- nvfx_tex_wrap_compare_mode(cso);
- ps->bcol = nvfx_tex_border_color(cso->border_color);
-
- if(nvfx->is_nv4x)
- nv40_sampler_state_init(pipe, ps, cso);
- else
- nv30_sampler_state_init(pipe, ps, cso);
-
- return (void *)ps;
-}
-
-static void
-nvfx_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- unsigned unit;
-
- for (unit = 0; unit < nr; unit++) {
- nvfx->tex_sampler[unit] = sampler[unit];
- nvfx->dirty_samplers |= (1 << unit);
- }
-
- for (unit = nr; unit < nvfx->nr_samplers; unit++) {
- nvfx->tex_sampler[unit] = NULL;
- nvfx->dirty_samplers |= (1 << unit);
- }
-
- nvfx->nr_samplers = nr;
- nvfx->dirty |= NVFX_NEW_SAMPLER;
-}
-
-static void
-nvfx_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
-{
- FREE(hwcso);
-}
-
-static void
-nvfx_set_fragment_sampler_views(struct pipe_context *pipe,
- unsigned nr,
- struct pipe_sampler_view **views)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- unsigned unit;
-
- for (unit = 0; unit < nr; unit++) {
- pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit],
- views[unit]);
- nvfx->dirty_samplers |= (1 << unit);
- }
-
- for (unit = nr; unit < nvfx->nr_textures; unit++) {
- pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit],
- NULL);
- nvfx->dirty_samplers |= (1 << unit);
- }
-
- nvfx->nr_textures = nr;
- nvfx->dirty |= NVFX_NEW_SAMPLER;
-}
-
-
-static struct pipe_sampler_view *
-nvfx_create_sampler_view(struct pipe_context *pipe,
- struct pipe_resource *texture,
- const struct pipe_sampler_view *templ)
-{
- struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
-
- if (view) {
- *view = *templ;
- view->reference.count = 1;
- view->texture = NULL;
- pipe_resource_reference(&view->texture, texture);
- view->context = pipe;
- }
-
- return view;
-}
-
-
-static void
-nvfx_sampler_view_destroy(struct pipe_context *pipe,
- struct pipe_sampler_view *view)
-{
- pipe_resource_reference(&view->texture, NULL);
- FREE(view);
-}
-
-static void *
nvfx_rasterizer_state_create(struct pipe_context *pipe,
const struct pipe_rasterizer_state *cso)
{
@@ -195,6 +91,7 @@ nvfx_rasterizer_state_create(struct pipe_context *pipe,
/*XXX: ignored:
* point_smooth -nohw
* multisample
+ * sprite_coord_origin
*/
sb_method(sb, NV34TCL_SHADE_MODEL, 1);
@@ -254,19 +151,8 @@ nvfx_rasterizer_state_create(struct pipe_context *pipe,
sb_data(sb, fui(cso->offset_units * 2));
}
- sb_method(sb, NV34TCL_POINT_SPRITE, 1);
- if (cso->point_quad_rasterization) {
- unsigned psctl = (1 << 0), i;
-
- for (i = 0; i < 8; i++) {
- if ((cso->sprite_coord_enable >> i) & 1)
- psctl |= (1 << (8 + i));
- }
-
- sb_data(sb, psctl);
- } else {
- sb_data(sb, 0);
- }
+ sb_method(sb, NV34TCL_FLATSHADE_FIRST, 1);
+ sb_data(sb, cso->flatshade_first);
rsso->pipe = *cso;
rsso->sb_len = sb_len(sb, rsso->sb);
@@ -287,11 +173,11 @@ nvfx_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
nvfx->draw_dirty |= NVFX_NEW_SCISSOR;
}
- if(((struct nvfx_rasterizer_state*)hwcso)->pipe.poly_stipple_enable
- != nvfx->rasterizer->pipe.poly_stipple_enable)
+ if(((struct nvfx_rasterizer_state*)hwcso)->pipe.point_quad_rasterization != nvfx->rasterizer->pipe.point_quad_rasterization
+ || ((struct nvfx_rasterizer_state*)hwcso)->pipe.sprite_coord_enable != nvfx->rasterizer->pipe.sprite_coord_enable
+ || ((struct nvfx_rasterizer_state*)hwcso)->pipe.sprite_coord_mode != nvfx->rasterizer->pipe.sprite_coord_mode)
{
- nvfx->dirty |= NVFX_NEW_STIPPLE;
- nvfx->draw_dirty |= NVFX_NEW_STIPPLE;
+ nvfx->dirty |= NVFX_NEW_SPRITE;
}
}
@@ -315,10 +201,8 @@ nvfx_depth_stencil_alpha_state_create(struct pipe_context *pipe,
struct nvfx_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
struct nouveau_statebuf_builder sb = sb_init(zsaso->sb);
- sb_method(sb, NV34TCL_DEPTH_FUNC, 3);
+ sb_method(sb, NV34TCL_DEPTH_FUNC, 1);
sb_data (sb, nvgl_comparison_op(cso->depth.func));
- sb_data (sb, cso->depth.writemask ? 1 : 0);
- sb_data (sb, cso->depth.enabled ? 1 : 0);
sb_method(sb, NV34TCL_ALPHA_FUNC_ENABLE, 3);
sb_data (sb, cso->alpha.enabled ? 1 : 0);
@@ -377,76 +261,6 @@ nvfx_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
FREE(zsaso);
}
-static void *
-nvfx_vp_state_create(struct pipe_context *pipe,
- const struct pipe_shader_state *cso)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- struct nvfx_vertex_program *vp;
-
- vp = CALLOC(1, sizeof(struct nvfx_vertex_program));
- vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
- vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe);
-
- return (void *)vp;
-}
-
-static void
-nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
-
- nvfx->vertprog = hwcso;
- nvfx->dirty |= NVFX_NEW_VERTPROG;
- nvfx->draw_dirty |= NVFX_NEW_VERTPROG;
-}
-
-static void
-nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- struct nvfx_vertex_program *vp = hwcso;
-
- draw_delete_vertex_shader(nvfx->draw, vp->draw);
- nvfx_vertprog_destroy(nvfx, vp);
- FREE((void*)vp->pipe.tokens);
- FREE(vp);
-}
-
-static void *
-nvfx_fp_state_create(struct pipe_context *pipe,
- const struct pipe_shader_state *cso)
-{
- struct nvfx_fragment_program *fp;
-
- fp = CALLOC(1, sizeof(struct nvfx_fragment_program));
- fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
-
- tgsi_scan_shader(fp->pipe.tokens, &fp->info);
-
- return (void *)fp;
-}
-
-static void
-nvfx_fp_state_bind(struct pipe_context *pipe, void *hwcso)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
-
- nvfx->fragprog = hwcso;
- nvfx->dirty |= NVFX_NEW_FRAGPROG;
-}
-
-static void
-nvfx_fp_state_delete(struct pipe_context *pipe, void *hwcso)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- struct nvfx_fragment_program *fp = hwcso;
-
- nvfx_fragprog_destroy(nvfx, fp);
- FREE((void*)fp->pipe.tokens);
- FREE(fp);
-}
-
static void
nvfx_set_blend_color(struct pipe_context *pipe,
const struct pipe_blend_color *bcol)
@@ -507,7 +321,10 @@ nvfx_set_framebuffer_state(struct pipe_context *pipe,
{
struct nvfx_context *nvfx = nvfx_context(pipe);
- nvfx->framebuffer = *fb;
+ if(fb)
+ util_copy_framebuffer_state(&nvfx->framebuffer, fb);
+ else
+ util_unreference_framebuffer_state(&nvfx->framebuffer);
nvfx->dirty |= NVFX_NEW_FB;
}
@@ -542,65 +359,6 @@ nvfx_set_viewport_state(struct pipe_context *pipe,
nvfx->draw_dirty |= NVFX_NEW_VIEWPORT;
}
-static void
-nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
- const struct pipe_vertex_buffer *vb)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
-
- memcpy(nvfx->vtxbuf, vb, sizeof(*vb) * count);
- nvfx->vtxbuf_nr = count;
-
- nvfx->dirty |= NVFX_NEW_ARRAYS;
- nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
-}
-
-static void
-nvfx_set_index_buffer(struct pipe_context *pipe,
- const struct pipe_index_buffer *ib)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
-
- if (ib)
- memcpy(&nvfx->idxbuf, ib, sizeof(nvfx->idxbuf));
- else
- memset(&nvfx->idxbuf, 0, sizeof(nvfx->idxbuf));
-
- /* TODO make this more like a state */
-}
-
-static void *
-nvfx_vtxelts_state_create(struct pipe_context *pipe,
- unsigned num_elements,
- const struct pipe_vertex_element *elements)
-{
- struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
-
- assert(num_elements < 16); /* not doing fallbacks yet */
- cso->num_elements = num_elements;
- memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
-
-/* nvfx_vtxelt_construct(cso);*/
-
- return (void *)cso;
-}
-
-static void
-nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
-{
- FREE(hwcso);
-}
-
-static void
-nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
-
- nvfx->vtxelt = hwcso;
- nvfx->dirty |= NVFX_NEW_ARRAYS;
- /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/
-}
-
void
nvfx_init_state_functions(struct nvfx_context *nvfx)
{
@@ -608,13 +366,6 @@ nvfx_init_state_functions(struct nvfx_context *nvfx)
nvfx->pipe.bind_blend_state = nvfx_blend_state_bind;
nvfx->pipe.delete_blend_state = nvfx_blend_state_delete;
- nvfx->pipe.create_sampler_state = nvfx_sampler_state_create;
- nvfx->pipe.bind_fragment_sampler_states = nvfx_sampler_state_bind;
- nvfx->pipe.delete_sampler_state = nvfx_sampler_state_delete;
- nvfx->pipe.set_fragment_sampler_views = nvfx_set_fragment_sampler_views;
- nvfx->pipe.create_sampler_view = nvfx_create_sampler_view;
- nvfx->pipe.sampler_view_destroy = nvfx_sampler_view_destroy;
-
nvfx->pipe.create_rasterizer_state = nvfx_rasterizer_state_create;
nvfx->pipe.bind_rasterizer_state = nvfx_rasterizer_state_bind;
nvfx->pipe.delete_rasterizer_state = nvfx_rasterizer_state_delete;
@@ -626,14 +377,6 @@ nvfx_init_state_functions(struct nvfx_context *nvfx)
nvfx->pipe.delete_depth_stencil_alpha_state =
nvfx_depth_stencil_alpha_state_delete;
- nvfx->pipe.create_vs_state = nvfx_vp_state_create;
- nvfx->pipe.bind_vs_state = nvfx_vp_state_bind;
- nvfx->pipe.delete_vs_state = nvfx_vp_state_delete;
-
- nvfx->pipe.create_fs_state = nvfx_fp_state_create;
- nvfx->pipe.bind_fs_state = nvfx_fp_state_bind;
- nvfx->pipe.delete_fs_state = nvfx_fp_state_delete;
-
nvfx->pipe.set_blend_color = nvfx_set_blend_color;
nvfx->pipe.set_stencil_ref = nvfx_set_stencil_ref;
nvfx->pipe.set_clip_state = nvfx_set_clip_state;
@@ -643,11 +386,4 @@ nvfx_init_state_functions(struct nvfx_context *nvfx)
nvfx->pipe.set_polygon_stipple = nvfx_set_polygon_stipple;
nvfx->pipe.set_scissor_state = nvfx_set_scissor_state;
nvfx->pipe.set_viewport_state = nvfx_set_viewport_state;
-
- nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
- nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
- nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
-
- nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
- nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h
index 9ceb2577ec..e9c1f2c26d 100644
--- a/src/gallium/drivers/nvfx/nvfx_state.h
+++ b/src/gallium/drivers/nvfx/nvfx_state.h
@@ -4,11 +4,11 @@
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"
#include "nouveau/nouveau_statebuf.h"
+#include "util/u_dynarray.h"
+#include "util/u_linkage.h"
struct nvfx_vertex_program_exec {
uint32_t data[4];
- boolean has_branch_offset;
- int const_index;
};
struct nvfx_vertex_program_data {
@@ -18,18 +18,20 @@ struct nvfx_vertex_program_data {
struct nvfx_vertex_program {
struct pipe_shader_state pipe;
+ unsigned long long id;
struct draw_vertex_shader *draw;
boolean translated;
- struct pipe_clip_state ucp;
-
struct nvfx_vertex_program_exec *insns;
unsigned nr_insns;
struct nvfx_vertex_program_data *consts;
unsigned nr_consts;
+ char generic_to_fp_input[256];
+ int sprite_fp_input;
+
struct nouveau_resource *exec;
unsigned exec_start;
struct nouveau_resource *data;
@@ -38,7 +40,10 @@ struct nvfx_vertex_program {
uint32_t ir;
uint32_t or;
- uint32_t clip_ctrl;
+ int clip_nr;
+
+ struct util_dynarray branch_relocs;
+ struct util_dynarray const_relocs;
};
struct nvfx_fragment_program_data {
@@ -49,15 +54,14 @@ struct nvfx_fragment_program_data {
struct nvfx_fragment_program_bo {
struct nvfx_fragment_program_bo* next;
struct nouveau_bo* bo;
+ unsigned char* slots;
char insn[] __attribute__((aligned(16)));
};
struct nvfx_fragment_program {
- struct pipe_shader_state pipe;
- struct tgsi_shader_info info;
-
- boolean translated;
unsigned samplers;
+ unsigned point_sprite_control;
+ unsigned or;
uint32_t *insn;
int insn_len;
@@ -65,13 +69,36 @@ struct nvfx_fragment_program {
struct nvfx_fragment_program_data *consts;
unsigned nr_consts;
+ /* the slot at num_slots is for the sprite coordinate, if any */
+ unsigned num_slots; /* how many input semantics? */
+ unsigned char slot_to_generic[10]; /* semantics */
+ unsigned char slot_to_fp_input[11]; /* current assignment of slots for each used semantic */
+ struct util_dynarray slot_relocations[11];
+
+ /* This is reset to progs on any relocation update, and decreases every time we
+ * move to a new prog due to a constant update
+ * When this is the same as progs, applying relocations is no longer necessary.
+ */
+ unsigned progs_left_with_obsolete_slot_assignments;
+
+ unsigned long long last_vp_id;
+ unsigned last_sprite_coord_enable;
+
uint32_t fp_control;
unsigned bo_prog_idx;
unsigned prog_size;
unsigned progs_per_bo;
+ unsigned progs;
+
struct nvfx_fragment_program_bo* fpbo;
};
+struct nvfx_pipe_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ struct nvfx_fragment_program* fps[2];
+};
#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c
index f91ae19ecd..390bca8cdb 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_emit.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
@@ -1,15 +1,54 @@
#include "nvfx_context.h"
#include "nvfx_state.h"
+#include "nvfx_resource.h"
#include "draw/draw_context.h"
static boolean
nvfx_state_validate_common(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
- unsigned dirty = nvfx->dirty;
+ unsigned dirty;
+ unsigned still_dirty = 0;
+ int all_swizzled = -1;
+ boolean flush_tex_cache = FALSE;
+ unsigned render_temps;
if(nvfx != nvfx->screen->cur_ctx)
- dirty = ~0;
+ {
+ nvfx->dirty = ~0;
+ nvfx->hw_vtxelt_nr = 16;
+ nvfx->hw_pointsprite_control = -1;
+ nvfx->hw_vp_output = -1;
+ nvfx->screen->cur_ctx = nvfx;
+ nvfx->relocs_needed = NVFX_RELOCATE_ALL;
+ }
+
+ /* These can trigger use the of 3D engine to copy temporaries.
+ * That will recurse here and thus dirty all 3D state, so we need to this before anything else, and in a loop..
+ * This converges to having clean temps, then binding both fragtexes and framebuffers.
+ */
+ while(nvfx->dirty & (NVFX_NEW_FB | NVFX_NEW_SAMPLER))
+ {
+ if(nvfx->dirty & NVFX_NEW_SAMPLER)
+ {
+ nvfx->dirty &=~ NVFX_NEW_SAMPLER;
+ nvfx_fragtex_validate(nvfx);
+
+ // TODO: only set this if really necessary
+ flush_tex_cache = TRUE;
+ }
+
+ if(nvfx->dirty & NVFX_NEW_FB)
+ {
+ nvfx->dirty &=~ NVFX_NEW_FB;
+ all_swizzled = nvfx_framebuffer_prepare(nvfx);
+
+ // TODO: make sure this doesn't happen, i.e. fbs have matching formats
+ assert(all_swizzled >= 0);
+ }
+ }
+
+ dirty = nvfx->dirty;
if(nvfx->render_mode == HW)
{
@@ -19,11 +58,19 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
return FALSE;
}
- if(dirty & (NVFX_NEW_ARRAYS))
+ if(dirty & NVFX_NEW_ARRAYS)
{
if(!nvfx_vbo_validate(nvfx))
return FALSE;
}
+
+ if(dirty & NVFX_NEW_INDEX)
+ {
+ if(nvfx->use_index_buffer)
+ nvfx_idxbuf_validate(nvfx);
+ else
+ still_dirty = NVFX_NEW_INDEX;
+ }
}
else
{
@@ -31,13 +78,10 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
nvfx_vertprog_validate(nvfx);
- if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG))
+ if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_INDEX | NVFX_NEW_FRAGPROG))
nvfx_vtxfmt_validate(nvfx);
}
- if(dirty & NVFX_NEW_FB)
- nvfx_state_framebuffer_validate(nvfx);
-
if(dirty & NVFX_NEW_RAST)
sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len);
@@ -47,11 +91,97 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
if(dirty & NVFX_NEW_STIPPLE)
nvfx_state_stipple_validate(nvfx);
- if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST))
+ if(nvfx->dirty & NVFX_NEW_UCP)
+ {
+ unsigned enables[7] =
+ {
+ 0,
+ NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0,
+ NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1,
+ NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2,
+ NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3,
+ NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4,
+ NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5,
+ };
+
+ if(!nvfx->use_vp_clipping)
+ {
+ WAIT_RING(chan, 2);
+ OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
+ OUT_RING(chan, 0);
+
+ WAIT_RING(chan, 6 * 4 + 1);
+ OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANE_A(0), nvfx->clip.nr * 4));
+ OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4);
+ }
+
+ WAIT_RING(chan, 2);
+ OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
+ OUT_RING(chan, enables[nvfx->clip.nr]);
+ }
+
+ if(nvfx->use_vp_clipping && (nvfx->dirty & (NVFX_NEW_UCP | NVFX_NEW_VERTPROG)))
+ {
+ unsigned i;
+ struct nvfx_vertex_program* vp = nvfx->vertprog;
+ if(nvfx->clip.nr != vp->clip_nr)
+ {
+ unsigned idx;
+ WAIT_RING(chan, 14);
+
+ /* remove last instruction bit */
+ if(vp->clip_nr >= 0)
+ {
+ idx = vp->nr_insns - 7 + vp->clip_nr;
+ OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_FROM_ID, 1));
+ OUT_RING(chan, vp->exec->start + idx);
+ OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_INST(0), 4));
+ OUT_RINGp (chan, vp->insns[idx].data, 4);
+ }
+
+ /* set last instruction bit */
+ idx = vp->nr_insns - 7 + nvfx->clip.nr;
+ OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_FROM_ID, 1));
+ OUT_RING(chan, vp->exec->start + idx);
+ OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_INST(0), 4));
+ OUT_RINGp(chan, vp->insns[idx].data, 3);
+ OUT_RING(chan, vp->insns[idx].data[3] | 1);
+ vp->clip_nr = nvfx->clip.nr;
+ }
+
+ // TODO: only do this for the ones changed
+ WAIT_RING(chan, 6 * 6);
+ for(i = 0; i < nvfx->clip.nr; ++i)
+ {
+ OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_CONST_ID, 5));
+ OUT_RING(chan, vp->data->start + i);
+ OUT_RINGp (chan, nvfx->clip.ucp[i], 4);
+ }
+ }
+
+ if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST | NVFX_NEW_VERTPROG | NVFX_NEW_SPRITE))
+ {
nvfx_fragprog_validate(nvfx);
+ if(dirty & NVFX_NEW_FRAGPROG)
+ flush_tex_cache = TRUE; // TODO: do we need this?
+ }
- if(dirty & NVFX_NEW_SAMPLER)
- nvfx_fragtex_validate(nvfx);
+ if(nvfx->is_nv4x)
+ {
+ unsigned vp_output = nvfx->vertprog->or | nvfx->hw_fragprog->or;
+ vp_output |= (1 << (nvfx->clip.nr + 6)) - (1 << 6);
+
+ if(vp_output != nvfx->hw_vp_output)
+ {
+ WAIT_RING(chan, 2);
+ OUT_RING(chan, RING_3D(NV40TCL_VP_RESULT_EN, 1));
+ OUT_RING(chan, vp_output);
+ nvfx->hw_vp_output = vp_output;
+ }
+ }
+
+ if(all_swizzled >= 0)
+ nvfx_framebuffer_validate(nvfx, all_swizzled);
if(dirty & NVFX_NEW_BLEND)
sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len);
@@ -65,31 +195,62 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
if(dirty & NVFX_NEW_SR)
nvfx_state_sr_validate(nvfx);
-/* Having this depend on FB looks wrong, but it seems
- necessary to make this work on nv3x
+/* All these dependencies are wrong, but otherwise
+ etracer, neverball, foobillard, glest totally misrender
TODO: find the right fix
*/
- if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_FB))
+ if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_RAST | NVFX_NEW_ZSA) || (all_swizzled >= 0))
+ {
nvfx_state_viewport_validate(nvfx);
+ }
+
+ if(dirty & NVFX_NEW_ZSA || (all_swizzled >= 0))
+ {
+ WAIT_RING(chan, 3);
+ OUT_RING(chan, RING_3D(NV34TCL_DEPTH_WRITE_ENABLE, 2));
+ OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask);
+ OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled);
+ }
- /* TODO: could nv30 need this or something similar too? */
- if((dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SAMPLER)) && nvfx->is_nv4x) {
- WAIT_RING(chan, 4);
- OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
- OUT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
- OUT_RING(chan, 1);
+ if(flush_tex_cache)
+ {
+ // TODO: what about nv30?
+ if(nvfx->is_nv4x)
+ {
+ WAIT_RING(chan, 4);
+ OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
+ OUT_RING(chan, 2);
+ OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
+ OUT_RING(chan, 1);
+ }
}
- nvfx->dirty = 0;
+
+ nvfx->dirty = dirty & still_dirty;
+
+ render_temps = nvfx->state.render_temps;
+ if(render_temps)
+ {
+ for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
+ {
+ if(render_temps & (1 << i))
+ util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]),
+ (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]);
+ }
+
+ if(render_temps & 0x80)
+ util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf),
+ (struct util_dirty_surface*)nvfx->framebuffer.zsbuf);
+ }
+
return TRUE;
}
-void
-nvfx_state_emit(struct nvfx_context *nvfx)
+inline void
+nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
/* we need to ensure there is enough space to output relocations in one go */
- unsigned max_relocs = 0
+ const unsigned max_relocs = 0
+ 16 /* vertex buffers, incl. dma flag */
+ 2 /* index buffer plus format+dma flag */
+ 2 * 5 /* 4 cbufs + zsbuf, plus dma objects */
@@ -97,18 +258,19 @@ nvfx_state_emit(struct nvfx_context *nvfx)
+ 2 * 4 /* vertex textures plus format+dma flag */
+ 1 /* fragprog incl dma flag */
;
+
MARK_RING(chan, max_relocs * 2, max_relocs * 2);
- nvfx_state_relocate(nvfx);
-}
-void
-nvfx_state_relocate(struct nvfx_context *nvfx)
-{
- nvfx_framebuffer_relocate(nvfx);
- nvfx_fragtex_relocate(nvfx);
- nvfx_fragprog_relocate(nvfx);
- if (nvfx->render_mode == HW)
+ if(relocs & NVFX_RELOCATE_FRAMEBUFFER)
+ nvfx_framebuffer_relocate(nvfx);
+ if(relocs & NVFX_RELOCATE_FRAGTEX)
+ nvfx_fragtex_relocate(nvfx);
+ if(relocs & NVFX_RELOCATE_FRAGPROG)
+ nvfx_fragprog_relocate(nvfx);
+ if(relocs & NVFX_RELOCATE_VTXBUF)
nvfx_vbo_relocate(nvfx);
+ if(relocs & NVFX_RELOCATE_IDXBUF)
+ nvfx_idxbuf_relocate(nvfx);
}
boolean
@@ -173,6 +335,9 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx)
draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe);
}
+ if (nvfx->draw_dirty & NVFX_NEW_INDEX)
+ draw_set_index_buffer(draw, &nvfx->idxbuf);
+
nvfx_state_validate_common(nvfx);
nvfx->draw_dirty = 0;
diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c
index 360e569f77..3b869d43a1 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_fb.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c
@@ -1,21 +1,55 @@
#include "nvfx_context.h"
#include "nvfx_resource.h"
-#include "nouveau/nouveau_util.h"
+#include "util/u_format.h"
+static inline boolean
+nvfx_surface_linear_renderable(struct pipe_surface* surf)
+{
+ return (surf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)
+ && !(surf->offset & 63)
+ && !(((struct nvfx_surface*)surf)->pitch & 63);
+}
+static inline boolean
+nvfx_surface_swizzled_renderable(struct pipe_framebuffer_state* fb, struct pipe_surface* surf)
+{
+ /* TODO: return FALSE if we have a format not supporting swizzled rendering (e.g. r8); currently those are not supported at all */
+ return !((struct nvfx_miptree*)surf->texture)->linear_pitch
+ && (surf->texture->target != PIPE_TEXTURE_3D || u_minify(surf->texture->depth0, surf->level) <= 1)
+ && !(surf->offset & 127)
+ && (surf->width == fb->width)
+ && (surf->height == fb->height)
+ && !((struct nvfx_surface*)surf)->temp;
+}
-void
-nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
+static boolean
+nvfx_surface_get_render_target(struct pipe_surface* surf, int all_swizzled, struct nvfx_render_target* target)
+{
+ struct nvfx_surface* ns = (struct nvfx_surface*)surf;
+ if(!ns->temp)
+ {
+ target->bo = ((struct nvfx_miptree*)surf->texture)->base.bo;
+ target->offset = surf->offset;
+ target->pitch = align(ns->pitch, 64);
+ assert(target->pitch);
+ return FALSE;
+ }
+ else
+ {
+ target->offset = 0;
+ target->pitch = ns->temp->linear_pitch;
+ target->bo = ns->temp->base.bo;
+ assert(target->pitch);
+ return TRUE;
+ }
+}
+
+int
+nvfx_framebuffer_prepare(struct nvfx_context *nvfx)
{
struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
- struct nouveau_channel *chan = nvfx->screen->base.channel;
- uint32_t rt_enable = 0, rt_format = 0;
- int i, colour_format = 0, zeta_format = 0;
- int depth_only = 0;
- unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
- unsigned w = fb->width;
- unsigned h = fb->height;
- int colour_bits = 32, zeta_bits = 32;
+ int i, color_format = 0, zeta_format = 0;
+ int all_swizzled = 1;
if(!nvfx->is_nv4x)
assert(fb->nr_cbufs <= 2);
@@ -23,113 +57,135 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
assert(fb->nr_cbufs <= 4);
for (i = 0; i < fb->nr_cbufs; i++) {
- if (colour_format)
- assert(colour_format == fb->cbufs[i]->format);
- else
- colour_format = fb->cbufs[i]->format;
-
- rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i);
- nvfx->hw_rt[i].bo = nvfx_surface_buffer(fb->cbufs[i]);
- nvfx->hw_rt[i].offset = fb->cbufs[i]->offset;
- nvfx->hw_rt[i].pitch = ((struct nv04_surface *)fb->cbufs[i])->pitch;
+ if (color_format) {
+ if(color_format != fb->cbufs[i]->format)
+ return -1;
+ } else
+ color_format = fb->cbufs[i]->format;
+
+ if(!nvfx_surface_swizzled_renderable(fb, fb->cbufs[i]))
+ all_swizzled = 0;
}
- for(; i < 4; ++i)
- nvfx->hw_rt[i].bo = 0;
+ if (fb->zsbuf) {
+ /* TODO: return FALSE if we have a format not supporting a depth buffer (e.g. r8); currently those are not supported at all */
+ if(!nvfx_surface_swizzled_renderable(fb, fb->zsbuf))
+ all_swizzled = 0;
+
+ if(all_swizzled && util_format_get_blocksize(color_format) != util_format_get_blocksize(zeta_format))
+ all_swizzled = 0;
+ }
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ if(!((struct nvfx_surface*)fb->cbufs[i])->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->cbufs[i]))
+ nvfx_surface_create_temp(&nvfx->pipe, fb->cbufs[i]);
+ }
+
+ if(fb->zsbuf) {
+ if(!((struct nvfx_surface*)fb->zsbuf)->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->zsbuf))
+ nvfx_surface_create_temp(&nvfx->pipe, fb->zsbuf);
+ }
+
+ return all_swizzled;
+}
+
+void
+nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
+{
+ struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
+ struct nouveau_channel *chan = nvfx->screen->base.channel;
+ uint32_t rt_enable, rt_format;
+ int i;
+ unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
+ unsigned w = fb->width;
+ unsigned h = fb->height;
+
+ rt_enable = (NV34TCL_RT_ENABLE_COLOR0 << fb->nr_cbufs) - 1;
if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 |
NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3))
rt_enable |= NV34TCL_RT_ENABLE_MRT;
+ nvfx->state.render_temps = 0;
+
+ for (i = 0; i < fb->nr_cbufs; i++)
+ nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->cbufs[i], prepare_result, &nvfx->hw_rt[i]) << i;
+
+ for(; i < 4; ++i)
+ nvfx->hw_rt[i].bo = 0;
+
if (fb->zsbuf) {
- zeta_format = fb->zsbuf->format;
- nvfx->hw_zeta.bo = nvfx_surface_buffer(fb->zsbuf);
- nvfx->hw_zeta.offset = fb->zsbuf->offset;
- nvfx->hw_zeta.pitch = ((struct nv04_surface *)fb->zsbuf)->pitch;
- }
- else
- nvfx->hw_zeta.bo = 0;
-
- if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 |
- NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) {
- /* Render to at least a colour buffer */
- if (!(fb->cbufs[0]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) {
- assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
- for (i = 1; i < fb->nr_cbufs; i++)
- assert(!(fb->cbufs[i]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR));
-
- rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
- (log2i(fb->cbufs[0]->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
- (log2i(fb->cbufs[0]->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
- }
- else
- rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
- } else if (fb->zsbuf) {
- depth_only = 1;
-
- /* Render to depth buffer only */
- if (!(fb->zsbuf->texture->usage & NVFX_RESOURCE_FLAG_LINEAR)) {
- assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
-
- rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
- (log2i(fb->zsbuf->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
- (log2i(fb->zsbuf->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
- }
- else
- rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
- } else {
- return;
+ nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->zsbuf, prepare_result, &nvfx->hw_zeta) << 7;
+
+ assert(util_format_get_stride(fb->zsbuf->format, fb->width) <= nvfx->hw_zeta.pitch);
+ assert(nvfx->hw_zeta.offset + nvfx->hw_zeta.pitch * fb->height <= nvfx->hw_zeta.bo->size);
}
- switch (colour_format) {
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case 0:
- rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
- break;
- case PIPE_FORMAT_B5G6R5_UNORM:
+ if (prepare_result) {
+ assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+
+ rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
+ (util_logbase2(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+ (util_logbase2(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+ } else
+ rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+
+ if(fb->nr_cbufs > 0) {
+ switch (fb->cbufs[0]->format) {
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case 0:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+ } else if(fb->zsbuf && util_format_get_blocksize(fb->zsbuf->format) == 2)
rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
- colour_bits = 16;
- break;
- default:
- assert(0);
- }
+ else
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
- switch (zeta_format) {
- case PIPE_FORMAT_Z16_UNORM:
+ if(fb->zsbuf) {
+ switch (fb->zsbuf->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
+ break;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case 0:
+ rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
+ break;
+ default:
+ assert(0);
+ }
+ } else if(fb->nr_cbufs && util_format_get_blocksize(fb->cbufs[0]->format) == 2)
rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
- zeta_bits = 16;
- break;
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- case PIPE_FORMAT_X8Z24_UNORM:
- case 0:
+ else
rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
- break;
- default:
- assert(0);
- }
- if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) {
- /* TODO: does this limitation really exist?
- TODO: can it be worked around somehow? */
- assert(0);
- }
+ if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) || fb->zsbuf) {
+ struct nvfx_render_target *rt0 = &nvfx->hw_rt[0];
+ uint32_t pitch;
- if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0)
- || ((!nvfx->is_nv4x) && depth_only)) {
- struct nvfx_render_target *rt0 = (depth_only ? &nvfx->hw_zeta : &nvfx->hw_rt[0]);
- uint32_t pitch = rt0->pitch;
+ if(!(rt_enable & NV34TCL_RT_ENABLE_COLOR0))
+ rt0 = &nvfx->hw_zeta;
+
+ pitch = rt0->pitch;
if(!nvfx->is_nv4x)
{
- if (nvfx->hw_zeta.bo) {
+ if (nvfx->hw_zeta.bo)
pitch |= (nvfx->hw_zeta.pitch << 16);
- } else {
+ else
pitch |= (pitch << 16);
- }
}
+ //printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch);
+
OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 1));
OUT_RELOC(chan, rt0->bo, 0,
rt_flags | NOUVEAU_BO_OR,
@@ -182,7 +238,7 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
}
}
- if (zeta_format) {
+ if (fb->zsbuf) {
OUT_RING(chan, RING_3D(NV34TCL_DMA_ZETA, 1));
OUT_RELOC(chan, nvfx->hw_zeta.bo, 0,
rt_flags | NOUVEAU_BO_OR,
@@ -196,6 +252,10 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
OUT_RING(chan, nvfx->hw_zeta.pitch);
}
}
+ else if(nvfx->is_nv4x) {
+ OUT_RING(chan, RING_3D(NV40TCL_ZETA_PITCH, 1));
+ OUT_RING(chan, 64);
+ }
OUT_RING(chan, RING_3D(NV34TCL_RT_ENABLE, 1));
OUT_RING(chan, rt_enable);
@@ -218,6 +278,7 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TX_ORIGIN, 1));
OUT_RING(chan, 0);
}
+ nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER;
}
void
@@ -247,4 +308,5 @@ nvfx_framebuffer_relocate(struct nvfx_context *nvfx)
DO(NV40, 3);
DO_(nvfx->hw_zeta, NV34, ZETA);
+ nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_state_stipple.c b/src/gallium/drivers/nvfx/nvfx_state_stipple.c
index 4da968f093..b76e9dd382 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_stipple.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_stipple.c
@@ -4,23 +4,8 @@ void
nvfx_state_stipple_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel *chan = nvfx->screen->base.channel;
- struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe;
- if ((rast->poly_stipple_enable == 0 && nvfx->state.stipple_enabled == 0))
- return;
-
- if (rast->poly_stipple_enable) {
- unsigned i;
-
- WAIT_RING(chan, 35);
- OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_ENABLE, 1));
- OUT_RING(chan, 1);
- OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32));
- for (i = 0; i < 32; i++)
- OUT_RING(chan, nvfx->stipple[i]);
- } else {
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_ENABLE, 1));
- OUT_RING(chan, 0);
- }
+ WAIT_RING(chan, 33);
+ OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32));
+ OUT_RINGp(chan, nvfx->stipple, 32);
}
diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c
index a605d2b754..a5931b6e15 100644
--- a/src/gallium/drivers/nvfx/nvfx_surface.c
+++ b/src/gallium/drivers/nvfx/nvfx_surface.c
@@ -26,33 +26,421 @@
*
**************************************************************************/
+#include "pipe/p_context.h"
+#include "pipe/p_format.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+#include "util/u_blitter.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_screen.h"
#include "nvfx_context.h"
+#include "nvfx_screen.h"
#include "nvfx_resource.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "util/u_pack_color.h"
+#include "nv04_2d.h"
+
+#include <nouveau/nouveau_bo.h>
+
+static INLINE void
+nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format)
+{
+ unsigned bits = util_format_get_blocksizebits(format);
+ switch(bits)
+ {
+ case 8:
+ rgn->bpps = 0;
+ break;
+ case 16:
+ rgn->bpps = 1;
+ break;
+ case 32:
+ rgn->bpps = 2;
+ break;
+ default:
+ {
+ int shift;
+ assert(util_is_power_of_two(bits));
+ shift = util_logbase2(bits) - 3;
+ assert(shift >= 2);
+ rgn->bpps = 2;
+ shift -= 2;
+
+ rgn->x = util_format_get_nblocksx(format, rgn->x) << shift;
+ rgn->y = util_format_get_nblocksy(format, rgn->y);
+ }
+ }
+}
+
+static INLINE void
+nvfx_region_fixup_swizzled(struct nv04_region* rgn, unsigned zslice, unsigned width, unsigned height, unsigned depth)
+{
+ // TODO: move this code to surface creation?
+ if((depth <= 1) && (height <= 1 || width <= 2))
+ rgn->pitch = width << rgn->bpps;
+ else if(depth > 1 && height <= 2 && width <= 2)
+ {
+ rgn->pitch = width << rgn->bpps;
+ rgn->offset += (zslice * width * height) << rgn->bpps;
+ }
+ else
+ {
+ rgn->pitch = 0;
+ rgn->z = zslice;
+ rgn->w = width;
+ rgn->h = height;
+ rgn->d = depth;
+ }
+}
+
+static INLINE void
+nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, bool for_write)
+{
+ rgn->x = x;
+ rgn->y = y;
+ rgn->z = 0;
+ nvfx_region_set_format(rgn, surf->base.base.format);
+
+ if(surf->temp)
+ {
+ rgn->bo = surf->temp->base.bo;
+ rgn->offset = 0;
+ rgn->pitch = surf->temp->linear_pitch;
+
+ if(for_write)
+ util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(&surf->base.base), &surf->base);
+ } else {
+ rgn->bo = ((struct nvfx_resource*)surf->base.base.texture)->bo;
+ rgn->offset = surf->base.base.offset;
+ rgn->pitch = surf->pitch;
+
+ if(!(surf->base.base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR))
+ nvfx_region_fixup_swizzled(rgn, surf->base.base.zslice, surf->base.base.width, surf->base.base.height, u_minify(surf->base.base.texture->depth0, surf->base.base.level));
+ }
+}
+
+static INLINE void
+nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z, bool for_write)
+{
+ if(pt->target != PIPE_BUFFER)
+ {
+ struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, sub.face, sub.level, z);
+ if(ns && util_dirty_surface_is_dirty(&ns->base))
+ {
+ nvfx_region_init_for_surface(rgn, ns, x, y, for_write);
+ return;
+ }
+ }
+
+ rgn->bo = ((struct nvfx_resource*)pt)->bo;
+ rgn->offset = nvfx_subresource_offset(pt, sub.face, sub.level, z);
+ rgn->pitch = nvfx_subresource_pitch(pt, sub.level);
+ rgn->x = x;
+ rgn->y = y;
+ rgn->z = 0;
+
+ nvfx_region_set_format(rgn, pt->format);
+ if(!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR))
+ nvfx_region_fixup_swizzled(rgn, z, u_minify(pt->width0, sub.level), u_minify(pt->height0, sub.level), u_minify(pt->depth0, sub.level));
+}
+
+// TODO: actually test this for all formats, it's probably wrong for some...
+
+static INLINE int
+nvfx_surface_format(enum pipe_format format)
+{
+ switch(util_format_get_blocksize(format)) {
+ case 1:
+ return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
+ case 2:
+ //return NV04_CONTEXT_SURFACES_2D_FORMAT_Y16;
+ return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
+ case 4:
+ //if(format == PIPE_FORMAT_B8G8R8X8_UNORM || format == PIPE_FORMAT_B8G8R8A8_UNORM)
+ return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8;
+ //else
+ // return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
+ default:
+ return -1;
+ }
+}
+
+static INLINE int
+nv04_scaled_image_format(enum pipe_format format)
+{
+ switch(util_format_get_blocksize(format)) {
+ case 1:
+ return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8;
+ case 2:
+ //if(format == PIPE_FORMAT_B5G5R5A1_UNORM)
+ // return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5;
+ //else
+ return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
+ case 4:
+ if(format == PIPE_FORMAT_B8G8R8X8_UNORM)
+ return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8;
+ else
+ return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8;
+ default:
+ return -1;
+ }
+}
+
+// XXX: must save index buffer too!
+static struct blitter_context*
+nvfx_get_blitter(struct pipe_context* pipe, int copy)
+{
+ struct nvfx_context* nvfx = nvfx_context(pipe);
+
+ struct blitter_context* blitter = nvfx->blitter;
+ if(!blitter)
+ nvfx->blitter = blitter = util_blitter_create(pipe);
+
+ util_blitter_save_blend(blitter, nvfx->blend);
+ util_blitter_save_depth_stencil_alpha(blitter, nvfx->zsa);
+ util_blitter_save_stencil_ref(blitter, &nvfx->stencil_ref);
+ util_blitter_save_rasterizer(blitter, nvfx->rasterizer);
+ util_blitter_save_fragment_shader(blitter, nvfx->fragprog);
+ util_blitter_save_vertex_shader(blitter, nvfx->vertprog);
+ util_blitter_save_viewport(blitter, &nvfx->viewport);
+ util_blitter_save_framebuffer(blitter, &nvfx->framebuffer);
+ util_blitter_save_clip(blitter, &nvfx->clip);
+ util_blitter_save_vertex_elements(blitter, nvfx->vtxelt);
+ util_blitter_save_vertex_buffers(blitter, nvfx->vtxbuf_nr, nvfx->vtxbuf);
+
+ if(copy)
+ {
+ util_blitter_save_fragment_sampler_states(blitter, nvfx->nr_samplers, (void**)nvfx->tex_sampler);
+ util_blitter_save_fragment_sampler_views(blitter, nvfx->nr_textures, nvfx->fragment_sampler_views);
+ }
+
+ return blitter;
+}
+
+static unsigned
+nvfx_region_clone(struct nv04_2d_context* ctx, struct nv04_region* rgn, unsigned w, unsigned h, boolean for_read)
+{
+ unsigned begin = nv04_region_begin(rgn, w, h);
+ unsigned end = nv04_region_end(rgn, w, h);
+ unsigned size = end - begin;
+ struct nouveau_bo* bo = 0;
+ nouveau_bo_new(rgn->bo->device, NOUVEAU_BO_MAP | NOUVEAU_BO_GART, 256, size, &bo);
+
+ if(for_read || (size > ((w * h) << rgn->bpps)))
+ nv04_memcpy(ctx, bo, 0, rgn->bo, rgn->offset + begin, size);
+
+ rgn->bo = bo;
+ rgn->offset = -begin;
+ return begin;
+}
static void
-nvfx_surface_copy(struct pipe_context *pipe,
- struct pipe_resource *dest, struct pipe_subresource subdst,
- unsigned destx, unsigned desty, unsigned destz,
- struct pipe_resource *src, struct pipe_subresource subsrc,
+nvfx_resource_copy_region(struct pipe_context *pipe,
+ struct pipe_resource *dstr, struct pipe_subresource subdst,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *srcr, struct pipe_subresource subsrc,
unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
+ unsigned w, unsigned h)
{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- struct nv04_surface_2d *eng2d = nvfx->screen->eng2d;
- struct pipe_surface *ps_dst, *ps_src;
+ static int copy_threshold = -1;
+ struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;
+ struct nv04_region dst, src;
+ int dst_to_gpu;
+ int src_on_gpu;
+ boolean small;
+ int ret;
+
+ if(!w || !h)
+ return;
+
+ if(copy_threshold < 0)
+ copy_threshold = debug_get_num_option("NOUVEAU_COPY_THRESHOLD", 4);
+
+ dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING;
+ src_on_gpu = nvfx_resource_on_gpu(srcr);
+
+ nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz, TRUE);
+ nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz, FALSE);
+ w = util_format_get_stride(dstr->format, w) >> dst.bpps;
+ h = util_format_get_nblocksy(dstr->format, h);
- ps_src = nvfx_miptree_surface_new(pipe->screen, src, subsrc.face,
- subsrc.level, srcz, 0 /* bind flags */);
- ps_dst = nvfx_miptree_surface_new(pipe->screen, dest, subdst.face,
- subdst.level, destz, 0 /* bindflags */);
+ small = (w * h <= copy_threshold);
+ if((!dst_to_gpu || !src_on_gpu) && small)
+ ret = -1; /* use the CPU */
+ else
+ ret = nv04_region_copy_2d(ctx, &dst, &src, w, h,
+ dstr->target == PIPE_BUFFER ? -1 : nvfx_surface_format(dstr->format),
+ dstr->target == PIPE_BUFFER ? -1 : nv04_scaled_image_format(dstr->format),
+ dst_to_gpu, src_on_gpu);
+ if(!ret)
+ {}
+ else if(ret > 0 && dstr->bind & PIPE_BIND_RENDER_TARGET && srcr->bind & PIPE_BIND_SAMPLER_VIEW)
+ {
+ struct blitter_context* blitter = nvfx_get_blitter(pipe, 1);
+ util_blitter_copy_region(blitter, dstr, subdst, dstx, dsty, dstz, srcr, subsrc, srcx, srcy, srcz, w, h, TRUE);
+ }
+ else
+ {
+ struct nv04_region dstt = dst;
+ struct nv04_region srct = src;
+ unsigned dstbegin = 0;
- eng2d->copy(eng2d, ps_dst, destx, desty, ps_src, srcx, srcy, width, height);
+ if(!small)
+ {
+ if(src_on_gpu)
+ nvfx_region_clone(ctx, &srct, w, h, TRUE);
- nvfx_miptree_surface_del(ps_src);
- nvfx_miptree_surface_del(ps_dst);
+ if(dst_to_gpu)
+ dstbegin = nvfx_region_clone(ctx, &dstt, w, h, FALSE);
+ }
+
+ nv04_region_copy_cpu(&dstt, &srct, w, h);
+
+ if(srct.bo != src.bo)
+ nouveau_screen_bo_release(pipe->screen, srct.bo);
+
+ if(dstt.bo != dst.bo)
+ {
+ nv04_memcpy(ctx, dst.bo, dst.offset + dstbegin, dstt.bo, 0, dstt.bo->size);
+ nouveau_screen_bo_release(pipe->screen, dstt.bo);
+ }
+ }
+}
+
+static int
+nvfx_surface_fill(struct pipe_context* pipe, struct pipe_surface *dsts,
+ unsigned dx, unsigned dy, unsigned w, unsigned h, unsigned value)
+{
+ struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;
+ struct nv04_region dst;
+ int ret;
+ /* Always try to use the GPU right now, if possible
+ * If the user wanted the surface data on the CPU, he would have cleared with memset (hopefully) */
+
+ // we don't care about interior pixel order since we set all them to the same value
+ nvfx_region_init_for_surface(&dst, (struct nvfx_surface*)dsts, dx, dy, TRUE);
+
+ w = util_format_get_stride(dsts->format, w) >> dst.bpps;
+ h = util_format_get_nblocksy(dsts->format, h);
+
+ ret = nv04_region_fill_2d(ctx, &dst, w, h, value);
+ if(ret > 0 && dsts->texture->bind & PIPE_BIND_RENDER_TARGET)
+ return 1;
+ else if(ret)
+ {
+ struct nv04_region dstt = dst;
+ unsigned dstbegin = 0;
+
+ if(nvfx_resource_on_gpu(dsts->texture))
+ dstbegin = nvfx_region_clone(ctx, &dstt, w, h, FALSE);
+
+ nv04_region_fill_cpu(&dstt, w, h, value);
+
+ if(dstt.bo != dst.bo)
+ {
+ nv04_memcpy(ctx, dst.bo, dst.offset + dstbegin, dstt.bo, 0, dstt.bo->size);
+ nouveau_screen_bo_release(pipe->screen, dstt.bo);
+ }
+ }
+
+ return 0;
+}
+
+
+void
+nvfx_screen_surface_takedown(struct pipe_screen *pscreen)
+{
+ nv04_2d_context_takedown(nvfx_screen(pscreen)->eng2d);
+ nvfx_screen(pscreen)->eng2d = 0;
+}
+
+int
+nvfx_screen_surface_init(struct pipe_screen *pscreen)
+{
+ struct nv04_2d_context* ctx = nv04_2d_context_init(nouveau_screen(pscreen)->channel);
+ if(!ctx)
+ return -1;
+ nvfx_screen(pscreen)->eng2d = ctx;
+ return 0;
+}
+
+static void
+nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int to_temp)
+{
+ struct nvfx_surface* ns = (struct nvfx_surface*)surf;
+ struct pipe_subresource tempsr, surfsr;
+ struct nvfx_context* nvfx = nvfx_context(pipe);
+
+ // TODO: we really should do this validation before setting these variable in draw calls
+ unsigned use_vertex_buffers = nvfx->use_vertex_buffers;
+ boolean use_index_buffer = nvfx->use_index_buffer;
+ unsigned base_vertex = nvfx->base_vertex;
+
+ tempsr.face = 0;
+ tempsr.level = 0;
+ surfsr.face = surf->face;
+ surfsr.level = surf->level;
+
+ if(to_temp)
+ nvfx_resource_copy_region(pipe, &ns->temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height);
+ else
+ nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &ns->temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height);
+
+ nvfx->use_vertex_buffers = use_vertex_buffers;
+ nvfx->use_index_buffer = use_index_buffer;
+ nvfx->base_vertex = base_vertex;
+
+ nvfx->dirty |= NVFX_NEW_ARRAYS;
+ nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
+}
+
+void
+nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf)
+{
+ struct nvfx_surface* ns = (struct nvfx_surface*)surf;
+ struct pipe_resource template;
+ memset(&template, 0, sizeof(struct pipe_resource));
+ template.target = PIPE_TEXTURE_2D;
+ template.format = surf->format;
+ template.width0 = surf->width;
+ template.height0 = surf->height;
+ template.depth0 = 1;
+ template.nr_samples = surf->texture->nr_samples;
+ template.flags = NVFX_RESOURCE_FLAG_LINEAR;
+
+ ns->temp = (struct nvfx_miptree*)nvfx_miptree_create(pipe->screen, &template);
+ nvfx_surface_copy_temp(pipe, surf, 1);
+}
+
+void
+nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf)
+{
+ struct nvfx_context* nvfx = (struct nvfx_context*)pipe;
+ struct nvfx_surface* ns = (struct nvfx_surface*)surf;
+ boolean bound = FALSE;
+
+ /* must be done before the copy, otherwise the copy will use the temp as destination */
+ util_dirty_surface_set_clean(nvfx_surface_get_dirty_surfaces(surf), &ns->base);
+
+ nvfx_surface_copy_temp(pipe, surf, 0);
+
+ if(nvfx->framebuffer.zsbuf == surf)
+ bound = TRUE;
+ else
+ {
+ for(unsigned i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
+ {
+ if(nvfx->framebuffer.cbufs[i] == surf)
+ {
+ bound = TRUE;
+ break;
+ }
+ }
+ }
+
+ if(!bound)
+ pipe_resource_reference((struct pipe_resource**)&ns->temp, 0);
}
static void
@@ -62,12 +450,16 @@ nvfx_clear_render_target(struct pipe_context *pipe,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- struct nv04_surface_2d *eng2d = nvfx->screen->eng2d;
union util_color uc;
util_pack_color(rgba, dst->format, &uc);
- eng2d->fill(eng2d, dst, dstx, dsty, width, height, uc.ui);
+ if(util_format_get_blocksizebits(dst->format) > 32
+ || nvfx_surface_fill(pipe, dst, dstx, dsty, width, height, uc.ui))
+ {
+ // TODO: probably should use hardware clear here instead if possible
+ struct blitter_context* blitter = nvfx_get_blitter(pipe, 0);
+ util_blitter_clear_render_target(blitter, dst, rgba, dstx, dsty, width, height);
+ }
}
static void
@@ -79,18 +471,20 @@ nvfx_clear_depth_stencil(struct pipe_context *pipe,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- struct nv04_surface_2d *eng2d = nvfx->screen->eng2d;
-
- eng2d->fill(eng2d, dst, dstx, dsty, width, height,
- util_pack_z_stencil(dst->format, depth, stencil));
+ if(util_format_get_blocksizebits(dst->format) > 32
+ || nvfx_surface_fill(pipe, dst, dstx, dsty, width, height, util_pack_z_stencil(dst->format, depth, stencil)))
+ {
+ // TODO: probably should use hardware clear here instead if possible
+ struct blitter_context* blitter = nvfx_get_blitter(pipe, 0);
+ util_blitter_clear_depth_stencil(blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height);
+ }
}
void
nvfx_init_surface_functions(struct nvfx_context *nvfx)
{
- nvfx->pipe.resource_copy_region = nvfx_surface_copy;
+ nvfx->pipe.resource_copy_region = nvfx_resource_copy_region;
nvfx->pipe.clear_render_target = nvfx_clear_render_target;
nvfx->pipe.clear_depth_stencil = nvfx_clear_depth_stencil;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_tex.h b/src/gallium/drivers/nvfx/nvfx_tex.h
index 69187a79e7..34be936a89 100644
--- a/src/gallium/drivers/nvfx/nvfx_tex.h
+++ b/src/gallium/drivers/nvfx/nvfx_tex.h
@@ -1,6 +1,11 @@
#ifndef NVFX_TEX_H_
#define NVFX_TEX_H_
+#include "util/u_math.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include <nouveau/nouveau_class.h>
+
static inline unsigned
nvfx_tex_wrap_mode(unsigned wrap) {
unsigned ret;
@@ -31,7 +36,7 @@ nvfx_tex_wrap_mode(unsigned wrap) {
ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP;
break;
default:
- NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ assert(0);
ret = NV34TCL_TX_WRAP_S_REPEAT;
break;
}
@@ -40,31 +45,29 @@ nvfx_tex_wrap_mode(unsigned wrap) {
}
static inline unsigned
-nvfx_tex_wrap_compare_mode(const struct pipe_sampler_state* cso)
+nvfx_tex_wrap_compare_mode(unsigned func)
{
- if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- switch (cso->compare_func) {
- case PIPE_FUNC_NEVER:
- return NV34TCL_TX_WRAP_RCOMP_NEVER;
- case PIPE_FUNC_GREATER:
- return NV34TCL_TX_WRAP_RCOMP_GREATER;
- case PIPE_FUNC_EQUAL:
- return NV34TCL_TX_WRAP_RCOMP_EQUAL;
- case PIPE_FUNC_GEQUAL:
- return NV34TCL_TX_WRAP_RCOMP_GEQUAL;
- case PIPE_FUNC_LESS:
- return NV34TCL_TX_WRAP_RCOMP_LESS;
- case PIPE_FUNC_NOTEQUAL:
- return NV34TCL_TX_WRAP_RCOMP_NOTEQUAL;
- case PIPE_FUNC_LEQUAL:
- return NV34TCL_TX_WRAP_RCOMP_LEQUAL;
- case PIPE_FUNC_ALWAYS:
- return NV34TCL_TX_WRAP_RCOMP_ALWAYS;
- default:
- break;
- }
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ return NV34TCL_TX_WRAP_RCOMP_NEVER;
+ case PIPE_FUNC_GREATER:
+ return NV34TCL_TX_WRAP_RCOMP_GREATER;
+ case PIPE_FUNC_EQUAL:
+ return NV34TCL_TX_WRAP_RCOMP_EQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return NV34TCL_TX_WRAP_RCOMP_GEQUAL;
+ case PIPE_FUNC_LESS:
+ return NV34TCL_TX_WRAP_RCOMP_LESS;
+ case PIPE_FUNC_NOTEQUAL:
+ return NV34TCL_TX_WRAP_RCOMP_NOTEQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return NV34TCL_TX_WRAP_RCOMP_LEQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return NV34TCL_TX_WRAP_RCOMP_ALWAYS;
+ default:
+ assert(0);
+ return 0;
}
- return 0;
}
static inline unsigned nvfx_tex_filter(const struct pipe_sampler_state* cso)
@@ -128,6 +131,45 @@ struct nvfx_sampler_state {
uint32_t en;
uint32_t filt;
uint32_t bcol;
+ uint32_t min_lod;
+ uint32_t max_lod;
+ boolean compare;
+};
+
+struct nvfx_sampler_view {
+ struct pipe_sampler_view base;
+ int offset;
+ uint32_t swizzle;
+ uint32_t npot_size;
+ uint32_t filt;
+ uint32_t wrap_mask;
+ uint32_t wrap;
+ uint32_t lod_offset;
+ uint32_t max_lod_limit;
+ union
+ {
+ struct
+ {
+ uint32_t fmt[4]; /* nv30 has 4 entries, nv40 one */
+ int rect;
+ } nv30;
+ struct
+ {
+ uint32_t fmt[2]; /* nv30 has 4 entries, nv40 one */
+ uint32_t npot_size2; /* nv40 only */
+ } nv40;
+ uint32_t init_fmt;
+ } u;
};
+struct nvfx_texture_format {
+ int fmt[6];
+ unsigned sign;
+ unsigned wrap;
+ unsigned char src[6];
+ unsigned char comp[6];
+};
+
+extern struct nvfx_texture_format nvfx_texture_formats[PIPE_FORMAT_COUNT];
+
#endif /* NVFX_TEX_H_ */
diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c
index 9ff0a93d30..7cb47a20f6 100644
--- a/src/gallium/drivers/nvfx/nvfx_transfer.c
+++ b/src/gallium/drivers/nvfx/nvfx_transfer.c
@@ -4,204 +4,218 @@
#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_math.h"
-#include "nouveau/nouveau_winsys.h"
+#include "util/u_staging.h"
#include "nvfx_context.h"
#include "nvfx_screen.h"
#include "nvfx_state.h"
#include "nvfx_resource.h"
#include "nvfx_transfer.h"
-struct nvfx_transfer {
- struct pipe_transfer base;
- struct pipe_surface *surface;
- boolean direct;
-};
-
-static void
-nvfx_compatible_transfer_tex(struct pipe_resource *pt, unsigned width, unsigned height,
- unsigned bind,
- struct pipe_resource *template)
-{
- memset(template, 0, sizeof(struct pipe_resource));
- template->target = pt->target;
- template->format = pt->format;
- template->width0 = width;
- template->height0 = height;
- template->depth0 = 1;
- template->last_level = 0;
- template->nr_samples = pt->nr_samples;
- template->bind = bind;
- template->usage = PIPE_USAGE_DYNAMIC;
- template->flags = NVFX_RESOURCE_FLAG_LINEAR;
-}
-
-
-static unsigned nvfx_transfer_bind_flags( unsigned transfer_usage )
+struct nvfx_staging_transfer
{
- unsigned bind = 0;
+ struct util_staging_transfer base;
-#if 0
- if (transfer_usage & PIPE_TRANSFER_WRITE)
- bind |= PIPE_BIND_BLIT_SOURCE;
-
- if (transfer_usage & PIPE_TRANSFER_READ)
- bind |= PIPE_BIND_BLIT_DESTINATION;
-#endif
-
- return bind;
-}
+ unsigned offset;
+ unsigned map_count;
+};
struct pipe_transfer *
-nvfx_miptree_transfer_new(struct pipe_context *pipe,
+nvfx_transfer_new(struct pipe_context *pipe,
struct pipe_resource *pt,
struct pipe_subresource sr,
unsigned usage,
const struct pipe_box *box)
{
- struct pipe_screen *pscreen = pipe->screen;
- struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
- struct nvfx_transfer *tx;
- struct pipe_resource tx_tex_template, *tx_tex;
- static int no_transfer = -1;
- unsigned bind = nvfx_transfer_bind_flags(usage);
- if(no_transfer < 0)
- no_transfer = debug_get_bool_option("NOUVEAU_NO_TRANSFER", FALSE);
-
-
- tx = CALLOC_STRUCT(nvfx_transfer);
- if (!tx)
- return NULL;
-
- /* Don't handle 3D transfers yet.
- */
- assert(box->depth == 1);
-
- pipe_resource_reference(&tx->base.resource, pt);
- tx->base.sr = sr;
- tx->base.usage = usage;
- tx->base.box = *box;
- tx->base.stride = mt->level[sr.level].pitch;
-
- /* Direct access to texture */
- if ((pt->usage == PIPE_USAGE_DYNAMIC ||
- no_transfer) &&
- pt->flags & NVFX_RESOURCE_FLAG_LINEAR)
+ if((usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_DONTBLOCK)) == PIPE_TRANSFER_DONTBLOCK)
+ {
+ struct nouveau_bo* bo = ((struct nvfx_resource*)pt)->bo;
+ if(bo && nouveau_bo_busy(bo, NOUVEAU_BO_WR))
+ return NULL;
+ }
+
+ if(pt->target == PIPE_BUFFER)
{
- tx->direct = true;
-
- /* XXX: just call the internal nvfx function.
- */
- tx->surface = pscreen->get_tex_surface(pscreen, pt,
- sr.face, sr.level,
- box->z,
- bind);
- return &tx->base;
- }
+ // it would be nice if we could avoid all this ridiculous overhead...
+ struct pipe_transfer* tx;
+ struct nvfx_buffer* buffer = nvfx_buffer(pt);
+
+ tx = CALLOC_STRUCT(pipe_transfer);
+ if (!tx)
+ return NULL;
- tx->direct = false;
+ pipe_resource_reference(&tx->resource, pt);
+ tx->sr = sr;
+ tx->usage = usage;
+ tx->box = *box;
- nvfx_compatible_transfer_tex(pt, box->width, box->height, bind, &tx_tex_template);
+ tx->slice_stride = tx->stride = util_format_get_stride(pt->format, box->width);
+ tx->data = buffer->data + util_format_get_stride(pt->format, box->x);
- tx_tex = pscreen->resource_create(pscreen, &tx_tex_template);
- if (!tx_tex)
+ return tx;
+ }
+ else
{
- FREE(tx);
- return NULL;
+ struct nvfx_staging_transfer* tx;
+ bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR;
+
+ tx = CALLOC_STRUCT(nvfx_staging_transfer);
+ if(!tx)
+ return NULL;
+
+ util_staging_transfer_init(pipe, pt, sr, usage, box, direct, &tx->base);
+
+ if(direct)
+ {
+ tx->base.base.stride = nvfx_subresource_pitch(pt, sr.level);
+ tx->base.base.slice_stride = tx->base.base.stride * u_minify(pt->height0, sr.level);
+ tx->offset = nvfx_subresource_offset(pt, sr.face, sr.level, box->z)
+ + util_format_get_2d_size(pt->format, tx->base.base.stride, box->y)
+ + util_format_get_stride(pt->format, box->x);
+ }
+ else
+ {
+ tx->base.base.stride = nvfx_subresource_pitch(tx->base.staging_resource, 0);
+ tx->base.base.slice_stride = tx->base.base.stride * tx->base.staging_resource->height0;
+ tx->offset = 0;
+ }
+
+ assert(tx->base.base.stride);
+
+ return &tx->base.base;
}
+}
- tx->base.stride = ((struct nvfx_miptree*)tx_tex)->level[0].pitch;
-
- tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
- 0, 0, 0,
- bind);
-
- pipe_resource_reference(&tx_tex, NULL);
-
- if (!tx->surface)
+static void nvfx_buffer_dirty_interval(struct nvfx_buffer* buffer, unsigned begin, unsigned size, boolean unsynchronized)
+{
+ struct nvfx_screen* screen = nvfx_screen(buffer->base.base.screen);
+ buffer->last_update_static = buffer->bytes_to_draw_until_static < 0;
+ if(buffer->dirty_begin == buffer->dirty_end)
{
- pipe_surface_reference(&tx->surface, NULL);
- FREE(tx);
- return NULL;
+ buffer->dirty_begin = begin;
+ buffer->dirty_end = begin + size;
+ buffer->dirty_unsynchronized = unsynchronized;
+ }
+ else
+ {
+ buffer->dirty_begin = MIN2(buffer->dirty_begin, begin);
+ buffer->dirty_end = MAX2(buffer->dirty_end, begin + size);
+ buffer->dirty_unsynchronized &= unsynchronized;
}
- if (usage & PIPE_TRANSFER_READ) {
- struct nvfx_screen *nvscreen = nvfx_screen(pscreen);
- struct pipe_surface *src;
+ if(unsynchronized)
+ {
+ // TODO: revisit this, it doesn't seem quite right
+ //printf("UNSYNC UPDATE %p %u %u\n", buffer, begin, size);
+ buffer->bytes_to_draw_until_static += size * screen->static_reuse_threshold;
+ }
+ else
+ buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold;
+}
- src = pscreen->get_tex_surface(pscreen, pt,
- sr.face, sr.level, box->z,
- 0 /*PIPE_BIND_BLIT_SOURCE*/);
+static void nvfx_transfer_flush_region( struct pipe_context *pipe,
+ struct pipe_transfer *ptx,
+ const struct pipe_box *box)
+{
+ if(ptx->resource->target == PIPE_BUFFER && (ptx->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+ {
+ struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource);
+ nvfx_buffer_dirty_interval(buffer,
+ (uint8_t*)ptx->data - buffer->data + util_format_get_stride(buffer->base.base.format, box->x),
+ util_format_get_stride(buffer->base.base.format, box->width),
+ !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED));
+ }
+}
- /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
- /* TODO: Check if SIFM can un-swizzle */
- nvscreen->eng2d->copy(nvscreen->eng2d,
- tx->surface, 0, 0,
- src,
- box->x, box->y,
- box->width, box->height);
+static void
+nvfx_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx)
+{
+ if(ptx->resource->target == PIPE_BUFFER)
+ {
+ struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource);
+ if((ptx->usage & (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) == PIPE_TRANSFER_WRITE)
+ nvfx_buffer_dirty_interval(buffer,
+ (uint8_t*)ptx->data - buffer->data,
+ ptx->stride,
+ !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED));
+ pipe_resource_reference(&ptx->resource, 0);
+ FREE(ptx);
+ }
+ else
+ {
+ struct nouveau_channel* chan = nvfx_context(pipe)->screen->base.channel;
+ util_staging_transfer_destroy(pipe, ptx);
- pipe_surface_reference(&src, NULL);
+ FIRE_RING(chan);
}
+}
- return &tx->base;
+void *
+nvfx_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx)
+{
+ if(ptx->resource->target == PIPE_BUFFER)
+ return ptx->data;
+ else
+ {
+ struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
+ if(!ptx->data)
+ {
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+ uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo, nouveau_screen_transfer_flags(ptx->usage));
+ ptx->data = map + tx->offset;
+ }
+
+ ++tx->map_count;
+ return ptx->data;
+ }
}
void
-nvfx_miptree_transfer_del(struct pipe_context *pipe,
- struct pipe_transfer *ptx)
+nvfx_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx)
{
- struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx;
-
- if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) {
- struct pipe_screen *pscreen = pipe->screen;
- struct nvfx_screen *nvscreen = nvfx_screen(pscreen);
- struct pipe_surface *dst;
-
- dst = pscreen->get_tex_surface(pscreen,
- ptx->resource,
- ptx->sr.face,
- ptx->sr.level,
- ptx->box.z,
- 0 /*PIPE_BIND_BLIT_DESTINATION*/);
-
- /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
- nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, ptx->box.x, ptx->box.y,
- tx->surface, 0, 0,
- ptx->box.width, ptx->box.height);
-
- pipe_surface_reference(&dst, NULL);
+ if(ptx->resource->target != PIPE_BUFFER)
+ {
+ struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+
+ if(!--tx->map_count)
+ {
+ nouveau_screen_bo_unmap(pipe->screen, mt->base.bo);
+ ptx->data = 0;
+ }
}
-
- pipe_surface_reference(&tx->surface, NULL);
- pipe_resource_reference(&ptx->resource, NULL);
- FREE(ptx);
}
-void *
-nvfx_miptree_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx)
+static void nvfx_transfer_inline_write( struct pipe_context *pipe,
+ struct pipe_resource *pr,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned slice_stride)
{
- struct pipe_screen *pscreen = pipe->screen;
- struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx;
- struct nv04_surface *ns = (struct nv04_surface *)tx->surface;
- struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture;
- uint8_t *map = nouveau_screen_bo_map(pscreen, mt->base.bo,
- nouveau_screen_transfer_flags(ptx->usage));
-
- if(!tx->direct)
- return map + ns->base.offset;
+ if(pr->target != PIPE_BUFFER)
+ {
+ u_default_transfer_inline_write(pipe, pr, sr, usage, box, data, stride, slice_stride);
+ }
else
- return (map + ns->base.offset +
- ptx->box.y * ns->pitch +
- ptx->box.x * util_format_get_blocksize(ptx->resource->format));
+ {
+ struct nvfx_buffer* buffer = nvfx_buffer(pr);
+ unsigned begin = util_format_get_stride(pr->format, box->x);
+ unsigned size = util_format_get_stride(pr->format, box->width);
+ memcpy(buffer->data + begin, data, size);
+ nvfx_buffer_dirty_interval(buffer, begin, size,
+ !!(pr->flags & PIPE_TRANSFER_UNSYNCHRONIZED));
+ }
}
void
-nvfx_miptree_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx)
+nvfx_init_transfer_functions(struct pipe_context *pipe)
{
- struct pipe_screen *pscreen = pipe->screen;
- struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx;
- struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture;
-
- nouveau_screen_bo_unmap(pscreen, mt->base.bo);
+ pipe->get_transfer = nvfx_transfer_new;
+ pipe->transfer_map = nvfx_transfer_map;
+ pipe->transfer_flush_region = nvfx_transfer_flush_region;
+ pipe->transfer_unmap = nvfx_transfer_unmap;
+ pipe->transfer_destroy = nvfx_transfer_destroy;
+ pipe->transfer_inline_write = nvfx_transfer_inline_write;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.h b/src/gallium/drivers/nvfx/nvfx_transfer.h
index 3e3317b2c7..20f20d5b0b 100644
--- a/src/gallium/drivers/nvfx/nvfx_transfer.h
+++ b/src/gallium/drivers/nvfx/nvfx_transfer.h
@@ -7,19 +7,17 @@
struct pipe_transfer *
-nvfx_miptree_transfer_new(struct pipe_context *pcontext,
+nvfx_transfer_new(struct pipe_context *pcontext,
struct pipe_resource *pt,
struct pipe_subresource sr,
unsigned usage,
const struct pipe_box *box);
-void
-nvfx_miptree_transfer_del(struct pipe_context *pcontext,
- struct pipe_transfer *ptx);
+
void *
-nvfx_miptree_transfer_map(struct pipe_context *pcontext,
+nvfx_transfer_map(struct pipe_context *pcontext,
struct pipe_transfer *ptx);
void
-nvfx_miptree_transfer_unmap(struct pipe_context *pcontext,
+nvfx_transfer_unmap(struct pipe_context *pcontext,
struct pipe_transfer *ptx);
diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c
index 4aa3793842..e6e9a8f2e4 100644
--- a/src/gallium/drivers/nvfx/nvfx_vbo.c
+++ b/src/gallium/drivers/nvfx/nvfx_vbo.c
@@ -2,6 +2,7 @@
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "translate/translate.h"
#include "nvfx_context.h"
#include "nvfx_state.h"
@@ -10,646 +11,595 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_class.h"
#include "nouveau/nouveau_pushbuf.h"
-#include "nouveau/nouveau_util.h"
-static INLINE int
-nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
+static inline unsigned
+util_guess_unique_indices_count(unsigned mode, unsigned indices)
{
- switch (pipe) {
- case PIPE_FORMAT_R32_FLOAT:
- case PIPE_FORMAT_R32G32_FLOAT:
- case PIPE_FORMAT_R32G32B32_FLOAT:
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- *fmt = NV34TCL_VTXFMT_TYPE_FLOAT;
- break;
- case PIPE_FORMAT_R16_FLOAT:
- case PIPE_FORMAT_R16G16_FLOAT:
- case PIPE_FORMAT_R16G16B16_FLOAT:
- case PIPE_FORMAT_R16G16B16A16_FLOAT:
- *fmt = NV34TCL_VTXFMT_TYPE_HALF;
- break;
- case PIPE_FORMAT_R8_UNORM:
- case PIPE_FORMAT_R8G8_UNORM:
- case PIPE_FORMAT_R8G8B8_UNORM:
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- *fmt = NV34TCL_VTXFMT_TYPE_UBYTE;
- break;
- case PIPE_FORMAT_R16_SSCALED:
- case PIPE_FORMAT_R16G16_SSCALED:
- case PIPE_FORMAT_R16G16B16_SSCALED:
- case PIPE_FORMAT_R16G16B16A16_SSCALED:
- *fmt = NV34TCL_VTXFMT_TYPE_USHORT;
- break;
- default:
- NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
- return 1;
+ /* Euler's formula gives V =
+ * = E - F + 2 =
+ * = F * (polygon_edges / 2 - 1) + 2 =
+ * = F * (polygon_edges - 2) / 2 + 2 =
+ * = indices * (polygon_edges - 2) / (2 * indices_per_face) + 2
+ * = indices * (1 / 2 - 1 / polygon_edges) + 2
+ */
+ switch(mode)
+ {
+ case PIPE_PRIM_LINES:
+ return indices >> 1;
+ case PIPE_PRIM_TRIANGLES:
+ {
+ // avoid an expensive division by 3 using the multiplicative inverse mod 2^32
+ unsigned q;
+ unsigned inv3 = 2863311531;
+ indices >>= 1;
+ q = indices * inv3;
+ if(unlikely(q >= indices))
+ {
+ q += inv3;
+ if(q >= indices)
+ q += inv3;
+ }
+ return indices + 2;
+ //return indices / 6 + 2;
}
-
- switch (pipe) {
- case PIPE_FORMAT_R8_UNORM:
- case PIPE_FORMAT_R32_FLOAT:
- case PIPE_FORMAT_R16_FLOAT:
- case PIPE_FORMAT_R16_SSCALED:
- *ncomp = 1;
- break;
- case PIPE_FORMAT_R8G8_UNORM:
- case PIPE_FORMAT_R32G32_FLOAT:
- case PIPE_FORMAT_R16G16_FLOAT:
- case PIPE_FORMAT_R16G16_SSCALED:
- *ncomp = 2;
- break;
- case PIPE_FORMAT_R8G8B8_UNORM:
- case PIPE_FORMAT_R32G32B32_FLOAT:
- case PIPE_FORMAT_R16G16B16_FLOAT:
- case PIPE_FORMAT_R16G16B16_SSCALED:
- *ncomp = 3;
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- case PIPE_FORMAT_R16G16B16A16_FLOAT:
- case PIPE_FORMAT_R16G16B16A16_SSCALED:
- *ncomp = 4;
- break;
+ // guess that indexed quads are created by successive connections, since a closed mesh seems unlikely
+ case PIPE_PRIM_QUADS:
+ return (indices >> 1) + 2;
+ // return (indices >> 2) + 2; // if it is a closed mesh
default:
- NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
- return 1;
+ return indices;
}
-
- return 0;
}
-static boolean
-nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib,
- unsigned ib_size)
+static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
- unsigned type;
-
- if (!ib) {
- nvfx->idxbuf_buffer = NULL;
- nvfx->idxbuf_format = 0xdeadbeef;
- return FALSE;
+ struct nvfx_context* nvfx = nvfx_context(pipe);
+ unsigned hardware_cost = 0;
+ unsigned inline_cost = 0;
+ unsigned unique_vertices;
+ unsigned upload_mode;
+ float best_index_cost_for_hardware_vertices_as_inline_cost;
+ boolean prefer_hardware_indices;
+ unsigned index_inline_cost;
+ unsigned index_hardware_cost;
+ if (info->indexed)
+ unique_vertices = util_guess_unique_indices_count(info->mode, info->count);
+ else
+ unique_vertices = info->count;
+
+ /* Here we try to figure out if we are better off writing vertex data directly on the FIFO,
+ * or create hardware buffer objects and pointing the hardware to them.
+ *
+ * This is done by computing the total memcpy cost of each option, ignoring uploads
+ * if we think that the buffer is static and thus the upload cost will be amortized over
+ * future draw calls.
+ *
+ * For instance, if everything looks static, we will always create buffer objects, while if
+ * everything is a user buffer and we are not doing indexed drawing, we never do.
+ *
+ * Other interesting cases are where a small user vertex buffer, but a huge user index buffer,
+ * where we will upload the vertex buffer, so that we can use hardware index lookup, and
+ * the opposite case, where we instead do index lookup in software to avoid uploading
+ * a huge amount of vertex data that is not going to be used.
+ *
+ * Otherwise, we generally move to the GPU the after it has been pushed
+ * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having
+ * been updated with a transfer (or just the buffer having been destroyed).
+ *
+ * There is no special handling for user buffers, since applications can use
+ * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this
+ * by the way.
+ *
+ * Note that currently we don't support only putting some data on the FIFO, and
+ * some on vertex buffers (constant and instanced data is independent from this).
+ *
+ * nVidia doesn't seem to do this either, even though it should be at least
+ * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed.
+ */
+
+ for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
+ {
+ struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+ struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+ buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices;
+ if (!nvfx_buffer_seems_static(buffer))
+ {
+ hardware_cost += buffer->dirty_end - buffer->dirty_begin;
+ if (!buffer->base.bo)
+ hardware_cost += nvfx->screen->buffer_allocation_cost;
+ }
+ inline_cost += vbi->per_vertex_size * info->count;
}
- if (!nvfx->screen->index_buffer_reloc_flags || ib_size == 1)
- return FALSE;
+ best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
+ prefer_hardware_indices = FALSE;
+ index_inline_cost = 0;
+ index_hardware_cost = 0;
- switch (ib_size) {
- case 2:
- type = NV34TCL_IDXBUF_FORMAT_TYPE_U16;
- break;
- case 4:
- type = NV34TCL_IDXBUF_FORMAT_TYPE_U32;
- break;
- default:
- return FALSE;
- }
+ if (info->indexed)
+ {
+ index_inline_cost = nvfx->idxbuf.index_size * info->count;
+ if (nvfx->screen->index_buffer_reloc_flags
+ && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4)
+ && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1)))
+ {
+ struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer);
+ buffer->bytes_to_draw_until_static -= index_inline_cost;
- if (ib != nvfx->idxbuf_buffer ||
- type != nvfx->idxbuf_format) {
- nvfx->dirty |= NVFX_NEW_ARRAYS;
- nvfx->idxbuf_buffer = ib;
- nvfx->idxbuf_format = type;
- }
+ prefer_hardware_indices = TRUE;
- return TRUE;
-}
+ if (!nvfx_buffer_seems_static(buffer))
+ {
+ index_hardware_cost = buffer->dirty_end - buffer->dirty_begin;
+ if (!buffer->base.bo)
+ index_hardware_cost += nvfx->screen->buffer_allocation_cost;
+ }
-// type must be floating point
-static inline void
-nvfx_vbo_static_attrib(struct nvfx_context *nvfx,
- int attrib, struct pipe_vertex_element *ve,
- struct pipe_vertex_buffer *vb, unsigned ncomp)
-{
- struct pipe_transfer *transfer;
- struct nouveau_channel* chan = nvfx->screen->base.channel;
- void *map;
- float *v;
-
- map = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer);
- map = (uint8_t *) map + vb->buffer_offset + ve->src_offset;
-
- v = map;
-
- switch (ncomp) {
- case 4:
- OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
- OUT_RING(chan, fui(v[0]));
- OUT_RING(chan, fui(v[1]));
- OUT_RING(chan, fui(v[2]));
- OUT_RING(chan, fui(v[3]));
- break;
- case 3:
- OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
- OUT_RING(chan, fui(v[0]));
- OUT_RING(chan, fui(v[1]));
- OUT_RING(chan, fui(v[2]));
- break;
- case 2:
- OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
- OUT_RING(chan, fui(v[0]));
- OUT_RING(chan, fui(v[1]));
- break;
- case 1:
- OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
- OUT_RING(chan, fui(v[0]));
- break;
+ if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost)
+ {
+ best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost;
+ }
+ else
+ {
+ best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost;
+ prefer_hardware_indices = TRUE;
+ }
+ }
}
- pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer);
+ /* let's finally figure out which of the 3 paths we want to take */
+ if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost))
+ upload_mode = 1 + prefer_hardware_indices;
+ else
+ upload_mode = 0;
+
+#ifdef DEBUG
+ if (unlikely(nvfx->screen->trace_draw))
+ {
+ fprintf(stderr, "DRAW");
+ if (info->indexed)
+ {
+ fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size);
+ if (info->index_bias)
+ fprintf(stderr, " biased %u", info->index_bias);
+ fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index);
+ }
+ if (info->instance_count > 1)
+ fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed);
+ fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode);
+ if (!upload_mode)
+ fprintf(stderr, " -> inline vertex data");
+ else if (upload_mode == 2 || !info->indexed)
+ fprintf(stderr, " -> buffer range");
+ else
+ fprintf(stderr, " -> inline indices");
+ fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost);
+ for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
+ {
+ struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+ struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+ if (i)
+ fprintf(stderr, ", ");
+ fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static);
+ }
+ fprintf(stderr, ">\n");
+ }
+#endif
+
+ return upload_mode;
}
-static void
-nvfx_draw_arrays(struct pipe_context *pipe,
- unsigned mode, unsigned start, unsigned count)
+void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
- struct nvfx_screen *screen = nvfx->screen;
- struct nouveau_channel *chan = screen->base.channel;
- unsigned restart = 0;
-
- nvfx_vbo_set_idxbuf(nvfx, NULL, 0);
- if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
- nvfx_draw_elements_swtnl(pipe, NULL, 0, 0,
- mode, start, count);
- return;
- }
+ unsigned upload_mode = 0;
- while (count) {
- unsigned vc, nr, avail;
+ if (!nvfx->vtxelt->needs_translate)
+ upload_mode = nvfx_decide_upload_mode(pipe, info);
- nvfx_state_emit(nvfx);
+ nvfx->use_index_buffer = upload_mode > 1;
- avail = AVAIL_RING(chan);
- avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+ if ((upload_mode > 0) != nvfx->use_vertex_buffers)
+ {
+ nvfx->use_vertex_buffers = (upload_mode > 0);
+ nvfx->dirty |= NVFX_NEW_ARRAYS;
+ nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
+ }
- vc = nouveau_vbuf_split(avail, 6, 256,
- mode, start, count, &restart);
- if (!vc) {
- FIRE_RING(chan);
- continue;
+ if (upload_mode > 0)
+ {
+ for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
+ {
+ struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+ nvfx_buffer_upload(nvfx_buffer(vb->buffer));
}
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, nvgl_primitive(mode));
+ if (upload_mode > 1)
+ {
+ nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer));
- nr = (vc & 0xff);
- if (nr) {
- OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1));
- OUT_RING (chan, ((nr - 1) << 24) | start);
- start += nr;
+ if (unlikely(info->index_bias != nvfx->base_vertex))
+ {
+ nvfx->base_vertex = info->index_bias;
+ nvfx->dirty |= NVFX_NEW_ARRAYS;
+ }
}
-
- nr = vc >> 8;
- while (nr) {
- unsigned push = nr > 2047 ? 2047 : nr;
-
- nr -= push;
-
- OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push));
- while (push--) {
- OUT_RING(chan, ((0x100 - 1) << 24) | start);
- start += 0x100;
+ else
+ {
+ if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex))
+ {
+ nvfx->base_vertex = 0;
+ nvfx->dirty |= NVFX_NEW_ARRAYS;
}
}
-
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, 0);
-
- count -= vc;
- start = restart;
}
- pipe->flush(pipe, 0, NULL);
+ if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx))
+ nvfx_draw_vbo_swtnl(pipe, info);
+ else
+ nvfx_push_vbo(pipe, info);
}
-static INLINE void
-nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
- unsigned mode, unsigned start, unsigned count)
+boolean
+nvfx_vbo_validate(struct nvfx_context *nvfx)
{
- struct nvfx_screen *screen = nvfx->screen;
- struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ int i;
+ int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
+ unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
- while (count) {
- uint8_t *elts = (uint8_t *)ib + start;
- unsigned vc, push, restart = 0, avail;
+ if (!elements)
+ return TRUE;
- nvfx_state_emit(nvfx);
+ MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
+ for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i)
+ {
+ struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+ struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+ float v[4];
+ ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
+ nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
+ }
- avail = AVAIL_RING(chan);
- avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
- vc = nouveau_vbuf_split(avail, 6, 2,
- mode, start, count, &restart);
- if (vc == 0) {
- FIRE_RING(chan);
- continue;
- }
- count -= vc;
+ OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
+ if(nvfx->use_vertex_buffers)
+ {
+ unsigned idx = 0;
+ for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+ struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, nvgl_primitive(mode));
+ if(idx != ve->idx)
+ {
+ assert(idx < ve->idx);
+ OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx);
+ idx = ve->idx;
+ }
- if (vc & 1) {
- OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
- OUT_RING (chan, elts[0]);
- elts++; vc--;
+ OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT));
+ ++idx;
}
+ if(idx != nvfx->vtxelt->num_elements)
+ OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx);
+ }
+ else
+ OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements);
- while (vc) {
- unsigned i;
-
- push = MIN2(vc, 2047 * 2);
-
- OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
- for (i = 0; i < push; i+=2)
- OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
+ for(i = nvfx->vtxelt->num_elements; i < elements; ++i)
+ OUT_RING(chan, NV34TCL_VTXFMT_TYPE_32_FLOAT);
- vc -= push;
- elts += push;
+ if(nvfx->is_nv4x) {
+ unsigned i;
+ /* seems to be some kind of cache flushing */
+ for(i = 0; i < 3; ++i) {
+ OUT_RING(chan, RING_3D(0x1718, 1));
+ OUT_RING(chan, 0);
}
-
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, 0);
-
- start = restart;
}
-}
-
-static INLINE void
-nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
- unsigned mode, unsigned start, unsigned count)
-{
- struct nvfx_screen *screen = nvfx->screen;
- struct nouveau_channel *chan = screen->base.channel;
-
- while (count) {
- uint16_t *elts = (uint16_t *)ib + start;
- unsigned vc, push, restart = 0, avail;
-
- nvfx_state_emit(nvfx);
-
- avail = AVAIL_RING(chan);
- avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
- vc = nouveau_vbuf_split(avail, 6, 2,
- mode, start, count, &restart);
- if (vc == 0) {
- FIRE_RING(chan);
- continue;
- }
- count -= vc;
+ OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
+ if(nvfx->use_vertex_buffers)
+ {
+ unsigned idx = 0;
+ for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+ struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+ struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, nvgl_primitive(mode));
+ for(; idx < ve->idx; ++idx)
+ OUT_RING(chan, 0);
- if (vc & 1) {
- OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
- OUT_RING (chan, elts[0]);
- elts++; vc--;
+ OUT_RELOC(chan, bo,
+ vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
+ vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
+ ++idx;
}
- while (vc) {
- unsigned i;
-
- push = MIN2(vc, 2047 * 2);
-
- OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
- for (i = 0; i < push; i+=2)
- OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
-
- vc -= push;
- elts += push;
- }
+ for(; idx < elements; ++idx)
+ OUT_RING(chan, 0);
+ }
+ else
+ {
+ for (i = 0; i < elements; i++)
+ OUT_RING(chan, 0);
+ }
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, 0);
+ OUT_RING(chan, RING_3D(0x1710, 1));
+ OUT_RING(chan, 0);
- start = restart;
- }
+ nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
+ nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
+ return TRUE;
}
-static INLINE void
-nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
- unsigned mode, unsigned start, unsigned count)
+void
+nvfx_vbo_relocate(struct nvfx_context *nvfx)
{
- struct nvfx_screen *screen = nvfx->screen;
- struct nouveau_channel *chan = screen->base.channel;
-
- while (count) {
- uint32_t *elts = (uint32_t *)ib + start;
- unsigned vc, push, restart = 0, avail;
-
- nvfx_state_emit(nvfx);
-
- avail = AVAIL_RING(chan);
- avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
-
- vc = nouveau_vbuf_split(avail, 5, 1,
- mode, start, count, &restart);
- if (vc == 0) {
- FIRE_RING(chan);
- continue;
- }
- count -= vc;
-
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, nvgl_primitive(mode));
-
- while (vc) {
- push = MIN2(vc, 2047);
-
- OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
- OUT_RINGp (chan, elts, push);
+ struct nouveau_channel* chan;
+ unsigned vb_flags;
+ int i;
- vc -= push;
- elts += push;
- }
+ if(!nvfx->use_vertex_buffers)
+ return;
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, 0);
+ chan = nvfx->screen->base.channel;
+ vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
- start = restart;
+ MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
+ for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+ struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+ struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
+
+ OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(ve->idx), 1),
+ vb_flags, 0, 0);
+ OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
+ vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
}
+ nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
}
static void
-nvfx_draw_elements_inline(struct pipe_context *pipe,
- struct pipe_resource *ib,
- unsigned ib_size, int ib_bias,
- unsigned mode, unsigned start, unsigned count)
+nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags)
{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- struct pipe_transfer *transfer;
- void *map;
-
- map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer);
- if (!ib) {
- NOUVEAU_ERR("failed mapping ib\n");
- return;
- }
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV34TCL_IDXBUF_FORMAT_TYPE_U16 : NV34TCL_IDXBUF_FORMAT_TYPE_U32;
+ struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo;
+ ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
- assert(ib_bias == 0);
-
- switch (ib_size) {
- case 1:
- nvfx_draw_elements_u08(nvfx, map, mode, start, count);
- break;
- case 2:
- nvfx_draw_elements_u16(nvfx, map, mode, start, count);
- break;
- case 4:
- nvfx_draw_elements_u32(nvfx, map, mode, start, count);
- break;
- default:
- NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
- break;
- }
+ assert(nvfx->screen->index_buffer_reloc_flags);
- pipe_buffer_unmap(pipe, ib, transfer);
+ MARK_RING(chan, 3, 3);
+ if(ib_flags & NOUVEAU_BO_DUMMY)
+ OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), ib_flags, 0, 0);
+ else
+ OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
+ OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0);
+ OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
+ 0, NV34TCL_IDXBUF_FORMAT_DMA1);
+ nvfx->relocs_needed &=~ NVFX_RELOCATE_IDXBUF;
}
-static void
-nvfx_draw_elements_vbo(struct pipe_context *pipe,
- unsigned mode, unsigned start, unsigned count)
+void
+nvfx_idxbuf_validate(struct nvfx_context* nvfx)
{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- struct nvfx_screen *screen = nvfx->screen;
- struct nouveau_channel *chan = screen->base.channel;
- unsigned restart = 0;
+ nvfx_idxbuf_emit(nvfx, 0);
+}
- while (count) {
- unsigned nr, vc, avail;
+void
+nvfx_idxbuf_relocate(struct nvfx_context* nvfx)
+{
+ nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY);
+}
- nvfx_state_emit(nvfx);
+unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+ [PIPE_FORMAT_R32G32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+ [PIPE_FORMAT_R32G32B32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+ [PIPE_FORMAT_R16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+ [PIPE_FORMAT_R16G16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+ [PIPE_FORMAT_R16G16B16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+ [PIPE_FORMAT_R8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+ [PIPE_FORMAT_R8G8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+ [PIPE_FORMAT_R8G8B8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_USCALED] = NV34TCL_VTXFMT_TYPE_8_USCALED,
+ [PIPE_FORMAT_R16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+ [PIPE_FORMAT_R16G16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+ [PIPE_FORMAT_R16G16B16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+ [PIPE_FORMAT_R16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+ [PIPE_FORMAT_R16G16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+ [PIPE_FORMAT_R16G16B16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+ [PIPE_FORMAT_R16G16B16A16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+};
+
+static void *
+nvfx_vtxelts_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
+ struct translate_key transkey;
+ unsigned per_vertex_size[16];
+ unsigned vb_compacted_index[16];
- avail = AVAIL_RING(chan);
- avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+ if(num_elements > 16)
+ {
+ _debug_printf("Error: application attempted to use %u vertex elements, but only 16 are supported: ignoring the rest\n", num_elements);
+ num_elements = 16;
+ }
- vc = nouveau_vbuf_split(avail, 6, 256,
- mode, start, count, &restart);
- if (!vc) {
- FIRE_RING(chan);
- continue;
- }
+ memset(per_vertex_size, 0, sizeof(per_vertex_size));
+ memcpy(cso->pipe, elements, num_elements * sizeof(elements[0]));
+ cso->num_elements = num_elements;
+ cso->needs_translate = FALSE;
+
+ transkey.nr_elements = 0;
+ transkey.output_stride = 0;
+
+ for(unsigned i = 0; i < num_elements; ++i)
+ {
+ const struct pipe_vertex_element* ve = &elements[i];
+ if(!ve->instance_divisor)
+ per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1);
+ }
+
+ for(unsigned i = 0; i < 16; ++i)
+ {
+ if(per_vertex_size[i])
+ {
+ unsigned idx = cso->num_per_vertex_buffer_infos++;
+ cso->per_vertex_buffer_info[idx].vertex_buffer_index = i;
+ cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i];
+ vb_compacted_index[i] = idx;
+ }
+ }
+
+ for(unsigned i = 0; i < num_elements; ++i)
+ {
+ const struct pipe_vertex_element* ve = &elements[i];
+ unsigned type = nvfx_vertex_formats[ve->src_format];
+ unsigned ncomp = util_format_get_nr_components(ve->src_format);
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, nvgl_primitive(mode));
+ //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX)
+ if(ve->instance_divisor)
+ {
+ struct nvfx_low_frequency_element* lfve;
+ cso->vtxfmt[i] = NV34TCL_VTXFMT_TYPE_32_FLOAT;
+
+ //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT)
+ if(0)
+ lfve = &cso->constant[cso->num_constant++];
+ else
+ {
+ lfve = &cso->per_instance[cso->num_per_instance++].base;
+ ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor;
+ }
- nr = (vc & 0xff);
- if (nr) {
- OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1));
- OUT_RING (chan, ((nr - 1) << 24) | start);
- start += nr;
+ lfve->idx = i;
+ lfve->vertex_buffer_index = ve->vertex_buffer_index;
+ lfve->src_offset = ve->src_offset;
+ lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float;
+ lfve->ncomp = ncomp;
}
-
- nr = vc >> 8;
- while (nr) {
- unsigned push = nr > 2047 ? 2047 : nr;
-
- nr -= push;
-
- OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push));
- while (push--) {
- OUT_RING(chan, ((0x100 - 1) << 24) | start);
- start += 0x100;
+ else
+ {
+ unsigned idx;
+
+ idx = cso->num_per_vertex++;
+ cso->per_vertex[idx].idx = i;
+ cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index;
+ cso->per_vertex[idx].src_offset = ve->src_offset;
+
+ idx = transkey.nr_elements++;
+ transkey.element[idx].input_format = ve->src_format;
+ transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index];
+ transkey.element[idx].input_offset = ve->src_offset;
+ transkey.element[idx].instance_divisor = 0;
+ transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL;
+ if(type)
+ {
+ transkey.element[idx].output_format = ve->src_format;
+ cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type;
+ }
+ else
+ {
+ unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT};
+ transkey.element[idx].output_format = float32[ncomp - 1];
+ cso->needs_translate = TRUE;
+ cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | NV34TCL_VTXFMT_TYPE_32_FLOAT;
}
+ transkey.element[idx].output_offset = transkey.output_stride;
+ transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3;
}
+ }
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, 0);
+ cso->translate = translate_create(&transkey);
+ cso->vertex_length = transkey.output_stride >> 2;
+ cso->max_vertices_per_packet = 2047 / cso->vertex_length;
- count -= vc;
- start = restart;
- }
+ return (void *)cso;
}
static void
-nvfx_draw_elements(struct pipe_context *pipe,
- struct pipe_resource *indexBuffer,
- unsigned indexSize, int indexBias,
- unsigned mode, unsigned start, unsigned count)
+nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- boolean idxbuf;
-
- idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize);
- if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
- nvfx_draw_elements_swtnl(pipe,
- indexBuffer, indexSize, indexBias,
- mode, start, count);
- return;
- }
-
- if (idxbuf) {
- nvfx_draw_elements_vbo(pipe, mode, start, count);
- } else {
- nvfx_draw_elements_inline(pipe,
- indexBuffer, indexSize, indexBias,
- mode, start, count);
- }
-
- pipe->flush(pipe, 0, NULL);
+ FREE(hwcso);
}
-void
-nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+static void
+nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
- if (info->indexed && nvfx->idxbuf.buffer) {
- unsigned offset;
-
- assert(nvfx->idxbuf.offset % nvfx->idxbuf.index_size == 0);
- offset = nvfx->idxbuf.offset / nvfx->idxbuf.index_size;
-
- nvfx_draw_elements(pipe,
- nvfx->idxbuf.buffer,
- nvfx->idxbuf.index_size,
- info->index_bias,
- info->mode,
- info->start + offset,
- info->count);
- }
- else {
- nvfx_draw_arrays(pipe,
- info->mode,
- info->start,
- info->count);
- }
+ nvfx->vtxelt = hwcso;
+ nvfx->use_vertex_buffers = -1;
+ nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
}
-boolean
-nvfx_vbo_validate(struct nvfx_context *nvfx)
+static void
+nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
{
- struct nouveau_channel* chan = nvfx->screen->base.channel;
- struct pipe_resource *ib = nvfx->idxbuf_buffer;
- unsigned ib_format = nvfx->idxbuf_format;
- int i;
- int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
- uint32_t vtxfmt[16];
- unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
-
- if (!elements)
- return TRUE;
-
- nvfx->vbo_bo = 0;
-
- MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
- for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
- struct pipe_vertex_element *ve;
- struct pipe_vertex_buffer *vb;
- unsigned type, ncomp;
-
- ve = &nvfx->vtxelt->pipe[i];
- vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
-
- if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
- MARK_UNDO(chan);
- nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS;
- return FALSE;
- }
+ struct nvfx_context *nvfx = nvfx_context(pipe);
- if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) {
- nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp);
- vtxfmt[i] = type;
- } else {
- vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) |
- (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type);
- nvfx->vbo_bo |= (1 << i);
- }
+ for(unsigned i = 0; i < count; ++i)
+ {
+ pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
+ nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
+ nvfx->vtxbuf[i].max_index = vb[i].max_index;
+ nvfx->vtxbuf[i].stride = vb[i].stride;
}
- for(; i < elements; ++i)
- vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT;
+ for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
+ pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
- OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
- OUT_RINGp(chan, vtxfmt, elements);
-
- if(nvfx->is_nv4x) {
- unsigned i;
- /* seems to be some kind of cache flushing */
- for(i = 0; i < 3; ++i) {
- OUT_RING(chan, RING_3D(0x1718, 1));
- OUT_RING(chan, 0);
- }
- }
-
- OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
- for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
- struct pipe_vertex_element *ve;
- struct pipe_vertex_buffer *vb;
+ nvfx->vtxbuf_nr = count;
+ nvfx->use_vertex_buffers = -1;
+ nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
+}
- ve = &nvfx->vtxelt->pipe[i];
- vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+static void
+nvfx_set_index_buffer(struct pipe_context *pipe,
+ const struct pipe_index_buffer *ib)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
- if (!(nvfx->vbo_bo & (1 << i)))
- OUT_RING(chan, 0);
- else
- {
- struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
- OUT_RELOC(chan, bo,
- vb->buffer_offset + ve->src_offset,
- vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
- 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
- }
+ if(ib)
+ {
+ pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
+ nvfx->idxbuf.index_size = ib->index_size;
+ nvfx->idxbuf.offset = ib->offset;
}
-
- for (; i < elements; i++)
- OUT_RING(chan, 0);
-
- OUT_RING(chan, RING_3D(0x1710, 1));
- OUT_RING(chan, 0);
-
- if (ib) {
- unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
- struct nouveau_bo* bo = nvfx_resource(ib)->bo;
-
- assert(nvfx->screen->index_buffer_reloc_flags);
-
- OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
- OUT_RELOC(chan, bo, 0, ib_flags | NOUVEAU_BO_LOW, 0, 0);
- OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
- 0, NV34TCL_IDXBUF_FORMAT_DMA1);
+ else
+ {
+ pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
+ nvfx->idxbuf.index_size = 0;
+ nvfx->idxbuf.offset = 0;
}
- nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
- return TRUE;
+ nvfx->dirty |= NVFX_NEW_INDEX;
+ nvfx->draw_dirty |= NVFX_NEW_INDEX;
}
void
-nvfx_vbo_relocate(struct nvfx_context *nvfx)
+nvfx_init_vbo_functions(struct nvfx_context *nvfx)
{
- struct nouveau_channel* chan = nvfx->screen->base.channel;
- unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
- int i;
+ nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
+ nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
- MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
- for(i = 0; i < nvfx->vtxelt->num_elements; ++i) {
- if(nvfx->vbo_bo & (1 << i)) {
- struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i];
- struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
- struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
- OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1),
- vb_flags, 0, 0);
- OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset,
- vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
- 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
- }
- }
-
- if(nvfx->idxbuf_buffer)
- {
- unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
- struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf_buffer)->bo;
-
- assert(nvfx->screen->index_buffer_reloc_flags);
-
- OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2),
- ib_flags, 0, 0);
- OUT_RELOC(chan, bo, 0,
- ib_flags | NOUVEAU_BO_LOW, 0, 0);
- OUT_RELOC(chan, bo, nvfx->idxbuf_format,
- ib_flags | NOUVEAU_BO_OR,
- 0, NV34TCL_IDXBUF_FORMAT_DMA1);
- }
+ nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
+ nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
+ nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c
index 24d9846310..ea7e88c561 100644
--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -1,15 +1,19 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
-#include "util/u_inlines.h"
+#include "util/u_linkage.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_util.h"
+#include "draw/draw_context.h"
+
#include "nvfx_context.h"
#include "nvfx_state.h"
+#include "nvfx_resource.h"
/* TODO (at least...):
* 1. Indexed consts + ARL
@@ -25,26 +29,34 @@
#include "nv30_vertprog.h"
#include "nv40_vertprog.h"
-#define NVFX_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
+struct nvfx_loop_entry
+{
+ unsigned brk_target;
+ unsigned cont_target;
+};
struct nvfx_vpc {
+ struct nvfx_context* nvfx;
struct nvfx_vertex_program *vp;
struct nvfx_vertex_program_exec *vpi;
unsigned r_temps;
unsigned r_temps_discard;
- struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
- struct nvfx_sreg *r_address;
- struct nvfx_sreg *r_temp;
+ struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS];
+ struct nvfx_reg *r_address;
+ struct nvfx_reg *r_temp;
- struct nvfx_sreg *imm;
+ struct nvfx_reg *imm;
unsigned nr_imm;
unsigned hpos_idx;
+
+ struct util_dynarray label_relocs;
+ struct util_dynarray loop_stack;
};
-static struct nvfx_sreg
+static struct nvfx_reg
temp(struct nvfx_vpc *vpc)
{
int idx = ffs(~vpc->r_temps) - 1;
@@ -52,22 +64,22 @@ temp(struct nvfx_vpc *vpc)
if (idx < 0) {
NOUVEAU_ERR("out of temps!!\n");
assert(0);
- return nvfx_sr(NVFXSR_TEMP, 0);
+ return nvfx_reg(NVFXSR_TEMP, 0);
}
vpc->r_temps |= (1 << idx);
vpc->r_temps_discard |= (1 << idx);
- return nvfx_sr(NVFXSR_TEMP, idx);
+ return nvfx_reg(NVFXSR_TEMP, idx);
}
-static INLINE void
+static inline void
release_temps(struct nvfx_vpc *vpc)
{
vpc->r_temps &= ~vpc->r_temps_discard;
vpc->r_temps_discard = 0;
}
-static struct nvfx_sreg
+static struct nvfx_reg
constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w)
{
struct nvfx_vertex_program *vp = vpc->vp;
@@ -77,7 +89,7 @@ constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w)
if (pipe >= 0) {
for (idx = 0; idx < vp->nr_consts; idx++) {
if (vp->consts[idx].index == pipe)
- return nvfx_sr(NVFXSR_CONST, idx);
+ return nvfx_reg(NVFXSR_CONST, idx);
}
}
@@ -90,35 +102,36 @@ constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w)
vpd->value[1] = y;
vpd->value[2] = z;
vpd->value[3] = w;
- return nvfx_sr(NVFXSR_CONST, idx);
+ return nvfx_reg(NVFXSR_CONST, idx);
}
-#define arith(cc,s,o,d,m,s0,s1,s2) \
- nvfx_vp_arith(nvfx, (cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2))
+#define arith(s,o,d,m,s0,s1,s2) \
+ nvfx_insn(0, (NVFX_VP_INST_SLOT_##s << 7) | NVFX_VP_INST_##s##_OP_##o, -1, (d), (m), (s0), (s1), (s2))
static void
-emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src)
+emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_src src)
{
struct nvfx_vertex_program *vp = vpc->vp;
uint32_t sr = 0;
+ struct nvfx_relocation reloc;
- switch (src.type) {
+ switch (src.reg.type) {
case NVFXSR_TEMP:
sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT));
- sr |= (src.index << NVFX_VP(SRC_TEMP_SRC_SHIFT));
+ sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT));
break;
case NVFXSR_INPUT:
sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<
NVFX_VP(SRC_REG_TYPE_SHIFT));
- vp->ir |= (1 << src.index);
- hw[1] |= (src.index << NVFX_VP(INST_INPUT_SRC_SHIFT));
+ vp->ir |= (1 << src.reg.index);
+ hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT));
break;
case NVFXSR_CONST:
sr |= (NVFX_VP(SRC_REG_TYPE_CONST) <<
NVFX_VP(SRC_REG_TYPE_SHIFT));
- assert(vpc->vpi->const_index == -1 ||
- vpc->vpi->const_index == src.index);
- vpc->vpi->const_index = src.index;
+ reloc.location = vp->nr_insns - 1;
+ reloc.target = src.reg.index;
+ util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc);
break;
case NVFXSR_NONE:
sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<
@@ -161,100 +174,67 @@ emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos,
}
static void
-emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst)
+emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_reg dst)
{
struct nvfx_vertex_program *vp = vpc->vp;
switch (dst.type) {
+ case NVFXSR_NONE:
+ if(!nvfx->is_nv4x)
+ hw[0] |= NV30_VP_INST_DEST_TEMP_ID_MASK;
+ else {
+ hw[3] |= NV40_VP_INST_DEST_MASK;
+ if (slot == 0)
+ hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;
+ else
+ hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
+ }
+ break;
case NVFXSR_TEMP:
if(!nvfx->is_nv4x)
hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
else {
hw[3] |= NV40_VP_INST_DEST_MASK;
- if (slot == 0) {
- hw[0] |= (dst.index <<
- NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
- } else {
- hw[3] |= (dst.index <<
- NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
- }
+ if (slot == 0)
+ hw[0] |= (dst.index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
+ else
+ hw[3] |= (dst.index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
}
break;
case NVFXSR_OUTPUT:
/* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */
- switch (dst.index) {
- case NVFX_VP_INST_DEST_CLIP(0):
- vp->or |= (1 << 6);
- vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0;
- dst.index = NVFX_VP(INST_DEST_FOGC);
- break;
- case NVFX_VP_INST_DEST_CLIP(1):
- vp->or |= (1 << 7);
- vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1;
- dst.index = NVFX_VP(INST_DEST_FOGC);
- break;
- case NVFX_VP_INST_DEST_CLIP(2):
- vp->or |= (1 << 8);
- vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2;
- dst.index = NVFX_VP(INST_DEST_FOGC);
- break;
- case NVFX_VP_INST_DEST_CLIP(3):
- vp->or |= (1 << 9);
- vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3;
- dst.index = NVFX_VP(INST_DEST_PSZ);
- break;
- case NVFX_VP_INST_DEST_CLIP(4):
- vp->or |= (1 << 10);
- vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4;
- dst.index = NVFX_VP(INST_DEST_PSZ);
- break;
- case NVFX_VP_INST_DEST_CLIP(5):
- vp->or |= (1 << 11);
- vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5;
- dst.index = NVFX_VP(INST_DEST_PSZ);
- break;
- default:
- if(!nvfx->is_nv4x) {
- switch (dst.index) {
- case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
- case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
- case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
- case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
- case NV30_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
- case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
- case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
- case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
- case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
- case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
- case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
- case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
- case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
- case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
- }
- } else {
- switch (dst.index) {
- case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
- case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
- case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
- case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
- case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
- case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
- case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
- case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
- case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
- case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
- case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
- case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
- case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
- case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
- }
+ if(nvfx->is_nv4x) {
+ switch (dst.index) {
+ case NV30_VP_INST_DEST_CLP(0):
+ dst.index = NVFX_VP(INST_DEST_FOGC);
+ break;
+ case NV30_VP_INST_DEST_CLP(1):
+ dst.index = NVFX_VP(INST_DEST_FOGC);
+ break;
+ case NV30_VP_INST_DEST_CLP(2):
+ dst.index = NVFX_VP(INST_DEST_FOGC);
+ break;
+ case NV30_VP_INST_DEST_CLP(3):
+ dst.index = NVFX_VP(INST_DEST_PSZ);
+ break;
+ case NV30_VP_INST_DEST_CLP(4):
+ dst.index = NVFX_VP(INST_DEST_PSZ);
+ break;
+ case NV30_VP_INST_DEST_CLP(5):
+ dst.index = NVFX_VP(INST_DEST_PSZ);
+ break;
+ case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
+ case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
}
- break;
}
if(!nvfx->is_nv4x) {
hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
- hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+ hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK;
/*XXX: no way this is entirely correct, someone needs to
* figure out what exactly it is.
@@ -264,7 +244,7 @@ emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot
hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT);
if (slot == 0) {
hw[0] |= NV40_VP_INST_VEC_RESULT;
- hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+ hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;
} else {
hw[3] |= NV40_VP_INST_SCA_RESULT;
hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
@@ -277,26 +257,27 @@ emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot
}
static void
-nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op,
- struct nvfx_sreg dst, int mask,
- struct nvfx_sreg s0, struct nvfx_sreg s1,
- struct nvfx_sreg s2)
+nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn)
{
+ struct nvfx_context* nvfx = vpc->nvfx;
struct nvfx_vertex_program *vp = vpc->vp;
+ unsigned slot = insn.op >> 7;
+ unsigned op = insn.op & 0x7f;
uint32_t *hw;
vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
vpc->vpi = &vp->insns[vp->nr_insns - 1];
memset(vpc->vpi, 0, sizeof(*vpc->vpi));
- vpc->vpi->const_index = -1;
hw = vpc->vpi->data;
- hw[0] |= (NVFX_COND_TR << NVFX_VP(INST_COND_SHIFT));
- hw[0] |= ((0 << NVFX_VP(INST_COND_SWZ_X_SHIFT)) |
- (1 << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) |
- (2 << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) |
- (3 << NVFX_VP(INST_COND_SWZ_W_SHIFT)));
+ hw[0] |= (insn.cc_test << NVFX_VP(INST_COND_SHIFT));
+ hw[0] |= ((insn.cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) |
+ (insn.cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) |
+ (insn.cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) |
+ (insn.cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT)));
+ if(insn.cc_update)
+ hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE);
if(!nvfx->is_nv4x) {
if(slot == 0)
@@ -309,54 +290,56 @@ nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op,
// hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK);
// hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT));
- if (dst.type == NVFXSR_OUTPUT) {
+ if (insn.dst.type == NVFXSR_OUTPUT) {
if (slot)
- hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
+ hw[3] |= (insn.mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
else
- hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
+ hw[3] |= (insn.mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
} else {
if (slot)
- hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
+ hw[3] |= (insn.mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
else
- hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
+ hw[3] |= (insn.mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
}
} else {
if (slot == 0) {
hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT);
hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
- hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);
+ hw[3] |= (insn.mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);
} else {
hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT);
- hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20));
- hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);
+ hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK ;
+ hw[3] |= (insn.mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);
}
}
- emit_dst(nvfx, vpc, hw, slot, dst);
- emit_src(nvfx, vpc, hw, 0, s0);
- emit_src(nvfx, vpc, hw, 1, s1);
- emit_src(nvfx, vpc, hw, 2, s2);
+ emit_dst(nvfx, vpc, hw, slot, insn.dst);
+ emit_src(nvfx, vpc, hw, 0, insn.src[0]);
+ emit_src(nvfx, vpc, hw, 1, insn.src[1]);
+ emit_src(nvfx, vpc, hw, 2, insn.src[2]);
}
-static INLINE struct nvfx_sreg
+static inline struct nvfx_src
tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
- struct nvfx_sreg src = { 0 };
+ struct nvfx_src src;
switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
- src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index);
+ src.reg = nvfx_reg(NVFXSR_INPUT, fsrc->Register.Index);
break;
case TGSI_FILE_CONSTANT:
- src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
+ src.reg = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
break;
case TGSI_FILE_IMMEDIATE:
- src = vpc->imm[fsrc->Register.Index];
+ src.reg = vpc->imm[fsrc->Register.Index];
break;
case TGSI_FILE_TEMPORARY:
- src = vpc->r_temp[fsrc->Register.Index];
+ src.reg = vpc->r_temp[fsrc->Register.Index];
break;
default:
NOUVEAU_ERR("bad src file\n");
+ src.reg.index = 0;
+ src.reg.type = 0;
break;
}
@@ -369,11 +352,14 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
return src;
}
-static INLINE struct nvfx_sreg
+static INLINE struct nvfx_reg
tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
- struct nvfx_sreg dst = { 0 };
+ struct nvfx_reg dst;
switch (fdst->Register.File) {
+ case TGSI_FILE_NULL:
+ dst = nvfx_reg(NVFXSR_NONE, 0);
+ break;
case TGSI_FILE_OUTPUT:
dst = vpc->r_result[fdst->Register.Index];
break;
@@ -384,14 +370,16 @@ tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
dst = vpc->r_address[fdst->Register.Index];
break;
default:
- NOUVEAU_ERR("bad dst file\n");
+ NOUVEAU_ERR("bad dst file %i\n", fdst->Register.File);
+ dst.index = 0;
+ dst.type = 0;
break;
}
return dst;
}
-static INLINE int
+static inline int
tgsi_mask(uint tgsi)
{
int mask = 0;
@@ -405,10 +393,14 @@ tgsi_mask(uint tgsi)
static boolean
nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
- const struct tgsi_full_instruction *finst)
+ unsigned idx, const struct tgsi_full_instruction *finst)
{
- struct nvfx_sreg src[3], dst, tmp;
- struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+ struct nvfx_src src[3], tmp;
+ struct nvfx_reg dst;
+ struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
+ struct nvfx_insn insn;
+ struct nvfx_relocation reloc;
+ struct nvfx_loop_entry loop;
int mask;
int ai = -1, ci = -1, ii = -1;
int i;
@@ -436,9 +428,8 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
ai = fsrc->Register.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
- src[i] = temp(vpc);
- arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL,
- tgsi_src(vpc, fsrc), none, none);
+ src[i] = nvfx_src(temp(vpc));
+ nvfx_vp_emit(vpc, arith(VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none));
}
break;
case TGSI_FILE_CONSTANT:
@@ -447,9 +438,8 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
ci = fsrc->Register.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
- src[i] = temp(vpc);
- arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL,
- tgsi_src(vpc, fsrc), none, none);
+ src[i] = nvfx_src(temp(vpc));
+ nvfx_vp_emit(vpc, arith(VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none));
}
break;
case TGSI_FILE_IMMEDIATE:
@@ -458,9 +448,8 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
ii = fsrc->Register.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
- src[i] = temp(vpc);
- arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL,
- tgsi_src(vpc, fsrc), none, none);
+ src[i] = nvfx_src(temp(vpc));
+ nvfx_vp_emit(vpc, arith(VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none));
}
break;
case TGSI_FILE_TEMPORARY:
@@ -477,128 +466,231 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
switch (finst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
- arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none);
+ nvfx_vp_emit(vpc, arith(VEC, MOV, dst, mask, abs(src[0]), none, none));
break;
case TGSI_OPCODE_ADD:
- arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]);
+ nvfx_vp_emit(vpc, arith(VEC, ADD, dst, mask, src[0], none, src[1]));
break;
case TGSI_OPCODE_ARL:
- arith(vpc, VEC, ARL, dst, mask, src[0], none, none);
+ nvfx_vp_emit(vpc, arith(VEC, ARL, dst, mask, src[0], none, none));
+ break;
+ case TGSI_OPCODE_CMP:
+ insn = arith(VEC, MOV, none.reg, mask, src[0], none, none);
+ insn.cc_update = 1;
+ nvfx_vp_emit(vpc, insn);
+
+ insn = arith(VEC, MOV, dst, mask, src[2], none, none);
+ insn.cc_test = NVFX_COND_GE;
+ nvfx_vp_emit(vpc, insn);
+
+ insn = arith(VEC, MOV, dst, mask, src[1], none, none);
+ insn.cc_test = NVFX_COND_LT;
+ nvfx_vp_emit(vpc, insn);
break;
case TGSI_OPCODE_COS:
- arith(vpc, SCA, COS, dst, mask, none, none, src[0]);
+ nvfx_vp_emit(vpc, arith(SCA, COS, dst, mask, none, none, src[0]));
break;
+ case TGSI_OPCODE_DP2:
+ tmp = nvfx_src(temp(vpc));
+ nvfx_vp_emit(vpc, arith(VEC, MUL, tmp.reg, NVFX_VP_MASK_X | NVFX_VP_MASK_Y, src[0], src[1], none));
+ nvfx_vp_emit(vpc, arith(VEC, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none));
+ break;
case TGSI_OPCODE_DP3:
- arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, DP3, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_DP4:
- arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, DP4, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_DPH:
- arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, DPH, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_DST:
- arith(vpc, VEC, DST, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, DST, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_EX2:
- arith(vpc, SCA, EX2, dst, mask, none, none, src[0]);
+ nvfx_vp_emit(vpc, arith(SCA, EX2, dst, mask, none, none, src[0]));
break;
case TGSI_OPCODE_EXP:
- arith(vpc, SCA, EXP, dst, mask, none, none, src[0]);
+ nvfx_vp_emit(vpc, arith(SCA, EXP, dst, mask, none, none, src[0]));
break;
case TGSI_OPCODE_FLR:
- arith(vpc, VEC, FLR, dst, mask, src[0], none, none);
+ nvfx_vp_emit(vpc, arith(VEC, FLR, dst, mask, src[0], none, none));
break;
case TGSI_OPCODE_FRC:
- arith(vpc, VEC, FRC, dst, mask, src[0], none, none);
+ nvfx_vp_emit(vpc, arith(VEC, FRC, dst, mask, src[0], none, none));
break;
case TGSI_OPCODE_LG2:
- arith(vpc, SCA, LG2, dst, mask, none, none, src[0]);
+ nvfx_vp_emit(vpc, arith(SCA, LG2, dst, mask, none, none, src[0]));
break;
case TGSI_OPCODE_LIT:
- arith(vpc, SCA, LIT, dst, mask, none, none, src[0]);
+ nvfx_vp_emit(vpc, arith(SCA, LIT, dst, mask, none, none, src[0]));
break;
case TGSI_OPCODE_LOG:
- arith(vpc, SCA, LOG, dst, mask, none, none, src[0]);
+ nvfx_vp_emit(vpc, arith(SCA, LOG, dst, mask, none, none, src[0]));
break;
case TGSI_OPCODE_LRP:
- tmp = temp(vpc);
- arith(vpc, VEC, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
- arith(vpc, VEC, MAD, dst, mask, src[0], src[1], tmp);
+ tmp = nvfx_src(temp(vpc));
+ nvfx_vp_emit(vpc, arith(VEC, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));
+ nvfx_vp_emit(vpc, arith(VEC, MAD, dst, mask, src[0], src[1], tmp));
break;
case TGSI_OPCODE_MAD:
- arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]);
+ nvfx_vp_emit(vpc, arith(VEC, MAD, dst, mask, src[0], src[1], src[2]));
break;
case TGSI_OPCODE_MAX:
- arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, MAX, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_MIN:
- arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, MIN, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_MOV:
- arith(vpc, VEC, MOV, dst, mask, src[0], none, none);
+ nvfx_vp_emit(vpc, arith(VEC, MOV, dst, mask, src[0], none, none));
break;
case TGSI_OPCODE_MUL:
- arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, MUL, dst, mask, src[0], src[1], none));
+ break;
+ case TGSI_OPCODE_NOP:
break;
case TGSI_OPCODE_POW:
- tmp = temp(vpc);
- arith(vpc, SCA, LG2, tmp, NVFX_VP_MASK_X, none, none,
- swz(src[0], X, X, X, X));
- arith(vpc, VEC, MUL, tmp, NVFX_VP_MASK_X, swz(tmp, X, X, X, X),
- swz(src[1], X, X, X, X), none);
- arith(vpc, SCA, EX2, dst, mask, none, none,
- swz(tmp, X, X, X, X));
+ tmp = nvfx_src(temp(vpc));
+ nvfx_vp_emit(vpc, arith(SCA, LG2, tmp.reg, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X)));
+ nvfx_vp_emit(vpc, arith(VEC, MUL, tmp.reg, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));
+ nvfx_vp_emit(vpc, arith(SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X)));
break;
case TGSI_OPCODE_RCP:
- arith(vpc, SCA, RCP, dst, mask, none, none, src[0]);
- break;
- case TGSI_OPCODE_RET:
+ nvfx_vp_emit(vpc, arith(SCA, RCP, dst, mask, none, none, src[0]));
break;
case TGSI_OPCODE_RSQ:
- arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0]));
+ nvfx_vp_emit(vpc, arith(SCA, RSQ, dst, mask, none, none, abs(src[0])));
break;
case TGSI_OPCODE_SEQ:
- arith(vpc, VEC, SEQ, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, SEQ, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SFL:
- arith(vpc, VEC, SFL, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, SFL, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SGE:
- arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, SGE, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SGT:
- arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, SGT, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SIN:
- arith(vpc, SCA, SIN, dst, mask, none, none, src[0]);
+ nvfx_vp_emit(vpc, arith(SCA, SIN, dst, mask, none, none, src[0]));
break;
case TGSI_OPCODE_SLE:
- arith(vpc, VEC, SLE, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, SLE, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SLT:
- arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, SLT, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SNE:
- arith(vpc, VEC, SNE, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, SNE, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SSG:
- arith(vpc, VEC, SSG, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, SSG, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_STR:
- arith(vpc, VEC, STR, dst, mask, src[0], src[1], none);
+ nvfx_vp_emit(vpc, arith(VEC, STR, dst, mask, src[0], src[1], none));
break;
case TGSI_OPCODE_SUB:
- arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1]));
+ nvfx_vp_emit(vpc, arith(VEC, ADD, dst, mask, src[0], none, neg(src[1])));
break;
+ case TGSI_OPCODE_TRUNC:
+ tmp = nvfx_src(temp(vpc));
+ insn = arith(VEC, MOV, none.reg, mask, src[0], none, none);
+ insn.cc_update = 1;
+ nvfx_vp_emit(vpc, insn);
+
+ nvfx_vp_emit(vpc, arith(VEC, FLR, tmp.reg, mask, abs(src[0]), none, none));
+ nvfx_vp_emit(vpc, arith(VEC, MOV, dst, mask, tmp, none, none));
+
+ insn = arith(VEC, MOV, dst, mask, neg(tmp), none, none);
+ insn.cc_test = NVFX_COND_LT;
+ nvfx_vp_emit(vpc, insn);
+ break;
case TGSI_OPCODE_XPD:
- tmp = temp(vpc);
- arith(vpc, VEC, MUL, tmp, mask,
- swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
- arith(vpc, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W),
- swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
- neg(tmp));
+ tmp = nvfx_src(temp(vpc));
+ nvfx_vp_emit(vpc, arith(VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
+ nvfx_vp_emit(vpc, arith(VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
+ break;
+
+ case TGSI_OPCODE_IF:
+ insn = arith(VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none);
+ insn.cc_update = 1;
+ nvfx_vp_emit(vpc, insn);
+
+ reloc.location = vpc->vp->nr_insns;
+ reloc.target = finst->Label.Label + 1;
+ util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
+
+ insn = arith(SCA, BRA, none.reg, 0, none, none, none);
+ insn.cc_test = NVFX_COND_EQ;
+ insn.cc_swz[0] = insn.cc_swz[1] = insn.cc_swz[2] = insn.cc_swz[3] = 0;
+ nvfx_vp_emit(vpc, insn);
break;
+
+ case TGSI_OPCODE_ELSE:
+ case TGSI_OPCODE_BRA:
+ case TGSI_OPCODE_CAL:
+ reloc.location = vpc->vp->nr_insns;
+ reloc.target = finst->Label.Label;
+ util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
+
+ if(finst->Instruction.Opcode == TGSI_OPCODE_CAL)
+ insn = arith(SCA, CAL, none.reg, 0, none, none, none);
+ else
+ insn = arith(SCA, BRA, none.reg, 0, none, none, none);
+ nvfx_vp_emit(vpc, insn);
+ break;
+
+ case TGSI_OPCODE_RET:
+ tmp = none;
+ tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0;
+ nvfx_vp_emit(vpc, arith(SCA, RET, none.reg, 0, none, none, tmp));
+ break;
+
+ case TGSI_OPCODE_BGNSUB:
+ case TGSI_OPCODE_ENDSUB:
+ case TGSI_OPCODE_ENDIF:
+ /* nothing to do here */
+ break;
+
+ case TGSI_OPCODE_BGNLOOP:
+ loop.cont_target = idx;
+ loop.brk_target = finst->Label.Label + 1;
+ util_dynarray_append(&vpc->loop_stack, struct nvfx_loop_entry, loop);
+ break;
+
+ case TGSI_OPCODE_ENDLOOP:
+ loop = util_dynarray_pop(&vpc->loop_stack, struct nvfx_loop_entry);
+
+ reloc.location = vpc->vp->nr_insns;
+ reloc.target = loop.cont_target;
+ util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
+
+ nvfx_vp_emit(vpc, arith(SCA, BRA, none.reg, 0, none, none, none));
+ break;
+
+ case TGSI_OPCODE_CONT:
+ loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry);
+
+ reloc.location = vpc->vp->nr_insns;
+ reloc.target = loop.cont_target;
+ util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
+
+ nvfx_vp_emit(vpc, arith(SCA, BRA, none.reg, 0, none, none, none));
+ break;
+
+ case TGSI_OPCODE_BRK:
+ loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry);
+
+ reloc.location = vpc->vp->nr_insns;
+ reloc.target = loop.brk_target;
+ util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
+
+ nvfx_vp_emit(vpc, arith(SCA, BRA, none.reg, 0, none, none, none));
+ break;
+
default:
NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
return FALSE;
@@ -649,12 +741,8 @@ nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
hw = NVFX_VP(INST_DEST_PSZ);
break;
case TGSI_SEMANTIC_GENERIC:
- if (fdec->Semantic.Index <= 7) {
- hw = NVFX_VP(INST_DEST_TC(fdec->Semantic.Index));
- } else {
- NOUVEAU_ERR("bad generic semantic index\n");
- return FALSE;
- }
+ hw = (vpc->vp->generic_to_fp_input[fdec->Semantic.Index] & 0xf)
+ + NVFX_VP(INST_DEST_TC(0)) - NVFX_FP_OP_INPUT_SRC_TC(0);
break;
case TGSI_SEMANTIC_EDGEFLAG:
/* not really an error just a fallback */
@@ -665,7 +753,7 @@ nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
return FALSE;
}
- vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);
+ vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
return TRUE;
}
@@ -674,6 +762,36 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)
{
struct tgsi_parse_context p;
int high_temp = -1, high_addr = -1, nr_imm = 0, i;
+ struct util_semantic_set set;
+ unsigned char sem_layout[8];
+ unsigned num_outputs;
+
+ num_outputs = util_semantic_set_from_program_file(&set, vpc->vp->pipe.tokens, TGSI_FILE_OUTPUT);
+
+ if(num_outputs > 8) {
+ NOUVEAU_ERR("too many vertex program outputs: %i\n", num_outputs);
+ return FALSE;
+ }
+ util_semantic_layout_from_set(sem_layout, &set, 8, 8);
+
+ /* hope 0xf is (0, 0, 0, 1) initialized; otherwise, we are _probably_ not required to do this */
+ memset(vpc->vp->generic_to_fp_input, 0x0f, sizeof(vpc->vp->generic_to_fp_input));
+ for(int i = 0; i < 8; ++i) {
+ if(sem_layout[i] == 0xff)
+ continue;
+ //printf("vp: GENERIC[%i] to fpreg %i\n", sem_layout[i], NVFX_FP_OP_INPUT_SRC_TC(0) + i);
+ vpc->vp->generic_to_fp_input[sem_layout[i]] = 0xf0 | NVFX_FP_OP_INPUT_SRC_TC(i);
+ }
+
+ vpc->vp->sprite_fp_input = -1;
+ for(int i = 0; i < 8; ++i)
+ {
+ if(sem_layout[i] == 0xff)
+ {
+ vpc->vp->sprite_fp_input = NVFX_FP_OP_INPUT_SRC_TC(i);
+ break;
+ }
+ }
tgsi_parse_init(&p, vpc->vp->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&p)) {
@@ -737,18 +855,18 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)
tgsi_parse_free(&p);
if (nr_imm) {
- vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg));
+ vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_reg));
assert(vpc->imm);
}
if (++high_temp) {
- vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
+ vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));
for (i = 0; i < high_temp; i++)
vpc->r_temp[i] = temp(vpc);
}
if (++high_addr) {
- vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg));
+ vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_reg));
for (i = 0; i < high_addr; i++)
vpc->r_address[i] = temp(vpc);
}
@@ -757,20 +875,31 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)
return TRUE;
}
+DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE)
+
static void
nvfx_vertprog_translate(struct nvfx_context *nvfx,
struct nvfx_vertex_program *vp)
{
struct tgsi_parse_context parse;
struct nvfx_vpc *vpc = NULL;
- struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+ struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
+ struct util_dynarray insns;
int i;
vpc = CALLOC(1, sizeof(struct nvfx_vpc));
if (!vpc)
return;
+ vpc->nvfx = nvfx;
vpc->vp = vp;
+ /* reserve space for ucps */
+ if(nvfx->use_vp_clipping)
+ {
+ for(i = 0; i < 6; ++i)
+ constant(vpc, -1, 0, 0, 0, 0);
+ }
+
if (!nvfx_vertprog_prepare(nvfx, vpc)) {
FREE(vpc);
return;
@@ -780,13 +909,15 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
* planes are enabled. We need to append code to the vtxprog
* to handle clip planes later.
*/
- if (vp->ucp.nr) {
+ /* TODO: maybe support patching this depending on whether there are ucps: not sure if it is really matters much */
+ if (nvfx->use_vp_clipping) {
vpc->r_result[vpc->hpos_idx] = temp(vpc);
vpc->r_temps_discard = 0;
}
tgsi_parse_init(&parse, vp->pipe.tokens);
+ util_dynarray_init(&insns);
while (!tgsi_parse_end_of_tokens(&parse)) {
tgsi_parse_token(&parse);
@@ -809,8 +940,10 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
case TGSI_TOKEN_TYPE_INSTRUCTION:
{
const struct tgsi_full_instruction *finst;
+ unsigned idx = insns.size >> 2;
+ util_dynarray_append(&insns, unsigned, vp->nr_insns);
finst = &parse.FullToken.FullInstruction;
- if (!nvfx_vertprog_parse_instruction(nvfx, vpc, finst))
+ if (!nvfx_vertprog_parse_instruction(nvfx, vpc, idx, finst))
goto out_err;
}
break;
@@ -819,43 +952,87 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
}
}
+ util_dynarray_append(&insns, unsigned, vp->nr_insns);
+
+ for(unsigned i = 0; i < vpc->label_relocs.size; i += sizeof(struct nvfx_relocation))
+ {
+ struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)vpc->label_relocs.data + i);
+ struct nvfx_relocation hw_reloc;
+
+ hw_reloc.location = label_reloc->location;
+ hw_reloc.target = ((unsigned*)insns.data)[label_reloc->target];
+
+ //debug_printf("hw %u -> tgsi %u = hw %u\n", hw_reloc.location, label_reloc->target, hw_reloc.target);
+
+ util_dynarray_append(&vp->branch_relocs, struct nvfx_relocation, hw_reloc);
+ }
+ util_dynarray_fini(&insns);
+ util_dynarray_trim(&vp->branch_relocs);
+
+ /* XXX: what if we add a RET before?! make sure we jump here...*/
+
/* Write out HPOS if it was redirected to a temp earlier */
if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) {
- struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT,
+ struct nvfx_reg hpos = nvfx_reg(NVFXSR_OUTPUT,
NVFX_VP(INST_DEST_POS));
- struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx];
+ struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
- arith(vpc, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none);
+ nvfx_vp_emit(vpc, arith(VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none));
}
/* Insert code to handle user clip planes */
- for (i = 0; i < vp->ucp.nr; i++) {
- struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT,
- NVFX_VP_INST_DEST_CLIP(i));
- struct nvfx_sreg ceqn = constant(vpc, -1,
- nvfx->clip.ucp[i][0],
- nvfx->clip.ucp[i][1],
- nvfx->clip.ucp[i][2],
- nvfx->clip.ucp[i][3]);
- struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx];
- unsigned mask;
-
- switch (i) {
- case 0: case 3: mask = NVFX_VP_MASK_Y; break;
- case 1: case 4: mask = NVFX_VP_MASK_Z; break;
- case 2: case 5: mask = NVFX_VP_MASK_W; break;
- default:
- NOUVEAU_ERR("invalid clip dist #%d\n", i);
- goto out_err;
+ if(nvfx->use_vp_clipping)
+ {
+ for (i = 0; i < 6; i++) {
+ struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i));
+ struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, i));
+ struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
+ unsigned mask;
+
+ if(nvfx->is_nv4x)
+ {
+ switch (i) {
+ case 0: case 3: mask = NVFX_VP_MASK_Y; break;
+ case 1: case 4: mask = NVFX_VP_MASK_Z; break;
+ case 2: case 5: mask = NVFX_VP_MASK_W; break;
+ default:
+ NOUVEAU_ERR("invalid clip dist #%d\n", i);
+ goto out_err;
+ }
+ }
+ else
+ mask = NVFX_VP_MASK_X;
+
+ nvfx_vp_emit(vpc, arith(VEC, DP4, cdst, mask, htmp, ceqn, none));
}
+ }
+ else
+ {
+ if(vp->nr_insns)
+ vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
- arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none);
+ nvfx_vp_emit(vpc, arith(VEC, NOP, none.reg, 0, none, none, none));
+ vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
}
- vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
+ if(debug_get_option_nvfx_dump_vp())
+ {
+ debug_printf("\n");
+ tgsi_dump(vp->pipe.tokens, 0);
+
+ debug_printf("\n%s vertex program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x");
+ for (i = 0; i < vp->nr_insns; i++)
+ debug_printf("%3u: %08x %08x %08x %08x\n", i, vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]);
+ debug_printf("\n");
+ }
+
+ vp->clip_nr = -1;
+ vp->exec_start = -1;
vp->translated = TRUE;
out_err:
tgsi_parse_free(&parse);
+ util_dynarray_fini(&vpc->label_relocs);
+ util_dynarray_fini(&vpc->loop_stack);
if (vpc->r_temp)
FREE(vpc->r_temp);
if (vpc->r_address)
@@ -868,26 +1045,17 @@ out_err:
boolean
nvfx_vertprog_validate(struct nvfx_context *nvfx)
{
- struct pipe_context *pipe = &nvfx->pipe;
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *eng3d = screen->eng3d;
struct nvfx_vertex_program *vp;
struct pipe_resource *constbuf;
- struct pipe_transfer *transfer = NULL;
boolean upload_code = FALSE, upload_data = FALSE;
int i;
if (nvfx->render_mode == HW) {
vp = nvfx->vertprog;
constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX];
-
- // TODO: ouch! can't we just use constant slots for these?!
- if ((nvfx->dirty & NVFX_NEW_UCP) ||
- memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) {
- nvfx_vertprog_destroy(nvfx, vp);
- memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp));
- }
} else {
vp = nvfx->swtnl.vertprog;
constbuf = NULL;
@@ -918,7 +1086,11 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
}
if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec))
- assert(0);
+ {
+ debug_printf("Vertex shader too long: %u instructions\n", vplen);
+ nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
+ return FALSE;
+ }
}
upload_code = TRUE;
@@ -937,7 +1109,11 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
}
if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data))
- assert(0);
+ {
+ debug_printf("Vertex shader uses too many constants: %u constants\n", vp->nr_consts);
+ nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
+ return FALSE;
+ }
}
/*XXX: handle this some day */
@@ -952,44 +1128,57 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
* fixup offsets and register IDs.
*/
if (vp->exec_start != vp->exec->start) {
- for (i = 0; i < vp->nr_insns; i++) {
- struct nvfx_vertex_program_exec *vpi = &vp->insns[i];
+ //printf("vp_relocs %u -> %u\n", vp->exec_start, vp->exec->start);
+ for(unsigned i = 0; i < vp->branch_relocs.size; i += sizeof(struct nvfx_relocation))
+ {
+ struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->branch_relocs.data + i);
+ uint32_t* hw = vp->insns[reloc->location].data;
+ unsigned target = vp->exec->start + reloc->target;
+
+ //debug_printf("vp_reloc hw %u -> hw %u\n", reloc->location, target);
- if (vpi->has_branch_offset) {
- assert(0);
+ if(!nvfx->is_nv4x)
+ {
+ hw[2] &=~ NV30_VP_INST_IADDR_MASK;
+ hw[2] |= (target & 0x1ff) << NV30_VP_INST_IADDR_SHIFT;
+ }
+ else
+ {
+ hw[3] &=~ NV40_VP_INST_IADDRL_MASK;
+ hw[3] |= (target & 7) << NV40_VP_INST_IADDRL_SHIFT;
+
+ hw[2] &=~ NV40_VP_INST_IADDRH_MASK;
+ hw[2] |= ((target >> 3) & 0x3f) << NV40_VP_INST_IADDRH_SHIFT;
}
}
vp->exec_start = vp->exec->start;
}
- if (vp->nr_consts && vp->data_start != vp->data->start) {
- for (i = 0; i < vp->nr_insns; i++) {
- struct nvfx_vertex_program_exec *vpi = &vp->insns[i];
+ if (vp->data_start != vp->data->start) {
+ for(unsigned i = 0; i < vp->const_relocs.size; i += sizeof(struct nvfx_relocation))
+ {
+ struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->const_relocs.data + i);
+ struct nvfx_vertex_program_exec *vpi = &vp->insns[reloc->location];
- if (vpi->const_index >= 0) {
- vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK);
- vpi->data[1] |=
- (vpi->const_index + vp->data->start) <<
+ vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK);
+ vpi->data[1] |=
+ (reloc->target + vp->data->start) <<
NVFX_VP(INST_CONST_SRC_SHIFT);
-
- }
}
vp->data_start = vp->data->start;
+ upload_code = TRUE;
}
/* Update + Upload constant values */
if (vp->nr_consts) {
float *map = NULL;
- if (constbuf) {
- map = pipe_buffer_map(pipe, constbuf,
- PIPE_TRANSFER_READ,
- &transfer);
- }
+ if (constbuf)
+ map = (float*)nvfx_buffer(constbuf)->data;
- for (i = 0; i < vp->nr_consts; i++) {
+ for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) {
struct nvfx_vertex_program_data *vpd = &vp->consts[i];
if (vpd->index >= 0) {
@@ -1005,41 +1194,28 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
OUT_RING (chan, i + vp->data->start);
OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
}
-
- if (constbuf)
- pipe_buffer_unmap(pipe, constbuf, transfer);
}
/* Upload vtxprog */
if (upload_code) {
-#if 0
- for (i = 0; i < vp->nr_insns; i++) {
- NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]);
- NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]);
- NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]);
- NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]);
- }
-#endif
BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1);
OUT_RING (chan, vp->exec->start);
for (i = 0; i < vp->nr_insns; i++) {
BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4);
OUT_RINGp (chan, vp->insns[i].data, 4);
}
+ vp->clip_nr = -1;
}
- if(nvfx->dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
+ if(nvfx->dirty & (NVFX_NEW_VERTPROG))
{
- WAIT_RING(chan, 7);
+ WAIT_RING(chan, 6);
OUT_RING(chan, RING_3D(NV34TCL_VP_START_FROM_ID, 1));
OUT_RING(chan, vp->exec->start);
if(nvfx->is_nv4x) {
- OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 2));
+ OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 1));
OUT_RING(chan, vp->ir);
- OUT_RING(chan, vp->or);
}
- OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
- OUT_RING(chan, vp->clip_ctrl);
}
return TRUE;
@@ -1048,25 +1224,63 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
void
nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp)
{
- vp->translated = FALSE;
-
- if (vp->nr_insns) {
+ if (vp->nr_insns)
FREE(vp->insns);
- vp->insns = NULL;
- vp->nr_insns = 0;
- }
- if (vp->nr_consts) {
+ if (vp->nr_consts)
FREE(vp->consts);
- vp->consts = NULL;
- vp->nr_consts = 0;
- }
nouveau_resource_free(&vp->exec);
- vp->exec_start = 0;
nouveau_resource_free(&vp->data);
- vp->data_start = 0;
- vp->data_start_min = 0;
- vp->ir = vp->or = vp->clip_ctrl = 0;
+ util_dynarray_fini(&vp->branch_relocs);
+ util_dynarray_fini(&vp->const_relocs);
+}
+
+static void *
+nvfx_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_vertex_program *vp;
+
+ // TODO: use a 64-bit atomic here!
+ static unsigned long long id = 0;
+
+ vp = CALLOC(1, sizeof(struct nvfx_vertex_program));
+ vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe);
+ vp->id = ++id;
+
+ return (void *)vp;
+}
+
+static void
+nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->vertprog = hwcso;
+ nvfx->dirty |= NVFX_NEW_VERTPROG;
+ nvfx->draw_dirty |= NVFX_NEW_VERTPROG;
+}
+
+static void
+nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_vertex_program *vp = hwcso;
+
+ draw_delete_vertex_shader(nvfx->draw, vp->draw);
+ nvfx_vertprog_destroy(nvfx, vp);
+ FREE((void*)vp->pipe.tokens);
+ FREE(vp);
+}
+
+void
+nvfx_init_vertprog_functions(struct nvfx_context *nvfx)
+{
+ nvfx->pipe.create_vs_state = nvfx_vp_state_create;
+ nvfx->pipe.bind_vs_state = nvfx_vp_state_bind;
+ nvfx->pipe.delete_vs_state = nvfx_vp_state_delete;
}
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index e8b6c4f7af..624dadd07d 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -65,7 +65,7 @@ static void r300_release_referenced_objects(struct r300_context *r300)
unsigned i;
/* Framebuffer state. */
- util_assign_framebuffer_state(fb, NULL);
+ util_unreference_framebuffer_state(fb);
/* Textures. */
for (i = 0; i < textures->sampler_view_count; i++)
@@ -99,8 +99,10 @@ static void r300_destroy_context(struct pipe_context* context)
struct r300_context* r300 = r300_context(context);
struct r300_atom *atom;
- util_blitter_destroy(r300->blitter);
- draw_destroy(r300->draw);
+ if (r300->blitter)
+ util_blitter_destroy(r300->blitter);
+ if (r300->draw)
+ draw_destroy(r300->draw);
/* Print stats, if enabled. */
if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
@@ -112,40 +114,48 @@ static void r300_destroy_context(struct pipe_context* context)
}
}
- u_upload_destroy(r300->upload_vb);
- u_upload_destroy(r300->upload_ib);
+ if (r300->upload_vb)
+ u_upload_destroy(r300->upload_vb);
+ if (r300->upload_ib)
+ u_upload_destroy(r300->upload_ib);
- /* setup hyper-z mm */
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- r300_hyperz_destroy_mm(r300);
-
- translate_cache_destroy(r300->tran.translate_cache);
+ if (r300->tran.translate_cache)
+ translate_cache_destroy(r300->tran.translate_cache);
+ /* XXX: This function assumes r300->query_list was initialized */
r300_release_referenced_objects(r300);
- r300->rws->cs_destroy(r300->cs);
+ if (r300->zmask_mm)
+ r300_hyperz_destroy_mm(r300);
+
+ if (r300->cs)
+ r300->rws->cs_destroy(r300->cs);
+ /* XXX: No way to tell if this was initialized or not? */
util_mempool_destroy(&r300->pool_transfers);
r300_update_num_contexts(r300->screen, -1);
- FREE(r300->aa_state.state);
- FREE(r300->blend_color_state.state);
- FREE(r300->clip_state.state);
- FREE(r300->fb_state.state);
- FREE(r300->gpu_flush.state);
- FREE(r300->hyperz_state.state);
- FREE(r300->invariant_state.state);
- FREE(r300->rs_block_state.state);
- FREE(r300->scissor_state.state);
- FREE(r300->textures_state.state);
- FREE(r300->vap_invariant_state.state);
- FREE(r300->viewport_state.state);
- FREE(r300->ztop_state.state);
- FREE(r300->fs_constants.state);
- FREE(r300->vs_constants.state);
- if (!r300->screen->caps.has_tcl) {
- FREE(r300->vertex_stream_state.state);
+ /* Free the structs allocated in r300_setup_atoms() */
+ if (r300->aa_state.state) {
+ FREE(r300->aa_state.state);
+ FREE(r300->blend_color_state.state);
+ FREE(r300->clip_state.state);
+ FREE(r300->fb_state.state);
+ FREE(r300->gpu_flush.state);
+ FREE(r300->hyperz_state.state);
+ FREE(r300->invariant_state.state);
+ FREE(r300->rs_block_state.state);
+ FREE(r300->scissor_state.state);
+ FREE(r300->textures_state.state);
+ FREE(r300->vap_invariant_state.state);
+ FREE(r300->viewport_state.state);
+ FREE(r300->ztop_state.state);
+ FREE(r300->fs_constants.state);
+ FREE(r300->vs_constants.state);
+ if (!r300->screen->caps.has_tcl) {
+ FREE(r300->vertex_stream_state.state);
+ }
}
FREE(r300);
}
@@ -158,12 +168,14 @@ void r300_flush_cb(void *data)
}
#define R300_INIT_ATOM(atomname, atomsize) \
+ do { \
r300->atomname.name = #atomname; \
r300->atomname.state = NULL; \
r300->atomname.size = atomsize; \
r300->atomname.emit = r300_emit_##atomname; \
r300->atomname.dirty = FALSE; \
- insert_at_tail(&r300->atom_list, &r300->atomname);
+ insert_at_tail(&r300->atom_list, &r300->atomname); \
+ } while (0)
static void r300_setup_atoms(struct r300_context* r300)
{
@@ -404,19 +416,21 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->context.destroy = r300_destroy_context;
- r300->cs = rws->cs_create(rws);
+ make_empty_list(&r300->query_list);
util_mempool_create(&r300->pool_transfers,
sizeof(struct pipe_transfer), 64,
UTIL_MEMPOOL_SINGLETHREADED);
+ r300->cs = rws->cs_create(rws);
+ if (r300->cs == NULL)
+ goto fail;
+
if (!r300screen->caps.has_tcl) {
/* Create a Draw. This is used for SW TCL. */
r300->draw = draw_create(&r300->context);
/* Enable our renderer. */
draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
- /* Enable Draw's clipping. */
- draw_set_driver_clipping(r300->draw, FALSE);
/* Disable converting points/lines to triangles. */
draw_wide_line_threshold(r300->draw, 10000000.f);
draw_wide_point_threshold(r300->draw, 10000000.f);
@@ -424,8 +438,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300_setup_atoms(r300);
- make_empty_list(&r300->query_list);
-
r300_init_blit_functions(r300);
r300_init_flush_functions(r300);
r300_init_query_functions(r300);
@@ -433,6 +445,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300_init_resource_functions(r300);
r300->blitter = util_blitter_create(&r300->context);
+ if (r300->blitter == NULL)
+ goto fail;
/* Render functions must be initialized after blitter. */
r300_init_render_functions(r300);
@@ -441,22 +455,25 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
/* setup hyper-z mm */
if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- r300_hyperz_init_mm(r300);
+ if (!r300_hyperz_init_mm(r300))
+ goto fail;
r300->upload_ib = u_upload_create(&r300->context,
32 * 1024, 16,
PIPE_BIND_INDEX_BUFFER);
if (r300->upload_ib == NULL)
- goto no_upload_ib;
+ goto fail;
r300->upload_vb = u_upload_create(&r300->context,
128 * 1024, 16,
PIPE_BIND_VERTEX_BUFFER);
if (r300->upload_vb == NULL)
- goto no_upload_vb;
+ goto fail;
r300->tran.translate_cache = translate_cache_create();
+ if (r300->tran.translate_cache == NULL)
+ goto fail;
r300_init_states(&r300->context);
@@ -486,10 +503,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
return &r300->context;
- no_upload_ib:
- u_upload_destroy(r300->upload_ib);
- no_upload_vb:
- FREE(r300);
+ fail:
+ r300_destroy_context(&r300->context);
return NULL;
}
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 6fa7f470f9..8f0e86fd37 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -254,8 +254,8 @@ struct r300_ztop_state {
struct r300_constant_buffer {
/* Buffer of constants */
uint32_t *ptr;
- /* Total number of vec4s */
- unsigned count;
+ /* Remapping table. */
+ unsigned *remap_table;
};
/* Query object.
@@ -449,6 +449,7 @@ struct r300_context {
struct r300_screen *screen;
/* Draw module. Used mostly for SW TCL. */
struct draw_context* draw;
+ size_t draw_vbo_size;
/* Accelerated blit support. */
struct blitter_context* blitter;
/* Stencil two-sided reference value fallback. */
@@ -649,6 +650,11 @@ void r300_translate_index_buffer(struct r300_context *r300,
/* r300_render_stencilref.c */
void r300_plug_in_stencil_ref_fallback(struct r300_context *r300);
+/* r300_render.c */
+void r300_draw_flush_vbuf(struct r300_context *r300);
+boolean r500_index_bias_supported(struct r300_context *r300);
+void r500_emit_index_bias(struct r300_context *r300, int index_bias);
+
/* r300_state.c */
enum r300_fb_state_change {
R300_CHANGED_FB_STATE = 0,
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index d0fd45349e..232259e21d 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -180,9 +180,18 @@ void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat
BEGIN_CS(size);
OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4);
- for (i = 0; i < count; i++)
- for (j = 0; j < 4; j++)
- OUT_CS(pack_float24(*(float*)&buf->ptr[i*4+j]));
+ if (buf->remap_table){
+ for (i = 0; i < count; i++) {
+ float *data = (float*)&buf->ptr[buf->remap_table[i]*4];
+ for (j = 0; j < 4; j++)
+ OUT_CS(pack_float24(data[j]));
+ }
+ } else {
+ for (i = 0; i < count; i++)
+ for (j = 0; j < 4; j++)
+ OUT_CS(pack_float24(*(float*)&buf->ptr[i*4+j]));
+ }
+
END_CS;
}
@@ -226,7 +235,7 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat
{
struct r300_fragment_shader *fs = r300_fs(r300);
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
- unsigned count = fs->shader->externals_count * 4;
+ unsigned count = fs->shader->externals_count;
CS_LOCALS(r300);
if (count == 0)
@@ -234,8 +243,15 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat
BEGIN_CS(size);
OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
- OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count);
- OUT_CS_TABLE(buf->ptr, count);
+ OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4);
+ if (buf->remap_table){
+ for (unsigned i = 0; i < count; i++) {
+ uint32_t *data = &buf->ptr[buf->remap_table[i]*4];
+ OUT_CS_TABLE(data, 4);
+ }
+ } else {
+ OUT_CS_TABLE(buf->ptr, count * 4);
+ }
END_CS;
}
@@ -893,7 +909,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count,
vtx_mem_size / output_count, 10);
- unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6);
+ unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 5);
unsigned imm_first = vs->externals_count;
unsigned imm_end = vs->code.constants.Count;
@@ -961,6 +977,7 @@ void r300_emit_vs_constants(struct r300_context* r300,
unsigned count =
((struct r300_vertex_shader*)r300->vs_state.state)->externals_count;
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
+ unsigned i;
CS_LOCALS(r300);
if (!count)
@@ -971,7 +988,14 @@ void r300_emit_vs_constants(struct r300_context* r300,
(r300->screen->caps.is_r500 ?
R500_PVS_CONST_START : R300_PVS_CONST_START));
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4);
- OUT_CS_TABLE(buf->ptr, count * 4);
+ if (buf->remap_table){
+ for (i = 0; i < count; i++) {
+ uint32_t *data = &buf->ptr[buf->remap_table[i]*4];
+ OUT_CS_TABLE(data, 4);
+ }
+ } else {
+ OUT_CS_TABLE(buf->ptr, count * 4);
+ }
END_CS;
}
@@ -1219,6 +1243,8 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300)
/* Emitted in flush. */
dwords += 26; /* emit_query_end */
dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */
+ if (r500_index_bias_supported(r300))
+ dwords += 2;
return dwords;
}
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index fe182b6615..2b5d2e42ba 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -43,9 +43,14 @@ static void r300_flush(struct pipe_context* pipe,
u_upload_flush(r300->upload_vb);
u_upload_flush(r300->upload_ib);
+ if (r300->draw)
+ r300_draw_flush_vbuf(r300);
+
if (r300->dirty_hw) {
r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
+ if (r500_index_bias_supported(r300))
+ r500_emit_index_bias(r300, 0);
r300->flush_counter++;
r300->rws->cs_flush(r300->cs);
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 2a0c30620a..9845e54610 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -257,17 +257,17 @@ static void r300_emit_fs_code_to_buffer(
shader->cb_code_size = 19 +
((code->inst_end + 1) * 6) +
imm_count * 7 +
- code->int_constant_count * 2;
+ code->int_constant_count * 2;
NEW_CB(shader->cb_code, shader->cb_code_size);
OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx);
OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl);
for(i = 0; i < code->int_constant_count; i++){
- OUT_CB_REG(R500_US_FC_INT_CONST_0 + (i * 4),
- code->int_constants[i]);
- }
- OUT_CB_REG(R500_US_CODE_RANGE,
+ OUT_CB_REG(R500_US_FC_INT_CONST_0 + (i * 4),
+ code->int_constants[i]);
+ }
+ OUT_CB_REG(R500_US_CODE_RANGE,
R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end));
OUT_CB_REG(R500_US_CODE_OFFSET, 0);
OUT_CB_REG(R500_US_CODE_ADDR,
@@ -386,6 +386,7 @@ static void r300_translate_fragment_shader(
compiler.state = shader->compare_state;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
+ compiler.Base.remove_unused_constants = TRUE;
compiler.AllocateHwInputs = &allocate_hardware_inputs;
compiler.UserData = &shader->inputs;
@@ -431,9 +432,8 @@ static void r300_translate_fragment_shader(
}
if (compiler.Base.Error) {
- DBG(r300, DBG_FP, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
- " instead.\nIf there's an 'unknown opcode' message, please"
- " file a bug report and attach this log.\n", compiler.Base.ErrorMsg);
+ fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
+ " instead.\n", compiler.Base.ErrorMsg);
if (shader->dummy) {
fprintf(stderr, "r300 FP: Cannot compile the dummy shader! "
@@ -447,7 +447,12 @@ static void r300_translate_fragment_shader(
}
/* Initialize numbers of constants for each type. */
- shader->externals_count = ttr.immediate_offset;
+ shader->externals_count = 0;
+ for (i = 0;
+ i < shader->code.constants.Count &&
+ shader->code.constants.Constants[i].Type == RC_CONSTANT_EXTERNAL; i++) {
+ shader->externals_count = i+1;
+ }
shader->immediates_count = 0;
shader->rc_state_count = 0;
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index 811b5646e1..eb5b0c36f8 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -354,7 +354,12 @@ void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf
/* We currently don't handle decompression for 3D textures and cubemaps
* correctly. */
if (tex->desc.b.b.target != PIPE_TEXTURE_1D &&
- tex->desc.b.b.target != PIPE_TEXTURE_2D)
+ tex->desc.b.b.target != PIPE_TEXTURE_2D &&
+ tex->desc.b.b.target != PIPE_TEXTURE_RECT)
+ return;
+
+ /* Cannot flush zmask of 16-bit zbuffers. */
+ if (util_format_get_blocksizebits(tex->desc.b.b.format) == 16)
return;
if (tex->zmask_mem[level])
@@ -373,23 +378,36 @@ void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf
return;
}
-void r300_hyperz_init_mm(struct r300_context *r300)
+boolean r300_hyperz_init_mm(struct r300_context *r300)
{
struct r300_screen* r300screen = r300->screen;
int frag_pipes = r300screen->caps.num_frag_pipes;
- if (r300screen->caps.hiz_ram)
+ r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes);
+ if (!r300->zmask_mm)
+ return FALSE;
+
+ if (r300screen->caps.hiz_ram) {
r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes);
+ if (!r300->hiz_mm) {
+ u_mmDestroy(r300->zmask_mm);
+ r300->zmask_mm = NULL;
+ return FALSE;
+ }
+ }
- r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes);
+ return TRUE;
}
void r300_hyperz_destroy_mm(struct r300_context *r300)
{
struct r300_screen* r300screen = r300->screen;
- if (r300screen->caps.hiz_ram)
+ if (r300screen->caps.hiz_ram) {
u_mmDestroy(r300->hiz_mm);
+ r300->hiz_mm = NULL;
+ }
u_mmDestroy(r300->zmask_mm);
+ r300->zmask_mm = NULL;
}
diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h
index 09e1ff6625..30a23ec649 100644
--- a/src/gallium/drivers/r300/r300_hyperz.h
+++ b/src/gallium/drivers/r300/r300_hyperz.h
@@ -30,6 +30,6 @@ void r300_update_hyperz_state(struct r300_context* r300);
void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf);
void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress);
-void r300_hyperz_init_mm(struct r300_context *r300);
+boolean r300_hyperz_init_mm(struct r300_context *r300);
void r300_hyperz_destroy_mm(struct r300_context *r300);
#endif
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 60d3b600cb..6bea783f69 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -1607,6 +1607,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_FORMAT_3D (1 << 25)
# define R300_TX_FORMAT_CUBIC_MAP (2 << 25)
+# define R300_TX_FORMAT_TEX_COORD_TYPE_MASK (0x3 << 25)
/* alpha modes, convenience mostly */
/* if you have alpha, pick constant appropriate to the
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 86b11ca045..20bad2c56f 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -118,13 +118,13 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
return color_control;
}
-static boolean index_bias_supported(struct r300_context *r300)
+boolean r500_index_bias_supported(struct r300_context *r300)
{
return r300->screen->caps.is_r500 &&
r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
}
-static void r500_emit_index_bias(struct r300_context *r300, int index_bias)
+void r500_emit_index_bias(struct r300_context *r300, int index_bias)
{
CS_LOCALS(r300);
@@ -199,7 +199,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300,
boolean emit_aos = flags & PREP_EMIT_AOS;
boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL;
boolean indexed = flags & PREP_INDEXED;
- boolean hw_index_bias = index_bias_supported(r300);
+ boolean hw_index_bias = r500_index_bias_supported(r300);
/* Add dirty state, index offset, and AOS. */
if (first_draw) {
@@ -506,7 +506,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
translate = TRUE;
}
- if (indexBias && !index_bias_supported(r300)) {
+ if (indexBias && !r500_index_bias_supported(r300)) {
r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset);
}
@@ -680,18 +680,11 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
if (info->indexed && r300->index_buffer.buffer) {
indices = pipe_buffer_map(pipe, r300->index_buffer.buffer,
PIPE_TRANSFER_READ, &ib_transfer);
- if (indices)
- indices = (void *) ((char *) indices + r300->index_buffer.offset);
}
- draw_set_mapped_element_buffer_range(r300->draw, (indices) ?
- r300->index_buffer.index_size : 0,
- info->index_bias,
- info->min_index,
- info->max_index,
- indices);
+ draw_set_mapped_index_buffer(r300->draw, indices);
- draw_arrays(r300->draw, info->mode, info->start, count);
+ draw_vbo(r300->draw, info);
/* XXX Not sure whether this is the best fix.
* It prevents CS from being rejected and weird assertion failures. */
@@ -707,8 +700,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
if (ib_transfer) {
pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer);
- draw_set_mapped_element_buffer_range(r300->draw, 0, 0, info->start,
- info->start + count - 1, NULL);
+ draw_set_mapped_index_buffer(r300->draw, NULL);
}
}
@@ -726,8 +718,6 @@ struct r300_render {
unsigned hwprim;
/* VBO */
- struct pipe_resource* vbo;
- size_t vbo_size;
size_t vbo_offset;
size_t vbo_max_used;
void * vbo_ptr;
@@ -759,31 +749,31 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
struct pipe_screen* screen = r300->context.screen;
size_t size = (size_t)vertex_size * (size_t)count;
- if (size + r300render->vbo_offset > r300render->vbo_size)
+ if (size + r300render->vbo_offset > r300->draw_vbo_size)
{
- pipe_resource_reference(&r300->vbo, NULL);
- r300render->vbo = pipe_buffer_create(screen,
- PIPE_BIND_VERTEX_BUFFER,
- R300_MAX_DRAW_VBO_SIZE);
+ pipe_resource_reference(&r300->vbo, NULL);
+ r300->vbo = pipe_buffer_create(screen,
+ PIPE_BIND_VERTEX_BUFFER,
+ R300_MAX_DRAW_VBO_SIZE);
r300render->vbo_offset = 0;
- r300render->vbo_size = R300_MAX_DRAW_VBO_SIZE;
+ r300->draw_vbo_size = R300_MAX_DRAW_VBO_SIZE;
}
r300render->vertex_size = vertex_size;
- r300->vbo = r300render->vbo;
r300->vbo_offset = r300render->vbo_offset;
- return (r300render->vbo) ? TRUE : FALSE;
+ return (r300->vbo) ? TRUE : FALSE;
}
static void* r300_render_map_vertices(struct vbuf_render* render)
{
struct r300_render* r300render = r300_render(render);
+ struct r300_context* r300 = r300render->r300;
assert(!r300render->vbo_transfer);
r300render->vbo_ptr = pipe_buffer_map(&r300render->r300->context,
- r300render->vbo,
+ r300->vbo,
PIPE_TRANSFER_WRITE,
&r300render->vbo_transfer);
@@ -798,12 +788,13 @@ static void r300_render_unmap_vertices(struct vbuf_render* render,
{
struct r300_render* r300render = r300_render(render);
struct pipe_context* context = &r300render->r300->context;
+ struct r300_context* r300 = r300render->r300;
assert(r300render->vbo_transfer);
r300render->vbo_max_used = MAX2(r300render->vbo_max_used,
r300render->vertex_size * (max + 1));
- pipe_buffer_unmap(context, r300render->vbo, r300render->vbo_transfer);
+ pipe_buffer_unmap(context, r300->vbo, r300render->vbo_transfer);
r300render->vbo_transfer = NULL;
}
@@ -880,7 +871,7 @@ static void r300_render_draw_elements(struct vbuf_render* render,
struct r300_context* r300 = r300render->r300;
int i;
unsigned end_cs_dwords;
- unsigned max_index = (r300render->vbo_size - r300render->vbo_offset) /
+ unsigned max_index = (r300->draw_vbo_size - r300render->vbo_offset) /
(r300render->r300->vertex_info.size * 4) - 1;
unsigned short_count;
unsigned free_dwords;
@@ -956,8 +947,6 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300)
r300render->base.release_vertices = r300_render_release_vertices;
r300render->base.destroy = r300_render_destroy;
- r300render->vbo = NULL;
- r300render->vbo_size = 0;
r300render->vbo_offset = 0;
return &r300render->base;
@@ -986,6 +975,12 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300)
return stage;
}
+void r300_draw_flush_vbuf(struct r300_context *r300)
+{
+ pipe_resource_reference(&r300->vbo, NULL);
+ r300->draw_vbo_size = 0;
+}
+
/****************************************************************************
* End of SW TCL functions *
***************************************************************************/
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 239edd98e3..8ccb63964e 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -23,7 +23,7 @@
#include "draw/draw_context.h"
-#include "util/u_blitter.h"
+#include "util/u_framebuffer.h"
#include "util/u_math.h"
#include "util/u_mm.h"
#include "util/u_memory.h"
@@ -748,7 +748,7 @@ static void
/* The tiling flags are dependent on the surface miplevel, unfortunately. */
r300_fb_set_tiling_flags(r300, state);
- util_assign_framebuffer_state(r300->fb_state.state, state);
+ util_copy_framebuffer_state(r300->fb_state.state, state);
r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
@@ -865,6 +865,9 @@ void r300_mark_fs_code_dirty(struct r300_context *r300)
r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 5;
r300->fs_constants.size = fs->shader->externals_count * 4 + 1;
}
+
+ ((struct r300_constant_buffer*)r300->fs_constants.state)->remap_table =
+ fs->shader->code.constants_remap_table;
}
/* Bind fragment shader state. */
@@ -937,9 +940,9 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */
/* Point sprites texture coordinates, 0: lower left, 1: upper right */
- float point_texcoord_left; /* R300_GA_POINT_S0: 0x4200 */
+ float point_texcoord_left = 0; /* R300_GA_POINT_S0: 0x4200 */
float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */
- float point_texcoord_right; /* R300_GA_POINT_S1: 0x4208 */
+ float point_texcoord_right = 1; /* R300_GA_POINT_S1: 0x4208 */
float point_texcoord_top = 0; /* R300_GA_POINT_T1: 0x420c */
CB_LOCALS;
@@ -947,6 +950,11 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
rs->rs = *state;
rs->rs_draw = *state;
+ /* Generate point sprite texture coordinates in GENERIC0
+ * if point_quad_rasterization is TRUE. */
+ rs->rs.sprite_coord_enable = state->point_quad_rasterization *
+ (state->sprite_coord_enable | 1);
+
/* Override some states for Draw. */
rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */
@@ -1048,16 +1056,13 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
/* Point sprites */
stuffing_enable = 0;
- if (state->sprite_coord_enable) {
+ if (rs->rs.sprite_coord_enable) {
stuffing_enable = R300_GB_POINT_STUFF_ENABLE;
- for (i = 0; i < 8; i++) {
- if (state->sprite_coord_enable & (1 << i))
+ for (i = 0; i < 8; i++) {
+ if (rs->rs.sprite_coord_enable & (1 << i))
stuffing_enable |=
R300_GB_TEX_ST << (R300_GB_TEX0_SOURCE_SHIFT + (i*2));
- }
-
- point_texcoord_left = 0.0f;
- point_texcoord_right = 1.0f;
+ }
switch (state->sprite_coord_mode) {
case PIPE_SPRITE_COORD_UPPER_LEFT:
@@ -1208,8 +1213,8 @@ static void*
/* Unfortunately, r300-r500 don't support floating-point mipmap lods. */
/* We must pass these to the merge function to clamp them properly. */
- sampler->min_lod = MAX2((unsigned)state->min_lod, 0);
- sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0);
+ sampler->min_lod = (unsigned)MAX2(state->min_lod, 0);
+ sampler->max_lod = (unsigned)MAX2(ceilf(state->max_lod), 0);
lod_bias = CLAMP((int)(state->lod_bias * 32 + 1), -(1 << 9), (1 << 9) - 1);
@@ -1548,7 +1553,12 @@ static void r300_set_index_buffer(struct pipe_context* pipe,
memset(&r300->index_buffer, 0, sizeof(r300->index_buffer));
}
- /* TODO make this more like a state */
+ if (r300->screen->caps.has_tcl) {
+ /* TODO make this more like a state */
+ }
+ else {
+ draw_set_index_buffer(r300->draw, ib);
+ }
}
/* Initialize the PSC tables. */
@@ -1765,6 +1775,9 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
r300->vs_constants.size = 0;
}
+ ((struct r300_constant_buffer*)r300->vs_constants.state)->remap_table =
+ vs->code.constants_remap_table;
+
r300->pvs_flush.dirty = TRUE;
} else {
draw_bind_vertex_shader(r300->draw,
@@ -1779,6 +1792,8 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
if (r300->screen->caps.has_tcl) {
rc_constants_destroy(&vs->code.constants);
+ if (vs->code.constants_remap_table)
+ FREE(vs->code.constants_remap_table);
} else {
draw_delete_vertex_shader(r300->draw,
(struct draw_vertex_shader*)vs->draw_vs);
@@ -1795,47 +1810,28 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
struct r300_context* r300 = r300_context(pipe);
struct r300_constant_buffer *cbuf;
uint32_t *mapped = r300_buffer(buf)->user_buffer;
- int max_size = 0, max_size_bytes = 0, clamped_size = 0;
switch (shader) {
case PIPE_SHADER_VERTEX:
cbuf = (struct r300_constant_buffer*)r300->vs_constants.state;
- max_size = 256;
break;
case PIPE_SHADER_FRAGMENT:
cbuf = (struct r300_constant_buffer*)r300->fs_constants.state;
- if (r300->screen->caps.is_r500) {
- max_size = 256;
- } else {
- max_size = 32;
- }
break;
default:
assert(0);
return;
}
- max_size_bytes = max_size * 4 * sizeof(float);
if (buf == NULL || buf->width0 == 0 ||
(mapped = r300_buffer(buf)->constant_buffer) == NULL) {
- cbuf->count = 0;
return;
}
if (shader == PIPE_SHADER_FRAGMENT ||
(shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) {
assert((buf->width0 % (4 * sizeof(float))) == 0);
-
- /* Check the size of the constant buffer. */
- /* XXX Subtract immediates and RC_STATE_* variables. */
- if (buf->width0 > max_size_bytes) {
- fprintf(stderr, "r300: Max size of the constant buffer is "
- "%i*4 floats.\n", max_size);
- }
-
- clamped_size = MIN2(buf->width0, max_size_bytes);
- cbuf->count = clamped_size / (4 * sizeof(float));
- cbuf->ptr = mapped;
+ cbuf->ptr = mapped + index*4;
}
if (shader == PIPE_SHADER_VERTEX) {
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 4a63ed7fc1..960dfdbaf0 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -211,7 +211,7 @@ static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset,
enum r300_rs_col_write_type type)
{
- assert(type != WRITE_COLOR);
+ assert(type == WRITE_COLOR);
rs->inst[id] |= R300_RS_INST_COL_CN_WRITE |
R300_RS_INST_COL_ADDR(fp_offset);
}
@@ -592,6 +592,25 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
texstate->filter1 = sampler->filter1;
texstate->border_color = sampler->border_color;
+ /* determine min/max levels */
+ max_level = MIN3(sampler->max_lod + view->base.first_level,
+ tex->desc.b.b.last_level, view->base.last_level);
+ min_level = MIN2(sampler->min_lod + view->base.first_level,
+ max_level);
+
+ if (tex->desc.is_npot && min_level > 0) {
+ /* Even though we do not implement mipmapping for NPOT
+ * textures, we should at least honor the minimum level
+ * which is allowed to be displayed. We do this by setting up
+ * an i-th mipmap level as the zero level. */
+ r300_texture_setup_format_state(r300->screen, &tex->desc,
+ min_level,
+ &texstate->format);
+ texstate->format.tile_config |=
+ tex->desc.offset_in_bytes[min_level] & 0xffffffe0;
+ assert((tex->desc.offset_in_bytes[min_level] & 0x1f) == 0);
+ }
+
/* Assign a texture cache region. */
texstate->format.format1 |= view->texcache_region;
@@ -654,12 +673,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
}
} else {
- /* determine min/max levels */
/* the MAX_MIP level is the largest (finest) one */
- max_level = MIN3(sampler->max_lod + view->base.first_level,
- tex->desc.b.b.last_level, view->base.last_level);
- min_level = MIN2(sampler->min_lod + view->base.first_level,
- max_level);
texstate->format.format0 |= R300_TX_NUM_LEVELS(max_level);
texstate->filter0 |= R300_TX_MAX_MIP_LEVEL(min_level);
}
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index da8eadd3b5..66f6d80bd0 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -541,48 +541,55 @@ boolean r300_is_sampler_format_supported(enum pipe_format format)
return r300_translate_texformat(format, 0, TRUE) != ~0;
}
-static void r300_texture_setup_immutable_state(struct r300_screen* screen,
- struct r300_texture* tex)
+void r300_texture_setup_format_state(struct r300_screen *screen,
+ struct r300_texture_desc *desc,
+ unsigned level,
+ struct r300_texture_format_state *out)
{
- struct r300_texture_format_state* f = &tex->tx_format;
- struct pipe_resource *pt = &tex->desc.b.b;
+ struct pipe_resource *pt = &desc->b.b;
boolean is_r500 = screen->caps.is_r500;
+ /* Mask out all the fields we change. */
+ out->format0 = 0;
+ out->format1 &= ~R300_TX_FORMAT_TEX_COORD_TYPE_MASK;
+ out->format2 &= R500_TXFORMAT_MSB;
+ out->tile_config = 0;
+
/* Set sampler state. */
- f->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) |
- R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff);
+ out->format0 = R300_TX_WIDTH((u_minify(pt->width0, level) - 1) & 0x7ff) |
+ R300_TX_HEIGHT((u_minify(pt->height0, level) - 1) & 0x7ff);
- if (tex->desc.uses_stride_addressing) {
+ if (desc->uses_stride_addressing) {
/* rectangles love this */
- f->format0 |= R300_TX_PITCH_EN;
- f->format2 = (tex->desc.stride_in_pixels[0] - 1) & 0x1fff;
+ out->format0 |= R300_TX_PITCH_EN;
+ out->format2 = (desc->stride_in_pixels[level] - 1) & 0x1fff;
} else {
/* Power of two textures (3D, mipmaps, and no pitch),
* also NPOT textures with a width being POT. */
- f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf);
+ out->format0 |=
+ R300_TX_DEPTH(util_logbase2(u_minify(pt->depth0, level)) & 0xf);
}
- f->format1 = 0;
if (pt->target == PIPE_TEXTURE_CUBE) {
- f->format1 |= R300_TX_FORMAT_CUBIC_MAP;
+ out->format1 |= R300_TX_FORMAT_CUBIC_MAP;
}
if (pt->target == PIPE_TEXTURE_3D) {
- f->format1 |= R300_TX_FORMAT_3D;
+ out->format1 |= R300_TX_FORMAT_3D;
}
/* large textures on r500 */
if (is_r500)
{
if (pt->width0 > 2048) {
- f->format2 |= R500_TXWIDTH_BIT11;
+ out->format2 |= R500_TXWIDTH_BIT11;
}
if (pt->height0 > 2048) {
- f->format2 |= R500_TXHEIGHT_BIT11;
+ out->format2 |= R500_TXHEIGHT_BIT11;
}
}
- f->tile_config = R300_TXO_MACRO_TILE(tex->desc.macrotile[0]) |
- R300_TXO_MICRO_TILE(tex->desc.microtile);
+ out->tile_config = R300_TXO_MACRO_TILE(desc->macrotile[level]) |
+ R300_TXO_MICRO_TILE(desc->microtile);
}
static void r300_texture_setup_fb_state(struct r300_screen* screen,
@@ -716,7 +723,7 @@ r300_texture_create_object(struct r300_screen *rscreen,
return NULL;
}
/* Initialize the hardware state. */
- r300_texture_setup_immutable_state(rscreen, tex);
+ r300_texture_setup_format_state(rscreen, &tex->desc, 0, &tex->tx_format);
r300_texture_setup_fb_state(rscreen, tex);
tex->desc.b.vtbl = &r300_texture_vtbl;
@@ -754,7 +761,8 @@ struct pipe_resource *r300_texture_create(struct pipe_screen *screen,
/* Refuse to create a texture with size 0. */
if (!base->width0 ||
(!base->height0 && (base->target == PIPE_TEXTURE_2D ||
- base->target == PIPE_TEXTURE_CUBE)) ||
+ base->target == PIPE_TEXTURE_CUBE ||
+ base->target == PIPE_TEXTURE_RECT)) ||
(!base->depth0 && base->target == PIPE_TEXTURE_3D)) {
fprintf(stderr, "r300: texture_create: "
"Got invalid texture dimensions: %ix%ix%i\n",
@@ -787,7 +795,8 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
unsigned stride, size;
/* Support only 2D textures without mipmaps */
- if (base->target != PIPE_TEXTURE_2D ||
+ if ((base->target != PIPE_TEXTURE_2D &&
+ base->target != PIPE_TEXTURE_RECT) ||
base->depth0 != 1 ||
base->last_level != 0) {
return NULL;
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index a4524320fd..c4588a0c90 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -23,11 +23,14 @@
#ifndef R300_TEXTURE_H
#define R300_TEXTURE_H
+#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
struct pipe_screen;
struct pipe_resource;
struct winsys_handle;
+struct r300_texture_format_state;
+struct r300_texture_desc;
struct r300_texture;
struct r300_screen;
@@ -50,6 +53,10 @@ boolean r300_is_zs_format_supported(enum pipe_format format);
boolean r300_is_sampler_format_supported(enum pipe_format format);
+void r300_texture_setup_format_state(struct r300_screen *screen,
+ struct r300_texture_desc *desc,
+ unsigned level,
+ struct r300_texture_format_state *out);
struct pipe_resource*
r300_texture_from_handle(struct pipe_screen* screen,
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index 5d690e8c33..2fe5d72188 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -184,7 +184,8 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
/* This is needed for the kernel checker, unfortunately. */
if ((desc->b.b.target != PIPE_TEXTURE_1D &&
- desc->b.b.target != PIPE_TEXTURE_2D) ||
+ desc->b.b.target != PIPE_TEXTURE_2D &&
+ desc->b.b.target != PIPE_TEXTURE_RECT) ||
desc->b.b.last_level != 0) {
height = util_next_power_of_two(height);
}
@@ -202,7 +203,8 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
* Do so for 3 or more macrotiles in the Y direction. */
if (level == 0 && desc->b.b.last_level == 0 &&
(desc->b.b.target == PIPE_TEXTURE_1D ||
- desc->b.b.target == PIPE_TEXTURE_2D) &&
+ desc->b.b.target == PIPE_TEXTURE_2D ||
+ desc->b.b.target == PIPE_TEXTURE_RECT) &&
height >= tile_height * 3) {
height = align(height, tile_height * 2);
}
diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h
index 95de66f654..3d7fe1fb47 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.h
+++ b/src/gallium/drivers/r300/r300_texture_desc.h
@@ -24,6 +24,7 @@
#ifndef R300_TEXTURE_DESC_H
#define R300_TEXTURE_DESC_H
+#include "pipe/p_format.h"
#include "r300_defines.h"
struct pipe_resource;
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index dd697b9c37..a4911b9a2a 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -97,13 +97,13 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_BRA: return RC_OPCODE_BRA; */
/* case TGSI_OPCODE_CAL: return RC_OPCODE_CAL; */
/* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */
- /* case TGSI_OPCODE_SSG: return RC_OPCODE_SSG; */
+ case TGSI_OPCODE_SSG: return RC_OPCODE_SSG;
case TGSI_OPCODE_CMP: return RC_OPCODE_CMP;
case TGSI_OPCODE_SCS: return RC_OPCODE_SCS;
case TGSI_OPCODE_TXB: return RC_OPCODE_TXB;
/* case TGSI_OPCODE_NRM: return RC_OPCODE_NRM; */
/* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
- /* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */
+ case TGSI_OPCODE_DP2: return RC_OPCODE_DP2;
case TGSI_OPCODE_TXL: return RC_OPCODE_TXL;
case TGSI_OPCODE_BRK: return RC_OPCODE_BRK;
case TGSI_OPCODE_IF: return RC_OPCODE_IF;
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 54c8de1241..5f8dbb28d0 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -196,6 +196,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
{
struct r300_vertex_program_compiler compiler;
struct tgsi_to_rc ttr;
+ unsigned i;
/* Setup the compiler */
rc_init(&compiler.Base);
@@ -205,6 +206,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
compiler.UserData = vs;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.max_temp_regs = 32;
+ compiler.Base.remove_unused_constants = TRUE;
if (compiler.Base.Debug) {
DBG(r300, DBG_VP, "r300: Initial vertex program\n");
@@ -227,9 +229,8 @@ void r300_translate_vertex_shader(struct r300_context *r300,
/* Invoke the compiler */
r3xx_compile_vertex_program(&compiler);
if (compiler.Base.Error) {
- DBG(r300, DBG_VP, "r300 VP: Compiler error:\n%sUsing a dummy shader"
- " instead.\nIf there's an 'unknown opcode' message, please"
- " file a bug report and attach this log.\n", compiler.Base.ErrorMsg);
+ fprintf(stderr, "r300 VP: Compiler error:\n%sUsing a dummy shader"
+ " instead.\n", compiler.Base.ErrorMsg);
if (vs->dummy) {
fprintf(stderr, "r300 VP: Cannot compile the dummy shader! "
@@ -243,7 +244,15 @@ void r300_translate_vertex_shader(struct r300_context *r300,
}
/* Initialize numbers of constants for each type. */
- vs->externals_count = ttr.immediate_offset;
+ vs->externals_count = 0;
+ for (i = 0;
+ i < vs->code.constants.Count &&
+ vs->code.constants.Constants[i].Type == RC_CONSTANT_EXTERNAL; i++) {
+ vs->externals_count = i+1;
+ }
+ for (; i < vs->code.constants.Count; i++) {
+ assert(vs->code.constants.Constants[i].Type == RC_CONSTANT_IMMEDIATE);
+ }
vs->immediates_count = vs->code.constants.Count - vs->externals_count;
/* And, finally... */
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index 187780750f..4597332399 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -33,6 +33,7 @@
#include "r300_defines.h"
+struct winsys_handle;
struct r300_winsys_screen;
struct r300_winsys_buffer;
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 9ea9d4354d..6483dac703 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -76,6 +76,27 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
{
LIST_INITHEAD(&bc->cf);
bc->family = family;
+ switch (bc->family) {
+ case CHIP_R600:
+ case CHIP_RV610:
+ case CHIP_RV630:
+ case CHIP_RV670:
+ case CHIP_RV620:
+ case CHIP_RV635:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ bc->chiprev = 0;
+ break;
+ case CHIP_RV770:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_RV740:
+ bc->chiprev = 1;
+ break;
+ default:
+ R600_ERR("unknown family %d\n", bc->family);
+ return -EINVAL;
+ }
return 0;
}
@@ -107,7 +128,7 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
return 0;
}
-int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
{
struct r600_bc_alu *nalu = r600_bc_alu();
struct r600_bc_alu *lalu;
@@ -119,7 +140,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
nalu->nliteral = 0;
/* cf can contains only alu or only vtx or only tex */
- if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) ||
+ if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) ||
bc->force_add_cf) {
/* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */
r = r600_bc_add_cf(bc);
@@ -127,7 +148,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
free(nalu);
return r;
}
- bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3;
+ bc->cf_last->inst = (type << 3);
}
if (alu->last && (bc->cf_last->ndw >> 1) >= 124) {
bc->force_add_cf = 1;
@@ -162,6 +183,11 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
return 0;
}
+int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+{
+ return r600_bc_add_alu_type(bc, alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU);
+}
+
int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
{
struct r600_bc_alu *alu;
@@ -172,7 +198,17 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
return 0;
}
- if (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) ||
+ if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
+ return 0;
+ }
+ if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
+ (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
LIST_IS_EMPTY(&bc->cf_last->alu)) {
R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
return -EINVAL;
@@ -241,6 +277,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
return 0;
}
+int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
+{
+ int r;
+ r = r600_bc_add_cf(bc);
+ if (r)
+ return r;
+
+ bc->cf_last->cond = V_SQ_CF_COND_ACTIVE;
+ bc->cf_last->inst = inst;
+ return 0;
+}
+
static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
{
bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
@@ -292,38 +340,44 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
unsigned i;
/* don't replace gpr by pv or ps for destination register */
+ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
+ S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
+ S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
+ S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
+ S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
+ S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
+ S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
+ S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
+ S_SQ_ALU_WORD0_LAST(alu->last);
+
if (alu->is_op3) {
- bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
- S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
- S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
- S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
- S_SQ_ALU_WORD0_LAST(alu->last);
bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+ S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
+ S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) |
S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
} else {
- bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
- S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
- S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
- S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
- S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
- S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
- S_SQ_ALU_WORD0_LAST(alu->last);
bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+ S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
- S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
+ S_SQ_ALU_WORD1_BANK_SWIZZLE(0) |
+ S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
+ S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
}
if (alu->last) {
+ if (alu->nliteral && !alu->literal_added) {
+ R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n");
+ }
for (i = 0; i < alu->nliteral; i++) {
bc->bytecode[id++] = alu->value[i];
}
@@ -337,6 +391,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
S_SQ_CF_ALU_WORD1_BARRIER(1) |
@@ -364,6 +419,20 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+ bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
+ bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COND(cf->cond) |
+ S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+
+ break;
default:
R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
return -EINVAL;
@@ -380,6 +449,8 @@ int r600_bc_build(struct r600_bc *bc)
unsigned addr;
int r;
+ if (bc->callstack[0].max > 0)
+ bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
/* first path compute addr of each CF block */
/* addr start after all the CF instructions */
@@ -387,6 +458,7 @@ int r600_bc_build(struct r600_bc *bc)
LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
break;
case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
@@ -398,6 +470,14 @@ int r600_bc_build(struct r600_bc *bc)
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+ break;
default:
R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
return -EINVAL;
@@ -417,22 +497,13 @@ int r600_bc_build(struct r600_bc *bc)
return r;
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
- switch (bc->family) {
- case CHIP_R600:
- case CHIP_RV610:
- case CHIP_RV630:
- case CHIP_RV670:
- case CHIP_RV620:
- case CHIP_RV635:
- case CHIP_RS780:
- case CHIP_RS880:
+ switch(bc->chiprev) {
+ case 0:
r = r600_bc_alu_build(bc, alu, addr);
break;
- case CHIP_RV770:
- case CHIP_RV730:
- case CHIP_RV710:
- case CHIP_RV740:
+ case 1:
r = r700_bc_alu_build(bc, alu, addr);
break;
default:
@@ -466,6 +537,13 @@ int r600_bc_build(struct r600_bc *bc)
break;
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+ case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_POP:
break;
default:
R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 10d98afaf0..9e65fcdd4f 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -31,6 +31,7 @@ struct r600_bc_alu_src {
unsigned chan;
unsigned neg;
unsigned abs;
+ unsigned rel;
};
struct r600_bc_alu_dst {
@@ -38,6 +39,7 @@ struct r600_bc_alu_dst {
unsigned chan;
unsigned clamp;
unsigned write;
+ unsigned rel;
};
struct r600_bc_alu {
@@ -47,6 +49,7 @@ struct r600_bc_alu {
unsigned inst;
unsigned last;
unsigned is_op3;
+ unsigned predicate;
unsigned nliteral;
unsigned literal_added;
u32 value[4];
@@ -114,22 +117,55 @@ struct r600_bc_cf {
unsigned addr;
unsigned ndw;
unsigned id;
+ unsigned cond;
+ unsigned pop_count;
+ unsigned cf_addr; /* control flow addr */
struct list_head alu;
struct list_head tex;
struct list_head vtx;
struct r600_bc_output output;
};
+#define FC_NONE 0
+#define FC_IF 1
+#define FC_LOOP 2
+#define FC_REP 3
+#define FC_PUSH_VPM 4
+#define FC_PUSH_WQM 5
+
+struct r600_cf_stack_entry {
+ int type;
+ struct r600_bc_cf *start;
+ struct r600_bc_cf **mid; /* used to store the else point */
+ int num_mid;
+};
+
+#define SQ_MAX_CALL_DEPTH 0x00000020
+struct r600_cf_callstack {
+ unsigned fc_sp_before_entry;
+ int sub_desc_index;
+ int current;
+ int max;
+};
+
struct r600_bc {
enum radeon_family family;
+ int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */
struct list_head cf;
struct r600_bc_cf *cf_last;
unsigned ndw;
unsigned ncf;
unsigned ngpr;
+ unsigned nstack;
unsigned nresource;
unsigned force_add_cf;
u32 *bytecode;
+
+ u32 fc_sp;
+ struct r600_cf_stack_entry fc_stack[32];
+
+ unsigned call_sp;
+ struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
};
int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
@@ -139,5 +175,6 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
int r600_bc_build(struct r600_bc *bc);
-
+int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
+int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
#endif
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index f4eedfe4cb..e6ded342e5 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -24,6 +24,7 @@
* Jerome Glisse
* Marek Olšák
*/
+#include <errno.h>
#include <pipe/p_screen.h>
#include <util/u_blitter.h>
#include <util/u_inlines.h>
@@ -31,9 +32,12 @@
#include "util/u_surface.h"
#include "r600_screen.h"
#include "r600_context.h"
+#include "r600d.h"
-static void r600_blitter_save_states(struct r600_context *rctx)
+static void r600_blitter_save_states(struct pipe_context *ctx)
{
+ struct r600_context *rctx = r600_context(ctx);
+
util_blitter_save_blend(rctx->blitter, rctx->blend);
util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa);
if (rctx->stencil_ref) {
@@ -47,48 +51,58 @@ static void r600_blitter_save_states(struct r600_context *rctx)
if (rctx->viewport) {
util_blitter_save_viewport(rctx->blitter, &rctx->viewport->state.viewport);
}
- /* XXX util_blitter_save_clip(rctx->blitter, &rctx->clip); */
+ if (rctx->clip) {
+ util_blitter_save_clip(rctx->blitter, &rctx->clip->state.clip);
+ }
util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer,
rctx->vertex_buffer);
/* remove ptr so they don't get deleted */
rctx->blend = NULL;
+ rctx->clip = NULL;
rctx->vs_shader = NULL;
rctx->ps_shader = NULL;
rctx->rasterizer = NULL;
rctx->dsa = NULL;
rctx->vertex_elements = NULL;
+
+ /* suspend queries */
+ r600_queries_suspend(ctx);
}
static void r600_clear(struct pipe_context *ctx, unsigned buffers,
- const float *rgba, double depth, unsigned stencil)
+ const float *rgba, double depth, unsigned stencil)
{
struct r600_context *rctx = r600_context(ctx);
struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
- r600_blitter_save_states(rctx);
+ r600_blitter_save_states(ctx);
util_blitter_clear(rctx->blitter, fb->width, fb->height,
fb->nr_cbufs, buffers, rgba, depth,
stencil);
+ /* resume queries */
+ r600_queries_resume(ctx);
}
-static void r600_clear_render_target(struct pipe_context *pipe,
+static void r600_clear_render_target(struct pipe_context *ctx,
struct pipe_surface *dst,
const float *rgba,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
- struct r600_context *rctx = r600_context(pipe);
+ struct r600_context *rctx = r600_context(ctx);
struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
- r600_blitter_save_states(rctx);
+ r600_blitter_save_states(ctx);
util_blitter_save_framebuffer(rctx->blitter, fb);
util_blitter_clear_render_target(rctx->blitter, dst, rgba,
dstx, dsty, width, height);
+ /* resume queries */
+ r600_queries_resume(ctx);
}
-static void r600_clear_depth_stencil(struct pipe_context *pipe,
+static void r600_clear_depth_stencil(struct pipe_context *ctx,
struct pipe_surface *dst,
unsigned clear_flags,
double depth,
@@ -96,17 +110,20 @@ static void r600_clear_depth_stencil(struct pipe_context *pipe,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
- struct r600_context *rctx = r600_context(pipe);
+ struct r600_context *rctx = r600_context(ctx);
struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
- r600_blitter_save_states(rctx);
+ r600_blitter_save_states(ctx);
util_blitter_save_framebuffer(rctx->blitter, fb);
util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil,
dstx, dsty, width, height);
+ /* resume queries */
+ r600_queries_resume(ctx);
}
-static void r600_resource_copy_region(struct pipe_context *pipe,
+
+static void r600_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
struct pipe_subresource subdst,
unsigned dstx, unsigned dsty, unsigned dstz,
@@ -115,7 +132,7 @@ static void r600_resource_copy_region(struct pipe_context *pipe,
unsigned srcx, unsigned srcy, unsigned srcz,
unsigned width, unsigned height)
{
- util_resource_copy_region(pipe, dst, subdst, dstx, dsty, dstz,
+ util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
src, subsrc, srcx, srcy, srcz, width, height);
}
@@ -126,3 +143,446 @@ void r600_init_blit_functions(struct r600_context *rctx)
rctx->context.clear_depth_stencil = r600_clear_depth_stencil;
rctx->context.resource_copy_region = r600_resource_copy_region;
}
+
+
+struct r600_blit_states {
+ struct radeon_state rasterizer;
+ struct radeon_state dsa;
+ struct radeon_state blend;
+ struct radeon_state cb_cntl;
+ struct radeon_state vgt;
+ struct radeon_state draw;
+ struct radeon_state vs_constant0;
+ struct radeon_state vs_constant1;
+ struct radeon_state vs_constant2;
+ struct radeon_state vs_constant3;
+ struct radeon_state ps_shader;
+ struct radeon_state vs_shader;
+ struct radeon_state vs_resource0;
+ struct radeon_state vs_resource1;
+};
+
+static int r600_blit_state_vs_resources(struct r600_screen *rscreen, struct r600_blit_states *bstates)
+{
+ struct radeon_state *rstate;
+ struct radeon_bo *bo;
+ u32 vbo[] = {
+ 0xBF800000, 0xBF800000, 0x3F800000, 0x3F800000,
+ 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000,
+ 0x3F800000, 0xBF800000, 0x3F800000, 0x3F800000,
+ 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000,
+ 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000,
+ 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000,
+ 0xBF800000, 0x3F800000, 0x3F800000, 0x3F800000,
+ 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000
+ };
+
+ /* simple shader */
+ bo = radeon_bo(rscreen->rw, 0, 128, 4096, NULL);
+ if (bo == NULL) {
+ return -ENOMEM;
+ }
+ if (radeon_bo_map(rscreen->rw, bo)) {
+ radeon_bo_decref(rscreen->rw, bo);
+ return -ENOMEM;
+ }
+ memcpy(bo->data, vbo, 128);
+ radeon_bo_unmap(rscreen->rw, bo);
+
+ rstate = &bstates->vs_resource0;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, 0, R600_SHADER_VS);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD0] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD1] = 0x00000080;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD2] = 0x02302000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD3] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD4] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD5] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD6] = 0xC0000000;
+ rstate->bo[0] = bo;
+ rstate->nbo = 1;
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ if (radeon_state_pm4(rstate)) {
+ radeon_state_fini(rstate);
+ return -ENOMEM;
+ }
+
+ rstate = &bstates->vs_resource1;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, 1, R600_SHADER_VS);
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD0] = 0x00000010;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD1] = 0x00000070;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD2] = 0x02302000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD3] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD4] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD5] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD6] = 0xC0000000;
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, bo);
+ rstate->nbo = 1;
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ if (radeon_state_pm4(rstate)) {
+ radeon_state_fini(rstate);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void r600_blit_state_vs_shader(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ struct radeon_bo *bo;
+ u32 shader_bc_r600[] = {
+ 0x00000004, 0x81000400,
+ 0x00000008, 0xA01C0000,
+ 0xC001A03C, 0x94000688,
+ 0xC0024000, 0x94200688,
+ 0x7C000000, 0x002D1001,
+ 0x00080000, 0x00000000,
+ 0x7C000100, 0x002D1002,
+ 0x00080000, 0x00000000,
+ 0x00000001, 0x00601910,
+ 0x00000401, 0x20601910,
+ 0x00000801, 0x40601910,
+ 0x80000C01, 0x60601910,
+ 0x00000002, 0x00801910,
+ 0x00000402, 0x20801910,
+ 0x00000802, 0x40801910,
+ 0x80000C02, 0x60801910
+ };
+ u32 shader_bc_r700[] = {
+ 0x00000004, 0x81000400,
+ 0x00000008, 0xA01C0000,
+ 0xC001A03C, 0x94000688,
+ 0xC0024000, 0x94200688,
+ 0x7C000000, 0x002D1001,
+ 0x00080000, 0x00000000,
+ 0x7C000100, 0x002D1002,
+ 0x00080000, 0x00000000,
+ 0x00000001, 0x00600C90,
+ 0x00000401, 0x20600C90,
+ 0x00000801, 0x40600C90,
+ 0x80000C01, 0x60600C90,
+ 0x00000002, 0x00800C90,
+ 0x00000402, 0x20800C90,
+ 0x00000802, 0x40800C90,
+ 0x80000C02, 0x60800C90
+ };
+
+ /* simple shader */
+ bo = radeon_bo(rscreen->rw, 0, 128, 4096, NULL);
+ if (bo == NULL) {
+ return;
+ }
+ if (radeon_bo_map(rscreen->rw, bo)) {
+ radeon_bo_decref(rscreen->rw, bo);
+ return;
+ }
+ switch (rscreen->chip_class) {
+ case R600:
+ memcpy(bo->data, shader_bc_r600, 128);
+ break;
+ case R700:
+ memcpy(bo->data, shader_bc_r700, 128);
+ break;
+ default:
+ R600_ERR("unsupported chip family\n");
+ radeon_bo_unmap(rscreen->rw, bo);
+ radeon_bo_decref(rscreen->rw, bo);
+ return;
+ }
+ radeon_bo_unmap(rscreen->rw, bo);
+
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_VS_SHADER__SPI_VS_OUT_ID_0] = 0x03020100;
+ rstate->states[R600_VS_SHADER__SPI_VS_OUT_ID_1] = 0x07060504;
+ rstate->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = 0x00000005;
+
+ rstate->bo[0] = bo;
+ rstate->bo[1] = radeon_bo_incref(rscreen->rw, bo);
+ rstate->nbo = 2;
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_ps_shader(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ struct radeon_bo *bo;
+ u32 shader_bc_r600[] = {
+ 0x00000002, 0xA00C0000,
+ 0xC0008000, 0x94200688,
+ 0x00000000, 0x00201910,
+ 0x00000400, 0x20201910,
+ 0x00000800, 0x40201910,
+ 0x80000C00, 0x60201910
+ };
+ u32 shader_bc_r700[] = {
+ 0x00000002, 0xA00C0000,
+ 0xC0008000, 0x94200688,
+ 0x00000000, 0x00200C90,
+ 0x00000400, 0x20200C90,
+ 0x00000800, 0x40200C90,
+ 0x80000C00, 0x60200C90
+ };
+
+ /* simple shader */
+ bo = radeon_bo(rscreen->rw, 0, 128, 4096, NULL);
+ if (bo == NULL) {
+ radeon_bo_decref(rscreen->rw, bo);
+ return;
+ }
+ if (radeon_bo_map(rscreen->rw, bo)) {
+ return;
+ }
+ switch (rscreen->chip_class) {
+ case R600:
+ memcpy(bo->data, shader_bc_r600, 48);
+ break;
+ case R700:
+ memcpy(bo->data, shader_bc_r700, 48);
+ break;
+ default:
+ R600_ERR("unsupported chip family\n");
+ radeon_bo_unmap(rscreen->rw, bo);
+ radeon_bo_decref(rscreen->rw, bo);
+ return;
+ }
+ radeon_bo_unmap(rscreen->rw, bo);
+
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0] = 0x00000C00;
+ rstate->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = 0x10000001;
+ rstate->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002;
+ rstate->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = 0x00000002;
+
+ rstate->bo[0] = bo;
+ rstate->nbo = 1;
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_vgt(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_VGT, 0, 0);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001;
+ rstate->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF;
+ rstate->states[R600_VGT__VGT_PRIMITIVE_TYPE] = 0x00000005;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_draw(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_DRAW, 0, 0);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_DRAW__VGT_DRAW_INITIATOR] = 0x00000002;
+ rstate->states[R600_DRAW__VGT_NUM_INDICES] = 0x00000004;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_vs_constant(struct r600_screen *rscreen, struct radeon_state *rstate,
+ unsigned id, float c0, float c1, float c2, float c3)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_CONSTANT, id, R600_SHADER_VS);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT0_256] = fui(c0);
+ rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT1_256] = fui(c1);
+ rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT2_256] = fui(c2);
+ rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT3_256] = fui(c3);
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_rasterizer(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000;
+ rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000;
+ rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000;
+ rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000;
+ rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400;
+ rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005;
+ rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008;
+ rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000;
+ rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080004;
+ rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_dsa(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00;
+ rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000;
+ rstate->states[R600_DSA__DB_RENDER_CONTROL] = 0x00000060;
+ rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = 0x0000002A;
+ rstate->states[R600_DSA__DB_SHADER_CONTROL] = 0x00000210;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_blend(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0);
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_cb_cntl(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0);
+ rstate->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000;
+ rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF;
+ rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF;
+ rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = 0x00CC0080;
+ rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F;
+ rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = 0x0000000F;
+ rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF;
+ radeon_state_pm4(rstate);
+}
+
+static int r600_blit_states_init(struct pipe_context *ctx, struct r600_blit_states *bstates)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+
+ r600_blit_state_ps_shader(rscreen, &bstates->ps_shader);
+ r600_blit_state_vs_shader(rscreen, &bstates->vs_shader);
+ r600_blit_state_vgt(rscreen, &bstates->vgt);
+ r600_blit_state_draw(rscreen, &bstates->draw);
+ r600_blit_state_vs_constant(rscreen, &bstates->vs_constant0, 0, 1.0, 0.0, 0.0, 0.0);
+ r600_blit_state_vs_constant(rscreen, &bstates->vs_constant1, 1, 0.0, 1.0, 0.0, 0.0);
+ r600_blit_state_vs_constant(rscreen, &bstates->vs_constant2, 2, 0.0, 0.0, -0.00199900055, 0.0);
+ r600_blit_state_vs_constant(rscreen, &bstates->vs_constant3, 3, 0.0, 0.0, -0.99900049, 1.0);
+ r600_blit_state_rasterizer(rscreen, &bstates->rasterizer);
+ r600_blit_state_dsa(rscreen, &bstates->dsa);
+ r600_blit_state_blend(rscreen, &bstates->blend);
+ r600_blit_state_cb_cntl(rscreen, &bstates->cb_cntl);
+ r600_blit_state_vs_resources(rscreen, bstates);
+ return 0;
+}
+
+static void r600_blit_states_destroy(struct pipe_context *ctx, struct r600_blit_states *bstates)
+{
+ radeon_state_fini(&bstates->ps_shader);
+ radeon_state_fini(&bstates->vs_shader);
+ radeon_state_fini(&bstates->vs_resource0);
+ radeon_state_fini(&bstates->vs_resource1);
+}
+
+int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+ struct r600_context *rctx = r600_context(ctx);
+ struct radeon_draw draw;
+ struct r600_blit_states bstates;
+ int r;
+
+ r = r600_texture_scissor(ctx, rtexture, level);
+ if (r) {
+ return r;
+ }
+ r = r600_texture_cb(ctx, rtexture, 0, level);
+ if (r) {
+ return r;
+ }
+ r = r600_texture_db(ctx, rtexture, level);
+ if (r) {
+ return r;
+ }
+ r = r600_texture_viewport(ctx, rtexture, level);
+ if (r) {
+ return r;
+ }
+
+ r = r600_blit_states_init(ctx, &bstates);
+ if (r) {
+ return r;
+ }
+ bstates.dsa.states[R600_DSA__DB_RENDER_CONTROL] = 0x0000008C;
+ bstates.cb_cntl.states[R600_CB_CNTL__CB_TARGET_MASK] = 0x00000001;
+ /* force rebuild */
+ bstates.dsa.cpm4 = bstates.cb_cntl.cpm4 = 0;
+ if (radeon_state_pm4(&bstates.dsa)) {
+ goto out;
+ }
+ if (radeon_state_pm4(&bstates.cb_cntl)) {
+ goto out;
+ }
+
+ r = radeon_draw_init(&draw, rscreen->rw);
+ if (r) {
+ R600_ERR("failed creating draw for uncompressing textures\n");
+ goto out;
+ }
+
+ radeon_draw_bind(&draw, &bstates.vs_shader);
+ radeon_draw_bind(&draw, &bstates.ps_shader);
+ radeon_draw_bind(&draw, &bstates.rasterizer);
+ radeon_draw_bind(&draw, &bstates.dsa);
+ radeon_draw_bind(&draw, &bstates.blend);
+ radeon_draw_bind(&draw, &bstates.cb_cntl);
+ radeon_draw_bind(&draw, &rctx->config);
+ radeon_draw_bind(&draw, &bstates.vgt);
+ radeon_draw_bind(&draw, &bstates.draw);
+ radeon_draw_bind(&draw, &bstates.vs_resource0);
+ radeon_draw_bind(&draw, &bstates.vs_resource1);
+ radeon_draw_bind(&draw, &bstates.vs_constant0);
+ radeon_draw_bind(&draw, &bstates.vs_constant1);
+ radeon_draw_bind(&draw, &bstates.vs_constant2);
+ radeon_draw_bind(&draw, &bstates.vs_constant3);
+ radeon_draw_bind(&draw, &rtexture->viewport[level]);
+ radeon_draw_bind(&draw, &rtexture->scissor[level]);
+ radeon_draw_bind(&draw, &rtexture->cb[0][level]);
+ radeon_draw_bind(&draw, &rtexture->db[level]);
+
+ /* suspend queries */
+ r600_queries_suspend(ctx);
+
+ /* schedule draw*/
+ r = radeon_ctx_set_draw(&rctx->ctx, &draw);
+ if (r == -EBUSY) {
+ r600_flush(ctx, 0, NULL);
+ r = radeon_ctx_set_draw(&rctx->ctx, &draw);
+ }
+ if (r) {
+ goto out;
+ }
+
+ /* resume queries */
+ r600_queries_resume(ctx);
+
+out:
+ r600_blit_states_destroy(ctx, &bstates);
+ return r;
+}
diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c
index edde80c660..7a0e5b4049 100644
--- a/src/gallium/drivers/r600/r600_context.c
+++ b/src/gallium/drivers/r600/r600_context.c
@@ -34,10 +34,26 @@
#include "r600_resource.h"
#include "r600d.h"
+
static void r600_destroy_context(struct pipe_context *context)
{
struct r600_context *rctx = r600_context(context);
+ rctx->rasterizer = r600_context_state_decref(rctx->rasterizer);
+ rctx->poly_stipple = r600_context_state_decref(rctx->poly_stipple);
+ rctx->scissor = r600_context_state_decref(rctx->scissor);
+ rctx->clip = r600_context_state_decref(rctx->clip);
+ rctx->ps_shader = r600_context_state_decref(rctx->ps_shader);
+ rctx->vs_shader = r600_context_state_decref(rctx->vs_shader);
+ rctx->depth = r600_context_state_decref(rctx->depth);
+ rctx->stencil = r600_context_state_decref(rctx->stencil);
+ rctx->alpha = r600_context_state_decref(rctx->alpha);
+ rctx->dsa = r600_context_state_decref(rctx->dsa);
+ rctx->blend = r600_context_state_decref(rctx->blend);
+ rctx->stencil_ref = r600_context_state_decref(rctx->stencil_ref);
+ rctx->viewport = r600_context_state_decref(rctx->viewport);
+ rctx->framebuffer = r600_context_state_decref(rctx->framebuffer);
+ radeon_ctx_fini(&rctx->ctx);
FREE(rctx);
}
@@ -45,27 +61,35 @@ void r600_flush(struct pipe_context *ctx, unsigned flags,
struct pipe_fence_handle **fence)
{
struct r600_context *rctx = r600_context(ctx);
- struct r600_screen *rscreen = rctx->screen;
+ struct r600_query *rquery;
static int dc = 0;
char dname[256];
- if (radeon_ctx_pm4(rctx->ctx))
- return;
+ /* suspend queries */
+ r600_queries_suspend(ctx);
/* FIXME dumping should be removed once shader support instructions
* without throwing bad code
*/
- if (!rctx->ctx->cpm4)
+ if (!rctx->ctx.cdwords)
goto out;
+#if 0
sprintf(dname, "gallium-%08d.bof", dc);
- if (dc < 1)
- radeon_ctx_dump_bof(rctx->ctx, dname);
+ if (dc < 2) {
+ radeon_ctx_dump_bof(&rctx->ctx, dname);
+ R600_ERR("dumped %s\n", dname);
+ }
+#endif
#if 1
- radeon_ctx_submit(rctx->ctx);
+ radeon_ctx_submit(&rctx->ctx);
#endif
+ LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) {
+ rquery->flushed = true;
+ }
dc++;
out:
- rctx->ctx = radeon_ctx_decref(rctx->ctx);
- rctx->ctx = radeon_ctx(rscreen->rw);
+ radeon_ctx_clear(&rctx->ctx);
+ /* resume queries */
+ r600_queries_resume(ctx);
}
static void r600_init_config(struct r600_context *rctx)
@@ -207,9 +231,9 @@ static void r600_init_config(struct r600_context *rctx)
num_es_stack_entries = 0;
break;
}
- rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG);
+ radeon_state_init(&rctx->config, rctx->rw, R600_STATE_CONFIG, 0, 0);
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] = 0x00000000;
switch (family) {
case CHIP_RV610:
case CHIP_RV620:
@@ -218,75 +242,85 @@ static void r600_init_config(struct r600_context *rctx)
case CHIP_RV710:
break;
default:
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1);
break;
}
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1);
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1);
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio);
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio);
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio);
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio);
+
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0;
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0;
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0;
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs);
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0;
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs);
+ rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0;
+ rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads);
+ rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads);
+ rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads);
+ rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads);
- rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0;
- rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads);
- rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads);
- rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads);
- rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads);
+ rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0;
+ rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
+ rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
- rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0;
- rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
- rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
+ rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0;
+ rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
+ rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
- rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0;
- rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
- rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
+ rctx->config.states[R600_CONFIG__VC_ENHANCE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000;
- rctx->hw_states.config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002;
- rctx->hw_states.config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__DB_DEBUG] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204;
- rctx->hw_states.config->states[R600_CONFIG__SX_MISC] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001;
- rctx->hw_states.config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003;
- rctx->hw_states.config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001;
- rctx->hw_states.config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000;
- radeon_state_pm4(rctx->hw_states.config);
+ if (family >= CHIP_RV770) {
+ rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000;
+ rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002;
+ rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000;
+ rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x00420204;
+ rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000000;
+ rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000;
+ } else {
+ rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000;
+ rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000003;
+ rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x82000000;
+ rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x01020204;
+ rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001;
+ rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00004010;
+ }
+ rctx->config.states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003;
+ rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001;
+ rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000;
+ radeon_state_pm4(&rctx->config);
}
struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv)
@@ -320,7 +354,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv)
r600_init_config(rctx);
- rctx->ctx = radeon_ctx(rscreen->rw);
- rctx->draw = radeon_draw(rscreen->rw);
+ radeon_ctx_init(&rctx->ctx, rscreen->rw);
+ radeon_draw_init(&rctx->draw, rscreen->rw);
return &rctx->context;
}
diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h
index 76d5de8653..cea0813054 100644
--- a/src/gallium/drivers/r600/r600_context.h
+++ b/src/gallium/drivers/r600/r600_context.h
@@ -30,9 +30,32 @@
#include <tgsi/tgsi_parse.h>
#include <tgsi/tgsi_util.h>
#include <util/u_blitter.h>
+#include <util/u_double_list.h>
#include "radeon.h"
#include "r600_shader.h"
+#define R600_QUERY_STATE_STARTED (1 << 0)
+#define R600_QUERY_STATE_ENDED (1 << 1)
+#define R600_QUERY_STATE_SUSPENDED (1 << 2)
+
+struct r600_query {
+ u64 result;
+ /* The kind of query. Currently only OQ is supported. */
+ unsigned type;
+ /* How many results have been written, in dwords. It's incremented
+ * after end_query and flush. */
+ unsigned num_results;
+ /* if we've flushed the query */
+ boolean flushed;
+ unsigned state;
+ /* The buffer where query results are stored. */
+ struct radeon_bo *buffer;
+ unsigned buffer_size;
+ /* linked list of queries */
+ struct list_head list;
+ struct radeon_state rstate;
+};
+
/* XXX move this to a more appropriate place */
union pipe_states {
struct pipe_rasterizer_state rasterizer;
@@ -72,13 +95,16 @@ enum pipe_state_type {
pipe_type_count
};
+#define R600_MAX_RSTATE 16
+
struct r600_context_state {
union pipe_states state;
unsigned refcount;
unsigned type;
- struct radeon_state *rstate;
+ struct radeon_state rstate[R600_MAX_RSTATE];
struct r600_shader shader;
struct radeon_bo *bo;
+ unsigned nrstate;
};
struct r600_vertex_element
@@ -89,28 +115,25 @@ struct r600_vertex_element
};
struct r600_context_hw_states {
- struct radeon_state *rasterizer;
- struct radeon_state *scissor;
- struct radeon_state *dsa;
- struct radeon_state *blend;
- struct radeon_state *viewport;
- struct radeon_state *cb[8];
- struct radeon_state *config;
- struct radeon_state *cb_cntl;
- struct radeon_state *db;
- unsigned ps_nresource;
- unsigned ps_nsampler;
- struct radeon_state *ps_resource[160];
- struct radeon_state *ps_sampler[16];
+ struct radeon_state rasterizer;
+ struct radeon_state scissor;
+ struct radeon_state dsa;
+ struct radeon_state cb_cntl;
};
struct r600_context {
struct pipe_context context;
struct r600_screen *screen;
struct radeon *rw;
- struct radeon_ctx *ctx;
+ struct radeon_ctx ctx;
struct blitter_context *blitter;
- struct radeon_draw *draw;
+ struct radeon_draw draw;
+ struct radeon_state config;
+ /* FIXME get rid of those vs_resource,vs/ps_constant */
+ struct radeon_state vs_resource[160];
+ unsigned vs_nresource;
+ struct radeon_state vs_constant[256];
+ struct radeon_state ps_constant[256];
/* hw states */
struct r600_context_hw_states hw_states;
/* pipe states */
@@ -134,14 +157,15 @@ struct r600_context {
struct r600_context_state *stencil_ref;
struct r600_context_state *viewport;
struct r600_context_state *framebuffer;
- struct r600_context_state *ps_sampler[PIPE_MAX_ATTRIBS];
- struct r600_context_state *vs_sampler[PIPE_MAX_ATTRIBS];
- struct r600_context_state *ps_sampler_view[PIPE_MAX_ATTRIBS];
- struct r600_context_state *vs_sampler_view[PIPE_MAX_ATTRIBS];
+ struct radeon_state *ps_sampler[PIPE_MAX_ATTRIBS];
+ struct radeon_state *vs_sampler[PIPE_MAX_ATTRIBS];
+ struct radeon_state *ps_sampler_view[PIPE_MAX_ATTRIBS];
+ struct radeon_state *vs_sampler_view[PIPE_MAX_ATTRIBS];
struct r600_vertex_element *vertex_elements;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
struct pipe_index_buffer index_buffer;
- struct pipe_blend_color blend_color;
+ struct pipe_blend_color blend_color;
+ struct list_head query_list;
};
/* Convenience cast wrapper. */
@@ -150,13 +174,18 @@ static INLINE struct r600_context *r600_context(struct pipe_context *pipe)
return (struct r600_context*)pipe;
}
+static INLINE struct r600_query* r600_query(struct pipe_query* q)
+{
+ return (struct r600_query*)q;
+}
+
struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigned type, const void *state);
struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate);
struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate);
void r600_flush(struct pipe_context *ctx, unsigned flags,
struct pipe_fence_handle **fence);
-int r600_context_hw_states(struct r600_context *rctx);
+int r600_context_hw_states(struct pipe_context *ctx);
void r600_draw_vbo(struct pipe_context *ctx,
const struct pipe_draw_info *info);
@@ -178,4 +207,10 @@ extern int r600_pipe_shader_update(struct pipe_context *ctx,
uint32_t r600_translate_texformat(enum pipe_format format,
const unsigned char *swizzle_view,
uint32_t *word4_p, uint32_t *yuv_format_p);
+
+/* query */
+extern void r600_queries_resume(struct pipe_context *ctx);
+extern void r600_queries_suspend(struct pipe_context *ctx);
+
+
#endif
diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c
index f058455162..fabd337d23 100644
--- a/src/gallium/drivers/r600/r600_draw.c
+++ b/src/gallium/drivers/r600/r600_draw.c
@@ -31,6 +31,7 @@
#include <util/u_math.h>
#include <util/u_inlines.h>
#include <util/u_memory.h>
+#include "radeon.h"
#include "r600_screen.h"
#include "r600_context.h"
#include "r600_resource.h"
@@ -38,8 +39,8 @@
struct r600_draw {
struct pipe_context *ctx;
- struct radeon_state *draw;
- struct radeon_state *vgt;
+ struct radeon_state draw;
+ struct radeon_state vgt;
unsigned mode;
unsigned start;
unsigned count;
@@ -51,6 +52,7 @@ static int r600_draw_common(struct r600_draw *draw)
{
struct r600_context *rctx = r600_context(draw->ctx);
struct r600_screen *rscreen = rctx->screen;
+ /* FIXME vs_resource */
struct radeon_state *vs_resource;
struct r600_resource *rbuffer;
unsigned i, j, offset, format, prim;
@@ -58,7 +60,7 @@ static int r600_draw_common(struct r600_draw *draw)
struct pipe_vertex_buffer *vertex_buffer;
int r;
- r = r600_context_hw_states(rctx);
+ r = r600_context_hw_states(draw->ctx);
if (r)
return r;
switch (draw->index_size) {
@@ -81,6 +83,7 @@ static int r600_draw_common(struct r600_draw *draw)
r = r600_conv_pipe_prim(draw->mode, &prim);
if (r)
return r;
+
/* rebuild vertex shader if input format changed */
r = r600_pipe_shader_update(draw->ctx, rctx->vs_shader);
if (r)
@@ -88,26 +91,24 @@ static int r600_draw_common(struct r600_draw *draw)
r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader);
if (r)
return r;
- r = radeon_draw_set(rctx->draw, rctx->vs_shader->rstate);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->ps_shader->rstate);
- if (r)
- return r;
+ radeon_draw_bind(&rctx->draw, &rctx->vs_shader->rstate[0]);
+ radeon_draw_bind(&rctx->draw, &rctx->ps_shader->rstate[0]);
+ for (i = 0 ; i < rctx->vs_nresource; i++) {
+ radeon_state_fini(&rctx->vs_resource[i]);
+ }
for (i = 0 ; i < rctx->vertex_elements->count; i++) {
+ vs_resource = &rctx->vs_resource[i];
j = rctx->vertex_elements->elements[i].vertex_buffer_index;
vertex_buffer = &rctx->vertex_buffer[j];
rbuffer = (struct r600_resource*)vertex_buffer->buffer;
offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset;
format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format);
- vs_resource = radeon_state(rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i);
- if (vs_resource == NULL)
- return -ENOMEM;
+ radeon_state_init(vs_resource, rscreen->rw, R600_STATE_RESOURCE, i, R600_SHADER_VS);
vs_resource->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
vs_resource->nbo = 1;
vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset;
- vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset;
+ vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset - 1;
vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) |
S_038008_DATA_FORMAT(format);
vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000;
@@ -116,59 +117,61 @@ static int r600_draw_common(struct r600_draw *draw)
vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000;
vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT;
vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT;
- r = radeon_draw_set_new(rctx->draw, vs_resource);
- if (r)
+ r = radeon_state_pm4(vs_resource);
+ if (r) {
return r;
+ }
+ radeon_draw_bind(&rctx->draw, vs_resource);
}
+ rctx->vs_nresource = rctx->vertex_elements->count;
/* FIXME start need to change winsys */
- draw->draw = radeon_state(rscreen->rw, R600_DRAW_TYPE, R600_DRAW);
- if (draw->draw == NULL)
- return -ENOMEM;
- draw->draw->states[R600_DRAW__VGT_NUM_INDICES] = draw->count;
- draw->draw->states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator;
+ radeon_state_init(&draw->draw, rscreen->rw, R600_STATE_DRAW, 0, 0);
+ draw->draw.states[R600_DRAW__VGT_NUM_INDICES] = draw->count;
+ draw->draw.states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator;
if (draw->index_buffer) {
rbuffer = (struct r600_resource*)draw->index_buffer;
- draw->draw->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
- draw->draw->placement[0] = RADEON_GEM_DOMAIN_GTT;
- draw->draw->placement[1] = RADEON_GEM_DOMAIN_GTT;
- draw->draw->nbo = 1;
+ draw->draw.bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT;
+ draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT;
+ draw->draw.nbo = 1;
}
- r = radeon_draw_set_new(rctx->draw, draw->draw);
- if (r)
+ r = radeon_state_pm4(&draw->draw);
+ if (r) {
return r;
- draw->vgt = radeon_state(rscreen->rw, R600_VGT_TYPE, R600_VGT);
- if (draw->vgt == NULL)
- return -ENOMEM;
- draw->vgt->states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim;
- draw->vgt->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF;
- draw->vgt->states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000;
- draw->vgt->states[R600_VGT__VGT_INDX_OFFSET] = draw->start;
- draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000;
- draw->vgt->states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type;
- draw->vgt->states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000;
- draw->vgt->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001;
- draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000;
- draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000;
- draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000;
- r = radeon_draw_set_new(rctx->draw, draw->vgt);
- if (r)
+ }
+ radeon_draw_bind(&rctx->draw, &draw->draw);
+
+ radeon_state_init(&draw->vgt, rscreen->rw, R600_STATE_VGT, 0, 0);
+ draw->vgt.states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim;
+ draw->vgt.states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF;
+ draw->vgt.states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000;
+ draw->vgt.states[R600_VGT__VGT_INDX_OFFSET] = draw->start;
+ draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000;
+ draw->vgt.states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type;
+ draw->vgt.states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000;
+ draw->vgt.states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001;
+ draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000;
+ draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000;
+ draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000;
+ r = radeon_state_pm4(&draw->vgt);
+ if (r) {
return r;
- /* FIXME */
- r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw);
+ }
+ radeon_draw_bind(&rctx->draw, &draw->vgt);
+
+ r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw);
if (r == -EBUSY) {
r600_flush(draw->ctx, 0, NULL);
- r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw);
+ r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw);
}
- if (r)
- return r;
- rctx->draw = radeon_draw_duplicate(rctx->draw);
- return 0;
+ return r;
}
void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
struct r600_context *rctx = r600_context(ctx);
struct r600_draw draw;
+ int r;
assert(info->index_bias == 0);
@@ -189,5 +192,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
draw.index_size = 0;
draw.index_buffer = NULL;
}
- r600_draw_common(&draw);
+ r = r600_draw_common(&draw);
+ if (r)
+ fprintf(stderr,"draw common failed %d\n", r);
}
diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
index 9b02ae680e..530940ed84 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -24,39 +24,225 @@
* Jerome Glisse
* Corbin Simpson
*/
+#include <errno.h>
#include <util/u_inlines.h>
#include <util/u_format.h>
#include <util/u_memory.h>
#include "r600_screen.h"
#include "r600_context.h"
-static struct pipe_query *r600_create_query(struct pipe_context *pipe, unsigned query_type)
+static void r600_query_begin(struct r600_context *rctx, struct r600_query *rquery)
{
- return NULL;
+ struct r600_screen *rscreen = rctx->screen;
+ struct radeon_state *rstate = &rquery->rstate;
+
+ radeon_state_fini(rstate);
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_BEGIN, 0, 0);
+ rstate->states[R600_QUERY__OFFSET] = rquery->num_results;
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, rquery->buffer);
+ rstate->nbo = 1;
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ if (radeon_state_pm4(rstate)) {
+ radeon_state_fini(rstate);
+ }
+}
+
+static void r600_query_end(struct r600_context *rctx, struct r600_query *rquery)
+{
+ struct r600_screen *rscreen = rctx->screen;
+ struct radeon_state *rstate = &rquery->rstate;
+
+ radeon_state_fini(rstate);
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_END, 0, 0);
+ rstate->states[R600_QUERY__OFFSET] = rquery->num_results + 8;
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, rquery->buffer);
+ rstate->nbo = 1;
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ if (radeon_state_pm4(rstate)) {
+ radeon_state_fini(rstate);
+ }
}
-static void r600_destroy_query(struct pipe_context *pipe, struct pipe_query *query)
+static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+ struct r600_context *rctx = r600_context(ctx);
+ struct r600_query *q;
+
+ if (query_type != PIPE_QUERY_OCCLUSION_COUNTER)
+ return NULL;
+
+ q = CALLOC_STRUCT(r600_query);
+ if (!q)
+ return NULL;
+
+ q->type = query_type;
+ LIST_ADDTAIL(&q->list, &rctx->query_list);
+ q->buffer_size = 4096;
+
+ q->buffer = radeon_bo(rscreen->rw, 0, q->buffer_size, 1, NULL);
+ if (!q->buffer) {
+ FREE(q);
+ return NULL;
+ }
+ return (struct pipe_query *)q;
+}
+
+static void r600_destroy_query(struct pipe_context *ctx,
+ struct pipe_query *query)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+ struct r600_query *q = r600_query(query);
+
+ radeon_bo_decref(rscreen->rw, q->buffer);
+ LIST_DEL(&q->list);
FREE(query);
}
-static void r600_begin_query(struct pipe_context *pipe, struct pipe_query *query)
+static void r600_query_result(struct pipe_context *ctx, struct r600_query *rquery)
{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+ u64 start, end;
+ u32 *results;
+ int i;
+
+ radeon_bo_wait(rscreen->rw, rquery->buffer);
+ radeon_bo_map(rscreen->rw, rquery->buffer);
+ results = rquery->buffer->data;
+ for (i = 0; i < rquery->num_results; i += 4) {
+ start = (u64)results[i] | (u64)results[i + 1] << 32;
+ end = (u64)results[i + 2] | (u64)results[i + 3] << 32;
+ if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) {
+ rquery->result += end - start;
+ }
+ }
+ radeon_bo_unmap(rscreen->rw, rquery->buffer);
+ rquery->num_results = 0;
}
-static void r600_end_query(struct pipe_context *pipe, struct pipe_query *query)
+static void r600_query_resume(struct pipe_context *ctx, struct r600_query *rquery)
{
+ struct r600_context *rctx = r600_context(ctx);
+
+ if (rquery->num_results >= ((rquery->buffer_size >> 2) - 2)) {
+ /* running out of space */
+ if (!rquery->flushed) {
+ ctx->flush(ctx, 0, NULL);
+ }
+ r600_query_result(ctx, rquery);
+ }
+ r600_query_begin(rctx, rquery);
+ rquery->flushed = false;
+}
+
+static void r600_query_suspend(struct pipe_context *ctx, struct r600_query *rquery)
+{
+ struct r600_context *rctx = r600_context(ctx);
+
+ r600_query_end(rctx, rquery);
+ rquery->num_results += 16;
}
-static boolean r600_get_query_result(struct pipe_context *pipe,
+static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ struct r600_context *rctx = r600_context(ctx);
+ struct r600_query *rquery = r600_query(query);
+ int r;
+
+ rquery->state = R600_QUERY_STATE_STARTED;
+ rquery->num_results = 0;
+ rquery->flushed = false;
+ r600_query_resume(ctx, rquery);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ if (r == -EBUSY) {
+ /* this shouldn't happen */
+ R600_ERR("had to flush while emitting end query\n");
+ ctx->flush(ctx, 0, NULL);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ }
+}
+
+static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ struct r600_context *rctx = r600_context(ctx);
+ struct r600_query *rquery = r600_query(query);
+ int r;
+
+ rquery->state &= ~R600_QUERY_STATE_STARTED;
+ rquery->state |= R600_QUERY_STATE_ENDED;
+ r600_query_suspend(ctx, rquery);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ if (r == -EBUSY) {
+ /* this shouldn't happen */
+ R600_ERR("had to flush while emitting end query\n");
+ ctx->flush(ctx, 0, NULL);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ }
+}
+
+void r600_queries_suspend(struct pipe_context *ctx)
+{
+ struct r600_context *rctx = r600_context(ctx);
+ struct r600_query *rquery;
+ int r;
+
+ LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) {
+ if (rquery->state & R600_QUERY_STATE_STARTED) {
+ r600_query_suspend(ctx, rquery);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ if (r == -EBUSY) {
+ /* this shouldn't happen */
+ R600_ERR("had to flush while emitting end query\n");
+ ctx->flush(ctx, 0, NULL);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ }
+ }
+ rquery->state |= R600_QUERY_STATE_SUSPENDED;
+ }
+}
+
+void r600_queries_resume(struct pipe_context *ctx)
+{
+ struct r600_context *rctx = r600_context(ctx);
+ struct r600_query *rquery;
+ int r;
+
+ LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) {
+ if (rquery->state & R600_QUERY_STATE_STARTED) {
+ r600_query_resume(ctx, rquery);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ if (r == -EBUSY) {
+ /* this shouldn't happen */
+ R600_ERR("had to flush while emitting end query\n");
+ ctx->flush(ctx, 0, NULL);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ }
+ }
+ rquery->state &= ~R600_QUERY_STATE_SUSPENDED;
+ }
+}
+
+static boolean r600_get_query_result(struct pipe_context *ctx,
struct pipe_query *query,
- boolean wait, void *result)
+ boolean wait, void *vresult)
{
+ struct r600_query *rquery = r600_query(query);
+ uint64_t *result = (uint64_t*)vresult;
+
+ if (!rquery->flushed) {
+ ctx->flush(ctx, 0, NULL);
+ rquery->flushed = true;
+ }
+ r600_query_result(ctx, rquery);
+ *result = rquery->result;
+ rquery->result = 0;
return TRUE;
}
void r600_init_query_functions(struct r600_context* rctx)
{
+ LIST_INITHEAD(&rctx->query_list);
+
rctx->context.create_query = r600_create_query;
rctx->context.destroy_query = r600_destroy_query;
rctx->context.begin_query = r600_begin_query;
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index bb90e76fb7..129667ad20 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -44,10 +44,22 @@ struct r600_resource_texture {
struct r600_resource resource;
unsigned long offset[PIPE_MAX_TEXTURE_LEVELS];
unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned long width[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned long height[PIPE_MAX_TEXTURE_LEVELS];
unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS];
unsigned long pitch_override;
unsigned long bpt;
unsigned long size;
+ unsigned tilled;
+ unsigned array_mode;
+ unsigned tile_type;
+ unsigned depth;
+ unsigned dirty;
+ struct radeon_bo *uncompressed;
+ struct radeon_state scissor[PIPE_MAX_TEXTURE_LEVELS];
+ struct radeon_state cb[8][PIPE_MAX_TEXTURE_LEVELS];
+ struct radeon_state db[PIPE_MAX_TEXTURE_LEVELS];
+ struct radeon_state viewport[PIPE_MAX_TEXTURE_LEVELS];
};
void r600_init_context_resource_functions(struct r600_context *r600);
diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c
index cdaca9ed7d..a047a49a6c 100644
--- a/src/gallium/drivers/r600/r600_screen.c
+++ b/src/gallium/drivers/r600/r600_screen.c
@@ -69,6 +69,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_SWIZZLE:
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
+ case PIPE_CAP_DEPTH_CLAMP:
return 1;
/* Unsupported features (boolean caps). */
@@ -77,7 +78,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_STREAM_OUTPUT:
case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */
case PIPE_CAP_GEOMETRY_SHADER4:
- case PIPE_CAP_DEPTH_CLAMP: /* FIXME allow this */
return 0;
/* Texturing. */
@@ -234,11 +234,34 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
struct pipe_screen *r600_screen_create(struct radeon *rw)
{
struct r600_screen* rscreen;
+ enum radeon_family family = radeon_get_family(rw);
rscreen = CALLOC_STRUCT(r600_screen);
if (rscreen == NULL) {
return NULL;
}
+
+ switch (family) {
+ case CHIP_R600:
+ case CHIP_RV610:
+ case CHIP_RV630:
+ case CHIP_RV670:
+ case CHIP_RV620:
+ case CHIP_RV635:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ rscreen->chip_class = R600;
+ break;
+ case CHIP_RV770:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_RV740:
+ rscreen->chip_class = R700;
+ break;
+ default:
+ FREE(rscreen);
+ return NULL;
+ }
rscreen->rw = rw;
rscreen->screen.winsys = (struct pipe_winsys*)rw;
rscreen->screen.destroy = r600_destroy_screen;
diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h
index 53b560c617..b9938f117a 100644
--- a/src/gallium/drivers/r600/r600_screen.h
+++ b/src/gallium/drivers/r600/r600_screen.h
@@ -30,6 +30,7 @@
#include <radeon_drm.h>
#include "radeon.h"
#include "util/u_transfer.h"
+#include "r600_resource.h"
/* Texture transfer. */
struct r600_transfer {
@@ -38,11 +39,19 @@ struct r600_transfer {
/* Buffer transfer. */
struct pipe_transfer *buffer_transfer;
unsigned offset;
+ struct pipe_resource *linear_texture;
+};
+
+enum chip_class {
+ R600,
+ R700,
+ EVERGREEN,
};
struct r600_screen {
struct pipe_screen screen;
struct radeon *rw;
+ enum chip_class chip_class;
};
static INLINE struct r600_screen *r600_screen(struct pipe_screen *screen)
@@ -62,7 +71,7 @@ unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
struct winsys_handle *whandle);
-/* Texture transfer functions. */
+/* r600_texture.c texture transfer functions. */
struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
struct pipe_resource *texture,
struct pipe_subresource sr,
@@ -74,7 +83,14 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
struct pipe_transfer* transfer);
void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer);
+int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
+int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level);
+int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
+int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
+int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
+/* r600_blit.c */
+int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
/* helpers */
int r600_conv_pipe_format(unsigned pformat, unsigned *format);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 956c7e7930..0ba26a2311 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -48,6 +48,9 @@ struct r600_shader_ctx {
struct r600_bc *bc;
struct r600_shader *shader;
u32 value[4];
+ u32 *literals;
+ u32 nliterals;
+ u32 max_driver_temp_used;
};
struct r600_shader_tgsi_instruction {
@@ -105,8 +108,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_screen *rscreen = r600_screen(ctx->screen);
int r;
-fprintf(stderr, "--------------------------------------------------------------\n");
-tgsi_dump(tokens, 0);
+//fprintf(stderr, "--------------------------------------------------------------\n");
+//tgsi_dump(tokens, 0);
if (rpshader == NULL)
return -ENOMEM;
rpshader->shader.family = radeon_get_family(rscreen->rw);
@@ -120,7 +123,7 @@ tgsi_dump(tokens, 0);
R600_ERR("building bytecode failed !\n");
return r;
}
-fprintf(stderr, "______________________________________________________________\n");
+//fprintf(stderr, "______________________________________________________________\n");
return 0;
}
@@ -131,10 +134,9 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta
struct radeon_state *state;
unsigned i, tmp;
- rpshader->rstate = radeon_state_decref(rpshader->rstate);
- state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
- if (state == NULL)
- return -ENOMEM;
+ state = &rpshader->rstate[0];
+ radeon_state_fini(&rpshader->rstate[0]);
+ radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS);
for (i = 0; i < 10; i++) {
state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
}
@@ -144,12 +146,13 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta
state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
}
state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
- state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
- rpshader->rstate = state;
- rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
- rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
- rpshader->rstate->nbo = 2;
- rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
+ S_028868_STACK_SIZE(rshader->bc.nstack);
+ state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
+ state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
+ state->nbo = 2;
+ state->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ state->placement[2] = RADEON_GEM_DOMAIN_GTT;
return radeon_state_pm4(state);
}
@@ -161,17 +164,20 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta
struct r600_context *rctx = r600_context(ctx);
struct radeon_state *state;
unsigned i, tmp, exports_ps, num_cout;
+ boolean have_pos = FALSE;
+ state = &rpshader->rstate[0];
rasterizer = &rctx->rasterizer->state.rasterizer;
- rpshader->rstate = radeon_state_decref(rpshader->rstate);
- state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
- if (state == NULL)
- return -ENOMEM;
+ radeon_state_fini(state);
+ radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS);
for (i = 0; i < rshader->ninput; i++) {
tmp = S_028644_SEMANTIC(i);
tmp |= S_028644_SEL_CENTROID(1);
+ if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
+ have_pos = TRUE;
if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
+ rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
}
if (rasterizer->sprite_coord_enable & (1 << i)) {
@@ -190,15 +196,24 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta
num_cout++;
}
}
+ if (!exports_ps) {
+ /* always at least export 1 component per pixel */
+ exports_ps = 2;
+ }
state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
S_0286CC_PERSP_GRADIENT_ENA(1);
+ if (have_pos) {
+ state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1) |
+ S_0286CC_BARYC_SAMPLE_CNTL(1);
+ state->states[R600_PS_SHADER__SPI_INPUT_Z] |= 1;
+ }
state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
- state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
+ state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
+ S_028868_STACK_SIZE(rshader->bc.nstack);
state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
- rpshader->rstate = state;
- rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
- rpshader->rstate->nbo = 1;
- rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
+ state->nbo = 1;
+ state->placement[0] = RADEON_GEM_DOMAIN_GTT;
return radeon_state_pm4(state);
}
@@ -268,21 +283,24 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
R600_ERR("predicate unsupported\n");
return -EINVAL;
}
+#if 0
if (i->Instruction.Label) {
R600_ERR("label unsupported\n");
return -EINVAL;
}
+#endif
for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
- if (i->Src[j].Register.Indirect ||
- i->Src[j].Register.Dimension ||
+ if (i->Src[j].Register.Dimension ||
i->Src[j].Register.Absolute) {
- R600_ERR("unsupported src (indirect|dimension|absolute)\n");
+ R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
+ i->Src[j].Register.Dimension,
+ i->Src[j].Register.Absolute);
return -EINVAL;
}
}
for (j = 0; j < i->Instruction.NumDstRegs; j++) {
- if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
- R600_ERR("unsupported dst (indirect|dimension)\n");
+ if (i->Dst[j].Register.Dimension) {
+ R600_ERR("unsupported dst (dimension)\n");
return -EINVAL;
}
}
@@ -333,6 +351,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_CONSTANT:
case TGSI_FILE_TEMPORARY:
case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_ADDRESS:
break;
default:
R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
@@ -341,6 +360,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
return 0;
}
+static int r600_get_temp(struct r600_shader_ctx *ctx)
+{
+ return ctx->temp_reg + ctx->max_driver_temp_used++;
+}
+
int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
{
struct tgsi_full_immediate *immediate;
@@ -362,9 +386,15 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
shader->processor_type = ctx.type;
/* register allocations */
- /* Values [0,127] correspond to GPR[0..127].
- * Values [256,511] correspond to cfile constants c[0..255].
+ /* Values [0,127] correspond to GPR[0..127].
+ * Values [128,159] correspond to constant buffer bank 0
+ * Values [160,191] correspond to constant buffer bank 1
+ * Values [256,511] correspond to cfile constants c[0..255].
* Other special values are shown in the list below.
+ * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
+ * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
+ * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
+ * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
* 248 SQ_ALU_SRC_0: special constant 0.0.
* 249 SQ_ALU_SRC_1: special constant 1.0 float.
* 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
@@ -389,15 +419,24 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_count[TGSI_FILE_TEMPORARY];
+ ctx.nliterals = 0;
+ ctx.literals = NULL;
+
while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
tgsi_parse_token(&ctx.parse);
switch (ctx.parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
immediate = &ctx.parse.FullToken.FullImmediate;
- ctx.value[0] = immediate->u[0].Uint;
- ctx.value[1] = immediate->u[1].Uint;
- ctx.value[2] = immediate->u[2].Uint;
- ctx.value[3] = immediate->u[3].Uint;
+ ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
+ if(ctx.literals == NULL) {
+ r = -ENOMEM;
+ goto out_err;
+ }
+ ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
+ ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
+ ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
+ ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
+ ctx.nliterals++;
break;
case TGSI_TOKEN_TYPE_DECLARATION:
r = tgsi_declaration(&ctx);
@@ -408,6 +447,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
r = tgsi_is_supported(&ctx);
if (r)
goto out_err;
+ ctx.max_driver_temp_used = 0;
+ /* reserve first tmp for everyone */
+ r600_get_temp(&ctx);
opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
r = ctx.inst_info->process(&ctx);
@@ -458,6 +500,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
output[i].array_base = 61;
+ output[i].swizzle_x = 2;
+ output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else {
R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
@@ -504,7 +548,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[0].swizzle_z = 7;
output[0].swizzle_w = 7;
output[0].barrier = 1;
- output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
output[0].array_base = 0;
output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
noutput++;
@@ -525,9 +569,11 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
if (r)
goto out_err;
}
+ free(ctx.literals);
tgsi_parse_free(&ctx.parse);
return 0;
out_err:
+ free(ctx.literals);
tgsi_parse_free(&ctx.parse);
return r;
}
@@ -547,11 +593,19 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
const struct tgsi_full_src_register *tgsi_src,
struct r600_bc_alu_src *r600_src)
{
+ int index;
memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
r600_src->sel = tgsi_src->Register.Index;
if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
r600_src->sel = 0;
+ index = tgsi_src->Register.Index;
+ ctx->value[0] = ctx->literals[index * 4 + 0];
+ ctx->value[1] = ctx->literals[index * 4 + 1];
+ ctx->value[2] = ctx->literals[index * 4 + 2];
+ ctx->value[3] = ctx->literals[index * 4 + 3];
}
+ if (tgsi_src->Register.Indirect)
+ r600_src->rel = V_SQ_REL_RELATIVE;
r600_src->neg = tgsi_src->Register.Negate;
r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
return 0;
@@ -568,6 +622,8 @@ static int tgsi_dst(struct r600_shader_ctx *ctx,
r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
r600_dst->chan = swizzle;
r600_dst->write = 1;
+ if (tgsi_dst->Register.Indirect)
+ r600_dst->rel = V_SQ_REL_RELATIVE;
if (inst->Instruction.Saturate) {
r600_dst->clamp = 1;
}
@@ -607,12 +663,13 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
}
for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
+ int treg = r600_get_temp(ctx);
for (k = 0; k < 4; k++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
- alu.src[0].sel = r600_src[0].sel;
+ alu.src[0].sel = r600_src[j].sel;
alu.src[0].chan = k;
- alu.dst.sel = ctx->temp_reg + j;
+ alu.dst.sel = treg;
alu.dst.chan = k;
alu.dst.write = 1;
if (k == 3)
@@ -621,37 +678,90 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
if (r)
return r;
}
- r600_src[0].sel = ctx->temp_reg + j;
+ r600_src[j].sel = treg;
j--;
}
}
return 0;
}
-static int tgsi_op2(struct r600_shader_ctx *ctx)
+/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
+static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, k, nliteral, r;
+
+ for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
+ nliteral++;
+ }
+ }
+ for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) {
+ int treg = r600_get_temp(ctx);
+ for (k = 0; k < 4; k++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.src[0].sel = r600_src[j].sel;
+ alu.src[0].chan = k;
+ alu.dst.sel = treg;
+ alu.dst.chan = k;
+ alu.dst.write = 1;
+ if (k == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+ r600_src[j].sel = treg;
+ j++;
+ }
+ }
+ return 0;
+}
+
+static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
+ int lasti = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ lasti = i;
+ }
+ }
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
- if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- alu.dst.chan = i;
- } else {
- alu.inst = ctx->inst_info->r600_opcode;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
+ alu.inst = ctx->inst_info->r600_opcode;
+ if (!swap) {
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
alu.src[j] = r600_src[j];
alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
}
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ } else {
+ alu.src[0] = r600_src[1];
+ alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
+
+ alu.src[1] = r600_src[0];
+ alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
}
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
@@ -664,7 +774,7 @@ static int tgsi_op2(struct r600_shader_ctx *ctx)
default:
break;
}
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -674,24 +784,154 @@ static int tgsi_op2(struct r600_shader_ctx *ctx)
return 0;
}
-static int tgsi_kill(struct r600_shader_ctx *ctx)
+static int tgsi_op2(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_s(ctx, 0);
+}
+
+static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_s(ctx, 1);
+}
+
+/*
+ * r600 - trunc to -PI..PI range
+ * r700 - normalize by dividing by 2PI
+ * see fdo bug 27901
+ */
+static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
+ struct r600_bc_alu_src r600_src[3])
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int r;
+ uint32_t lit_vals[4];
+ struct r600_bc_alu alu;
+
+ memset(lit_vals, 0, 4*4);
+ r = tgsi_split_constant(ctx, r600_src);
+ if (r)
+ return r;
+
+ r = tgsi_split_literal_constant(ctx, r600_src);
+ if (r)
+ return r;
+
+ lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
+ lit_vals[1] = fui(0.5f);
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
+ alu.is_op3 = 1;
+
+ alu.dst.chan = 0;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+
+ alu.src[0] = r600_src[0];
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].chan = 0;
+ alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[2].chan = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc, lit_vals);
+ if (r)
+ return r;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
+
+ alu.dst.chan = 0;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ if (ctx->bc->chiprev == 0) {
+ lit_vals[0] = fui(3.1415926535897f * 2.0f);
+ lit_vals[1] = fui(-3.1415926535897f);
+ } else {
+ lit_vals[0] = fui(1.0f);
+ lit_vals[1] = fui(-0.5f);
+ }
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
+ alu.is_op3 = 1;
+
+ alu.dst.chan = 0;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].chan = 0;
+ alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[2].chan = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc, lit_vals);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int tgsi_trig(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, r;
+ int lasti = 0;
+
+ r = tgsi_setup_trig(ctx, r600_src);
+ if (r)
+ return r;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = ctx->inst_info->r600_opcode;
+ alu.dst.chan = 0;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ /* replicate result */
for (i = 0; i < 4; i++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i))
+ lasti = i;
+ }
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = ctx->inst_info->r600_opcode;
- alu.dst.chan = i;
- alu.src[0].sel = 248;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+
+ alu.src[0].sel = ctx->temp_reg;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (r)
return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
- if (i == 3) {
+ if (i == lasti)
alu.last = 1;
- }
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
@@ -699,30 +939,70 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
return 0;
}
-static int tgsi_slt(struct r600_shader_ctx *ctx)
+static int tgsi_scs(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
- int i, r;
+ int r;
- r = tgsi_split_constant(ctx, r600_src);
+ r = tgsi_setup_trig(ctx, r600_src);
+ if (r)
+ return r;
+
+
+ /* dst.x = COS */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS;
+ r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
if (r)
return r;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ /* dst.y = SIN */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN;
+ r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
+ if (r)
+ return r;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int tgsi_kill(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, r;
+
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
- if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- alu.dst.chan = i;
+ alu.inst = ctx->inst_info->r600_opcode;
+
+ alu.dst.chan = i;
+
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+
+ if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ alu.src[1].neg = 1;
} else {
- alu.inst = ctx->inst_info->r600_opcode;
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
- alu.src[0] = r600_src[1];
- alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
if (r)
return r;
+ alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
}
if (i == 3) {
alu.last = 1;
@@ -731,6 +1011,13 @@ static int tgsi_slt(struct r600_shader_ctx *ctx)
if (r)
return r;
}
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+
+ /* kill must be last in ALU */
+ ctx->bc->force_add_cf = 1;
+ ctx->shader->uses_kill = TRUE;
return 0;
}
@@ -738,12 +1025,20 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
+ struct r600_bc_alu_src r600_src[3];
int r;
+ r = tgsi_split_constant(ctx, r600_src);
+ if (r)
+ return r;
+ r = tgsi_split_literal_constant(ctx, r600_src);
+ if (r)
+ return r;
+
/* dst.x, <- 1.0 */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
- alu.src[0].sel = 249; /*1.0*/
+ alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
alu.src[0].chan = 0;
r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
if (r)
@@ -756,11 +1051,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* dst.y = max(src.x, 0.0) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[1].sel = 248; /*0.0*/
- alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
+ alu.src[0] = r600_src[0];
+ alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
+ alu.src[1].chan = 0;
r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
if (r)
return r;
@@ -769,18 +1062,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
- /* dst.z = NOP - fill Z slot */
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- alu.dst.chan = 2;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
-
/* dst.w, <- 1.0 */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
- alu.src[0].sel = 249;
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
if (r)
@@ -791,6 +1076,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+
if (inst->Dst[0].Register.WriteMask & (1 << 2))
{
int chan;
@@ -799,9 +1088,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* dst.z = log(src.y) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
+ alu.src[0] = r600_src[0];
alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
if (r)
@@ -811,21 +1098,22 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+
chan = alu.dst.chan;
sel = alu.dst.sel;
/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
+ alu.src[0] = r600_src[0];
alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
alu.src[1].sel = sel;
alu.src[1].chan = chan;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]);
- if (r)
- return r;
+
+ alu.src[2] = r600_src[0];
alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -836,6 +1124,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
/* dst.z = exp(tmp.x) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
@@ -880,19 +1171,43 @@ static int tgsi_trans(struct r600_shader_ctx *ctx)
return 0;
}
+static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, r;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.src[0].sel = ctx->temp_reg;
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.dst.chan = i;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- int i, j, r;
+ int i, r;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
- for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
if (r)
return r;
- alu.src[j].chan = tgsi_chan(&inst->Src[j], 0);
+ alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
}
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -900,16 +1215,124 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
/* replicate result */
+ return tgsi_helper_tempx_replicate(ctx);
+}
+
+static int tgsi_pow(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int r;
+
+ /* LOG2(a) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc,ctx->value);
+ if (r)
+ return r;
+ /* b * LOG2(a) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE;
+ r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
+ alu.src[1].sel = ctx->temp_reg;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc,ctx->value);
+ if (r)
+ return r;
+ /* POW(a,b) = EXP2(b * LOG2(a))*/
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc,ctx->value);
+ if (r)
+ return r;
+ return tgsi_helper_tempx_replicate(ctx);
+}
+
+static int tgsi_ssg(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ struct r600_bc_alu_src r600_src[3];
+ int i, r;
+
+ r = tgsi_split_constant(ctx, r600_src);
+ if (r)
+ return r;
+
+ /* tmp = (src > 0 ? 1 : src) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.src[0].sel = ctx->temp_reg;
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
+ alu.is_op3 = 1;
+
+ alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
+
+ alu.src[0] = r600_src[0];
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+
+ alu.src[2] = r600_src[0];
+ alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+
+ /* dst = (-tmp > 0 ? -1 : tmp) */
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
+ alu.is_op3 = 1;
r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (r)
return r;
- alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = i;
+ alu.src[0].neg = 1;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ alu.src[1].neg = 1;
+
+ alu.src[2].sel = ctx->temp_reg;
+ alu.src[2].chan = i;
+
if (i == 3)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -1006,16 +1429,23 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
switch (ctx->inst_info->tgsi_opcode) {
case TGSI_OPCODE_DP2:
if (i > 1) {
- alu.src[0].sel = alu.src[1].sel = 248;
+ alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = alu.src[1].chan = 0;
}
break;
case TGSI_OPCODE_DP3:
if (i > 2) {
- alu.src[0].sel = alu.src[1].sel = 248;
+ alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = alu.src[1].chan = 0;
}
break;
+ case TGSI_OPCODE_DPH:
+ if (i == 3) {
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
+ alu.src[0].chan = 0;
+ alu.src[0].neg = 0;
+ }
+ break;
default:
break;
}
@@ -1035,75 +1465,197 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
struct r600_bc_tex tex;
struct r600_bc_alu alu;
unsigned src_gpr;
- int r;
+ int r, i;
+ int opcode;
+ boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
+ uint32_t lit_vals[4];
src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
- /* Add perspective divide */
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
- alu.src[0].sel = src_gpr;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 3;
- alu.last = 1;
- alu.dst.write = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
+ /* Add perspective divide */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 3;
- alu.src[1].sel = src_gpr;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 0;
- alu.dst.write = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 3;
- alu.src[1].sel = src_gpr;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], 1);
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 1;
- alu.dst.write = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 3;
- alu.src[1].sel = src_gpr;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], 2);
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 2;
- alu.dst.write = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
- alu.src[0].sel = 249;
- alu.src[0].chan = 0;
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 3;
- alu.last = 1;
- alu.dst.write = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- src_gpr = ctx->temp_reg;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 3;
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 3;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
+ if (r)
+ return r;
+ alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
+ alu.src[0].chan = 0;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 3;
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ src_not_temp = false;
+ src_gpr = ctx->temp_reg;
+ }
+
+ if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
+ int src_chan, src2_chan;
+
+ /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
+ switch (i) {
+ case 0:
+ src_chan = 2;
+ src2_chan = 1;
+ break;
+ case 1:
+ src_chan = 2;
+ src2_chan = 0;
+ break;
+ case 2:
+ src_chan = 0;
+ src2_chan = 2;
+ break;
+ case 3:
+ src_chan = 1;
+ src2_chan = 2;
+ break;
+ }
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
+ if (r)
+ return r;
+ alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 3)
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* tmp1.z = RCP_e(|tmp1.z|) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 2;
+ alu.src[0].abs = 1;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 2;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
+ * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
+ * muladd has no writemask, have to use another temp
+ */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
+ alu.is_op3 = 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.src[1].sel = ctx->temp_reg;
+ alu.src[1].chan = 2;
+
+ alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[2].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
+ alu.is_op3 = 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 1;
+ alu.src[1].sel = ctx->temp_reg;
+ alu.src[1].chan = 2;
+
+ alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[2].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ lit_vals[0] = fui(1.5f);
+
+ r = r600_bc_add_literal(ctx->bc, lit_vals);
+ if (r)
+ return r;
+ src_not_temp = false;
+ src_gpr = ctx->temp_reg;
+ }
+
+ if (src_not_temp) {
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = i;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 3)
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ src_gpr = ctx->temp_reg;
+ }
+
+ opcode = ctx->inst_info->r600_opcode;
+ if (opcode == SQ_TEX_INST_SAMPLE &&
+ (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
+ opcode = SQ_TEX_INST_SAMPLE_C;
- /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */
memset(&tex, 0, sizeof(struct r600_bc_tex));
- tex.inst = ctx->inst_info->r600_opcode;
+ tex.inst = opcode;
tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
tex.sampler_id = tex.resource_id;
tex.src_gpr = src_gpr;
@@ -1117,13 +1669,30 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.src_sel_z = 2;
tex.src_sel_w = 3;
+ if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
+ tex.src_sel_x = 1;
+ tex.src_sel_y = 0;
+ tex.src_sel_z = 3;
+ tex.src_sel_w = 1;
+ }
+
if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
tex.coord_type_x = 1;
tex.coord_type_y = 1;
tex.coord_type_z = 1;
tex.coord_type_w = 1;
}
- return r600_bc_add_tex(ctx->bc, &tex);
+
+ if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
+ tex.src_sel_w = 2;
+
+ r = r600_bc_add_tex(ctx->bc, &tex);
+ if (r)
+ return r;
+
+ /* add shadow ambient support - gallium doesn't do it yet */
+ return 0;
+
}
static int tgsi_lrp(struct r600_shader_ctx *ctx)
@@ -1141,7 +1710,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
- alu.src[0].sel = 249;
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
alu.src[1] = r600_src[0];
alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
@@ -1205,23 +1774,654 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
return tgsi_helper_copy(ctx, inst);
}
+static int tgsi_cmp(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu_src r600_src[3];
+ struct r600_bc_alu alu;
+ int use_temp = 0;
+ int i, r;
+
+ r = tgsi_split_constant(ctx, r600_src);
+ if (r)
+ return r;
+
+ if (inst->Dst[0].Register.WriteMask != 0xf)
+ use_temp = 1;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE;
+ alu.src[0] = r600_src[0];
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+
+ alu.src[1] = r600_src[2];
+ alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+
+ alu.src[2] = r600_src[1];
+ alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
+
+ if (use_temp)
+ alu.dst.sel = ctx->temp_reg;
+ else {
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+ }
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.is_op3 = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ if (use_temp)
+ return tgsi_helper_copy(ctx, inst);
+ return 0;
+}
+
+static int tgsi_xpd(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu_src r600_src[3];
+ struct r600_bc_alu alu;
+ uint32_t use_temp = 0;
+ int i, r;
+
+ if (inst->Dst[0].Register.WriteMask != 0xf)
+ use_temp = 1;
+
+ r = tgsi_split_constant(ctx, r600_src);
+ if (r)
+ return r;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
+
+ alu.src[0] = r600_src[0];
+ switch (i) {
+ case 0:
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
+ break;
+ case 1:
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ break;
+ case 2:
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
+ break;
+ case 3:
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+ alu.src[0].chan = i;
+ }
+
+ alu.src[1] = r600_src[1];
+ switch (i) {
+ case 0:
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
+ break;
+ case 1:
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
+ break;
+ case 2:
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
+ break;
+ case 3:
+ alu.src[1].sel = V_SQ_ALU_SRC_0;
+ alu.src[1].chan = i;
+ }
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
+
+ alu.src[0] = r600_src[0];
+ switch (i) {
+ case 0:
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
+ break;
+ case 1:
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
+ break;
+ case 2:
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ break;
+ case 3:
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+ alu.src[0].chan = i;
+ }
+
+ alu.src[1] = r600_src[1];
+ switch (i) {
+ case 0:
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
+ break;
+ case 1:
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
+ break;
+ case 2:
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
+ break;
+ case 3:
+ alu.src[1].sel = V_SQ_ALU_SRC_0;
+ alu.src[1].chan = i;
+ }
+
+ alu.src[2].sel = ctx->temp_reg;
+ alu.src[2].neg = 1;
+ alu.src[2].chan = i;
+
+ if (use_temp)
+ alu.dst.sel = ctx->temp_reg;
+ else {
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+ }
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.is_op3 = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ if (use_temp)
+ return tgsi_helper_copy(ctx, inst);
+ return 0;
+}
+
+static int tgsi_exp(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu_src r600_src[3];
+ struct r600_bc_alu alu;
+ int r;
+
+ /* result.x = 2^floor(src); */
+ if (inst->Dst[0].Register.WriteMask & 1) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+ }
+
+ /* result.y = tmp - floor(tmp); */
+ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
+ alu.src[0] = r600_src[0];
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+// if (r)
+// return r;
+ alu.dst.write = 1;
+ alu.dst.chan = 1;
+
+ alu.last = 1;
+
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+ }
+
+ /* result.z = RoughApprox2ToX(tmp);*/
+ if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.dst.chan = 2;
+
+ alu.last = 1;
+
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+ }
+
+ /* result.w = 1.0;*/
+ if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
+ alu.src[0].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+ }
+ return tgsi_helper_copy(ctx, inst);
+}
+
+static int tgsi_arl(struct r600_shader_ctx *ctx)
+{
+ /* TODO from r600c, ar values don't persist between clauses */
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int r;
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
+
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+
+ alu.last = 1;
+
+ r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int tgsi_opdst(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, r = 0;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
+ if (i == 0 || i == 3) {
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
+ } else {
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+ }
+
+ if (i == 0 || i == 2) {
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ } else {
+ r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
+ if (r)
+ return r;
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
+ }
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int r;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = opcode;
+ alu.predicate = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.dst.chan = 0;
+
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ alu.src[1].sel = V_SQ_ALU_SRC_0;
+ alu.src[1].chan = 0;
+
+ alu.last = 1;
+
+ r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int pops(struct r600_shader_ctx *ctx, int pops)
+{
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP);
+ ctx->bc->cf_last->pop_count = pops;
+ return 0;
+}
+
+static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
+{
+ switch(reason) {
+ case FC_PUSH_VPM:
+ ctx->bc->callstack[ctx->bc->call_sp].current--;
+ break;
+ case FC_PUSH_WQM:
+ case FC_LOOP:
+ ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
+ break;
+ case FC_REP:
+ /* TOODO : for 16 vp asic should -= 2; */
+ ctx->bc->callstack[ctx->bc->call_sp].current --;
+ break;
+ }
+}
+
+static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
+{
+ if (check_max_only) {
+ int diff;
+ switch (reason) {
+ case FC_PUSH_VPM:
+ diff = 1;
+ break;
+ case FC_PUSH_WQM:
+ diff = 4;
+ break;
+ }
+ if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
+ ctx->bc->callstack[ctx->bc->call_sp].max) {
+ ctx->bc->callstack[ctx->bc->call_sp].max =
+ ctx->bc->callstack[ctx->bc->call_sp].current + diff;
+ }
+ return;
+ }
+ switch (reason) {
+ case FC_PUSH_VPM:
+ ctx->bc->callstack[ctx->bc->call_sp].current++;
+ break;
+ case FC_PUSH_WQM:
+ case FC_LOOP:
+ ctx->bc->callstack[ctx->bc->call_sp].current += 4;
+ break;
+ case FC_REP:
+ ctx->bc->callstack[ctx->bc->call_sp].current++;
+ break;
+ }
+
+ if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
+ ctx->bc->callstack[ctx->bc->call_sp].max) {
+ ctx->bc->callstack[ctx->bc->call_sp].max =
+ ctx->bc->callstack[ctx->bc->call_sp].current;
+ }
+}
+
+static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
+{
+ struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
+
+ sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
+ sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
+ sp->mid[sp->num_mid] = ctx->bc->cf_last;
+ sp->num_mid++;
+}
+
+static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
+{
+ ctx->bc->fc_sp++;
+ ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
+ ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+}
+
+static void fc_poplevel(struct r600_shader_ctx *ctx)
+{
+ struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
+ if (sp->mid) {
+ free(sp->mid);
+ sp->mid = NULL;
+ }
+ sp->num_mid = 0;
+ sp->start = NULL;
+ sp->type = 0;
+ ctx->bc->fc_sp--;
+}
+
+#if 0
+static int emit_return(struct r600_shader_ctx *ctx)
+{
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
+ return 0;
+}
+
+static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
+{
+
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+ ctx->bc->cf_last->pop_count = pops;
+ /* TODO work out offset */
+ return 0;
+}
+
+static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
+{
+ return 0;
+}
+
+static void emit_testflag(struct r600_shader_ctx *ctx)
+{
+
+}
+
+static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
+{
+ emit_testflag(ctx);
+ emit_jump_to_offset(ctx, 1, 4);
+ emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
+ pops(ctx, ifidx + 1);
+ emit_return(ctx);
+}
+
+static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
+{
+ emit_testflag(ctx);
+
+ r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+ ctx->bc->cf_last->pop_count = 1;
+
+ fc_set_mid(ctx, fc_sp);
+
+ pops(ctx, 1);
+}
+#endif
+
+static int tgsi_if(struct r600_shader_ctx *ctx)
+{
+ emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
+
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+
+ fc_pushlevel(ctx, FC_IF);
+
+ callstack_check_depth(ctx, FC_PUSH_VPM, 0);
+ return 0;
+}
+
+static int tgsi_else(struct r600_shader_ctx *ctx)
+{
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
+ ctx->bc->cf_last->pop_count = 1;
+
+ fc_set_mid(ctx, ctx->bc->fc_sp);
+ ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
+ return 0;
+}
+
+static int tgsi_endif(struct r600_shader_ctx *ctx)
+{
+ pops(ctx, 1);
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
+ R600_ERR("if/endif unbalanced in shader\n");
+ return -1;
+ }
+
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
+ ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
+ ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
+ } else {
+ ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
+ }
+ fc_poplevel(ctx);
+
+ callstack_decrease_current(ctx, FC_PUSH_VPM);
+ return 0;
+}
+
+static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
+{
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL);
+
+ fc_pushlevel(ctx, FC_LOOP);
+
+ /* check stack depth */
+ callstack_check_depth(ctx, FC_LOOP, 0);
+ return 0;
+}
+
+static int tgsi_endloop(struct r600_shader_ctx *ctx)
+{
+ int i;
+
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END);
+
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
+ R600_ERR("loop/endloop in shader code are not paired.\n");
+ return -EINVAL;
+ }
+
+ /* fixup loop pointers - from r600isa
+ LOOP END points to CF after LOOP START,
+ LOOP START point to CF after LOOP END
+ BRK/CONT point to LOOP END CF
+ */
+ ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
+
+ ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
+
+ for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
+ ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
+ }
+ /* TODO add LOOPRET support */
+ fc_poplevel(ctx);
+ callstack_decrease_current(ctx, FC_LOOP);
+ return 0;
+}
+
+static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
+{
+ unsigned int fscp;
+
+ for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
+ {
+ if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+ break;
+ }
+
+ if (fscp == 0) {
+ R600_ERR("Break not inside loop/endloop pair\n");
+ return -EINVAL;
+ }
+
+ r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+ ctx->bc->cf_last->pop_count = 1;
+
+ fc_set_mid(ctx, fscp);
+
+ pops(ctx, 1);
+ callstack_check_depth(ctx, FC_PUSH_VPM, 1);
+ return 0;
+}
+
static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
- {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
{TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
{TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
{TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
- {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
{TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
{TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
{TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
{TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
- {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
{TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
{TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
- {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
- {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
+ {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
{TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
{TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
{TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
@@ -1232,38 +2432,38 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
/* gap */
{22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
{TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
{TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
- {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
+ {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
+ {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
/* gap */
{32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
{TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
+ {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+ {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
+ {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
+ {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
+ {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
{TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
{TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
+ {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
+ {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
+ {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
{TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TEX, 0, 0x10, tgsi_tex},
+ {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex},
+ {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
{TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -1274,21 +2474,21 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */
- {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
+ {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
+ {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
+ {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
{TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
{TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
+ {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
/* gap */
{75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
+ {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
/* gap */
{79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -1297,7 +2497,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -1308,12 +2508,12 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
{TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
{TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
{TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 2ee7780ead..7c722c07cb 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -42,6 +42,7 @@ struct r600_shader {
struct r600_shader_io input[32];
struct r600_shader_io output[32];
enum radeon_family family;
+ boolean uses_kill;
};
#endif
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index 002660c654..fa7a31742a 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -206,6 +206,26 @@
#define S_SQ_ALU_WORD0_SRC0_SEL(x) (((x) & 0x1FF) << 0)
#define G_SQ_ALU_WORD0_SRC0_SEL(x) (((x) >> 0) & 0x1FF)
#define C_SQ_ALU_WORD0_SRC0_SEL 0xFFFFFE00
+/*
+ * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
+ * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
+ * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
+ * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
+ * 248 SQ_ALU_SRC_0: special constant 0.0.
+ * 249 SQ_ALU_SRC_1: special constant 1.0 float.
+ * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
+ * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
+ * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
+ * 253 SQ_ALU_SRC_LITERAL: literal constant.
+ * 254 SQ_ALU_SRC_PV: previous vector result.
+ * 255 SQ_ALU_SRC_PS: previous scalar result.
+ */
+#define V_SQ_ALU_SRC_0 0x000000F8
+#define V_SQ_ALU_SRC_1 0x000000F9
+#define V_SQ_ALU_SRC_1_INT 0x000000FA
+#define V_SQ_ALU_SRC_M_1_INT 0x000000FB
+#define V_SQ_ALU_SRC_0_5 0x000000FC
+#define V_SQ_ALU_SRC_LITERAL 0x000000FD
#define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9)
#define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1)
#define C_SQ_ALU_WORD0_SRC0_REL 0xFFFFFDFF
@@ -583,4 +603,11 @@
#define G_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) >> 29) & 0x7)
#define C_SQ_TEX_WORD2_SRC_SEL_W 0x1FFFFFFF
+#define V_SQ_CF_COND_ACTIVE 0x00
+#define V_SQ_CF_COND_FALSE 0x01
+#define V_SQ_CF_COND_BOOL 0x02
+#define V_SQ_CF_COND_NOT_BOOL 0x03
+
+#define V_SQ_REL_ABSOLUTE 0
+#define V_SQ_REL_RELATIVE 1
#endif
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 3efd409ae0..66cab7d7a6 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -34,6 +34,17 @@
#include "r600d.h"
#include "r600_state_inlines.h"
+static void r600_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state);
+static void r600_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state);
+static void r600_ucp(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_clip_state *state);
+static void r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_sampler_state *state, unsigned id);
+static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, const struct pipe_sampler_view *view, unsigned id);
+static void r600_cb(struct r600_context *rctx, struct radeon_state *rstate,
+ const struct pipe_framebuffer_state *state, int cb);
+static void r600_db(struct r600_context *rctx, struct radeon_state *rstate,
+ const struct pipe_framebuffer_state *state);
+
+
static void *r600_create_blend_state(struct pipe_context *ctx,
const struct pipe_blend_state *state)
{
@@ -81,11 +92,12 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
struct r600_context *rctx = r600_context(ctx);
struct r600_context_state *rstate;
- rstate = r600_context_state(rctx, pipe_sampler_type, state);
+ rstate = r600_context_state(rctx, pipe_sampler_view_type, state);
pipe_reference(NULL, &texture->reference);
rstate->state.sampler_view.texture = texture;
rstate->state.sampler_view.reference.count = 1;
rstate->state.sampler_view.context = ctx;
+ r600_resource(ctx, &rstate->rstate[0], &rstate->state.sampler_view, 0);
return &rstate->state.sampler_view;
}
@@ -223,12 +235,24 @@ static void r600_bind_ps_sampler(struct pipe_context *ctx,
struct r600_context_state *rstate;
unsigned i;
- for (i = 0; i < rctx->ps_nsampler; i++) {
- rctx->ps_sampler[i] = r600_context_state_decref(rctx->ps_sampler[i]);
+ for (i = 0; i < count; i++) {
+ rstate = (struct r600_context_state *)states[i];
+ if (rstate) {
+ rstate->nrstate = 0;
+ }
}
for (i = 0; i < count; i++) {
rstate = (struct r600_context_state *)states[i];
- rctx->ps_sampler[i] = r600_context_state_incref(rstate);
+ if (rstate) {
+ if (rstate->nrstate >= R600_MAX_RSTATE)
+ continue;
+ if (rstate->nrstate) {
+ memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state));
+ }
+ radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_SAMPLER, i, R600_SHADER_PS);
+ rctx->ps_sampler[i] = &rstate->rstate[rstate->nrstate];
+ rstate->nrstate++;
+ }
}
rctx->ps_nsampler = count;
}
@@ -240,12 +264,24 @@ static void r600_bind_vs_sampler(struct pipe_context *ctx,
struct r600_context_state *rstate;
unsigned i;
- for (i = 0; i < rctx->vs_nsampler; i++) {
- rctx->vs_sampler[i] = r600_context_state_decref(rctx->vs_sampler[i]);
+ for (i = 0; i < count; i++) {
+ rstate = (struct r600_context_state *)states[i];
+ if (rstate) {
+ rstate->nrstate = 0;
+ }
}
for (i = 0; i < count; i++) {
rstate = (struct r600_context_state *)states[i];
- rctx->vs_sampler[i] = r600_context_state_incref(rstate);
+ if (rstate) {
+ if (rstate->nrstate >= R600_MAX_RSTATE)
+ continue;
+ if (rstate->nrstate) {
+ memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state));
+ }
+ radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_SAMPLER, i, R600_SHADER_VS);
+ rctx->vs_sampler[i] = &rstate->rstate[rstate->nrstate];
+ rstate->nrstate++;
+ }
}
rctx->vs_nsampler = count;
}
@@ -268,6 +304,13 @@ static void r600_set_blend_color(struct pipe_context *ctx,
static void r600_set_clip_state(struct pipe_context *ctx,
const struct pipe_clip_state *state)
{
+ struct r600_context *rctx = r600_context(ctx);
+ struct r600_context_state *rstate;
+
+ rstate = r600_context_state(rctx, pipe_clip_type, state);
+ r600_bind_state(ctx, rstate);
+ /* refcount is taken care of this */
+ r600_delete_state(ctx, rstate);
}
static void r600_set_constant_buffer(struct pipe_context *ctx,
@@ -276,19 +319,21 @@ static void r600_set_constant_buffer(struct pipe_context *ctx,
{
struct r600_screen *rscreen = r600_screen(ctx->screen);
struct r600_context *rctx = r600_context(ctx);
- unsigned nconstant = 0, i, type, id;
- struct radeon_state *rstate;
+ unsigned nconstant = 0, i, type, shader_class;
+ struct radeon_state *rstate, *rstates;
struct pipe_transfer *transfer;
u32 *ptr;
+ type = R600_STATE_CONSTANT;
+
switch (shader) {
case PIPE_SHADER_VERTEX:
- id = R600_VS_CONSTANT;
- type = R600_VS_CONSTANT_TYPE;
+ shader_class = R600_SHADER_VS;
+ rstates = rctx->vs_constant;
break;
case PIPE_SHADER_FRAGMENT:
- id = R600_PS_CONSTANT;
- type = R600_PS_CONSTANT_TYPE;
+ shader_class = R600_SHADER_PS;
+ rstates = rctx->ps_constant;
break;
default:
R600_ERR("unsupported %d\n", shader);
@@ -300,17 +345,15 @@ static void r600_set_constant_buffer(struct pipe_context *ctx,
if (ptr == NULL)
return;
for (i = 0; i < nconstant; i++) {
- rstate = radeon_state(rscreen->rw, type, id + i);
- if (rstate == NULL)
- return;
+ rstate = &rstates[i];
+ radeon_state_init(rstate, rscreen->rw, type, i, shader_class);
rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0];
rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1];
rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2];
rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3];
if (radeon_state_pm4(rstate))
return;
- if (radeon_draw_set_new(rctx->draw, rstate))
- return;
+ radeon_draw_bind(&rctx->draw, rstate);
}
pipe_buffer_unmap(ctx, buffer, transfer);
}
@@ -324,12 +367,24 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx,
struct r600_context_state *rstate;
unsigned i;
- for (i = 0; i < rctx->ps_nsampler_view; i++) {
- rctx->ps_sampler_view[i] = r600_context_state_decref(rctx->ps_sampler_view[i]);
+ for (i = 0; i < count; i++) {
+ rstate = (struct r600_context_state *)views[i];
+ if (rstate) {
+ rstate->nrstate = 0;
+ }
}
for (i = 0; i < count; i++) {
rstate = (struct r600_context_state *)views[i];
- rctx->ps_sampler_view[i] = r600_context_state_incref(rstate);
+ if (rstate) {
+ if (rstate->nrstate >= R600_MAX_RSTATE)
+ continue;
+ if (rstate->nrstate) {
+ memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state));
+ }
+ radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_RESOURCE, i, R600_SHADER_PS);
+ rctx->ps_sampler_view[i] = &rstate->rstate[rstate->nrstate];
+ rstate->nrstate++;
+ }
}
rctx->ps_nsampler_view = count;
}
@@ -342,12 +397,24 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx,
struct r600_context_state *rstate;
unsigned i;
- for (i = 0; i < rctx->vs_nsampler_view; i++) {
- rctx->vs_sampler_view[i] = r600_context_state_decref(rctx->vs_sampler_view[i]);
+ for (i = 0; i < count; i++) {
+ rstate = (struct r600_context_state *)views[i];
+ if (rstate) {
+ rstate->nrstate = 0;
+ }
}
for (i = 0; i < count; i++) {
rstate = (struct r600_context_state *)views[i];
- rctx->vs_sampler_view[i] = r600_context_state_incref(rstate);
+ if (rstate) {
+ if (rstate->nrstate >= R600_MAX_RSTATE)
+ continue;
+ if (rstate->nrstate) {
+ memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state));
+ }
+ radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_RESOURCE, i, R600_SHADER_VS);
+ rctx->vs_sampler_view[i] = &rstate->rstate[rstate->nrstate];
+ rstate->nrstate++;
+ }
}
rctx->vs_nsampler_view = count;
}
@@ -360,6 +427,12 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
rstate = r600_context_state(rctx, pipe_framebuffer_type, state);
r600_bind_state(ctx, rstate);
+ for (int i = 0; i < state->nr_cbufs; i++) {
+ r600_cb(rctx, &rstate->rstate[i+1], state, i);
+ }
+ if (state->zsbuf) {
+ r600_db(rctx, &rstate->rstate[0], state);
+ }
}
static void r600_set_polygon_stipple(struct pipe_context *ctx,
@@ -525,7 +598,7 @@ struct r600_context_state *r600_context_state_decref(struct r600_context_state *
R600_ERR("invalid type %d\n", rstate->type);
return NULL;
}
- radeon_state_decref(rstate->rstate);
+ radeon_state_fini(&rstate->rstate[0]);
FREE(rstate);
return NULL;
}
@@ -558,6 +631,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
break;
case pipe_viewport_type:
rstate->state.viewport = (*states).viewport;
+ r600_viewport(rctx, &rstate->rstate[0], &rstate->state.viewport);
break;
case pipe_depth_type:
rstate->state.depth = (*states).depth;
@@ -573,6 +647,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
break;
case pipe_clip_type:
rstate->state.clip = (*states).clip;
+ r600_ucp(rctx, &rstate->rstate[0], &rstate->state.clip);
break;
case pipe_stencil_type:
rstate->state.stencil = (*states).stencil;
@@ -585,6 +660,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
break;
case pipe_blend_type:
rstate->state.blend = (*states).blend;
+ r600_blend(rctx, &rstate->rstate[0], &rstate->state.blend);
break;
case pipe_stencil_ref_type:
rstate->state.stencil_ref = (*states).stencil_ref;
@@ -599,6 +675,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
break;
case pipe_sampler_type:
rstate->state.sampler = (*states).sampler;
+ r600_sampler(rctx, &rstate->rstate[0], &rstate->state.sampler, 0);
break;
default:
R600_ERR("invalid type %d\n", rstate->type);
@@ -608,16 +685,12 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
return rstate;
}
-static struct radeon_state *r600_blend(struct r600_context *rctx)
+static void r600_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state)
{
struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate;
- const struct pipe_blend_state *state = &rctx->blend->state.blend;
int i;
- rstate = radeon_state(rscreen->rw, R600_BLEND_TYPE, R600_BLEND);
- if (rstate == NULL)
- return NULL;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0);
rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]);
rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]);
rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]);
@@ -661,29 +734,38 @@ static struct radeon_state *r600_blend(struct r600_context *rctx)
rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc;
}
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
+ radeon_state_pm4(rstate);
+}
+
+static void r600_ucp(struct r600_context *rctx, struct radeon_state *rstate,
+ const struct pipe_clip_state *state)
+{
+ struct r600_screen *rscreen = rctx->screen;
+
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_UCP, 0, 0);
+
+ for (int i = 0; i < state->nr; i++) {
+ rstate->states[i * 4 + 0] = fui(state->ucp[i][0]);
+ rstate->states[i * 4 + 1] = fui(state->ucp[i][1]);
+ rstate->states[i * 4 + 2] = fui(state->ucp[i][2]);
+ rstate->states[i * 4 + 3] = fui(state->ucp[i][3]);
}
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_cb(struct r600_context *rctx, int cb)
+static void r600_cb(struct r600_context *rctx, struct radeon_state *rstate,
+ const struct pipe_framebuffer_state *state, int cb)
{
struct r600_screen *rscreen = rctx->screen;
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
- struct radeon_state *rstate;
- const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer;
unsigned level = state->cbufs[cb]->level;
unsigned pitch, slice;
unsigned color_info;
unsigned format, swap, ntype;
const struct util_format_description *desc;
- rstate = radeon_state(rscreen->rw, R600_CB0_TYPE + cb, R600_CB0 + cb);
- if (rstate == NULL)
- return NULL;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0 + cb, 0, 0);
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
rbuffer = &rtex->resource;
rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
@@ -710,7 +792,7 @@ static struct radeon_state *r600_cb(struct r600_context *rctx, int cb)
S_0280A0_SOURCE_FORMAT(1) |
S_0280A0_NUMBER_TYPE(ntype);
- rstate->states[R600_CB0__CB_COLOR0_BASE] = 0x00000000;
+ rstate->states[R600_CB0__CB_COLOR0_BASE] = state->cbufs[cb]->offset >> 8;
rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info;
rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) |
S_028060_SLICE_TILE_MAX(slice);
@@ -718,32 +800,29 @@ static struct radeon_state *r600_cb(struct r600_context *rctx, int cb)
rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000;
rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000;
rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000;
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_db(struct r600_context *rctx)
+static void r600_db(struct r600_context *rctx, struct radeon_state *rstate,
+ const struct pipe_framebuffer_state *state)
{
struct r600_screen *rscreen = rctx->screen;
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
- struct radeon_state *rstate;
- const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer;
unsigned level;
unsigned pitch, slice, format;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0);
if (state->zsbuf == NULL)
- return NULL;
-
- rstate = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB);
- if (rstate == NULL)
- return NULL;
+ return;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
+ rtex->tilled = 1;
+ rtex->array_mode = 2;
+ rtex->tile_type = 1;
+ rtex->depth = 1;
rbuffer = &rtex->resource;
+
rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
rstate->nbo = 1;
rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM;
@@ -751,31 +830,30 @@ static struct radeon_state *r600_db(struct r600_context *rctx)
pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
- rstate->states[R600_DB__DB_DEPTH_BASE] = 0x00000000;
- rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010000 |
+ rstate->states[R600_DB__DB_DEPTH_BASE] = state->zsbuf->offset >> 8;
+ rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtex->array_mode) |
S_028010_FORMAT(format);
rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000;
rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1;
rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) |
S_028000_SLICE_TILE_MAX(slice);
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
+static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstate)
{
const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer;
const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
+ const struct pipe_clip_state *clip = NULL;
struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate;
float offset_units = 0, offset_scale = 0;
char depth = 0;
unsigned offset_db_fmt_cntl = 0;
unsigned tmp;
unsigned prov_vtx = 1;
+
+ if (rctx->clip)
+ clip = &rctx->clip->state.clip;
if (fb->zsbuf) {
offset_units = state->offset_units;
offset_scale = state->offset_scale * 12.0f;
@@ -796,7 +874,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
break;
default:
R600_ERR("unsupported %d\n", fb->zsbuf->texture->format);
- return NULL;
+ return;
}
}
offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
@@ -805,9 +883,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
prov_vtx = 0;
rctx->flat_shade = state->flatshade;
- rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER);
- if (rstate == NULL)
- return NULL;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0);
rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001;
if (state->sprite_coord_enable) {
rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |=
@@ -821,7 +897,12 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
S_0286D4_PNT_SPRITE_TOP_1(1);
}
}
- rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000;
+ rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0;
+ if (clip) {
+ rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = S_028810_PS_UCP_MODE(3) | ((1 << clip->nr) - 1);
+ rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp);
+ rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp);
+ }
rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] =
S_028814_PROVOKING_VTX_LAST(prov_vtx) |
S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
@@ -835,7 +916,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex);
rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000;
/* point size 12.4 fixed point */
- tmp = (unsigned)(state->point_size * 8.0 / 2.0);
+ tmp = (unsigned)(state->point_size * 8.0);
rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp);
rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000;
rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008;
@@ -852,19 +933,14 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units);
rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale);
rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units);
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_scissor(struct r600_context *rctx)
+static void r600_scissor(struct r600_context *rctx, struct radeon_state *rstate)
{
const struct pipe_scissor_state *state = &rctx->scissor->state.scissor;
const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate;
unsigned minx, maxx, miny, maxy;
u32 tl, br;
@@ -881,9 +957,7 @@ static struct radeon_state *r600_scissor(struct r600_context *rctx)
}
tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny) | S_028240_WINDOW_OFFSET_DISABLE(1);
br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy);
- rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR);
- if (rstate == NULL)
- return NULL;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0);
rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl;
rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br;
rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000;
@@ -903,22 +977,14 @@ static struct radeon_state *r600_scissor(struct r600_context *rctx)
rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br;
rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl;
rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br;
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_viewport(struct r600_context *rctx)
+static void r600_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state)
{
- const struct pipe_viewport_state *state = &rctx->viewport->state.viewport;
struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate;
- rstate = radeon_state(rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT);
- if (rstate == NULL)
- return NULL;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0);
rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000;
rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000;
rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]);
@@ -928,29 +994,28 @@ static struct radeon_state *r600_viewport(struct r600_context *rctx)
rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]);
rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]);
rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F;
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_dsa(struct r600_context *rctx)
+static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate)
{
const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa;
const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref;
struct r600_screen *rscreen = rctx->screen;
unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control;
unsigned stencil_ref_mask, stencil_ref_mask_bf;
- struct r600_shader *rshader = &rctx->ps_shader->shader;
- struct radeon_state *rstate;
+ struct r600_shader *rshader;
int i;
- rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA);
- if (rstate == NULL)
- return NULL;
+ if (rctx->ps_shader == NULL) {
+ return;
+ }
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0);
db_shader_control = 0x210;
+ rshader = &rctx->ps_shader->shader;
+ if (rshader->uses_kill)
+ db_shader_control |= (1 << 6);
for (i = 0; i < rshader->noutput; i++) {
if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
db_shader_control |= 1;
@@ -1008,11 +1073,7 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx)
rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000;
rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000;
rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00;
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
static inline unsigned r600_tex_wrap(unsigned wrap)
@@ -1090,16 +1151,12 @@ static INLINE u32 S_FIXED(float value, u32 frac_bits)
return value * (1 << frac_bits);
}
-static struct radeon_state *r600_sampler(struct r600_context *rctx,
- const struct pipe_sampler_state *state,
- unsigned id)
+static void r600_sampler(struct r600_context *rctx, struct radeon_state *rstate,
+ const struct pipe_sampler_state *state, unsigned id)
{
struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate;
- rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, id);
- if (rstate == NULL)
- return NULL;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER, id, R600_SHADER_PS);
rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] =
S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
@@ -1114,11 +1171,7 @@ static struct radeon_state *r600_sampler(struct r600_context *rctx,
S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6));
rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1);
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
static inline unsigned r600_tex_swizzle(unsigned swizzle)
@@ -1160,6 +1213,7 @@ static inline unsigned r600_tex_dim(unsigned dim)
case PIPE_TEXTURE_1D:
return V_038000_SQ_TEX_DIM_1D;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
return V_038000_SQ_TEX_DIM_2D;
case PIPE_TEXTURE_3D:
return V_038000_SQ_TEX_DIM_3D;
@@ -1168,19 +1222,20 @@ static inline unsigned r600_tex_dim(unsigned dim)
}
}
-static struct radeon_state *r600_resource(struct r600_context *rctx,
- const struct pipe_sampler_view *view,
- unsigned id)
+static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate,
+ const struct pipe_sampler_view *view, unsigned id)
{
+ struct r600_context *rctx = r600_context(ctx);
struct r600_screen *rscreen = rctx->screen;
const struct util_format_description *desc;
struct r600_resource_texture *tmp;
struct r600_resource *rbuffer;
- struct radeon_state *rstate;
unsigned format;
- uint32_t word4 = 0, yuv_format = 0;
- unsigned char swizzle[4];
+ uint32_t word4 = 0, yuv_format = 0, pitch = 0;
+ unsigned char swizzle[4], array_mode = 0, tile_type = 0;
+ int r;
+ rstate->cpm4 = 0;
swizzle[0] = view->swizzle_r;
swizzle[1] = view->swizzle_g;
swizzle[2] = view->swizzle_b;
@@ -1188,37 +1243,49 @@ static struct radeon_state *r600_resource(struct r600_context *rctx,
format = r600_translate_texformat(view->texture->format,
swizzle,
&word4, &yuv_format);
- if (format == ~0)
- return NULL;
+ if (format == ~0) {
+ return;
+ }
desc = util_format_description(view->texture->format);
if (desc == NULL) {
R600_ERR("unknow format %d\n", view->texture->format);
- return NULL;
- }
- rstate = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, id);
- if (rstate == NULL) {
- return NULL;
+ return;
}
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS);
tmp = (struct r600_resource_texture*)view->texture;
rbuffer = &tmp->resource;
- rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
- rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ if (tmp->depth) {
+ r = r600_texture_from_depth(ctx, tmp, view->first_level);
+ if (r) {
+ return;
+ }
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, tmp->uncompressed);
+ rstate->bo[1] = radeon_bo_incref(rscreen->rw, tmp->uncompressed);
+ } else {
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ }
rstate->nbo = 2;
rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
rstate->placement[1] = RADEON_GEM_DOMAIN_GTT;
rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
rstate->placement[3] = RADEON_GEM_DOMAIN_GTT;
+ pitch = (tmp->pitch[0] / tmp->bpt);
+ pitch = (pitch + 0x7) & ~0x7;
+
/* FIXME properly handle first level != 0 */
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] =
S_038000_DIM(r600_tex_dim(view->texture->target)) |
- S_038000_PITCH(((tmp->pitch[0] / tmp->bpt) / 8) - 1) |
+ S_038000_TILE_MODE(array_mode) |
+ S_038000_TILE_TYPE(tile_type) |
+ S_038000_PITCH((pitch / 8) - 1) |
S_038000_TEX_WIDTH(view->texture->width0 - 1);
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD1] =
S_038004_TEX_HEIGHT(view->texture->height0 - 1) |
S_038004_TEX_DEPTH(view->texture->depth0 - 1) |
S_038004_DATA_FORMAT(format);
- rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0;
+ rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = tmp->offset[0] >> 8;
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8;
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] =
word4 |
@@ -1232,17 +1299,12 @@ static struct radeon_state *r600_resource(struct r600_context *rctx,
S_038014_LAST_ARRAY(0);
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] =
S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE);
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
+static void r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate)
{
struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate;
const struct pipe_blend_state *pbs = &rctx->blend->state.blend;
int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs;
uint32_t color_control, target_mask, shader_mask;
@@ -1257,7 +1319,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
}
if (pbs->logicop_enable) {
- color_control |= (pbs->logicop_func) << 16;
+ color_control |= (pbs->logicop_func << 16) | (pbs->logicop_func << 20);
} else {
color_control |= (0xcc << 16);
}
@@ -1277,7 +1339,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
target_mask |= (pbs->rt[0].colormask << (4 * i));
}
}
- rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL);
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0);
rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask;
rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask;
rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control;
@@ -1289,115 +1351,51 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF;
rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF;
rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF;
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-int r600_context_hw_states(struct r600_context *rctx)
+int r600_context_hw_states(struct pipe_context *ctx)
{
+ struct r600_context *rctx = r600_context(ctx);
unsigned i;
- int r;
- int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs;
- /* free previous TODO determine what need to be updated, what
- * doesn't
- */
- //radeon_state_decref(rctx->hw_states.config);
- rctx->hw_states.cb_cntl = radeon_state_decref(rctx->hw_states.cb_cntl);
- rctx->hw_states.db = radeon_state_decref(rctx->hw_states.db);
- rctx->hw_states.rasterizer = radeon_state_decref(rctx->hw_states.rasterizer);
- rctx->hw_states.scissor = radeon_state_decref(rctx->hw_states.scissor);
- rctx->hw_states.dsa = radeon_state_decref(rctx->hw_states.dsa);
- rctx->hw_states.blend = radeon_state_decref(rctx->hw_states.blend);
- rctx->hw_states.viewport = radeon_state_decref(rctx->hw_states.viewport);
- for (i = 0; i < 8; i++) {
- rctx->hw_states.cb[i] = radeon_state_decref(rctx->hw_states.cb[i]);
+ /* build new states */
+ r600_rasterizer(rctx, &rctx->hw_states.rasterizer);
+ r600_scissor(rctx, &rctx->hw_states.scissor);
+ r600_dsa(rctx, &rctx->hw_states.dsa);
+ r600_cb_cntl(rctx, &rctx->hw_states.cb_cntl);
+
+ /* bind states */
+ radeon_draw_bind(&rctx->draw, &rctx->hw_states.rasterizer);
+ radeon_draw_bind(&rctx->draw, &rctx->hw_states.scissor);
+ radeon_draw_bind(&rctx->draw, &rctx->hw_states.dsa);
+ radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_cntl);
+
+ radeon_draw_bind(&rctx->draw, &rctx->config);
+
+ if (rctx->viewport) {
+ radeon_draw_bind(&rctx->draw, &rctx->viewport->rstate[0]);
}
- for (i = 0; i < rctx->hw_states.ps_nresource; i++) {
- radeon_state_decref(rctx->hw_states.ps_resource[i]);
- rctx->hw_states.ps_resource[i] = NULL;
+ if (rctx->blend) {
+ radeon_draw_bind(&rctx->draw, &rctx->blend->rstate[0]);
}
- rctx->hw_states.ps_nresource = 0;
- for (i = 0; i < rctx->hw_states.ps_nsampler; i++) {
- radeon_state_decref(rctx->hw_states.ps_sampler[i]);
- rctx->hw_states.ps_sampler[i] = NULL;
+ if (rctx->clip) {
+ radeon_draw_bind(&rctx->draw, &rctx->clip->rstate[0]);
}
- rctx->hw_states.ps_nsampler = 0;
-
- /* build new states */
- rctx->hw_states.rasterizer = r600_rasterizer(rctx);
- rctx->hw_states.scissor = r600_scissor(rctx);
- rctx->hw_states.dsa = r600_dsa(rctx);
- rctx->hw_states.blend = r600_blend(rctx);
- rctx->hw_states.viewport = r600_viewport(rctx);
- for (i = 0; i < nr_cbufs; i++) {
- rctx->hw_states.cb[i] = r600_cb(rctx, i);
+ for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) {
+ radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[i+1]);
+ }
+ if (rctx->framebuffer->state.framebuffer.zsbuf) {
+ radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[0]);
}
- rctx->hw_states.db = r600_db(rctx);
- rctx->hw_states.cb_cntl = r600_cb_cntl(rctx);
-
for (i = 0; i < rctx->ps_nsampler; i++) {
if (rctx->ps_sampler[i]) {
- rctx->hw_states.ps_sampler[i] = r600_sampler(rctx,
- &rctx->ps_sampler[i]->state.sampler,
- R600_PS_SAMPLER + i);
+ radeon_draw_bind(&rctx->draw, rctx->ps_sampler[i]);
}
}
- rctx->hw_states.ps_nsampler = rctx->ps_nsampler;
for (i = 0; i < rctx->ps_nsampler_view; i++) {
if (rctx->ps_sampler_view[i]) {
- rctx->hw_states.ps_resource[i] = r600_resource(rctx,
- &rctx->ps_sampler_view[i]->state.sampler_view,
- R600_PS_RESOURCE + i);
- }
- }
- rctx->hw_states.ps_nresource = rctx->ps_nsampler_view;
-
- /* bind states */
- r = radeon_draw_set(rctx->draw, rctx->hw_states.db);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->hw_states.rasterizer);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->hw_states.scissor);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->hw_states.dsa);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->hw_states.blend);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->hw_states.viewport);
- if (r)
- return r;
- for (i = 0; i < nr_cbufs; i++) {
- r = radeon_draw_set(rctx->draw, rctx->hw_states.cb[i]);
- if (r)
- return r;
- }
- r = radeon_draw_set(rctx->draw, rctx->hw_states.config);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->hw_states.cb_cntl);
- if (r)
- return r;
- for (i = 0; i < rctx->hw_states.ps_nresource; i++) {
- if (rctx->hw_states.ps_resource[i]) {
- r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_resource[i]);
- if (r)
- return r;
- }
- }
- for (i = 0; i < rctx->hw_states.ps_nsampler; i++) {
- if (rctx->hw_states.ps_sampler[i]) {
- r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_sampler[i]);
- if (r)
- return r;
+ radeon_draw_bind(&rctx->draw, rctx->ps_sampler_view[i]);
}
}
return 0;
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index f93c20da35..84866825aa 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -252,6 +252,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
case PIPE_FORMAT_X8B8G8R8_UNORM:
case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
return V_0280A0_COLOR_8_8_8_8;
case PIPE_FORMAT_R10G10B10A2_UNORM:
@@ -262,7 +263,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- return V_0280A0_COLOR_24_8;
+ return V_0280A0_COLOR_8_24;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return V_0280A0_COLOR_32_FLOAT;
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
@@ -275,6 +279,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
/* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32_FLOAT:
+ return V_0280A0_COLOR_32_32_32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return V_0280A0_COLOR_32_32_32_32_FLOAT;
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 30d79ebdd6..b6698e3885 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -24,6 +24,7 @@
* Jerome Glisse
* Corbin Simpson
*/
+#include <errno.h>
#include <pipe/p_screen.h>
#include <util/u_format.h>
#include <util/u_math.h>
@@ -33,10 +34,26 @@
#include "r600_screen.h"
#include "r600_context.h"
#include "r600_resource.h"
+#include "r600_state_inlines.h"
#include "r600d.h"
extern struct u_resource_vtbl r600_texture_vtbl;
+/* Copy from a tiled texture to a detiled one. */
+static void r600_copy_from_tiled_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
+{
+ struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
+ struct pipe_resource *texture = transfer->resource;
+ struct pipe_subresource subdst;
+
+ subdst.face = 0;
+ subdst.level = 0;
+ ctx->resource_copy_region(ctx, rtransfer->linear_texture,
+ subdst, 0, 0, 0, texture, transfer->sr,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ transfer->box.width, transfer->box.height);
+}
+
static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex,
unsigned level, unsigned zslice,
unsigned face)
@@ -65,7 +82,9 @@ static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_resource
for (i = 0, offset = 0; i <= ptex->last_level; i++) {
w = u_minify(ptex->width0, i);
h = u_minify(ptex->height0, i);
+ h = util_next_power_of_two(h);
pitch = util_format_get_stride(ptex->format, align(w, 64));
+ pitch = align(pitch, 256);
layer_size = pitch * h;
if (ptex->target == PIPE_TEXTURE_CUBE)
size = layer_size * 6;
@@ -74,6 +93,8 @@ static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_resource
rtex->offset[i] = offset;
rtex->layer_size[i] = layer_size;
rtex->pitch[i] = pitch;
+ rtex->width[i] = w;
+ rtex->height[i] = h;
offset += size;
}
rtex->size = offset;
@@ -104,10 +125,22 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
FREE(rtex);
return NULL;
}
-
return &resource->base.b;
}
+static void r600_texture_destroy_state(struct pipe_resource *ptexture)
+{
+ struct r600_resource_texture *rtexture = (struct r600_resource_texture*)ptexture;
+
+ for (int i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) {
+ radeon_state_fini(&rtexture->scissor[i]);
+ radeon_state_fini(&rtexture->db[i]);
+ for (int j = 0; j < 8; j++) {
+ radeon_state_fini(&rtexture->cb[j][i]);
+ }
+ }
+}
+
static void r600_texture_destroy(struct pipe_screen *screen,
struct pipe_resource *ptex)
{
@@ -118,6 +151,10 @@ static void r600_texture_destroy(struct pipe_screen *screen,
if (resource->bo) {
radeon_bo_decref(rscreen->rw, resource->bo);
}
+ if (rtex->uncompressed) {
+ radeon_bo_decref(rscreen->rw, rtex->uncompressed);
+ }
+ r600_texture_destroy_state(ptex);
FREE(rtex);
}
@@ -168,7 +205,8 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
}
/* Support only 2D textures without mipmaps */
- if (templ->target != PIPE_TEXTURE_2D || templ->depth0 != 1 || templ->last_level != 0)
+ if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
+ templ->depth0 != 1 || templ->last_level != 0)
return NULL;
rtex = CALLOC_STRUCT(r600_resource_texture);
@@ -181,9 +219,12 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
pipe_reference_init(&resource->base.b.reference, 1);
resource->base.b.screen = screen;
resource->bo = bo;
+ rtex->depth = 0;
rtex->pitch_override = whandle->stride;
rtex->bpt = util_format_get_blocksize(templ->format);
rtex->pitch[0] = whandle->stride;
+ rtex->width[0] = templ->width0;
+ rtex->height[0] = templ->height0;
rtex->offset[0] = 0;
rtex->size = align(rtex->pitch[0] * templ->height0, 64);
@@ -205,6 +246,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
const struct pipe_box *box)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
+ struct pipe_resource resource;
struct r600_transfer *trans;
trans = CALLOC_STRUCT(r600_transfer);
@@ -216,48 +258,117 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
trans->transfer.box = *box;
trans->transfer.stride = rtex->pitch[sr.level];
trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face);
+ if (rtex->tilled && !rtex->depth) {
+ resource.target = PIPE_TEXTURE_2D;
+ resource.format = texture->format;
+ resource.width0 = box->width;
+ resource.height0 = box->height;
+ resource.depth0 = 0;
+ resource.last_level = 0;
+ resource.nr_samples = 0;
+ resource.usage = PIPE_USAGE_DYNAMIC;
+ resource.bind = 0;
+ resource.flags = 0;
+ /* For texture reading, the temporary (detiled) texture is used as
+ * a render target when blitting from a tiled texture. */
+ if (usage & PIPE_TRANSFER_READ) {
+ resource.bind |= PIPE_BIND_RENDER_TARGET;
+ }
+ /* For texture writing, the temporary texture is used as a sampler
+ * when blitting into a tiled texture. */
+ if (usage & PIPE_TRANSFER_WRITE) {
+ resource.bind |= PIPE_BIND_SAMPLER_VIEW;
+ }
+ /* Create the temporary texture. */
+ trans->linear_texture = ctx->screen->resource_create(ctx->screen, &resource);
+ if (trans->linear_texture == NULL) {
+ R600_ERR("failed to create temporary texture to hold untiled copy\n");
+ pipe_resource_reference(&trans->transfer.resource, NULL);
+ FREE(trans);
+ return NULL;
+ }
+ if (usage & PIPE_TRANSFER_READ) {
+ /* We cannot map a tiled texture directly because the data is
+ * in a different order, therefore we do detiling using a blit. */
+ r600_copy_from_tiled_texture(ctx, trans);
+ /* Always referenced in the blit. */
+ ctx->flush(ctx, 0, NULL);
+ }
+ }
return &trans->transfer;
}
void r600_texture_transfer_destroy(struct pipe_context *ctx,
- struct pipe_transfer *trans)
+ struct pipe_transfer *transfer)
{
- pipe_resource_reference(&trans->resource, NULL);
- FREE(trans);
+ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+
+ if (rtransfer->linear_texture) {
+ pipe_resource_reference(&rtransfer->linear_texture, NULL);
+ }
+ pipe_resource_reference(&transfer->resource, NULL);
+ FREE(transfer);
}
void* r600_texture_transfer_map(struct pipe_context *ctx,
struct pipe_transfer* transfer)
{
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
- struct r600_resource *resource;
+ struct radeon_bo *bo;
enum pipe_format format = transfer->resource->format;
struct r600_screen *rscreen = r600_screen(ctx->screen);
+ struct r600_resource_texture *rtex;
+ unsigned long offset = 0;
char *map;
+ int r;
r600_flush(ctx, 0, NULL);
-
- resource = (struct r600_resource *)transfer->resource;
- if (radeon_bo_map(rscreen->rw, resource->bo)) {
+ if (rtransfer->linear_texture) {
+ bo = ((struct r600_resource *)rtransfer->linear_texture)->bo;
+ } else {
+ rtex = (struct r600_resource_texture*)transfer->resource;
+ if (rtex->depth) {
+ r = r600_texture_from_depth(ctx, rtex, transfer->sr.level);
+ if (r) {
+ return NULL;
+ }
+ r600_flush(ctx, 0, NULL);
+ bo = rtex->uncompressed;
+ } else {
+ bo = ((struct r600_resource *)transfer->resource)->bo;
+ }
+ offset = rtransfer->offset +
+ transfer->box.y / util_format_get_blockheight(format) * transfer->stride +
+ transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
+ }
+ if (radeon_bo_map(rscreen->rw, bo)) {
return NULL;
}
- radeon_bo_wait(rscreen->rw, resource->bo);
-
- map = resource->bo->data;
+ radeon_bo_wait(rscreen->rw, bo);
- return map + rtransfer->offset +
- transfer->box.y / util_format_get_blockheight(format) * transfer->stride +
- transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
+ map = bo->data;
+ return map + offset;
}
void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer)
{
+ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
struct r600_screen *rscreen = r600_screen(ctx->screen);
- struct r600_resource *resource;
-
- resource = (struct r600_resource *)transfer->resource;
- radeon_bo_unmap(rscreen->rw, resource->bo);
+ struct r600_resource_texture *rtex;
+ struct radeon_bo *bo;
+
+ if (rtransfer->linear_texture) {
+ bo = ((struct r600_resource *)rtransfer->linear_texture)->bo;
+ } else {
+ rtex = (struct r600_resource_texture*)transfer->resource;
+ if (rtex->depth) {
+ bo = rtex->uncompressed;
+ } else {
+ bo = ((struct r600_resource *)transfer->resource)->bo;
+ }
+ }
+ radeon_bo_unmap(rscreen->rw, bo);
}
struct u_resource_vtbl r600_texture_vtbl =
@@ -280,51 +391,51 @@ void r600_init_screen_texture_functions(struct pipe_screen *screen)
}
static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
- const unsigned char *swizzle_view)
+ const unsigned char *swizzle_view)
{
- unsigned i;
- unsigned char swizzle[4];
- unsigned result = 0;
- const uint32_t swizzle_shift[4] = {
- 16, 19, 22, 25,
- };
- const uint32_t swizzle_bit[4] = {
- 0, 1, 2, 3,
- };
-
- if (swizzle_view) {
- /* Combine two sets of swizzles. */
- for (i = 0; i < 4; i++) {
- swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ?
- swizzle_format[swizzle_view[i]] : swizzle_view[i];
- }
- } else {
- memcpy(swizzle, swizzle_format, 4);
- }
-
- /* Get swizzle. */
- for (i = 0; i < 4; i++) {
- switch (swizzle[i]) {
- case UTIL_FORMAT_SWIZZLE_Y:
- result |= swizzle_bit[1] << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_Z:
- result |= swizzle_bit[2] << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_W:
- result |= swizzle_bit[3] << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_0:
- result |= V_038010_SQ_SEL_0 << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_1:
- result |= V_038010_SQ_SEL_1 << swizzle_shift[i];
- break;
- default: /* UTIL_FORMAT_SWIZZLE_X */
- result |= swizzle_bit[0] << swizzle_shift[i];
- }
- }
- return result;
+ unsigned i;
+ unsigned char swizzle[4];
+ unsigned result = 0;
+ const uint32_t swizzle_shift[4] = {
+ 16, 19, 22, 25,
+ };
+ const uint32_t swizzle_bit[4] = {
+ 0, 1, 2, 3,
+ };
+
+ if (swizzle_view) {
+ /* Combine two sets of swizzles. */
+ for (i = 0; i < 4; i++) {
+ swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ?
+ swizzle_format[swizzle_view[i]] : swizzle_view[i];
+ }
+ } else {
+ memcpy(swizzle, swizzle_format, 4);
+ }
+
+ /* Get swizzle. */
+ for (i = 0; i < 4; i++) {
+ switch (swizzle[i]) {
+ case UTIL_FORMAT_SWIZZLE_Y:
+ result |= swizzle_bit[1] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_Z:
+ result |= swizzle_bit[2] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_W:
+ result |= swizzle_bit[3] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_0:
+ result |= V_038010_SQ_SEL_0 << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_1:
+ result |= V_038010_SQ_SEL_1 << swizzle_shift[i];
+ break;
+ default: /* UTIL_FORMAT_SWIZZLE_X */
+ result |= swizzle_bit[0] << swizzle_shift[i];
+ }
+ }
+ return result;
}
/* texture format translate */
@@ -344,19 +455,21 @@ uint32_t r600_translate_texformat(enum pipe_format format,
};
desc = util_format_description(format);
+ word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view);
+
/* Colorspace (return non-RGB formats directly). */
switch (desc->colorspace) {
/* Depth stencil formats */
case UTIL_FORMAT_COLORSPACE_ZS:
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
- result = V_028010_DEPTH_16;
+ result = V_0280A0_COLOR_16;
goto out_word4;
case PIPE_FORMAT_Z24X8_UNORM:
- result = V_028010_DEPTH_X8_24;
+ result = V_0280A0_COLOR_8_24;
goto out_word4;
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- result = V_028010_DEPTH_8_24;
+ result = V_0280A0_COLOR_8_24;
goto out_word4;
default:
goto out_unknown;
@@ -382,8 +495,6 @@ uint32_t r600_translate_texformat(enum pipe_format format,
break;
}
- word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view);
-
/* S3TC formats. TODO */
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
goto out_unknown;
@@ -519,9 +630,221 @@ out_word4:
*word4_p = word4;
if (yuv_format_p)
*yuv_format_p = yuv_format;
-// fprintf(stderr,"returning %08x %08x %08x\n", result, word4, yuv_format);
return result;
out_unknown:
// R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format));
return ~0;
}
+
+int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+ int r;
+
+ if (!rtexture->depth) {
+ /* This shouldn't happen maybe print a warning */
+ return 0;
+ }
+ if (rtexture->uncompressed && !rtexture->dirty) {
+ /* Uncompressed bo already in good state */
+ return 0;
+ }
+
+ /* allocate uncompressed texture */
+ if (rtexture->uncompressed == NULL) {
+ rtexture->uncompressed = radeon_bo(rscreen->rw, 0, rtexture->size, 4096, NULL);
+ if (rtexture->uncompressed == NULL) {
+ return -ENOMEM;
+ }
+ }
+
+ /* render a rectangle covering whole buffer to uncompress depth */
+ r = r600_blit_uncompress_depth(ctx, rtexture, level);
+ if (r) {
+ return r;
+ }
+
+ rtexture->dirty = 0;
+ return 0;
+}
+
+static void r600_texture_state_scissor(struct r600_screen *rscreen,
+ struct r600_resource_texture *rtexture,
+ unsigned level)
+{
+ struct radeon_state *rstate = &rtexture->scissor[level];
+
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0);
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF;
+ rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA;
+ rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = 0x80000000;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_texture_state_cb(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned cb, unsigned level)
+{
+ struct radeon_state *rstate;
+ struct r600_resource *rbuffer;
+ unsigned pitch, slice;
+ unsigned color_info;
+ unsigned format, swap, ntype;
+ const struct util_format_description *desc;
+
+ rstate = &rtexture->cb[cb][level];
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0 + cb, 0, 0);
+ rbuffer = &rtexture->resource;
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1;
+ slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1;
+ ntype = 0;
+ desc = util_format_description(rbuffer->base.b.format);
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ ntype = V_0280A0_NUMBER_SRGB;
+ format = r600_translate_colorformat(rtexture->resource.base.b.format);
+ swap = r600_translate_colorswap(rtexture->resource.base.b.format);
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, rtexture->uncompressed);
+ rstate->bo[1] = radeon_bo_incref(rscreen->rw, rtexture->uncompressed);
+ rstate->bo[2] = radeon_bo_incref(rscreen->rw, rtexture->uncompressed);
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
+ rstate->placement[4] = RADEON_GEM_DOMAIN_GTT;
+ rstate->nbo = 3;
+ color_info = 0;
+ } else {
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ rstate->bo[2] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
+ rstate->placement[4] = RADEON_GEM_DOMAIN_GTT;
+ rstate->nbo = 3;
+ color_info = S_0280A0_SOURCE_FORMAT(1);
+ }
+ color_info |= S_0280A0_FORMAT(format) |
+ S_0280A0_COMP_SWAP(swap) |
+ S_0280A0_BLEND_CLAMP(1) |
+ S_0280A0_NUMBER_TYPE(ntype);
+ rstate->states[R600_CB0__CB_COLOR0_BASE] = rtexture->offset[level] >> 8;
+ rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info;
+ rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) |
+ S_028060_SLICE_TILE_MAX(slice);
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_texture_state_db(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct radeon_state *rstate = &rtexture->db[level];
+ struct r600_resource *rbuffer;
+ unsigned pitch, slice, format;
+
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0);
+ rbuffer = &rtexture->resource;
+ rtexture->tilled = 1;
+ rtexture->array_mode = 2;
+ rtexture->tile_type = 1;
+ rtexture->depth = 1;
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1;
+ slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1;
+ format = r600_translate_dbformat(rbuffer->base.b.format);
+ rstate->states[R600_DB__DB_DEPTH_BASE] = rtexture->offset[level] >> 8;
+ rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtexture->array_mode) |
+ S_028010_FORMAT(format);
+ rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000;
+ rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (rtexture->height[level] / 8) -1;
+ rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) |
+ S_028000_SLICE_TILE_MAX(slice);
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ rstate->nbo = 1;
+
+ radeon_state_pm4(rstate);
+}
+
+int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+
+ if (!rtexture->scissor[level].cpm4) {
+ r600_texture_state_scissor(rscreen, rtexture, level);
+ }
+ return 0;
+}
+
+static void r600_texture_state_viewport(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct radeon_state *rstate = &rtexture->viewport[level];
+
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui((float)rtexture->width[level]/2.0);
+ rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui((float)rtexture->width[level]/2.0);
+ rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui((float)rtexture->height[level]/2.0);
+ rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui((float)-rtexture->height[level]/2.0);
+ rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = 0x3F000000;
+ rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = 0x3F000000;
+ rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F;
+ rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000;
+
+ radeon_state_pm4(rstate);
+}
+
+int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+
+ if (!rtexture->cb[cb][level].cpm4) {
+ r600_texture_state_cb(rscreen, rtexture, cb, level);
+ }
+ return 0;
+}
+
+int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+
+ if (!rtexture->db[level].cpm4) {
+ r600_texture_state_db(rscreen, rtexture, level);
+ }
+ return 0;
+}
+
+int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+
+ if (!rtexture->viewport[level].cpm4) {
+ r600_texture_state_viewport(rscreen, rtexture, level);
+ }
+ return 0;
+}
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 53388f822e..7b9a983d53 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -199,6 +199,7 @@
#define V_0280A0_COLOR_16_16_16_16_FLOAT 0x00000020
#define V_0280A0_COLOR_32_32_32_32 0x00000022
#define V_0280A0_COLOR_32_32_32_32_FLOAT 0x00000023
+#define V_0280A0_COLOR_32_32_32_FLOAT 0x00000030
#define S_0280A0_ARRAY_MODE(x) (((x) & 0xF) << 8)
#define G_0280A0_ARRAY_MODE(x) (((x) >> 8) & 0xF)
#define C_0280A0_ARRAY_MODE 0xFFFFF0FF
@@ -1316,4 +1317,11 @@
#define G_0286D4_PNT_SPRITE_TOP_1(x) (((x) >> 14) & 0x1)
#define C_0286D4_PNT_SPRITE_TOP_1 0xFFFFBFFF
+#define SQ_TEX_INST_LD 0x03
+#define SQ_TEX_INST_GET_GRADIENTS_H 0x7
+#define SQ_TEX_INST_GET_GRADIENTS_V 0x8
+
+#define SQ_TEX_INST_SAMPLE 0x10
+#define SQ_TEX_INST_SAMPLE_L 0x11
+#define SQ_TEX_INST_SAMPLE_C 0x18
#endif
diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h
index 8f00a4895a..aaac8de528 100644
--- a/src/gallium/drivers/r600/radeon.h
+++ b/src/gallium/drivers/r600/radeon.h
@@ -77,6 +77,14 @@ enum radeon_family {
CHIP_LAST,
};
+enum {
+ R600_SHADER_PS = 1,
+ R600_SHADER_VS,
+ R600_SHADER_GS,
+ R600_SHADER_FS,
+ R600_SHADER_MAX = R600_SHADER_FS,
+};
+
enum radeon_family radeon_get_family(struct radeon *rw);
/*
@@ -98,22 +106,23 @@ struct radeon_bo *radeon_bo_incref(struct radeon *radeon, struct radeon_bo *bo);
struct radeon_bo *radeon_bo_decref(struct radeon *radeon, struct radeon_bo *bo);
int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo);
+struct radeon_stype_info;
/*
* states functions
*/
struct radeon_state {
struct radeon *radeon;
unsigned refcount;
- unsigned type;
+ struct radeon_stype_info *stype;
+ unsigned state_id;
unsigned id;
+ unsigned shader_index;
unsigned nstates;
- u32 *states;
+ u32 states[64];
unsigned npm4;
unsigned cpm4;
u32 pm4_crc;
- u32 *pm4;
- u32 nimmd;
- u32 *immd;
+ u32 pm4[128];
unsigned nbo;
struct radeon_bo *bo[4];
unsigned nreloc;
@@ -123,38 +132,22 @@ struct radeon_state {
unsigned bo_dirty[4];
};
-struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id);
-struct radeon_state *radeon_state_incref(struct radeon_state *state);
-struct radeon_state *radeon_state_decref(struct radeon_state *state);
+int radeon_state_init(struct radeon_state *rstate, struct radeon *radeon, u32 type, u32 id, u32 shader_class);
+void radeon_state_fini(struct radeon_state *state);
int radeon_state_pm4(struct radeon_state *state);
+int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type);
/*
* draw functions
*/
struct radeon_draw {
- unsigned refcount;
struct radeon *radeon;
- unsigned nstate;
struct radeon_state **state;
- unsigned cpm4;
};
-struct radeon_draw *radeon_draw(struct radeon *radeon);
-struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw);
-struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw);
-struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw);
-int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state);
-int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state);
-int radeon_draw_check(struct radeon_draw *draw);
-
-struct radeon_ctx *radeon_ctx(struct radeon *radeon);
-struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx);
-struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx);
-int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw);
-int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw);
-int radeon_ctx_pm4(struct radeon_ctx *ctx);
-int radeon_ctx_submit(struct radeon_ctx *ctx);
-void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file);
+int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon);
+void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state);
+void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state);
/*
* radeon context functions
@@ -169,95 +162,57 @@ struct radeon_cs_reloc {
#pragma pack()
struct radeon_ctx {
- int refcount;
struct radeon *radeon;
u32 *pm4;
- u32 cpm4;
- u32 draw_cpm4;
- unsigned id;
- unsigned next_id;
+ int cdwords;
+ int ndwords;
unsigned nreloc;
struct radeon_cs_reloc *reloc;
unsigned nbo;
struct radeon_bo **bo;
- unsigned ndraw;
- struct radeon_draw *cdraw;
- struct radeon_draw **draw;
- unsigned nstate;
- struct radeon_state **state;
};
+int radeon_ctx_init(struct radeon_ctx *ctx, struct radeon *radeon);
+void radeon_ctx_fini(struct radeon_ctx *ctx);
+void radeon_ctx_clear(struct radeon_ctx *ctx);
+int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw);
+int radeon_ctx_submit(struct radeon_ctx *ctx);
+void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file);
+int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state);
+
/*
* R600/R700
*/
-#define R600_NSTATE 1280
-#define R600_NTYPE 32
+enum r600_stype {
+ R600_STATE_CONFIG,
+ R600_STATE_CB_CNTL,
+ R600_STATE_RASTERIZER,
+ R600_STATE_VIEWPORT,
+ R600_STATE_SCISSOR,
+ R600_STATE_BLEND,
+ R600_STATE_DSA,
+ R600_STATE_SHADER, /* has PS,VS,GS,FS variants */
+ R600_STATE_CONSTANT, /* has PS,VS,GS,FS variants */
+ R600_STATE_RESOURCE, /* has PS,VS,GS,FS variants */
+ R600_STATE_SAMPLER, /* has PS,VS,GS,FS variants */
+ R600_STATE_SAMPLER_BORDER, /* has PS,VS,GS,FS variants */
+ R600_STATE_CB0,
+ R600_STATE_CB1,
+ R600_STATE_CB2,
+ R600_STATE_CB3,
+ R600_STATE_CB4,
+ R600_STATE_CB5,
+ R600_STATE_CB6,
+ R600_STATE_CB7,
+ R600_STATE_DB,
+ R600_STATE_QUERY_BEGIN,
+ R600_STATE_QUERY_END,
+ R600_STATE_UCP,
+ R600_STATE_VGT,
+ R600_STATE_DRAW,
+};
-#define R600_CONFIG 0
-#define R600_CONFIG_TYPE 0
-#define R600_CB_CNTL 1
-#define R600_CB_CNTL_TYPE 1
-#define R600_RASTERIZER 2
-#define R600_RASTERIZER_TYPE 2
-#define R600_VIEWPORT 3
-#define R600_VIEWPORT_TYPE 3
-#define R600_SCISSOR 4
-#define R600_SCISSOR_TYPE 4
-#define R600_BLEND 5
-#define R600_BLEND_TYPE 5
-#define R600_DSA 6
-#define R600_DSA_TYPE 6
-#define R600_VS_SHADER 7
-#define R600_VS_SHADER_TYPE 7
-#define R600_PS_SHADER 8
-#define R600_PS_SHADER_TYPE 8
-#define R600_PS_CONSTANT 9
-#define R600_PS_CONSTANT_TYPE 9
-#define R600_VS_CONSTANT 265
-#define R600_VS_CONSTANT_TYPE 10
-#define R600_PS_RESOURCE 521
-#define R600_PS_RESOURCE_TYPE 11
-#define R600_VS_RESOURCE 681
-#define R600_VS_RESOURCE_TYPE 12
-#define R600_FS_RESOURCE 841
-#define R600_FS_RESOURCE_TYPE 13
-#define R600_GS_RESOURCE 1001
-#define R600_GS_RESOURCE_TYPE 14
-#define R600_PS_SAMPLER 1161
-#define R600_PS_SAMPLER_TYPE 15
-#define R600_VS_SAMPLER 1179
-#define R600_VS_SAMPLER_TYPE 16
-#define R600_GS_SAMPLER 1197
-#define R600_GS_SAMPLER_TYPE 17
-#define R600_PS_SAMPLER_BORDER 1215
-#define R600_PS_SAMPLER_BORDER_TYPE 18
-#define R600_VS_SAMPLER_BORDER 1233
-#define R600_VS_SAMPLER_BORDER_TYPE 19
-#define R600_GS_SAMPLER_BORDER 1251
-#define R600_GS_SAMPLER_BORDER_TYPE 20
-#define R600_CB0 1269
-#define R600_CB0_TYPE 21
-#define R600_CB1 1270
-#define R600_CB1_TYPE 22
-#define R600_CB2 1271
-#define R600_CB2_TYPE 23
-#define R600_CB3 1272
-#define R600_CB3_TYPE 24
-#define R600_CB4 1273
-#define R600_CB4_TYPE 25
-#define R600_CB5 1274
-#define R600_CB5_TYPE 26
-#define R600_CB6 1275
-#define R600_CB6_TYPE 27
-#define R600_CB7 1276
-#define R600_CB7_TYPE 28
-#define R600_DB 1277
-#define R600_DB_TYPE 29
-#define R600_VGT 1278
-#define R600_VGT_TYPE 30
-#define R600_DRAW 1279
-#define R600_DRAW_TYPE 31
/* R600_CONFIG */
#define R600_CONFIG__SQ_CONFIG 0
#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1 1
@@ -639,9 +594,40 @@ struct radeon_ctx {
/* R600_DRAW */
#define R600_DRAW__VGT_NUM_INDICES 0
#define R600_DRAW__VGT_DMA_BASE_HI 1
-#define R600_DRAW__VGT_DMA_BASE 2
+#define R600_DRAW__VGT_DMA_BASE 2
#define R600_DRAW__VGT_DRAW_INITIATOR 3
-#define R600_DRAW_SIZE 4
-#define R600_DRAW_PM4 128
+#define R600_DRAW_SIZE 4
+#define R600_DRAW_PM4 128
+/* R600_CLIP */
+#define R600_CLIP__PA_CL_UCP_X_0 0
+#define R600_CLIP__PA_CL_UCP_Y_0 1
+#define R600_CLIP__PA_CL_UCP_Z_0 2
+#define R600_CLIP__PA_CL_UCP_W_0 3
+#define R600_CLIP__PA_CL_UCP_X_1 4
+#define R600_CLIP__PA_CL_UCP_Y_1 5
+#define R600_CLIP__PA_CL_UCP_Z_1 6
+#define R600_CLIP__PA_CL_UCP_W_1 7
+#define R600_CLIP__PA_CL_UCP_X_2 8
+#define R600_CLIP__PA_CL_UCP_Y_2 9
+#define R600_CLIP__PA_CL_UCP_Z_2 10
+#define R600_CLIP__PA_CL_UCP_W_2 11
+#define R600_CLIP__PA_CL_UCP_X_3 12
+#define R600_CLIP__PA_CL_UCP_Y_3 13
+#define R600_CLIP__PA_CL_UCP_Z_3 14
+#define R600_CLIP__PA_CL_UCP_W_3 15
+#define R600_CLIP__PA_CL_UCP_X_4 16
+#define R600_CLIP__PA_CL_UCP_Y_4 17
+#define R600_CLIP__PA_CL_UCP_Z_4 18
+#define R600_CLIP__PA_CL_UCP_W_4 19
+#define R600_CLIP__PA_CL_UCP_X_5 20
+#define R600_CLIP__PA_CL_UCP_Y_5 21
+#define R600_CLIP__PA_CL_UCP_Z_5 22
+#define R600_CLIP__PA_CL_UCP_W_5 23
+#define R600_CLIP_SIZE 24
+#define R600_CLIP_PM4 128
+/* R600 QUERY BEGIN/END */
+#define R600_QUERY__OFFSET 0
+#define R600_QUERY_SIZE 1
+#define R600_QUERY_PM4 128
#endif
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index 386c8acb8c..01b4ca985d 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -75,14 +75,10 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode)
buf = (void*)((int32_t*)buf + offset);
draw_set_mapped_vertex_buffer(draw, 0, buf);
- draw_set_mapped_element_buffer_range(draw,
- 0, 0,
- start,
- start + count - 1,
- NULL);
+ draw_set_mapped_index_buffer(draw, NULL);
/* draw! */
- draw_arrays_instanced(draw, mode, start, count, 0, 1);
+ draw_arrays(draw, mode, start, count);
/* unmap vertex/index buffers - will cause draw module to flush */
draw_set_mapped_vertex_buffer(draw, 0, NULL);
@@ -138,28 +134,20 @@ softpipe_draw_vbo(struct pipe_context *pipe,
}
/* Map index buffer, if present */
- if (info->indexed && sp->index_buffer.buffer) {
- char *indices = (char *) softpipe_resource(sp->index_buffer.buffer)->data;
- mapped_indices = (void *) (indices + sp->index_buffer.offset);
- }
+ if (info->indexed && sp->index_buffer.buffer)
+ mapped_indices = softpipe_resource(sp->index_buffer.buffer)->data;
- draw_set_mapped_element_buffer_range(draw, (mapped_indices) ?
- sp->index_buffer.index_size : 0,
- info->index_bias,
- info->min_index,
- info->max_index,
- mapped_indices);
+ draw_set_mapped_index_buffer(draw, mapped_indices);
/* draw! */
- draw_arrays_instanced(draw, info->mode, info->start, info->count,
- info->start_instance, info->instance_count);
+ draw_vbo(draw, info);
/* unmap vertex/index buffers - will cause draw module to flush */
for (i = 0; i < sp->num_vertex_buffers; i++) {
draw_set_mapped_vertex_buffer(draw, i, NULL);
}
if (mapped_indices) {
- draw_set_mapped_element_buffer(draw, 0, 0, NULL);
+ draw_set_mapped_index_buffer(draw, NULL);
}
/*
diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
index 4a53ef048f..1071011db0 100644
--- a/src/gallium/drivers/softpipe/sp_flush.c
+++ b/src/gallium/drivers/softpipe/sp_flush.c
@@ -31,6 +31,7 @@
#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
#include "draw/draw_context.h"
#include "sp_flush.h"
#include "sp_context.h"
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 93af6ee5b0..73ae2dea56 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -199,6 +199,7 @@ softpipe_is_format_supported( struct pipe_screen *screen,
assert(target == PIPE_BUFFER ||
target == PIPE_TEXTURE_1D ||
target == PIPE_TEXTURE_2D ||
+ target == PIPE_TEXTURE_RECT ||
target == PIPE_TEXTURE_3D ||
target == PIPE_TEXTURE_CUBE);
diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c
index 880a7c7cd2..b650fcaea5 100644
--- a/src/gallium/drivers/softpipe/sp_state_vertex.c
+++ b/src/gallium/drivers/softpipe/sp_state_vertex.c
@@ -100,5 +100,5 @@ softpipe_set_index_buffer(struct pipe_context *pipe,
else
memset(&softpipe->index_buffer, 0, sizeof(softpipe->index_buffer));
- /* TODO make this more like a state */
+ draw_set_index_buffer(softpipe->draw, ib);
}
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index cf7ab81405..e654bb77c2 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1785,6 +1785,7 @@ get_lambda_func(const union sp_sampler_key key)
case PIPE_TEXTURE_1D:
return compute_lambda_1d;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
return compute_lambda_2d;
case PIPE_TEXTURE_3D:
@@ -1809,6 +1810,7 @@ get_img_filter(const union sp_sampler_key key,
return img_filter_1d_linear;
break;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
/* Try for fast path:
*/
if (key.bits.is_pot &&
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
index 7b2dfe2549..e975f3b02f 100644
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -67,7 +67,7 @@ void surface_to_surfaceid(struct svga_winsys_context *swc, // IN
id->mipmap = s->real_level;
}
else {
- id->sid = SVGA3D_INVALID_ID;
+ swc->surface_relocation(swc, &id->sid, NULL, flags);
id->face = 0;
id->mipmap = 0;
}
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index 3b30b9e341..cd3f6b8982 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -214,6 +214,11 @@ void svga_context_flush( struct svga_context *svga,
svga_screen_cache_flush(svgascreen, fence);
+ /* To force the reemission of rendertargets and texture bindings at
+ * the beginning of every command buffer.
+ */
+ svga->dirty |= SVGA_NEW_COMMAND_BUFFER;
+
if (SVGA_DEBUG & DEBUG_SYNC) {
if (fence)
svga->pipe.screen->fence_finish( svga->pipe.screen, fence, 0);
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 67a7614c8a..1fb5a04887 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -382,6 +382,7 @@ struct svga_context
#define SVGA_NEW_ZERO_STRIDE 0x2000000
#define SVGA_NEW_TEXTURE_FLAGS 0x4000000
#define SVGA_NEW_STENCIL_REF 0x8000000
+#define SVGA_NEW_COMMAND_BUFFER 0x10000000
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
index de08bc5e56..001ec3616c 100644
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -146,23 +146,15 @@ retry:
}
-
-
-
static void
-svga_draw_range_elements( struct pipe_context *pipe,
- struct pipe_resource *index_buffer,
- unsigned index_size,
- int index_bias,
- unsigned min_index,
- unsigned max_index,
- unsigned prim, unsigned start, unsigned count)
+svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct svga_context *svga = svga_context( pipe );
- unsigned reduced_prim = u_reduced_prim(prim);
+ unsigned reduced_prim = u_reduced_prim( info->mode );
+ unsigned count = info->count;
enum pipe_error ret = 0;
- if (!u_trim_pipe_prim( prim, &count ))
+ if (!u_trim_pipe_prim( info->mode, &count ))
return;
/*
@@ -187,34 +179,32 @@ svga_draw_range_elements( struct pipe_context *pipe,
return;
#endif
- if (svga->state.sw.need_swtnl)
- {
- ret = svga_swtnl_draw_range_elements( svga,
- index_buffer,
- index_size,
- index_bias,
- min_index, max_index,
- prim,
- start, count );
+ if (svga->state.sw.need_swtnl) {
+ ret = svga_swtnl_draw_vbo( svga, info );
}
else {
- if (index_buffer) {
+ if (info->indexed && svga->curr.ib.buffer) {
+ unsigned offset;
+
+ assert(svga->curr.ib.offset % svga->curr.ib.index_size == 0);
+ offset = svga->curr.ib.offset / svga->curr.ib.index_size;
+
ret = retry_draw_range_elements( svga,
- index_buffer,
- index_size,
- index_bias,
- min_index,
- max_index,
- prim,
- start,
- count,
+ svga->curr.ib.buffer,
+ svga->curr.ib.index_size,
+ info->index_bias,
+ info->min_index,
+ info->max_index,
+ info->mode,
+ info->start + offset,
+ info->count,
TRUE );
}
else {
- ret = retry_draw_arrays( svga,
- prim,
- start,
- count,
+ ret = retry_draw_arrays( svga,
+ info->mode,
+ info->start,
+ info->count,
TRUE );
}
}
@@ -226,30 +216,6 @@ svga_draw_range_elements( struct pipe_context *pipe,
}
-static void
-svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
-{
- struct svga_context *svga = svga_context(pipe);
-
- if (info->indexed && svga->curr.ib.buffer) {
- unsigned offset;
-
- assert(svga->curr.ib.offset % svga->curr.ib.index_size == 0);
- offset = svga->curr.ib.offset / svga->curr.ib.index_size;
-
- svga_draw_range_elements(pipe, svga->curr.ib.buffer,
- svga->curr.ib.index_size, info->index_bias,
- info->min_index, info->max_index,
- info->mode, info->start + offset, info->count);
- }
- else {
- svga_draw_range_elements(pipe, NULL, 0, 0,
- info->min_index, info->max_index,
- info->mode, info->start, info->count);
- }
-}
-
-
void svga_init_draw_functions( struct svga_context *svga )
{
svga->pipe.draw_vbo = svga_draw_vbo;
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index ff83c750aa..26eb03a895 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -583,7 +583,8 @@ svga_texture_from_handle(struct pipe_screen *screen,
assert(screen);
/* Only supports one type */
- if (template->target != PIPE_TEXTURE_2D ||
+ if ((template->target != PIPE_TEXTURE_2D &&
+ template->target != PIPE_TEXTURE_RECT) ||
template->last_level != 0 ||
template->depth0 != 1) {
return NULL;
diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c
index bd92f00343..fcbb35e797 100644
--- a/src/gallium/drivers/svga/svga_state_framebuffer.c
+++ b/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -43,15 +43,18 @@ static int emit_framebuffer( struct svga_context *svga,
{
const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+ boolean reemit = !!(dirty & SVGA_NEW_COMMAND_BUFFER);
unsigned i;
enum pipe_error ret;
- /* XXX: Need shadow state in svga->hw to eliminate redundant
- * uploads, especially of NULL buffers.
+ /*
+ * We need to reemit non-null surface bindings, even when they are not
+ * dirty, to ensure that the resources are paged in.
*/
for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
- if (curr->cbufs[i] != hw->cbufs[i]) {
+ if (curr->cbufs[i] != hw->cbufs[i] ||
+ (reemit && hw->cbufs[i])) {
if (svga->curr.nr_fbs++ > 8)
return PIPE_ERROR_OUT_OF_MEMORY;
@@ -64,7 +67,8 @@ static int emit_framebuffer( struct svga_context *svga,
}
- if (curr->zsbuf != hw->zsbuf) {
+ if (curr->zsbuf != hw->zsbuf ||
+ (reemit && hw->zsbuf)) {
ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, curr->zsbuf);
if (ret != PIPE_OK)
return ret;
@@ -92,7 +96,8 @@ static int emit_framebuffer( struct svga_context *svga,
struct svga_tracked_state svga_hw_framebuffer =
{
"hw framebuffer state",
- SVGA_NEW_FRAME_BUFFER,
+ SVGA_NEW_FRAME_BUFFER |
+ SVGA_NEW_COMMAND_BUFFER,
emit_framebuffer
};
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
index 76a2dae143..4a50b19474 100644
--- a/src/gallium/drivers/svga/svga_state_tss.c
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -56,6 +56,7 @@ static int
update_tss_binding(struct svga_context *svga,
unsigned dirty )
{
+ boolean reemit = !!(dirty & SVGA_NEW_COMMAND_BUFFER);
unsigned i;
unsigned count = MAX2( svga->curr.num_sampler_views,
svga->state.hw_draw.num_views );
@@ -107,12 +108,18 @@ update_tss_binding(struct svga_context *svga,
max_lod);
}
- if (view->dirty) {
+ /*
+ * We need to reemit non-null texture bindings, even when they are not
+ * dirty, to ensure that the resources are paged in.
+ */
+
+ if (view->dirty ||
+ (reemit && view->v)) {
queue.bind[queue.bind_count].unit = i;
queue.bind[queue.bind_count].view = view;
queue.bind_count++;
}
- else if (view->v) {
+ if (!view->dirty && view->v) {
svga_validate_sampler_view(svga, view->v);
}
}
@@ -128,18 +135,21 @@ update_tss_binding(struct svga_context *svga,
goto fail;
for (i = 0; i < queue.bind_count; i++) {
+ struct svga_winsys_surface *handle;
+
ts[i].stage = queue.bind[i].unit;
ts[i].name = SVGA3D_TS_BIND_TEXTURE;
if (queue.bind[i].view->v) {
- svga->swc->surface_relocation(svga->swc,
- &ts[i].value,
- queue.bind[i].view->v->handle,
- SVGA_RELOC_READ);
+ handle = queue.bind[i].view->v->handle;
}
else {
- ts[i].value = SVGA3D_INVALID_ID;
+ handle = NULL;
}
+ svga->swc->surface_relocation(svga->swc,
+ &ts[i].value,
+ handle,
+ SVGA_RELOC_READ);
queue.bind[i].view->dirty = FALSE;
}
@@ -157,7 +167,8 @@ fail:
struct svga_tracked_state svga_hw_tss_binding = {
"texture binding emit",
SVGA_NEW_TEXTURE_BINDING |
- SVGA_NEW_SAMPLER,
+ SVGA_NEW_SAMPLER |
+ SVGA_NEW_COMMAND_BUFFER,
update_tss_binding
};
diff --git a/src/gallium/drivers/svga/svga_swtnl.h b/src/gallium/drivers/svga/svga_swtnl.h
index 65c675f99c..fc094e5142 100644
--- a/src/gallium/drivers/svga/svga_swtnl.h
+++ b/src/gallium/drivers/svga/svga_swtnl.h
@@ -38,15 +38,8 @@ void svga_destroy_swtnl( struct svga_context *svga );
enum pipe_error
-svga_swtnl_draw_range_elements(struct svga_context *svga,
- struct pipe_resource *indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned min_index,
- unsigned max_index,
- unsigned prim,
- unsigned start,
- unsigned count);
+svga_swtnl_draw_vbo(struct svga_context *svga,
+ const struct pipe_draw_info *info);
#endif
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
index eb71c23195..814e8edd70 100644
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -36,13 +36,8 @@
enum pipe_error
-svga_swtnl_draw_range_elements(struct svga_context *svga,
- struct pipe_resource *indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned min_index,
- unsigned max_index,
- unsigned prim, unsigned start, unsigned count)
+svga_swtnl_draw_vbo(struct svga_context *svga,
+ const struct pipe_draw_info *info)
{
struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
struct pipe_transfer *ib_transfer = NULL;
@@ -76,19 +71,18 @@ svga_swtnl_draw_range_elements(struct svga_context *svga,
draw_set_mapped_vertex_buffer(draw, i, map);
}
+ /* TODO move this to update_swtnl_draw */
+ draw_set_index_buffer(draw, &svga->curr.ib);
+
/* Map index buffer, if present */
- if (indexBuffer) {
- map = pipe_buffer_map(&svga->pipe, indexBuffer,
+ map = NULL;
+ if (info->indexed && svga->curr.ib.buffer) {
+ map = pipe_buffer_map(&svga->pipe, svga->curr.ib.buffer,
PIPE_TRANSFER_READ,
- &ib_transfer);
-
- draw_set_mapped_element_buffer_range(draw,
- indexSize, indexBias,
- min_index,
- max_index,
- map);
+ &ib_transfer);
}
-
+ draw_set_mapped_index_buffer(draw, map);
+
if (svga->curr.cb[PIPE_SHADER_VERTEX]) {
map = pipe_buffer_map(&svga->pipe,
svga->curr.cb[PIPE_SHADER_VERTEX],
@@ -101,7 +95,7 @@ svga_swtnl_draw_range_elements(struct svga_context *svga,
svga->curr.cb[PIPE_SHADER_VERTEX]->width0);
}
- draw_arrays(svga->swtnl.draw, prim, start, count);
+ draw_vbo(draw, info);
draw_flush(svga->swtnl.draw);
@@ -117,9 +111,9 @@ svga_swtnl_draw_range_elements(struct svga_context *svga,
draw_set_mapped_vertex_buffer(draw, i, NULL);
}
- if (indexBuffer) {
- pipe_buffer_unmap(&svga->pipe, indexBuffer, ib_transfer);
- draw_set_mapped_element_buffer(draw, 0, 0, NULL);
+ if (ib_transfer) {
+ pipe_buffer_unmap(&svga->pipe, svga->curr.ib.buffer, ib_transfer);
+ draw_set_mapped_index_buffer(draw, NULL);
}
if (svga->curr.cb[PIPE_SHADER_VERTEX]) {
@@ -157,7 +151,8 @@ boolean svga_init_swtnl( struct svga_context *svga )
draw_install_aapoint_stage(svga->swtnl.draw, &svga->pipe);
draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe);
- draw_set_driver_clipping(svga->swtnl.draw, debug_get_bool_option("SVGA_SWTNL_FSE", FALSE));
+ if (debug_get_bool_option("SVGA_SWTNL_FSE", FALSE))
+ draw_set_driver_clipping(svga->swtnl.draw, TRUE, TRUE);
return TRUE;
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
index 48eced2ece..b4e90a957d 100644
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -353,6 +353,7 @@ static INLINE ubyte svga_tgsi_sampler_type( struct svga_shader_emitter *emit,
case PIPE_TEXTURE_1D:
return SVGA3DSAMP_2D;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
return SVGA3DSAMP_2D;
case PIPE_TEXTURE_3D:
return SVGA3DSAMP_VOLUME;
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 67e1f22a70..72dccdf150 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -806,6 +806,20 @@ static boolean emit_cmp(struct svga_shader_emitter *emit,
const struct src_register src2 = translate_src_register(
emit, &insn->Src[2] );
+ if (emit->unit == PIPE_SHADER_VERTEX) {
+ SVGA3dShaderDestToken temp = get_temp(emit);
+ struct src_register zero = scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X);
+
+ /* Since vertex shaders don't support the CMP instruction,
+ * simulate it with SLT and LRP instructions.
+ * SLT TMP, SRC0, 0.0
+ * LRP DST, TMP, SRC1, SRC2
+ */
+ if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero))
+ return FALSE;
+ return submit_op3(emit, inst_token(SVGA3DOP_LRP), dst, src(temp), src1, src2);
+ }
+
/* CMP DST, SRC0, SRC2, SRC1 */
return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
}
@@ -2682,6 +2696,11 @@ needs_to_create_zero( struct svga_shader_emitter *emit )
return TRUE;
}
+ if (emit->unit == PIPE_SHADER_VERTEX) {
+ if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
+ return TRUE;
+ }
+
if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 84e5a6a824..271cd4aff5 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -885,7 +885,7 @@ trace_sampler_view_destroy(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, view);
- pipe->sampler_view_destroy(pipe, view);
+ pipe_sampler_view_reference(&tr_view->sampler_view, NULL);
trace_dump_call_end();
@@ -1002,7 +1002,7 @@ trace_context_set_index_buffer(struct pipe_context *_pipe,
trace_dump_call_begin("pipe_context", "set_index_buffer");
trace_dump_arg(ptr, pipe);
- trace_dump_arg(index_buffer, ib);
+ trace_dump_arg(index_buffer, _ib);
pipe->set_index_buffer(pipe, ib);
@@ -1063,7 +1063,10 @@ trace_context_clear(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(uint, buffers);
- trace_dump_arg_array(float, rgba, 4);
+ if (rgba)
+ trace_dump_arg_array(float, rgba, 4);
+ else
+ trace_dump_null();
trace_dump_arg(float, depth);
trace_dump_arg(uint, stencil);
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index 1fa3ec8300..0a5be43f6b 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -79,6 +79,14 @@ typedef unsigned char boolean;
#define FALSE false
#endif
+#ifndef va_copy
+#ifdef __va_copy
+#define va_copy(dest, src) __va_copy((dest), (src))
+#else
+#define va_copy(dest, src) (dest) = (src)
+#endif
+#endif
+
/* Function inlining */
#ifndef INLINE
# ifdef __cplusplus
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 0579962ec6..0e53aef6d2 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -28,19 +28,37 @@
#ifndef PIPE_CONTEXT_H
#define PIPE_CONTEXT_H
-#include "p_state.h"
-
+#include "p_compiler.h"
#ifdef __cplusplus
extern "C" {
#endif
-
-struct pipe_screen;
+
+struct pipe_blend_color;
+struct pipe_blend_state;
+struct pipe_box;
+struct pipe_clip_state;
+struct pipe_depth_stencil_alpha_state;
+struct pipe_draw_info;
struct pipe_fence_handle;
-struct pipe_state_cache;
+struct pipe_framebuffer_state;
+struct pipe_index_buffer;
struct pipe_query;
-struct pipe_winsys;
+struct pipe_poly_stipple;
+struct pipe_rasterizer_state;
+struct pipe_resource;
+struct pipe_sampler_state;
+struct pipe_sampler_view;
+struct pipe_scissor_state;
+struct pipe_shader_state;
+struct pipe_stencil_ref;
+struct pipe_stream_output_state;
+struct pipe_subresource;
+struct pipe_surface;
+struct pipe_vertex_buffer;
+struct pipe_vertex_element;
+struct pipe_viewport_state;
/**
* Gallium rendering context. Basically:
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 00aa2076ed..627b5ae538 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -28,7 +28,7 @@
#ifndef PIPE_DEFINES_H
#define PIPE_DEFINES_H
-#include "p_format.h"
+#include "p_compiler.h"
#ifdef __cplusplus
extern "C" {
@@ -135,13 +135,15 @@ enum pipe_error {
#define PIPE_STENCIL_OP_DECR_WRAP 6
#define PIPE_STENCIL_OP_INVERT 7
-/** Texture types */
+/** Texture types.
+ * See the documentation for info on PIPE_TEXTURE_RECT vs PIPE_TEXTURE_2D */
enum pipe_texture_target {
PIPE_BUFFER = 0,
PIPE_TEXTURE_1D = 1,
PIPE_TEXTURE_2D = 2,
PIPE_TEXTURE_3D = 3,
PIPE_TEXTURE_CUBE = 4,
+ PIPE_TEXTURE_RECT = 5,
PIPE_MAX_TEXTURE_TYPES
};
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 436c3f627a..06412f4894 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -29,8 +29,6 @@
#ifndef PIPE_FORMAT_H
#define PIPE_FORMAT_H
-#include "p_compiler.h"
-
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 9df20ea858..c4bd17e92b 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -33,8 +33,6 @@
extern "C" {
#endif
-#include "p_compiler.h"
-
struct tgsi_header
{
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 0f1a44cde4..9a2b31da50 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -43,7 +43,6 @@
#include "p_compiler.h"
#include "p_defines.h"
#include "p_format.h"
-#include "p_screen.h"
#ifdef __cplusplus
diff --git a/src/gallium/include/state_tracker/graw.h b/src/gallium/include/state_tracker/graw.h
index 59b0e337c9..6a99b234aa 100644
--- a/src/gallium/include/state_tracker/graw.h
+++ b/src/gallium/include/state_tracker/graw.h
@@ -1,3 +1,30 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#ifndef GALLIUM_RAW_H
#define GALLIUM_RAW_H
@@ -14,6 +41,7 @@
* those for parsing text representations of TGSI shaders.
*/
+#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
struct pipe_screen;
diff --git a/src/gallium/state_trackers/dri/common/dri_context.h b/src/gallium/state_trackers/dri/common/dri_context.h
index 692c49d7cd..35b870a8a3 100644
--- a/src/gallium/state_trackers/dri/common/dri_context.h
+++ b/src/gallium/state_trackers/dri/common/dri_context.h
@@ -34,7 +34,6 @@
#include "pipe/p_compiler.h"
#include "dri_wrapper.h"
-#include "main/mtypes.h"
struct pipe_context;
struct pipe_fence;
diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c
index 6ad2c7da4d..0ab4dd1893 100644
--- a/src/gallium/state_trackers/dri/common/dri_screen.c
+++ b/src/gallium/state_trackers/dri/common/dri_screen.c
@@ -383,6 +383,11 @@ dri_init_screen_helper(struct dri_screen *screen,
if (!screen->st_api)
return NULL;
+ if(pscreen->get_param(pscreen, PIPE_CAP_NPOT_TEXTURES))
+ screen->target = PIPE_TEXTURE_2D;
+ else
+ screen->target = PIPE_TEXTURE_RECT;
+
driParseOptionInfo(&screen->optionCache,
__driConfigOptions, __driNConfigOptions);
diff --git a/src/gallium/state_trackers/dri/common/dri_screen.h b/src/gallium/state_trackers/dri/common/dri_screen.h
index 53ccce145b..849f399b2f 100644
--- a/src/gallium/state_trackers/dri/common/dri_screen.h
+++ b/src/gallium/state_trackers/dri/common/dri_screen.h
@@ -68,6 +68,7 @@ struct dri_screen
boolean d_depth_bits_last;
boolean sd_depth_bits_last;
boolean auto_fake_front;
+ enum pipe_texture_target target;
};
/** cast wrapper */
diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c
index 47005c17e2..93f910a26d 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -195,7 +195,7 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable,
pipe_resource_reference(&drawable->textures[i], NULL);
memset(&templ, 0, sizeof(templ));
- templ.target = PIPE_TEXTURE_2D;
+ templ.target = screen->target;
templ.last_level = 0;
templ.width0 = dri_drawable->w;
templ.height0 = dri_drawable->h;
@@ -342,7 +342,7 @@ dri2_create_image_from_name(__DRIcontext *context,
memset(&templ, 0, sizeof(templ));
templ.bind = tex_usage;
templ.format = pf;
- templ.target = PIPE_TEXTURE_2D;
+ templ.target = screen->target;
templ.last_level = 0;
templ.width0 = width;
templ.height0 = height;
diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c
index 249ccd7fcf..04bba631ae 100644
--- a/src/gallium/state_trackers/dri/sw/drisw.c
+++ b/src/gallium/state_trackers/dri/sw/drisw.c
@@ -216,7 +216,7 @@ drisw_allocate_textures(struct dri_drawable *drawable,
}
memset(&templ, 0, sizeof(templ));
- templ.target = PIPE_TEXTURE_2D;
+ templ.target = screen->target;
templ.width0 = width;
templ.height0 = height;
templ.depth0 = 1;
diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile
index 9e9e479e7e..4199d7c6ba 100644
--- a/src/gallium/state_trackers/egl/Makefile
+++ b/src/gallium/state_trackers/egl/Makefile
@@ -24,7 +24,7 @@ x11_SOURCES = $(wildcard x11/*.c) \
x11_OBJECTS = $(x11_SOURCES:.c=.o)
-kms_INCLUDES = $(shell pkg-config --cflags-only-I libdrm)
+kms_INCLUDES = -I$(TOP)/src/gallium/winsys $(shell pkg-config --cflags-only-I libdrm)
kms_SOURCES = $(wildcard kms/*.c)
kms_OBJECTS = $(kms_SOURCES:.c=.o)
diff --git a/src/gallium/state_trackers/egl/SConscript b/src/gallium/state_trackers/egl/SConscript
index e71aec35b7..efcce25e31 100644
--- a/src/gallium/state_trackers/egl/SConscript
+++ b/src/gallium/state_trackers/egl/SConscript
@@ -21,6 +21,7 @@ if 'egl' in env['statetrackers']:
'common/egl_g3d_api.c',
'common/egl_g3d_image.c',
'common/egl_g3d_st.c',
+ 'common/egl_g3d_sync.c',
'common/native_helper.c',
]
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c
index 56d575ffe0..4e653bdf3b 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.c
@@ -530,6 +530,18 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy,
if (gdpy->native->get_param(gdpy->native, NATIVE_PARAM_USE_NATIVE_BUFFER))
dpy->Extensions.KHR_image_pixmap = EGL_TRUE;
+ dpy->Extensions.KHR_reusable_sync = EGL_TRUE;
+ dpy->Extensions.KHR_fence_sync = EGL_TRUE;
+
+ dpy->Extensions.KHR_surfaceless_gles1 = EGL_TRUE;
+ dpy->Extensions.KHR_surfaceless_gles2 = EGL_TRUE;
+ dpy->Extensions.KHR_surfaceless_opengl = EGL_TRUE;
+
+ if (dpy->Platform == _EGL_PLATFORM_DRM) {
+ dpy->Extensions.MESA_drm_display = EGL_TRUE;
+ dpy->Extensions.MESA_drm_image = EGL_TRUE;
+ }
+
if (egl_g3d_add_configs(drv, dpy, 1) == 1) {
_eglError(EGL_NOT_INITIALIZED, "eglInitialize(unable to add configs)");
goto fail;
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.h b/src/gallium/state_trackers/egl/common/egl_g3d.h
index f33dc91cf9..be450bbede 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.h
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.h
@@ -30,12 +30,14 @@
#include "pipe/p_screen.h"
#include "pipe/p_context.h"
#include "pipe/p_format.h"
+#include "os/os_thread.h"
#include "egldriver.h"
#include "egldisplay.h"
#include "eglcontext.h"
#include "eglsurface.h"
#include "eglconfig.h"
#include "eglimage.h"
+#include "eglsync.h"
#include "eglscreen.h"
#include "eglmode.h"
@@ -99,6 +101,24 @@ struct egl_g3d_image {
_EGL_DRIVER_STANDARD_TYPECASTS(egl_g3d)
_EGL_DRIVER_TYPECAST(egl_g3d_image, _EGLImage, obj)
+#ifdef EGL_KHR_reusable_sync
+
+struct egl_g3d_sync {
+ _EGLSync base;
+
+ int refs;
+
+ /* the mutex protects only the condvar, not the struct */
+ pipe_mutex mutex;
+ pipe_condvar condvar;
+
+ /* for fence sync */
+ struct pipe_fence_handle *fence;
+};
+_EGL_DRIVER_TYPECAST(egl_g3d_sync, _EGLSync, obj)
+
+#endif /* EGL_KHR_reusable_sync */
+
#ifdef EGL_MESA_screen_surface
struct egl_g3d_screen {
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.c b/src/gallium/state_trackers/egl/common/egl_g3d_api.c
index edac72a822..3ec53653f4 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_api.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.c
@@ -34,6 +34,7 @@
#include "egl_g3d.h"
#include "egl_g3d_api.h"
#include "egl_g3d_image.h"
+#include "egl_g3d_sync.h"
#include "egl_g3d_st.h"
#include "egl_g3d_loader.h"
#include "native.h"
@@ -103,7 +104,7 @@ egl_g3d_create_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf,
}
gctx->stctxi = gctx->stapi->create_context(gctx->stapi, gdpy->smapi,
- &gconf->stvis, (gshare) ? gshare->stctxi : NULL);
+ (gconf) ? &gconf->stvis : NULL, (gshare) ? gshare->stctxi : NULL);
if (!gctx->stctxi) {
FREE(gctx);
return NULL;
@@ -437,16 +438,19 @@ egl_g3d_make_current(_EGLDriver *drv, _EGLDisplay *dpy,
ok = gctx->stapi->make_current(gctx->stapi, gctx->stctxi,
(gdraw) ? gdraw->stfbi : NULL, (gread) ? gread->stfbi : NULL);
if (ok) {
- gctx->stctxi->notify_invalid_framebuffer(gctx->stctxi, gdraw->stfbi);
- if (gread != gdraw) {
+ if (gdraw) {
gctx->stctxi->notify_invalid_framebuffer(gctx->stctxi,
- gread->stfbi);
- }
+ gdraw->stfbi);
- if (gdraw->base.Type == EGL_WINDOW_BIT) {
- gctx->base.WindowRenderBuffer =
- (gdraw->stvis.render_buffer == ST_ATTACHMENT_FRONT_LEFT) ?
- EGL_SINGLE_BUFFER : EGL_BACK_BUFFER;
+ if (gdraw->base.Type == EGL_WINDOW_BIT) {
+ gctx->base.WindowRenderBuffer =
+ (gdraw->stvis.render_buffer == ST_ATTACHMENT_FRONT_LEFT) ?
+ EGL_SINGLE_BUFFER : EGL_BACK_BUFFER;
+ }
+ }
+ if (gread && gread != gdraw) {
+ gctx->stctxi->notify_invalid_framebuffer(gctx->stctxi,
+ gread->stfbi);
}
}
}
@@ -805,6 +809,17 @@ egl_g3d_init_driver_api(_EGLDriver *drv)
drv->API.CreateImageKHR = egl_g3d_create_image;
drv->API.DestroyImageKHR = egl_g3d_destroy_image;
+#ifdef EGL_MESA_drm_image
+ drv->API.CreateDRMImageMESA = egl_g3d_create_drm_image;
+ drv->API.ExportDRMImageMESA = egl_g3d_export_drm_image;
+#endif
+
+#ifdef EGL_KHR_reusable_sync
+ drv->API.CreateSyncKHR = egl_g3d_create_sync;
+ drv->API.DestroySyncKHR = egl_g3d_destroy_sync;
+ drv->API.ClientWaitSyncKHR = egl_g3d_client_wait_sync;
+ drv->API.SignalSyncKHR = egl_g3d_signal_sync;
+#endif
#ifdef EGL_MESA_screen_surface
drv->API.CreateScreenSurfaceMESA = egl_g3d_create_screen_surface;
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c
index 1e13cfcf7e..558638e72f 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c
@@ -31,12 +31,16 @@
#include "util/u_rect.h"
#include "util/u_inlines.h"
#include "eglcurrent.h"
+#include "egllog.h"
#include "native.h"
#include "egl_g3d.h"
#include "egl_g3d_api.h"
#include "egl_g3d_image.h"
+/* move this to native display? */
+#include "state_tracker/drm_driver.h"
+
/**
* Reference and return the front left buffer of the native pixmap.
*/
@@ -67,6 +71,165 @@ egl_g3d_reference_native_pixmap(_EGLDisplay *dpy, EGLNativePixmapType pix)
return textures[natt];
}
+#ifdef EGL_MESA_drm_image
+
+static struct pipe_resource *
+egl_g3d_create_drm_buffer(_EGLDisplay *dpy, const EGLint *attribs)
+{
+ struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
+ struct pipe_screen *screen = gdpy->native->screen;
+ struct pipe_resource templ;
+ EGLint width = 0, height = 0, format = 0, use = 0;
+ EGLint valid_use;
+ EGLint i, err = EGL_SUCCESS;
+
+ for (i = 0; attribs[i] != EGL_NONE; i++) {
+ EGLint attr = attribs[i++];
+ EGLint val = attribs[i];
+
+ switch (attr) {
+ case EGL_WIDTH:
+ width = val;
+ break;
+ case EGL_HEIGHT:
+ height = val;
+ break;
+ case EGL_DRM_BUFFER_FORMAT_MESA:
+ format = val;
+ break;
+ case EGL_DRM_BUFFER_USE_MESA:
+ use = val;
+ break;
+ default:
+ err = EGL_BAD_ATTRIBUTE;
+ break;
+ }
+
+ if (err != EGL_SUCCESS) {
+ _eglLog(_EGL_DEBUG, "bad image attribute 0x%04x", attr);
+ return NULL;
+ }
+ }
+
+ if (width <= 0 || height <= 0) {
+ _eglLog(_EGL_DEBUG, "bad width or height (%dx%d)", width, height);
+ return NULL;
+ }
+
+ switch (format) {
+ case EGL_DRM_BUFFER_FORMAT_ARGB32_MESA:
+ format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ break;
+ default:
+ _eglLog(_EGL_DEBUG, "bad image format value 0x%04x", format);
+ return NULL;
+ break;
+ }
+
+ valid_use = EGL_DRM_BUFFER_USE_SCANOUT_MESA |
+ EGL_DRM_BUFFER_USE_SHARE_MESA;
+ if (use & ~valid_use) {
+ _eglLog(_EGL_DEBUG, "bad image use bit 0x%04x", use);
+ return NULL;
+ }
+
+ memset(&templ, 0, sizeof(templ));
+ templ.target = PIPE_TEXTURE_2D;
+ templ.format = format;
+ templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+ templ.width0 = width;
+ templ.height0 = height;
+ templ.depth0 = 1;
+
+ /*
+ * XXX fix apps (e.g. wayland) and pipe drivers (e.g. i915) and remove the
+ * size check
+ */
+ if ((use & EGL_DRM_BUFFER_USE_SCANOUT_MESA) &&
+ width >= 640 && height >= 480)
+ templ.bind |= PIPE_BIND_SCANOUT;
+ if (use & EGL_DRM_BUFFER_USE_SHARE_MESA)
+ templ.bind |= PIPE_BIND_SHARED;
+
+ return screen->resource_create(screen, &templ);
+}
+
+static struct pipe_resource *
+egl_g3d_reference_drm_buffer(_EGLDisplay *dpy, EGLint name,
+ const EGLint *attribs)
+{
+ struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
+ struct pipe_screen *screen = gdpy->native->screen;
+ struct pipe_resource templ;
+ struct winsys_handle wsh;
+ EGLint width = 0, height = 0, format = 0, stride = 0;
+ EGLint i, err = EGL_SUCCESS;
+
+ /* winsys_handle is in theory platform-specific */
+ if (dpy->Platform != _EGL_PLATFORM_DRM)
+ return NULL;
+
+ for (i = 0; attribs[i] != EGL_NONE; i++) {
+ EGLint attr = attribs[i++];
+ EGLint val = attribs[i];
+
+ switch (attr) {
+ case EGL_WIDTH:
+ width = val;
+ break;
+ case EGL_HEIGHT:
+ height = val;
+ break;
+ case EGL_DRM_BUFFER_FORMAT_MESA:
+ format = val;
+ break;
+ case EGL_DRM_BUFFER_STRIDE_MESA:
+ stride = val;
+ break;
+ default:
+ err = EGL_BAD_ATTRIBUTE;
+ break;
+ }
+
+ if (err != EGL_SUCCESS) {
+ _eglLog(_EGL_DEBUG, "bad image attribute 0x%04x", attr);
+ return NULL;
+ }
+ }
+
+ if (width <= 0 || height <= 0 || stride <= 0) {
+ _eglLog(_EGL_DEBUG, "bad width, height, or stride (%dx%dx%d)",
+ width, height, stride);
+ return NULL;
+ }
+
+ switch (format) {
+ case EGL_DRM_BUFFER_FORMAT_ARGB32_MESA:
+ format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ break;
+ default:
+ _eglLog(_EGL_DEBUG, "bad image format value 0x%04x", format);
+ return NULL;
+ break;
+ }
+
+ memset(&templ, 0, sizeof(templ));
+ templ.target = PIPE_TEXTURE_2D;
+ templ.format = format;
+ templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+ templ.width0 = width;
+ templ.height0 = height;
+ templ.depth0 = 1;
+
+ memset(&wsh, 0, sizeof(wsh));
+ wsh.handle = (unsigned) name;
+ wsh.stride = stride;
+
+ return screen->resource_from_handle(screen, &templ, &wsh);
+}
+
+#endif /* EGL_MESA_drm_image */
+
_EGLImage *
egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx,
EGLenum target, EGLClientBuffer buffer,
@@ -92,6 +255,11 @@ egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx,
ptex = egl_g3d_reference_native_pixmap(dpy,
(EGLNativePixmapType) buffer);
break;
+#ifdef EGL_MESA_drm_image
+ case EGL_DRM_BUFFER_MESA:
+ ptex = egl_g3d_reference_drm_buffer(dpy, (EGLint) buffer, attribs);
+ break;
+#endif
default:
ptex = NULL;
break;
@@ -134,3 +302,80 @@ egl_g3d_destroy_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLImage *img)
return EGL_TRUE;
}
+
+_EGLImage *
+egl_g3d_create_drm_image(_EGLDriver *drv, _EGLDisplay *dpy,
+ const EGLint *attribs)
+{
+ struct egl_g3d_image *gimg;
+ struct pipe_resource *ptex;
+
+ gimg = CALLOC_STRUCT(egl_g3d_image);
+ if (!gimg) {
+ _eglError(EGL_BAD_ALLOC, "eglCreateDRMImageKHR");
+ return NULL;
+ }
+
+ if (!_eglInitImage(&gimg->base, dpy, attribs)) {
+ FREE(gimg);
+ return NULL;
+ }
+
+#ifdef EGL_MESA_drm_image
+ ptex = egl_g3d_create_drm_buffer(dpy, attribs);
+#else
+ ptex = NULL;
+#endif
+ if (!ptex) {
+ FREE(gimg);
+ return NULL;
+ }
+
+ /* transfer the ownership to the image */
+ gimg->texture = ptex;
+ gimg->face = 0;
+ gimg->level = 0;
+ gimg->zslice = 0;
+
+ return &gimg->base;
+}
+
+EGLBoolean
+egl_g3d_export_drm_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLImage *img,
+ EGLint *name, EGLint *handle, EGLint *stride)
+{
+ struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
+ struct egl_g3d_image *gimg = egl_g3d_image(img);
+ struct pipe_screen *screen = gdpy->native->screen;
+ struct winsys_handle wsh;
+
+ /* winsys_handle is in theory platform-specific */
+ if (dpy->Platform != _EGL_PLATFORM_DRM)
+ return EGL_FALSE;
+
+ /* get shared handle */
+ if (name) {
+ memset(&handle, 0, sizeof(handle));
+ wsh.type = DRM_API_HANDLE_TYPE_SHARED;
+ if (!screen->resource_get_handle(screen, gimg->texture, &wsh)) {
+ return EGL_FALSE;
+ }
+
+ *name = wsh.handle;
+ }
+
+ /* get KMS handle */
+ if (handle || stride) {
+ memset(&wsh, 0, sizeof(wsh));
+ wsh.type = DRM_API_HANDLE_TYPE_KMS;
+ if (!screen->resource_get_handle(screen, gimg->texture, &wsh))
+ return EGL_FALSE;
+
+ if (handle)
+ *handle = wsh.handle;
+ if (stride)
+ *stride = wsh.stride;
+ }
+
+ return EGL_TRUE;
+}
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.h b/src/gallium/state_trackers/egl/common/egl_g3d_image.h
index adda933371..f051da8283 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_image.h
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.h
@@ -39,4 +39,12 @@ egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx,
EGLBoolean
egl_g3d_destroy_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLImage *image);
+_EGLImage *
+egl_g3d_create_drm_image(_EGLDriver *drv, _EGLDisplay *dpy,
+ const EGLint *attribs);
+
+EGLBoolean
+egl_g3d_export_drm_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLImage *img,
+ EGLint *name, EGLint *handle, EGLint *stride);
+
#endif /* _EGL_G3D_IMAGE_H_ */
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_sync.c b/src/gallium/state_trackers/egl/common/egl_g3d_sync.c
new file mode 100644
index 0000000000..ec74e9eb94
--- /dev/null
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_sync.c
@@ -0,0 +1,284 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_atomic.h"
+#include "os/os_thread.h"
+#include "eglsync.h"
+#include "eglcurrent.h"
+
+#include "egl_g3d.h"
+#include "egl_g3d_sync.h"
+
+#ifdef EGL_KHR_reusable_sync
+
+/**
+ * Wait for the conditional variable.
+ */
+static EGLint
+egl_g3d_wait_sync_condvar(struct egl_g3d_sync *gsync, EGLTimeKHR timeout)
+{
+ _EGLDisplay *dpy = gsync->base.Resource.Display;
+
+ pipe_mutex_lock(gsync->mutex);
+
+ /* unlock display lock just before waiting */
+ _eglUnlockMutex(&dpy->Mutex);
+
+ /* No timed wait. Always treat timeout as EGL_FOREVER_KHR */
+ pipe_condvar_wait(gsync->condvar, gsync->mutex);
+
+ _eglLockMutex(&dpy->Mutex);
+
+ pipe_mutex_unlock(gsync->mutex);
+
+ return EGL_CONDITION_SATISFIED_KHR;
+}
+
+/**
+ * Signal the conditional variable.
+ */
+static void
+egl_g3d_signal_sync_condvar(struct egl_g3d_sync *gsync)
+{
+ pipe_mutex_lock(gsync->mutex);
+ pipe_condvar_broadcast(gsync->condvar);
+ pipe_mutex_unlock(gsync->mutex);
+}
+
+/**
+ * Insert a fence command to the command stream of the current context.
+ */
+static EGLint
+egl_g3d_insert_fence_sync(struct egl_g3d_sync *gsync)
+{
+ _EGLContext *ctx = _eglGetCurrentContext();
+ struct egl_g3d_context *gctx = egl_g3d_context(ctx);
+
+ /* already checked in egl_g3d_create_sync */
+ assert(gctx);
+
+ /* insert the fence command */
+ gctx->stctxi->flush(gctx->stctxi, 0x0, &gsync->fence);
+ if (!gsync->fence)
+ gsync->base.SyncStatus = EGL_SIGNALED_KHR;
+
+ return EGL_SUCCESS;
+}
+
+/**
+ * Wait for the fence sync to be signaled.
+ */
+static EGLint
+egl_g3d_wait_fence_sync(struct egl_g3d_sync *gsync, EGLTimeKHR timeout)
+{
+ EGLint ret;
+
+ if (gsync->fence) {
+ _EGLDisplay *dpy = gsync->base.Resource.Display;
+ struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
+ struct pipe_screen *screen = gdpy->native->screen;
+ struct pipe_fence_handle *fence = gsync->fence;
+
+ gsync->fence = NULL;
+
+ _eglUnlockMutex(&dpy->Mutex);
+ /* no timed finish? */
+ screen->fence_finish(screen, fence, 0x0);
+ ret = EGL_CONDITION_SATISFIED_KHR;
+ _eglLockMutex(&dpy->Mutex);
+
+ gsync->base.SyncStatus = EGL_SIGNALED_KHR;
+
+ screen->fence_reference(screen, &fence, NULL);
+ egl_g3d_signal_sync_condvar(gsync);
+ }
+ else {
+ ret = egl_g3d_wait_sync_condvar(gsync, timeout);
+ }
+
+ return ret;
+}
+
+static INLINE void
+egl_g3d_ref_sync(struct egl_g3d_sync *gsync)
+{
+ p_atomic_inc(&gsync->refs);
+}
+
+static INLINE void
+egl_g3d_unref_sync(struct egl_g3d_sync *gsync)
+{
+ if (p_atomic_dec_zero(&gsync->refs)) {
+ pipe_condvar_destroy(gsync->condvar);
+ pipe_mutex_destroy(gsync->mutex);
+
+ if (gsync->fence) {
+ struct egl_g3d_display *gdpy =
+ egl_g3d_display(gsync->base.Resource.Display);
+ struct pipe_screen *screen = gdpy->native->screen;
+
+ screen->fence_reference(screen, &gsync->fence, NULL);
+ }
+
+ FREE(gsync);
+ }
+}
+
+_EGLSync *
+egl_g3d_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
+ EGLenum type, const EGLint *attrib_list)
+{
+ _EGLContext *ctx = _eglGetCurrentContext();
+ struct egl_g3d_sync *gsync;
+ EGLint err;
+
+ if (!ctx || ctx->Resource.Display != dpy) {
+ _eglError(EGL_BAD_MATCH, "eglCreateSyncKHR");
+ return NULL;
+ }
+
+ gsync = CALLOC_STRUCT(egl_g3d_sync);
+ if (!gsync) {
+ _eglError(EGL_BAD_ALLOC, "eglCreateSyncKHR");
+ return NULL;
+ }
+
+ if (!_eglInitSync(&gsync->base, dpy, type, attrib_list)) {
+ FREE(gsync);
+ return NULL;
+ }
+
+ switch (type) {
+ case EGL_SYNC_REUSABLE_KHR:
+ err = EGL_SUCCESS;
+ break;
+ case EGL_SYNC_FENCE_KHR:
+ err = egl_g3d_insert_fence_sync(gsync);
+ break;
+ default:
+ err = EGL_BAD_ATTRIBUTE;
+ break;
+ }
+
+ if (err != EGL_SUCCESS) {
+ _eglError(err, "eglCreateSyncKHR");
+ FREE(gsync);
+ return NULL;
+ }
+
+ pipe_mutex_init(gsync->mutex);
+ pipe_condvar_init(gsync->condvar);
+ p_atomic_set(&gsync->refs, 1);
+
+ return &gsync->base;
+}
+
+EGLBoolean
+egl_g3d_destroy_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync)
+{
+ struct egl_g3d_sync *gsync = egl_g3d_sync(sync);
+
+ switch (gsync->base.Type) {
+ case EGL_SYNC_REUSABLE_KHR:
+ /* signal the waiters */
+ if (gsync->base.SyncStatus != EGL_SIGNALED_KHR) {
+ gsync->base.SyncStatus = EGL_SIGNALED_KHR;
+ egl_g3d_signal_sync_condvar(gsync);
+ }
+ break;
+ default:
+ break;
+ }
+
+ egl_g3d_unref_sync(gsync);
+
+ return EGL_TRUE;
+}
+
+EGLint
+egl_g3d_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
+ EGLint flags, EGLTimeKHR timeout)
+{
+ struct egl_g3d_sync *gsync = egl_g3d_sync(sync);
+ EGLint ret = EGL_CONDITION_SATISFIED_KHR;
+
+ if (gsync->base.SyncStatus != EGL_SIGNALED_KHR) {
+ /* flush if there is a current context */
+ if (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR) {
+ _EGLContext *ctx = _eglGetCurrentContext();
+ struct egl_g3d_context *gctx = egl_g3d_context(ctx);
+
+ if (gctx)
+ gctx->stctxi->flush(gctx->stctxi, PIPE_FLUSH_RENDER_CACHE , NULL);
+ }
+
+ if (timeout) {
+ /* reference the sync object in case it is destroyed while waiting */
+ egl_g3d_ref_sync(gsync);
+
+ switch (gsync->base.Type) {
+ case EGL_SYNC_REUSABLE_KHR:
+ ret = egl_g3d_wait_sync_condvar(gsync, timeout);
+ break;
+ case EGL_SYNC_FENCE_KHR:
+ ret = egl_g3d_wait_fence_sync(gsync, timeout);
+ default:
+ break;
+ }
+
+ egl_g3d_unref_sync(gsync);
+ }
+ else {
+ ret = EGL_TIMEOUT_EXPIRED_KHR;
+ }
+ }
+
+ return ret;
+}
+
+EGLBoolean
+egl_g3d_signal_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
+ EGLenum mode)
+{
+ struct egl_g3d_sync *gsync = egl_g3d_sync(sync);
+
+ /* only for reusable sync */
+ if (sync->Type != EGL_SYNC_REUSABLE_KHR)
+ return _eglError(EGL_BAD_MATCH, "eglSignalSyncKHR");
+
+ if (gsync->base.SyncStatus != mode) {
+ gsync->base.SyncStatus = mode;
+ if (mode == EGL_SIGNALED_KHR)
+ egl_g3d_signal_sync_condvar(gsync);
+ }
+
+ return EGL_TRUE;
+}
+
+#endif /* EGL_KHR_reusable_sync */
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_sync.h b/src/gallium/state_trackers/egl/common/egl_g3d_sync.h
new file mode 100644
index 0000000000..3179ca04e1
--- /dev/null
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_sync.h
@@ -0,0 +1,53 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef _EGL_G3D_SYNC_H_
+#define _EGL_G3D_SYNC_H_
+
+#include "egl_g3d.h"
+
+#ifdef EGL_KHR_reusable_sync
+
+_EGLSync *
+egl_g3d_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
+ EGLenum type, const EGLint *attrib_list);
+
+EGLBoolean
+egl_g3d_destroy_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync);
+
+EGLint
+egl_g3d_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
+ EGLint flags, EGLTimeKHR timeout);
+
+EGLBoolean
+egl_g3d_signal_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
+ EGLenum mode);
+
+#endif /* EGL_KHR_reusable_sync */
+
+#endif /* _EGL_G3D_SYNC_H_ */
diff --git a/src/gallium/state_trackers/egl/kms/native_kms.c b/src/gallium/state_trackers/egl/kms/native_kms.c
index d4e8fbc913..208f73306c 100644
--- a/src/gallium/state_trackers/egl/kms/native_kms.c
+++ b/src/gallium/state_trackers/egl/kms/native_kms.c
@@ -38,6 +38,10 @@
#include "native_kms.h"
+/* see get_drm_screen_name */
+#include <radeon_drm.h>
+#include "radeon/drm/radeon_drm.h"
+
static boolean
kms_surface_validate(struct native_surface *nsurf, uint attachment_mask,
unsigned int *seq_num, struct pipe_resource **textures,
@@ -584,7 +588,9 @@ kms_display_get_configs(struct native_display *ndpy, int *num_configs)
nconf->color_format = format;
- nconf->scanout_bit = TRUE;
+ /* support KMS */
+ if (kdpy->resources)
+ nconf->scanout_bit = TRUE;
}
configs = MALLOC(sizeof(*configs));
@@ -664,6 +670,27 @@ kms_display_destroy(struct native_display *ndpy)
FREE(kdpy);
}
+static const char *
+get_drm_screen_name(int fd, drmVersionPtr version)
+{
+ const char *name = version->name;
+
+ if (name && !strcmp(name, "radeon")) {
+ int chip_id;
+ struct drm_radeon_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.request = RADEON_INFO_DEVICE_ID;
+ info.value = pointer_to_intptr(&chip_id);
+ if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0)
+ return NULL;
+
+ name = is_r3xx(chip_id) ? "r300" : "r600";
+ }
+
+ return name;
+}
+
/**
* Initialize KMS and pipe screen.
*/
@@ -672,6 +699,7 @@ kms_display_init_screen(struct native_display *ndpy)
{
struct kms_display *kdpy = kms_display(ndpy);
drmVersionPtr version;
+ const char *name;
version = drmGetVersion(kdpy->fd);
if (!version) {
@@ -679,8 +707,11 @@ kms_display_init_screen(struct native_display *ndpy)
return FALSE;
}
- kdpy->base.screen = kdpy->event_handler->new_drm_screen(&kdpy->base,
- version->name, kdpy->fd);;
+ name = get_drm_screen_name(kdpy->fd, version);
+ if (name) {
+ kdpy->base.screen =
+ kdpy->event_handler->new_drm_screen(&kdpy->base, name, kdpy->fd);
+ }
drmFreeVersion(version);
if (!kdpy->base.screen) {
@@ -717,32 +748,32 @@ kms_create_display(int fd, struct native_event_handler *event_handler,
return NULL;
}
+ kdpy->base.destroy = kms_display_destroy;
+ kdpy->base.get_param = kms_display_get_param;
+ kdpy->base.get_configs = kms_display_get_configs;
+
/* resources are fixed, unlike crtc, connector, or encoder */
kdpy->resources = drmModeGetResources(kdpy->fd);
- if (!kdpy->resources) {
- kms_display_destroy(&kdpy->base);
- return NULL;
- }
+ if (kdpy->resources) {
+ kdpy->saved_crtcs =
+ CALLOC(kdpy->resources->count_crtcs, sizeof(*kdpy->saved_crtcs));
+ if (!kdpy->saved_crtcs) {
+ kms_display_destroy(&kdpy->base);
+ return NULL;
+ }
- kdpy->saved_crtcs =
- CALLOC(kdpy->resources->count_crtcs, sizeof(*kdpy->saved_crtcs));
- if (!kdpy->saved_crtcs) {
- kms_display_destroy(&kdpy->base);
- return NULL;
- }
+ kdpy->shown_surfaces =
+ CALLOC(kdpy->resources->count_crtcs, sizeof(*kdpy->shown_surfaces));
+ if (!kdpy->shown_surfaces) {
+ kms_display_destroy(&kdpy->base);
+ return NULL;
+ }
- kdpy->shown_surfaces =
- CALLOC(kdpy->resources->count_crtcs, sizeof(*kdpy->shown_surfaces));
- if (!kdpy->shown_surfaces) {
- kms_display_destroy(&kdpy->base);
- return NULL;
+ kdpy->base.modeset = &kms_display_modeset;
+ }
+ else {
+ _eglLog(_EGL_DEBUG, "Failed to get KMS resources. Disable modeset.");
}
-
- kdpy->base.destroy = kms_display_destroy;
- kdpy->base.get_param = kms_display_get_param;
- kdpy->base.get_configs = kms_display_get_configs;
-
- kdpy->base.modeset = &kms_display_modeset;
return &kdpy->base;
}
diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c
index eb8d6a1933..dcd50e19d7 100644
--- a/src/gallium/state_trackers/glx/xlib/glx_api.c
+++ b/src/gallium/state_trackers/glx/xlib/glx_api.c
@@ -34,10 +34,6 @@
#include "GL/glx.h"
#include "xm_api.h"
-#include "main/context.h"
-#include "main/macros.h"
-#include "main/imports.h"
-#include "main/version.h"
/* This indicates the client-side GLX API and GLX encoder version. */
@@ -603,8 +599,8 @@ destroy_visuals_on_display(Display *dpy)
static int
close_display_callback(Display *dpy, XExtCodes *codes)
{
- destroy_visuals_on_display(dpy);
xmesa_destroy_buffers_on_display(dpy);
+ destroy_visuals_on_display(dpy);
return 0;
}
@@ -1299,7 +1295,7 @@ glXCopyContext( Display *dpy, GLXContext src, GLXContext dst,
XMesaContext xm_dst = dst->xmesaContext;
(void) dpy;
if (MakeCurrent_PrevContext == src) {
- _mesa_Flush();
+ glFlush();
}
XMesaCopyContext(xm_src, xm_dst, mask);
}
diff --git a/src/gallium/state_trackers/glx/xlib/glx_usefont.c b/src/gallium/state_trackers/glx/xlib/glx_usefont.c
index 8903b0e6cb..0aa37e150b 100644
--- a/src/gallium/state_trackers/glx/xlib/glx_usefont.c
+++ b/src/gallium/state_trackers/glx/xlib/glx_usefont.c
@@ -30,8 +30,7 @@
*/
-#include "main/context.h"
-#include "main/imports.h"
+#include "main/core.h"
#include <GL/glx.h>
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c
index c0c418306f..eb4ce74266 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@@ -56,7 +56,6 @@
#include "xm_api.h"
#include "xm_st.h"
-#include "main/context.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
#include "pipe/p_context.h"
@@ -72,10 +71,35 @@
static struct xm_driver driver;
static struct st_api *stapi;
+/* Default strict invalidate to false. This means we will not call
+ * XGetGeometry after every swapbuffers, which allows swapbuffers to
+ * remain asynchronous. For apps running at 100fps with synchronous
+ * swapping, a 10% boost is typical. For gears, I see closer to 20%
+ * speedup.
+ *
+ * Note that the work of copying data on swapbuffers doesn't disappear
+ * - this change just allows the X server to execute the PutImage
+ * asynchronously without us effectively blocked until its completion.
+ *
+ * This speeds up even llvmpipe's threaded rasterization as the
+ * swapbuffers operation was a large part of the serial component of
+ * an llvmpipe frame.
+ *
+ * The downside of this is correctness - applications which don't call
+ * glViewport on window resizes will get incorrect rendering. A
+ * better solution would be to have per-frame but asynchronous
+ * invalidation. Xcb almost looks as if it could provide this, but
+ * the API doesn't seem to quite be there.
+ */
+boolean xmesa_strict_invalidate = FALSE;
+
void xmesa_set_driver( const struct xm_driver *templ )
{
driver = *templ;
stapi = driver.create_st_api();
+
+ xmesa_strict_invalidate =
+ debug_get_bool_option("XMESA_STRICT_INVALIDATE", FALSE);
}
@@ -91,7 +115,12 @@ static int
xmesa_get_param(struct st_manager *smapi,
enum st_manager_param param)
{
- return 0;
+ switch(param) {
+ case ST_MANAGER_BROKEN_INVALIDATE:
+ return !xmesa_strict_invalidate;
+ default:
+ return 0;
+ }
}
static XMesaDisplay
@@ -263,7 +292,6 @@ xmesa_get_window_size(Display *dpy, XMesaBuffer b,
Status stat;
pipe_mutex_lock(xmdpy->mutex);
- XSync(b->xm_visual->display, 0); /* added for Chromium */
stat = get_drawable_size(dpy, b->ws.drawable, width, height);
pipe_mutex_unlock(xmdpy->mutex);
@@ -726,15 +754,39 @@ XMesaVisual XMesaCreateVisual( Display *display,
alpha_bits = v->mesa_visual.alphaBits;
}
- _mesa_initialize_visual( &v->mesa_visual,
- db_flag, stereo_flag,
- red_bits, green_bits,
- blue_bits, alpha_bits,
- depth_size,
- stencil_size,
- accum_red_size, accum_green_size,
- accum_blue_size, accum_alpha_size,
- 0 );
+ /* initialize visual */
+ {
+ __GLcontextModes *vis = &v->mesa_visual;
+
+ vis->rgbMode = GL_TRUE;
+ vis->doubleBufferMode = db_flag;
+ vis->stereoMode = stereo_flag;
+
+ vis->redBits = red_bits;
+ vis->greenBits = green_bits;
+ vis->blueBits = blue_bits;
+ vis->alphaBits = alpha_bits;
+ vis->rgbBits = red_bits + green_bits + blue_bits;
+
+ vis->indexBits = 0;
+ vis->depthBits = depth_size;
+ vis->stencilBits = stencil_size;
+
+ vis->accumRedBits = accum_red_size;
+ vis->accumGreenBits = accum_green_size;
+ vis->accumBlueBits = accum_blue_size;
+ vis->accumAlphaBits = accum_alpha_size;
+
+ vis->haveAccumBuffer = accum_red_size > 0;
+ vis->haveDepthBuffer = depth_size > 0;
+ vis->haveStencilBuffer = stencil_size > 0;
+
+ vis->numAuxBuffers = 0;
+ vis->level = 0;
+ vis->pixmapMode = 0;
+ vis->sampleBuffers = 0;
+ vis->samples = 0;
+ }
v->stvis.buffer_mask = ST_ATTACHMENT_FRONT_LEFT_MASK;
if (db_flag)
@@ -1154,7 +1206,7 @@ void XMesaFlush( XMesaContext c )
xmdpy->screen->fence_finish(xmdpy->screen, fence, 0);
xmdpy->screen->fence_reference(xmdpy->screen, &fence, NULL);
}
- XSync( c->xm_visual->display, False );
+ XFlush( c->xm_visual->display );
}
}
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.h b/src/gallium/state_trackers/glx/xlib/xm_api.h
index 4f2c8a6e6a..f209b14ea1 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_api.h
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.h
@@ -57,7 +57,7 @@ and create a window, you must do the following to use the X/Mesa interface:
#define XMESA_H
-#include "main/mtypes.h"
+#include "main/core.h" /* for GLvisual and MESA_VERSION_STRING */
#include "state_tracker/st_api.h"
#include "os/os_thread.h"
@@ -378,6 +378,6 @@ xmesa_buffer_height(XMesaBuffer b)
return b->height;
}
-
+extern boolean xmesa_strict_invalidate;
#endif
diff --git a/src/gallium/state_trackers/glx/xlib/xm_st.c b/src/gallium/state_trackers/glx/xlib/xm_st.c
index c62eb8bfbd..0f74b3f7aa 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_st.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_st.c
@@ -26,18 +26,18 @@
* Chia-I Wu <olv@lunarg.com>
*/
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-
#include "xm_api.h"
#include "xm_st.h"
+#include "util/u_inlines.h"
+
struct xmesa_st_framebuffer {
XMesaDisplay display;
XMesaBuffer buffer;
struct pipe_screen *screen;
struct st_visual stvis;
+ enum pipe_texture_target target;
unsigned texture_width, texture_height, texture_mask;
struct pipe_resource *textures[ST_ATTACHMENT_COUNT];
@@ -139,7 +139,7 @@ xmesa_st_framebuffer_validate_textures(struct st_framebuffer_iface *stfbi,
}
memset(&templ, 0, sizeof(templ));
- templ.target = PIPE_TEXTURE_2D;
+ templ.target = xstfb->target;
templ.width0 = width;
templ.height0 = height;
templ.depth0 = 1;
@@ -210,6 +210,12 @@ xmesa_st_framebuffer_validate(struct st_framebuffer_iface *stfbi,
/* record newly allocated textures */
new_mask = statt_mask & ~xstfb->texture_mask;
+ /* If xmesa_strict_invalidate is not set, we will not yet have
+ * called XGetGeometry(). Do so here:
+ */
+ if (!xmesa_strict_invalidate)
+ xmesa_check_buffer_size(xstfb->buffer);
+
resized = (xstfb->buffer->width != xstfb->texture_width ||
xstfb->buffer->height != xstfb->texture_height);
@@ -251,7 +257,8 @@ xmesa_st_framebuffer_flush_front(struct st_framebuffer_iface *stfbi,
boolean ret;
ret = xmesa_st_framebuffer_display(stfbi, statt);
- if (ret)
+
+ if (ret && xmesa_strict_invalidate)
xmesa_check_buffer_size(xstfb->buffer);
return ret;
@@ -279,6 +286,10 @@ xmesa_create_st_framebuffer(XMesaDisplay xmdpy, XMesaBuffer b)
xstfb->buffer = b;
xstfb->screen = xmdpy->screen;
xstfb->stvis = b->xm_visual->stvis;
+ if(xstfb->screen->get_param(xstfb->screen, PIPE_CAP_NPOT_TEXTURES))
+ xstfb->target = PIPE_TEXTURE_2D;
+ else
+ xstfb->target = PIPE_TEXTURE_RECT;
stfbi->visual = &xstfb->stvis;
stfbi->flush_front = xmesa_st_framebuffer_flush_front;
@@ -322,7 +333,8 @@ xmesa_swap_st_framebuffer(struct st_framebuffer_iface *stfbi)
*back = tmp;
}
- xmesa_check_buffer_size(xstfb->buffer);
+ if (xmesa_strict_invalidate)
+ xmesa_check_buffer_size(xstfb->buffer);
}
}
diff --git a/src/gallium/state_trackers/wgl/stw_context.c b/src/gallium/state_trackers/wgl/stw_context.c
index 0fb7cd8306..a0e14b9601 100644
--- a/src/gallium/state_trackers/wgl/stw_context.c
+++ b/src/gallium/state_trackers/wgl/stw_context.c
@@ -33,7 +33,7 @@
/* for _mesa_share_state */
#include "state_tracker/st_context.h"
-#include "main/context.h"
+#include "main/core.h"
#include "stw_icd.h"
#include "stw_device.h"
diff --git a/src/gallium/state_trackers/wgl/stw_device.c b/src/gallium/state_trackers/wgl/stw_device.c
index a107c71bda..37809d084c 100644
--- a/src/gallium/state_trackers/wgl/stw_device.c
+++ b/src/gallium/state_trackers/wgl/stw_device.c
@@ -27,7 +27,7 @@
#include <windows.h>
-#include "glapi/glthread.h"
+#include "glapi/glapi.h"
#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
diff --git a/src/gallium/state_trackers/wgl/stw_pixelformat.c b/src/gallium/state_trackers/wgl/stw_pixelformat.c
index e606477e97..18ac482369 100644
--- a/src/gallium/state_trackers/wgl/stw_pixelformat.c
+++ b/src/gallium/state_trackers/wgl/stw_pixelformat.c
@@ -25,15 +25,13 @@
*
**************************************************************************/
-#include "main/mtypes.h"
-#include "main/context.h"
-
#include "pipe/p_format.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
#include "util/u_format.h"
#include "util/u_debug.h"
+#include "util/u_memory.h"
#include "stw_icd.h"
#include "stw_device.h"
diff --git a/src/gallium/state_trackers/wgl/stw_pixelformat.h b/src/gallium/state_trackers/wgl/stw_pixelformat.h
index d405172773..282c9f643c 100644
--- a/src/gallium/state_trackers/wgl/stw_pixelformat.h
+++ b/src/gallium/state_trackers/wgl/stw_pixelformat.h
@@ -34,8 +34,6 @@
#define PFD_SUPPORT_COMPOSITION 0x00008000
#endif
-#include "main/mtypes.h"
-
#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
#include "state_tracker/st_api.h"
diff --git a/src/gallium/targets/Makefile.dri b/src/gallium/targets/Makefile.dri
index de05f96d23..59961e982a 100644
--- a/src/gallium/targets/Makefile.dri
+++ b/src/gallium/targets/Makefile.dri
@@ -1,11 +1,12 @@
# -*-makefile-*-
+
ifeq ($(MESA_LLVM),1)
PIPE_DRIVERS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
LDFLAGS += $(LLVM_LDFLAGS)
-LD = g++
DRIVER_EXTRAS = $(LLVM_LIBS)
-USE_CXX=1
+else
+LDFLAGS += -lstdc++
endif
MESA_MODULES = \
@@ -75,15 +76,11 @@ default: depend symlinks $(TOP)/$(LIB_DIR)/gallium $(LIBNAME) $(LIBNAME_STAGING)
$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) Makefile \
$(TOP)/src/mesa/drivers/dri/Makefile.template $(TOP)/src/mesa/drivers/dri/common/dri_test.o
- $(MKLIB) -o $@.tmp -noprefix -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+ $(MKLIB) -o $@.tmp -noprefix -linker '$(CXX)' -ldflags '$(LDFLAGS)' \
$(OBJECTS) $(PIPE_DRIVERS) \
-Wl,--start-group $(MESA_MODULES) -Wl,--end-group \
$(DRI_LIB_DEPS) $(DRIVER_EXTRAS)
- if [ "x${USE_CXX}" == "x" ]; then \
- $(CC) $(CFLAGS) -o $@.test $(TOP)/src/mesa/drivers/dri/common/dri_test.o $@.tmp $(DRI_LIB_DEPS); \
- else \
- $(CXX) $(CFLAGS) -o $@.test $(TOP)/src/mesa/drivers/dri/common/dri_test.o $@.tmp $(DRI_LIB_DEPS); \
- fi
+ $(CXX) $(CFLAGS) -o $@.test $(TOP)/src/mesa/drivers/dri/common/dri_test.o $@.tmp $(DRI_LIB_DEPS);
@rm -f $@.test
mv -f $@.tmp $@
diff --git a/src/gallium/targets/SConscript b/src/gallium/targets/SConscript
index f8276b1555..e447d09361 100644
--- a/src/gallium/targets/SConscript
+++ b/src/gallium/targets/SConscript
@@ -1,18 +1,13 @@
import os
Import('*')
-
+
# Compatibility with old build scripts:
#
if 'mesa' in env['statetrackers']:
- if 'xlib' in env['winsys']:
- SConscript([
- 'libgl-xlib/SConscript',
- ])
-
- if 'gdi' in env['winsys']:
- SConscript([
- 'libgl-gdi/SConscript',
- ])
+ if 'xlib' in env['winsys'] and 'libgl-xlib' not in env['targets']:
+ env['targets'].append('libgl-xlib')
+ if 'gdi' in env['winsys'] and 'libgl-gdi' not in env['targets']:
+ env['targets'].append('libgl-gdi')
if not 'graw-xlib' in env['targets'] and not 'graw-null' in env['targets'] and not env['msvc']:
# XXX: disable until MSVC can link correctly
diff --git a/src/gallium/targets/dri-radeong/Makefile b/src/gallium/targets/dri-r300/Makefile
index 3f9ec36166..9afbb13276 100644
--- a/src/gallium/targets/dri-radeong/Makefile
+++ b/src/gallium/targets/dri-r300/Makefile
@@ -1,7 +1,7 @@
TOP = ../../../..
include $(TOP)/configs/current
-LIBNAME = radeong_dri.so
+LIBNAME = r300_dri.so
PIPE_DRIVERS = \
$(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \
diff --git a/src/gallium/targets/dri-radeong/SConscript b/src/gallium/targets/dri-r300/SConscript
index 1402c3bd12..33a458f2e6 100644
--- a/src/gallium/targets/dri-radeong/SConscript
+++ b/src/gallium/targets/dri-r300/SConscript
@@ -1,7 +1,7 @@
Import('*')
if not 'r300' in env['drivers']:
- print 'warning: r300 pipe driver not built skipping radeong_dri.so'
+ print 'warning: r300 pipe driver not built skipping r300_dri.so'
Return()
env = drienv.Clone()
@@ -24,7 +24,7 @@ env.Prepend(LIBS = [
])
env.SharedLibrary(
- target ='radeon_dri.so',
+ target ='r300_dri.so',
source = 'target.c',
SHLIBPREFIX = '',
)
diff --git a/src/gallium/targets/dri-radeong/target.c b/src/gallium/targets/dri-r300/target.c
index 5a0a8dc573..2ecf3457a7 100644
--- a/src/gallium/targets/dri-radeong/target.c
+++ b/src/gallium/targets/dri-r300/target.c
@@ -23,4 +23,4 @@ create_screen(int fd)
return screen;
}
-DRM_DRIVER_DESCRIPTOR("radeon", "radeon", create_screen)
+DRM_DRIVER_DESCRIPTOR("r300", "radeon", create_screen)
diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile
index 932303d194..661283de6a 100644
--- a/src/gallium/targets/dri-r600/Makefile
+++ b/src/gallium/targets/dri-r600/Makefile
@@ -4,12 +4,12 @@ include $(TOP)/configs/current
LIBNAME = r600_dri.so
PIPE_DRIVERS = \
+ $(TOP)/src/gallium/drivers/r600/libr600.a \
$(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \
$(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
$(TOP)/src/gallium/drivers/trace/libtrace.a \
- $(TOP)/src/gallium/drivers/rbug/librbug.a \
- $(TOP)/src/gallium/drivers/r600/libr600.a
+ $(TOP)/src/gallium/drivers/rbug/librbug.a
C_SOURCES = \
target.c \
@@ -21,6 +21,6 @@ DRIVER_DEFINES = \
include ../Makefile.dri
-DRI_LIB_DEPS += -ldrm_radeon
+DRI_LIB_DEPS +=
symlinks:
diff --git a/src/gallium/targets/egl-gdi/egl-static.c b/src/gallium/targets/egl-gdi/egl-static.c
index ec2f865c31..4655d79117 100644
--- a/src/gallium/targets/egl-gdi/egl-static.c
+++ b/src/gallium/targets/egl-gdi/egl-static.c
@@ -33,6 +33,8 @@
#include "target-helpers/inline_debug_helper.h"
#include "egldriver.h"
+static struct st_api *stapis[ST_API_COUNT];
+
static uint
get_api_mask(void)
{
@@ -57,7 +59,11 @@ get_api_mask(void)
static struct st_api *
get_st_api(enum st_api_type api)
{
- struct st_api *stapi = NULL;
+ struct st_api *stapi;
+
+ stapi = stapis[api];
+ if (stapi)
+ return stapi;
switch (api) {
#if FEATURE_GL
@@ -84,13 +90,33 @@ get_st_api(enum st_api_type api)
break;
}
+ stapis[api] = stapi;
+
return stapi;
}
static struct st_api *
guess_gl_api(void)
{
- return NULL;
+ struct st_api *stapi = NULL;
+
+#if FEATURE_GL
+ stapi = get_st_api(ST_API_OPENGL);
+ if (stapi)
+ return stapi;
+#endif
+#if FEATURE_ES1
+ stapi = get_st_api(ST_API_OPENGL_ES1);
+ if (stapi)
+ return stapi;
+#endif
+#if FEATURE_ES2
+ stapi = get_st_api(ST_API_OPENGL_ES2);
+ if (stapi)
+ return stapi;
+#endif
+
+ return stapi;
}
static struct pipe_screen *
@@ -127,7 +153,16 @@ init_loader(struct egl_g3d_loader *loader)
static void
egl_g3d_unload(_EGLDriver *drv)
{
+ int i;
+
egl_g3d_destroy_driver(drv);
+
+ for (i = 0; i < ST_API_COUNT; i++) {
+ if (stapis[i]) {
+ stapis[i]->destroy(stapis[i]);
+ stapis[i] = NULL;
+ }
+ }
}
static struct egl_g3d_loader loader;
diff --git a/src/gallium/targets/egl/Makefile b/src/gallium/targets/egl/Makefile
index 1e4bb4d94c..2784fd0d10 100644
--- a/src/gallium/targets/egl/Makefile
+++ b/src/gallium/targets/egl/Makefile
@@ -90,13 +90,20 @@ nouveau_LIBS := \
$(TOP)/src/gallium/drivers/nv50/libnv50.a \
$(TOP)/src/gallium/drivers/nouveau/libnouveau.a
-# radeon pipe driver
-radeon_CPPFLAGS :=
-radeon_SYS := -ldrm -ldrm_radeon
-radeon_LIBS := \
+# r300 pipe driver
+r300_CPPFLAGS :=
+r300_SYS := -ldrm -ldrm_radeon
+r300_LIBS := \
$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
$(TOP)/src/gallium/drivers/r300/libr300.a
+# r600 pipe driver
+r600_CPPFLAGS :=
+r600_SYS := -ldrm -ldrm_radeon
+r600_LIBS := \
+ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+ $(TOP)/src/gallium/drivers/r600/libr600.a
+
# vmwgfx pipe driver
vmwgfx_CPPFLAGS :=
vmwgfx_SYS :=
@@ -119,17 +126,17 @@ endif
# OpenGL state tracker
GL_CPPFLAGS := -I$(TOP)/src/mesa $(API_DEFINES)
-GL_SYS := -lpthread -lm -L$(TOP)/$(LIB_DIR) -l$(GL_LIB)
+GL_SYS := $(DRI_LIB_DEPS) -L$(TOP)/$(LIB_DIR) -l$(GL_LIB)
GL_LIBS := $(TOP)/src/mesa/libmesagallium.a
# OpenGL ES 1.x state tracker
GLESv1_CM_CPPFLAGS := -I$(TOP)/src/mesa
-GLESv1_CM_SYS := -lpthread -lm -L$(TOP)/$(LIB_DIR) -l$(GLESv1_CM_LIB)
+GLESv1_CM_SYS := $(DRI_LIB_DEPS) -L$(TOP)/$(LIB_DIR) -l$(GLESv1_CM_LIB)
GLESv1_CM_LIBS := $(TOP)/src/mesa/libes1gallium.a
# OpenGL ES 2.x state tracker
GLESv2_CPPFLAGS := -I$(TOP)/src/mesa
-GLESv2_SYS := -lpthread -lm -L$(TOP)/$(LIB_DIR) -l$(GLESv2_LIB)
+GLESv2_SYS := $(DRI_LIB_DEPS) -L$(TOP)/$(LIB_DIR) -l$(GLESv2_LIB)
GLESv2_LIBS := $(TOP)/src/mesa/libes2gallium.a
# OpenVG state tracker
@@ -151,7 +158,10 @@ ifneq ($(findstring nouveau/drm,$(GALLIUM_WINSYS_DIRS)),)
OUTPUTS += nouveau
endif
ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),)
-OUTPUTS += radeon
+OUTPUTS += r300
+endif
+ifneq ($(findstring r600/drm,$(GALLIUM_WINSYS_DIRS)),)
+OUTPUTS += r600
endif
ifneq ($(findstring svga/drm,$(GALLIUM_WINSYS_DIRS)),)
OUTPUTS += vmwgfx
@@ -188,8 +198,11 @@ $(OUTPUT_PATH)/$(PIPE_PREFIX)i965.so: pipe_i965.o $(i965_LIBS)
$(OUTPUT_PATH)/$(PIPE_PREFIX)nouveau.so: pipe_nouveau.o $(nouveau_LIBS)
$(call mklib,nouveau)
-$(OUTPUT_PATH)/$(PIPE_PREFIX)radeon.so: pipe_radeon.o $(radeon_LIBS)
- $(call mklib,radeon)
+$(OUTPUT_PATH)/$(PIPE_PREFIX)r300.so: pipe_r300.o $(r300_LIBS)
+ $(call mklib,r300)
+
+$(OUTPUT_PATH)/$(PIPE_PREFIX)r600.so: pipe_r600.o $(r600_LIBS)
+ $(call mklib,r600)
$(OUTPUT_PATH)/$(PIPE_PREFIX)vmwgfx.so: pipe_vmwgfx.o $(vmwgfx_LIBS)
$(call mklib,vmwgfx)
diff --git a/src/gallium/targets/egl/egl.c b/src/gallium/targets/egl/egl.c
index d9d89485c3..a573b21217 100644
--- a/src/gallium/targets/egl/egl.c
+++ b/src/gallium/targets/egl/egl.c
@@ -155,24 +155,23 @@ load_pipe_module(struct pipe_module *pmod, const char *name)
if (!pmod->name)
return FALSE;
+ _eglLog(_EGL_DEBUG, "searching for pipe module %s", pmod->name);
_eglSearchPathForEach(dlopen_pipe_module_cb, (void *) pmod);
if (pmod->lib) {
pmod->drmdd = (const struct drm_driver_descriptor *)
util_dl_get_proc_address(pmod->lib, "driver_descriptor");
- if (pmod->drmdd) {
- if (pmod->drmdd->driver_name) {
- /* driver name mismatch */
- if (strcmp(pmod->drmdd->driver_name, pmod->name) != 0)
- pmod->drmdd = NULL;
- }
- else {
- /* swrast */
- pmod->swrast_create_screen =
- (struct pipe_screen *(*)(struct sw_winsys *))
- util_dl_get_proc_address(pmod->lib, "swrast_create_screen");
- if (!pmod->swrast_create_screen)
- pmod->drmdd = NULL;
- }
+
+ /* sanity check on the name */
+ if (pmod->drmdd && strcmp(pmod->drmdd->name, pmod->name) != 0)
+ pmod->drmdd = NULL;
+
+ /* swrast */
+ if (pmod->drmdd && !pmod->drmdd->driver_name) {
+ pmod->swrast_create_screen =
+ (struct pipe_screen *(*)(struct sw_winsys *))
+ util_dl_get_proc_address(pmod->lib, "swrast_create_screen");
+ if (!pmod->swrast_create_screen)
+ pmod->drmdd = NULL;
}
if (!pmod->drmdd) {
diff --git a/src/gallium/targets/egl/pipe_radeon.c b/src/gallium/targets/egl/pipe_r300.c
index 35550bcb26..d84bb92539 100644
--- a/src/gallium/targets/egl/pipe_radeon.c
+++ b/src/gallium/targets/egl/pipe_r300.c
@@ -24,4 +24,4 @@ create_screen(int fd)
}
PUBLIC
-DRM_DRIVER_DESCRIPTOR("radeon", "radeon", create_screen)
+DRM_DRIVER_DESCRIPTOR("r300", "radeon", create_screen)
diff --git a/src/gallium/targets/egl/pipe_r600.c b/src/gallium/targets/egl/pipe_r600.c
new file mode 100644
index 0000000000..486a659258
--- /dev/null
+++ b/src/gallium/targets/egl/pipe_r600.c
@@ -0,0 +1,27 @@
+
+#include "state_tracker/drm_driver.h"
+#include "target-helpers/inline_debug_helper.h"
+#include "r600/drm/r600_drm_public.h"
+#include "r600/r600_public.h"
+
+static struct pipe_screen *
+create_screen(int fd)
+{
+ struct radeon *rw;
+ struct pipe_screen *screen;
+
+ rw = r600_drm_winsys_create(fd);
+ if (!rw)
+ return NULL;
+
+ screen = r600_screen_create(rw);
+ if (!screen)
+ return NULL;
+
+ screen = debug_screen_wrap(screen);
+
+ return screen;
+}
+
+PUBLIC
+DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen)
diff --git a/src/gallium/targets/graw-xlib/graw_util.c b/src/gallium/targets/graw-xlib/graw_util.c
index 47aca4464d..fc7c9ae6f9 100644
--- a/src/gallium/targets/graw-xlib/graw_util.c
+++ b/src/gallium/targets/graw-xlib/graw_util.c
@@ -1,6 +1,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include "tgsi/tgsi_text.h"
#include "util/u_memory.h"
#include "state_tracker/graw.h"
diff --git a/src/gallium/targets/graw-xlib/graw_xlib.c b/src/gallium/targets/graw-xlib/graw_xlib.c
index 41120ba3c7..8b64a0b819 100644
--- a/src/gallium/targets/graw-xlib/graw_xlib.c
+++ b/src/gallium/targets/graw-xlib/graw_xlib.c
@@ -1,5 +1,6 @@
#include "pipe/p_compiler.h"
#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
#include "util/u_debug.h"
#include "util/u_memory.h"
#include "target-helpers/wrap_screen.h"
diff --git a/src/gallium/targets/libgl-gdi/SConscript b/src/gallium/targets/libgl-gdi/SConscript
index 144084f74f..12fe403f62 100644
--- a/src/gallium/targets/libgl-gdi/SConscript
+++ b/src/gallium/targets/libgl-gdi/SConscript
@@ -17,6 +17,7 @@ if env['platform'] == 'windows':
'user32',
'kernel32',
'ws2_32',
+ talloc,
])
sources = []
diff --git a/src/gallium/targets/libgl-xlib/Makefile b/src/gallium/targets/libgl-xlib/Makefile
index e745023ba5..fe0541543a 100644
--- a/src/gallium/targets/libgl-xlib/Makefile
+++ b/src/gallium/targets/libgl-xlib/Makefile
@@ -68,8 +68,9 @@ $(TOP)/$(LIB_DIR)/gallium:
# Make the libGL.so library
$(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME): $(XLIB_TARGET_OBJECTS) $(LIBS) Makefile
$(TOP)/bin/mklib -o $(GL_LIB) \
- -linker "$(CC)" \
+ -linker "$(CXX)" \
-major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \
+ -cplusplus \
-install $(TOP)/$(LIB_DIR)/gallium \
$(MKLIB_OPTIONS) $(XLIB_TARGET_OBJECTS) \
-Wl,--start-group $(LIBS) -Wl,--end-group $(GL_LIB_DEPS)
diff --git a/src/gallium/targets/libgl-xlib/SConscript b/src/gallium/targets/libgl-xlib/SConscript
index 78703fd096..88e216a65b 100644
--- a/src/gallium/targets/libgl-xlib/SConscript
+++ b/src/gallium/targets/libgl-xlib/SConscript
@@ -35,6 +35,7 @@ env.Prepend(LIBS = [
mesa,
glsl,
gallium,
+ 'talloc'
])
sources = [
diff --git a/src/gallium/targets/libgl-xlib/xlib.c b/src/gallium/targets/libgl-xlib/xlib.c
index 69b4ddd33f..5a9c80c856 100644
--- a/src/gallium/targets/libgl-xlib/xlib.c
+++ b/src/gallium/targets/libgl-xlib/xlib.c
@@ -36,6 +36,7 @@
#include "state_tracker/xlib_sw_winsys.h"
#include "xm_public.h"
+#include "state_tracker/st_api.h"
#include "state_tracker/st_gl_api.h"
/* piggy back on this libGL for OpenGL support in EGL */
diff --git a/src/gallium/tests/python/tests/texture_blit.py b/src/gallium/tests/python/tests/texture_blit.py
index 58706dab93..089d05c623 100755
--- a/src/gallium/tests/python/tests/texture_blit.py
+++ b/src/gallium/tests/python/tests/texture_blit.py
@@ -55,7 +55,7 @@ def tex_coords(texture, face, level, zslice):
[0.0, 1.0],
]
- if texture.target == PIPE_TEXTURE_2D:
+ if texture.target == PIPE_TEXTURE_2D or texture.target == PIPE_TEXTURE_RECT:
return [[s, t, 0.0] for s, t in st]
elif texture.target == PIPE_TEXTURE_3D:
depth = texture.get_depth(level)
diff --git a/src/gallium/tools/addr2line.sh b/src/gallium/tools/addr2line.sh
new file mode 100755
index 0000000000..34dec14271
--- /dev/null
+++ b/src/gallium/tools/addr2line.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# This script processes symbols output by Gallium using glibc to human-readable function names
+
+lastbin=
+i=-1
+dir="$(mktemp -d)"
+input="$1"
+
+# Gather all unique addresses for each binary
+sed -nre 's|([^ ]*/[^ ]*)\(\+0x([^)]*).*|\1 \2|p' "$input"|sort|uniq|while read bin addr; do
+ if test "$lastbin" != "$bin"; then
+ ((++i))
+ lastbin="$bin"
+ echo "$bin" > "$dir/$i.addrs.bin"
+ fi
+ echo "$addr" >> "$dir/$i.addrs"
+done
+
+# Construct a sed script to convert hex address to human readable form, and apply it
+for i in "$dir"/*.addrs; do
+ bin="$(<"$i.bin")"
+ addr2line -p -e "$bin" -a -f < "$i"|sed -nre 's@^0x0*([^:]*): ([^?]*)$@s|'"$bin"'(+0x\1)|\2|g@gp'
+ rm -f "$i" "$i.bin"
+done|sed -f - "$input"
+
+rmdir "$dir"
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index 660dbd0c33..d4bf124ce6 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -19,7 +19,8 @@ nouveau_drm_destroy_winsys(struct pipe_winsys *s)
{
struct nouveau_winsys *nv_winsys = nouveau_winsys(s);
struct nouveau_screen *nv_screen= nouveau_screen(nv_winsys->pscreen);
- nouveau_device_close(&nv_screen->device);
+ if (nv_screen)
+ nouveau_device_close(&nv_screen->device);
FREE(nv_winsys);
}
diff --git a/src/gallium/winsys/r600/drm/r600_state.c b/src/gallium/winsys/r600/drm/r600_state.c
index d17d6e7954..71d65f0fea 100644
--- a/src/gallium/winsys/r600/drm/r600_state.c
+++ b/src/gallium/winsys/r600/drm/r600_state.c
@@ -30,6 +30,8 @@
#include "radeon_priv.h"
#include "r600d.h"
+#include "util/u_memory.h"
+
static int r600_state_pm4_resource(struct radeon_state *state);
static int r600_state_pm4_cb0(struct radeon_state *state);
static int r600_state_pm4_vgt(struct radeon_state *state);
@@ -38,24 +40,69 @@ static int r600_state_pm4_shader(struct radeon_state *state);
static int r600_state_pm4_draw(struct radeon_state *state);
static int r600_state_pm4_config(struct radeon_state *state);
static int r600_state_pm4_generic(struct radeon_state *state);
+static int r600_state_pm4_query_begin(struct radeon_state *state);
+static int r600_state_pm4_query_end(struct radeon_state *state);
static int r700_state_pm4_config(struct radeon_state *state);
static int r700_state_pm4_cb0(struct radeon_state *state);
static int r700_state_pm4_db(struct radeon_state *state);
#include "r600_states.h"
+
+#define SUB_NONE(param) { { 0, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) } }
+#define SUB_PS(param) { R600_SHADER_PS, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) }
+#define SUB_VS(param) { R600_SHADER_VS, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) }
+#define SUB_GS(param) { R600_SHADER_GS, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) }
+#define SUB_FS(param) { R600_SHADER_FS, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) }
+
+/* some of these are overriden at runtime for R700 */
+struct radeon_stype_info r600_stypes[] = {
+ { R600_STATE_CONFIG, 1, 0, r600_state_pm4_config, SUB_NONE(CONFIG), },
+ { R600_STATE_CB_CNTL, 1, 0, r600_state_pm4_generic, SUB_NONE(CB_CNTL) },
+ { R600_STATE_RASTERIZER, 1, 0, r600_state_pm4_generic, SUB_NONE(RASTERIZER) },
+ { R600_STATE_VIEWPORT, 1, 0, r600_state_pm4_generic, SUB_NONE(VIEWPORT) },
+ { R600_STATE_SCISSOR, 1, 0, r600_state_pm4_generic, SUB_NONE(SCISSOR) },
+ { R600_STATE_BLEND, 1, 0, r600_state_pm4_generic, SUB_NONE(BLEND), },
+ { R600_STATE_DSA, 1, 0, r600_state_pm4_generic, SUB_NONE(DSA), },
+ { R600_STATE_SHADER, 1, 0, r600_state_pm4_shader, { SUB_PS(PS_SHADER), SUB_VS(VS_SHADER) } },
+ { R600_STATE_CONSTANT, 256, 0x10, r600_state_pm4_generic, { SUB_PS(PS_CONSTANT), SUB_VS(VS_CONSTANT) } },
+ { R600_STATE_RESOURCE, 160, 0x1c, r600_state_pm4_resource, { SUB_PS(PS_RESOURCE), SUB_VS(VS_RESOURCE), SUB_GS(GS_RESOURCE), SUB_FS(FS_RESOURCE)} },
+ { R600_STATE_SAMPLER, 18, 0xc, r600_state_pm4_generic, { SUB_PS(PS_SAMPLER), SUB_VS(VS_SAMPLER), SUB_GS(GS_SAMPLER) } },
+ { R600_STATE_SAMPLER_BORDER, 18, 0x10, r600_state_pm4_generic, { SUB_PS(PS_SAMPLER_BORDER), SUB_VS(VS_SAMPLER_BORDER), SUB_GS(GS_SAMPLER_BORDER) } },
+ { R600_STATE_CB0, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB0) },
+ { R600_STATE_CB1, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB1) },
+ { R600_STATE_CB2, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB2) },
+ { R600_STATE_CB3, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB3) },
+ { R600_STATE_CB4, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB4) },
+ { R600_STATE_CB5, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB5) },
+ { R600_STATE_CB6, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB6) },
+ { R600_STATE_CB7, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB7) },
+ { R600_STATE_QUERY_BEGIN, 1, 0, r600_state_pm4_query_begin, SUB_NONE(VGT_EVENT) },
+ { R600_STATE_QUERY_END, 1, 0, r600_state_pm4_query_end, SUB_NONE(VGT_EVENT) },
+ { R600_STATE_DB, 1, 0, r600_state_pm4_db, SUB_NONE(DB) },
+ { R600_STATE_UCP, 1, 0, r600_state_pm4_generic, SUB_NONE(UCP) },
+ { R600_STATE_VGT, 1, 0, r600_state_pm4_vgt, SUB_NONE(VGT) },
+ { R600_STATE_DRAW, 1, 0, r600_state_pm4_draw, SUB_NONE(DRAW) },
+};
+#define STYPES_SIZE Elements(r600_stypes)
+
+static const struct radeon_register *get_regs(struct radeon_state *state)
+{
+ return state->stype->reginfo[state->shader_index].regs;
+}
+
/*
* r600/r700 state functions
*/
static int r600_state_pm4_bytecode(struct radeon_state *state, unsigned offset, unsigned id, unsigned nreg)
{
- const struct radeon_register *regs = state->radeon->type[state->type].regs;
+ const struct radeon_register *regs = get_regs(state);
unsigned i;
int r;
if (!offset) {
fprintf(stderr, "%s invalid register for state %d %d\n",
- __func__, state->type, id);
+ __func__, state->stype->stype, id);
return -EINVAL;
}
if (offset >= R600_CONFIG_REG_OFFSET && offset < R600_CONFIG_REG_END) {
@@ -114,19 +161,18 @@ static int r600_state_pm4_bytecode(struct radeon_state *state, unsigned offset,
static int r600_state_pm4_generic(struct radeon_state *state)
{
- struct radeon *radeon = state->radeon;
- unsigned i, offset, nreg, type, coffset, loffset, soffset;
+ const struct radeon_register *regs = get_regs(state);
+ unsigned i, offset, nreg, coffset, loffset, soffset;
unsigned start;
int r;
if (!state->nstates)
return 0;
- type = state->type;
- soffset = (state->id - radeon->type[type].id) * radeon->type[type].stride;
- offset = loffset = radeon->type[type].regs[0].offset + soffset;
+ soffset = state->id * state->stype->stride;
+ offset = loffset = regs[0].offset + soffset;
start = 0;
for (i = 1, nreg = 1; i < state->nstates; i++) {
- coffset = radeon->type[type].regs[i].offset + soffset;
+ coffset = regs[i].offset + soffset;
if (coffset == (loffset + 4)) {
nreg++;
loffset = coffset;
@@ -233,20 +279,54 @@ static int r600_state_pm4_config(struct radeon_state *state)
state->pm4[state->cpm4++] = 0x80000000;
state->pm4[state->cpm4++] = 0x80000000;
state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 0);
- state->pm4[state->cpm4++] = 0x00000016;
+ state->pm4[state->cpm4++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONFIG_REG, 1);
state->pm4[state->cpm4++] = 0x00000010;
state->pm4[state->cpm4++] = 0x00028000;
return r600_state_pm4_generic(state);
}
+static int r600_state_pm4_query_begin(struct radeon_state *state)
+{
+ int r;
+
+ state->cpm4 = 0;
+ state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 2);
+ state->pm4[state->cpm4++] = EVENT_TYPE_ZPASS_DONE;
+ state->pm4[state->cpm4++] = state->states[0];
+ state->pm4[state->cpm4++] = 0x0;
+ state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0);
+ r = radeon_state_reloc(state, state->cpm4, 0);
+ if (r)
+ return r;
+ state->pm4[state->cpm4++] = state->bo[0]->handle;
+ return 0;
+}
+
+static int r600_state_pm4_query_end(struct radeon_state *state)
+{
+ int r;
+
+ state->cpm4 = 0;
+ state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 2);
+ state->pm4[state->cpm4++] = EVENT_TYPE_ZPASS_DONE;
+ state->pm4[state->cpm4++] = state->states[0];
+ state->pm4[state->cpm4++] = 0x0;
+ state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0);
+ r = radeon_state_reloc(state, state->cpm4, 0);
+ if (r)
+ return r;
+ state->pm4[state->cpm4++] = state->bo[0]->handle;
+ return 0;
+}
+
static int r700_state_pm4_config(struct radeon_state *state)
{
state->pm4[state->cpm4++] = PKT3(PKT3_CONTEXT_CONTROL, 1);
state->pm4[state->cpm4++] = 0x80000000;
state->pm4[state->cpm4++] = 0x80000000;
state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 0);
- state->pm4[state->cpm4++] = 0x00000016;
+ state->pm4[state->cpm4++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONFIG_REG, 1);
state->pm4[state->cpm4++] = 0x00000010;
state->pm4[state->cpm4++] = 0x00028000;
@@ -287,7 +367,6 @@ static int r600_state_pm4_vgt(struct radeon_state *state)
static int r600_state_pm4_draw(struct radeon_state *state)
{
- unsigned i;
int r;
if (state->nbo) {
@@ -301,20 +380,13 @@ static int r600_state_pm4_draw(struct radeon_state *state)
if (r)
return r;
state->pm4[state->cpm4++] = state->bo[0]->handle;
- } else if (state->nimmd) {
- state->pm4[state->cpm4++] = PKT3(PKT3_DRAW_INDEX_IMMD, state->nimmd + 1);
- state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_NUM_INDICES];
- state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DRAW_INITIATOR];
- for (i = 0; i < state->nimmd; i++) {
- state->pm4[state->cpm4++] = state->immd[i];
- }
} else {
state->pm4[state->cpm4++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_NUM_INDICES];
state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DRAW_INITIATOR];
}
state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 0);
- state->pm4[state->cpm4++] = 0x00000016;
+ state->pm4[state->cpm4++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
return 0;
}
@@ -322,8 +394,9 @@ static int r600_state_pm4_resource(struct radeon_state *state)
{
u32 flags, type, nbo, offset, soffset;
int r;
+ const struct radeon_register *regs = get_regs(state);
- soffset = (state->id - state->radeon->type[state->type].id) * state->radeon->type[state->type].stride;
+ soffset = state->id * state->stype->stride;
type = G_038018_TYPE(state->states[6]);
switch (type) {
case 2:
@@ -342,7 +415,7 @@ static int r600_state_pm4_resource(struct radeon_state *state)
return -EINVAL;
}
r600_state_pm4_with_flush(state, flags);
- offset = state->radeon->type[state->type].regs[0].offset + soffset;
+ offset = regs[0].offset + soffset;
state->pm4[state->cpm4++] = PKT3(PKT3_SET_RESOURCE, 7);
state->pm4[state->cpm4++] = (offset - R_038000_SQ_TEX_RESOURCE_WORD0_0) >> 2;
state->pm4[state->cpm4++] = state->states[0];
@@ -367,33 +440,62 @@ static int r600_state_pm4_resource(struct radeon_state *state)
return 0;
}
-int r600_init(struct radeon *radeon)
+
+static void r600_modify_type_array(struct radeon *radeon)
{
+ int i;
switch (radeon->family) {
- case CHIP_R600:
- case CHIP_RV610:
- case CHIP_RV630:
- case CHIP_RV670:
- case CHIP_RV620:
- case CHIP_RV635:
- case CHIP_RS780:
- case CHIP_RS880:
- radeon->ntype = R600_NTYPE;
- radeon->nstate = R600_NSTATE;
- radeon->type = R600_types;
- break;
case CHIP_RV770:
case CHIP_RV730:
case CHIP_RV710:
case CHIP_RV740:
- radeon->ntype = R600_NTYPE;
- radeon->nstate = R600_NSTATE;
- radeon->type = R700_types;
break;
default:
- fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n",
- __func__, radeon->device);
- return -EINVAL;
+ return;
+ }
+
+ /* r700 needs some mods */
+ for (i = 0; i < radeon->nstype; i++) {
+ struct radeon_stype_info *info = &radeon->stype[i];
+
+ switch(info->stype) {
+ case R600_STATE_CONFIG:
+ info->pm4 = r700_state_pm4_config;
+ break;
+ case R600_STATE_CB0:
+ info->pm4 = r700_state_pm4_cb0;
+ break;
+ case R600_STATE_DB:
+ info->pm4 = r700_state_pm4_db;
+ };
}
+}
+
+static void r600_build_types_array(struct radeon *radeon)
+{
+ int i, j;
+ int id = 0;
+
+ for (i = 0; i < STYPES_SIZE; i++) {
+ r600_stypes[i].base_id = id;
+ r600_stypes[i].npm4 = 128;
+ if (r600_stypes[i].reginfo[0].shader_type == 0) {
+ id += r600_stypes[i].num;
+ } else {
+ for (j = 0; j < R600_SHADER_MAX; j++) {
+ if (r600_stypes[i].reginfo[j].shader_type)
+ id += r600_stypes[i].num;
+ }
+ }
+ }
+ radeon->stype = r600_stypes;
+ radeon->nstype = STYPES_SIZE;
+
+ r600_modify_type_array(radeon);
+}
+
+int r600_init(struct radeon *radeon)
+{
+ r600_build_types_array(radeon);
return 0;
}
diff --git a/src/gallium/winsys/r600/drm/r600_states.h b/src/gallium/winsys/r600/drm/r600_states.h
index e40c77d8f6..09d79d498d 100644
--- a/src/gallium/winsys/r600/drm/r600_states.h
+++ b/src/gallium/winsys/r600/drm/r600_states.h
@@ -17,7 +17,7 @@
#ifndef R600_STATES_H
#define R600_STATES_H
-static const struct radeon_register R600_CONFIG_names[] = {
+static const struct radeon_register R600_names_CONFIG[] = {
{0x00008C00, 0, 0, "SQ_CONFIG"},
{0x00008C04, 0, 0, "SQ_GPR_RESOURCE_MGMT_1"},
{0x00008C08, 0, 0, "SQ_GPR_RESOURCE_MGMT_2"},
@@ -61,7 +61,7 @@ static const struct radeon_register R600_CONFIG_names[] = {
{0x00028B20, 0, 0, "VGT_STRMOUT_BUFFER_EN"},
};
-static const struct radeon_register R600_CB_CNTL_names[] = {
+static const struct radeon_register R600_names_CB_CNTL[] = {
{0x00028120, 0, 0, "CB_CLEAR_RED"},
{0x00028124, 0, 0, "CB_CLEAR_GREEN"},
{0x00028128, 0, 0, "CB_CLEAR_BLUE"},
@@ -82,7 +82,7 @@ static const struct radeon_register R600_CB_CNTL_names[] = {
{0x00028C48, 0, 0, "PA_SC_AA_MASK"},
};
-static const struct radeon_register R600_RASTERIZER_names[] = {
+static const struct radeon_register R600_names_RASTERIZER[] = {
{0x000286D4, 0, 0, "SPI_INTERP_CONTROL_0"},
{0x00028810, 0, 0, "PA_CL_CLIP_CNTL"},
{0x00028814, 0, 0, "PA_SU_SC_MODE_CNTL"},
@@ -106,7 +106,7 @@ static const struct radeon_register R600_RASTERIZER_names[] = {
{0x00028E0C, 0, 0, "PA_SU_POLY_OFFSET_BACK_OFFSET"},
};
-static const struct radeon_register R600_VIEWPORT_names[] = {
+static const struct radeon_register R600_names_VIEWPORT[] = {
{0x000282D0, 0, 0, "PA_SC_VPORT_ZMIN_0"},
{0x000282D4, 0, 0, "PA_SC_VPORT_ZMAX_0"},
{0x0002843C, 0, 0, "PA_CL_VPORT_XSCALE_0"},
@@ -118,7 +118,7 @@ static const struct radeon_register R600_VIEWPORT_names[] = {
{0x00028818, 0, 0, "PA_CL_VTE_CNTL"},
};
-static const struct radeon_register R600_SCISSOR_names[] = {
+static const struct radeon_register R600_names_SCISSOR[] = {
{0x00028030, 0, 0, "PA_SC_SCREEN_SCISSOR_TL"},
{0x00028034, 0, 0, "PA_SC_SCREEN_SCISSOR_BR"},
{0x00028200, 0, 0, "PA_SC_WINDOW_OFFSET"},
@@ -140,7 +140,7 @@ static const struct radeon_register R600_SCISSOR_names[] = {
{0x00028254, 0, 0, "PA_SC_VPORT_SCISSOR_0_BR"},
};
-static const struct radeon_register R600_BLEND_names[] = {
+static const struct radeon_register R600_names_BLEND[] = {
{0x00028414, 0, 0, "CB_BLEND_RED"},
{0x00028418, 0, 0, "CB_BLEND_GREEN"},
{0x0002841C, 0, 0, "CB_BLEND_BLUE"},
@@ -156,7 +156,7 @@ static const struct radeon_register R600_BLEND_names[] = {
{0x00028804, 0, 0, "CB_BLEND_CONTROL"},
};
-static const struct radeon_register R600_DSA_names[] = {
+static const struct radeon_register R600_names_DSA[] = {
{0x00028028, 0, 0, "DB_STENCIL_CLEAR"},
{0x0002802C, 0, 0, "DB_DEPTH_CLEAR"},
{0x00028410, 0, 0, "SX_ALPHA_TEST_CONTROL"},
@@ -175,7 +175,7 @@ static const struct radeon_register R600_DSA_names[] = {
{0x00028D44, 0, 0, "DB_ALPHA_TO_MASK"},
};
-static const struct radeon_register R600_VS_SHADER_names[] = {
+static const struct radeon_register R600_names_VS_SHADER[] = {
{0x00028380, 0, 0, "SQ_VTX_SEMANTIC_0"},
{0x00028384, 0, 0, "SQ_VTX_SEMANTIC_1"},
{0x00028388, 0, 0, "SQ_VTX_SEMANTIC_2"},
@@ -227,7 +227,7 @@ static const struct radeon_register R600_VS_SHADER_names[] = {
{0x000288DC, 0, 0, "SQ_PGM_CF_OFFSET_FS"},
};
-static const struct radeon_register R600_PS_SHADER_names[] = {
+static const struct radeon_register R600_names_PS_SHADER[] = {
{0x00028644, 0, 0, "SPI_PS_INPUT_CNTL_0"},
{0x00028648, 0, 0, "SPI_PS_INPUT_CNTL_1"},
{0x0002864C, 0, 0, "SPI_PS_INPUT_CNTL_2"},
@@ -269,21 +269,48 @@ static const struct radeon_register R600_PS_SHADER_names[] = {
{0x000288CC, 0, 0, "SQ_PGM_CF_OFFSET_PS"},
};
-static const struct radeon_register R600_PS_CONSTANT_names[] = {
+static const struct radeon_register R600_names_PS_CONSTANT[] = {
{0x00030000, 0, 0, "SQ_ALU_CONSTANT0_0"},
{0x00030004, 0, 0, "SQ_ALU_CONSTANT1_0"},
{0x00030008, 0, 0, "SQ_ALU_CONSTANT2_0"},
{0x0003000C, 0, 0, "SQ_ALU_CONSTANT3_0"},
};
-static const struct radeon_register R600_VS_CONSTANT_names[] = {
+static const struct radeon_register R600_names_VS_CONSTANT[] = {
{0x00031000, 0, 0, "SQ_ALU_CONSTANT0_256"},
{0x00031004, 0, 0, "SQ_ALU_CONSTANT1_256"},
{0x00031008, 0, 0, "SQ_ALU_CONSTANT2_256"},
{0x0003100C, 0, 0, "SQ_ALU_CONSTANT3_256"},
};
-static const struct radeon_register R600_PS_RESOURCE_names[] = {
+static const struct radeon_register R600_names_UCP[] = {
+ {0x00028E20, 0, 0, "PA_CL_UCP0_X"},
+ {0x00028E24, 0, 0, "PA_CL_UCP0_Y"},
+ {0x00028E28, 0, 0, "PA_CL_UCP0_Z"},
+ {0x00028E2C, 0, 0, "PA_CL_UCP0_W"},
+ {0x00028E30, 0, 0, "PA_CL_UCP1_X"},
+ {0x00028E34, 0, 0, "PA_CL_UCP1_Y"},
+ {0x00028E38, 0, 0, "PA_CL_UCP1_Z"},
+ {0x00028E3C, 0, 0, "PA_CL_UCP1_W"},
+ {0x00028E40, 0, 0, "PA_CL_UCP2_X"},
+ {0x00028E44, 0, 0, "PA_CL_UCP2_Y"},
+ {0x00028E48, 0, 0, "PA_CL_UCP2_Z"},
+ {0x00028E4C, 0, 0, "PA_CL_UCP2_W"},
+ {0x00028E50, 0, 0, "PA_CL_UCP3_X"},
+ {0x00028E54, 0, 0, "PA_CL_UCP3_Y"},
+ {0x00028E58, 0, 0, "PA_CL_UCP3_Z"},
+ {0x00028E5C, 0, 0, "PA_CL_UCP3_W"},
+ {0x00028E60, 0, 0, "PA_CL_UCP4_X"},
+ {0x00028E64, 0, 0, "PA_CL_UCP4_Y"},
+ {0x00028E68, 0, 0, "PA_CL_UCP4_Z"},
+ {0x00028E6C, 0, 0, "PA_CL_UCP4_W"},
+ {0x00028E70, 0, 0, "PA_CL_UCP5_X"},
+ {0x00028E74, 0, 0, "PA_CL_UCP5_Y"},
+ {0x00028E78, 0, 0, "PA_CL_UCP5_Z"},
+ {0x00028E7C, 0, 0, "PA_CL_UCP5_W"},
+};
+
+static const struct radeon_register R600_names_PS_RESOURCE[] = {
{0x00038000, 0, 0, "RESOURCE0_WORD0"},
{0x00038004, 0, 0, "RESOURCE0_WORD1"},
{0x00038008, 0, 0, "RESOURCE0_WORD2"},
@@ -293,7 +320,7 @@ static const struct radeon_register R600_PS_RESOURCE_names[] = {
{0x00038018, 0, 0, "RESOURCE0_WORD6"},
};
-static const struct radeon_register R600_VS_RESOURCE_names[] = {
+static const struct radeon_register R600_names_VS_RESOURCE[] = {
{0x00039180, 0, 0, "RESOURCE160_WORD0"},
{0x00039184, 0, 0, "RESOURCE160_WORD1"},
{0x00039188, 0, 0, "RESOURCE160_WORD2"},
@@ -303,7 +330,7 @@ static const struct radeon_register R600_VS_RESOURCE_names[] = {
{0x00039198, 0, 0, "RESOURCE160_WORD6"},
};
-static const struct radeon_register R600_FS_RESOURCE_names[] = {
+static const struct radeon_register R600_names_FS_RESOURCE[] = {
{0x0003A300, 0, 0, "RESOURCE320_WORD0"},
{0x0003A304, 0, 0, "RESOURCE320_WORD1"},
{0x0003A308, 0, 0, "RESOURCE320_WORD2"},
@@ -313,7 +340,7 @@ static const struct radeon_register R600_FS_RESOURCE_names[] = {
{0x0003A318, 0, 0, "RESOURCE320_WORD6"},
};
-static const struct radeon_register R600_GS_RESOURCE_names[] = {
+static const struct radeon_register R600_names_GS_RESOURCE[] = {
{0x0003A4C0, 0, 0, "RESOURCE336_WORD0"},
{0x0003A4C4, 0, 0, "RESOURCE336_WORD1"},
{0x0003A4C8, 0, 0, "RESOURCE336_WORD2"},
@@ -323,46 +350,46 @@ static const struct radeon_register R600_GS_RESOURCE_names[] = {
{0x0003A4D8, 0, 0, "RESOURCE336_WORD6"},
};
-static const struct radeon_register R600_PS_SAMPLER_names[] = {
+static const struct radeon_register R600_names_PS_SAMPLER[] = {
{0x0003C000, 0, 0, "SQ_TEX_SAMPLER_WORD0_0"},
{0x0003C004, 0, 0, "SQ_TEX_SAMPLER_WORD1_0"},
{0x0003C008, 0, 0, "SQ_TEX_SAMPLER_WORD2_0"},
};
-static const struct radeon_register R600_VS_SAMPLER_names[] = {
+static const struct radeon_register R600_names_VS_SAMPLER[] = {
{0x0003C0D8, 0, 0, "SQ_TEX_SAMPLER_WORD0_18"},
{0x0003C0DC, 0, 0, "SQ_TEX_SAMPLER_WORD1_18"},
{0x0003C0E0, 0, 0, "SQ_TEX_SAMPLER_WORD2_18"},
};
-static const struct radeon_register R600_GS_SAMPLER_names[] = {
+static const struct radeon_register R600_names_GS_SAMPLER[] = {
{0x0003C1B0, 0, 0, "SQ_TEX_SAMPLER_WORD0_36"},
{0x0003C1B4, 0, 0, "SQ_TEX_SAMPLER_WORD1_36"},
{0x0003C1B8, 0, 0, "SQ_TEX_SAMPLER_WORD2_36"},
};
-static const struct radeon_register R600_PS_SAMPLER_BORDER_names[] = {
+static const struct radeon_register R600_names_PS_SAMPLER_BORDER[] = {
{0x0000A400, 0, 0, "TD_PS_SAMPLER0_BORDER_RED"},
{0x0000A404, 0, 0, "TD_PS_SAMPLER0_BORDER_GREEN"},
{0x0000A408, 0, 0, "TD_PS_SAMPLER0_BORDER_BLUE"},
{0x0000A40C, 0, 0, "TD_PS_SAMPLER0_BORDER_ALPHA"},
};
-static const struct radeon_register R600_VS_SAMPLER_BORDER_names[] = {
+static const struct radeon_register R600_names_VS_SAMPLER_BORDER[] = {
{0x0000A600, 0, 0, "TD_VS_SAMPLER0_BORDER_RED"},
{0x0000A604, 0, 0, "TD_VS_SAMPLER0_BORDER_GREEN"},
{0x0000A608, 0, 0, "TD_VS_SAMPLER0_BORDER_BLUE"},
{0x0000A60C, 0, 0, "TD_VS_SAMPLER0_BORDER_ALPHA"},
};
-static const struct radeon_register R600_GS_SAMPLER_BORDER_names[] = {
+static const struct radeon_register R600_names_GS_SAMPLER_BORDER[] = {
{0x0000A800, 0, 0, "TD_GS_SAMPLER0_BORDER_RED"},
{0x0000A804, 0, 0, "TD_GS_SAMPLER0_BORDER_GREEN"},
{0x0000A808, 0, 0, "TD_GS_SAMPLER0_BORDER_BLUE"},
{0x0000A80C, 0, 0, "TD_GS_SAMPLER0_BORDER_ALPHA"},
};
-static const struct radeon_register R600_CB0_names[] = {
+static const struct radeon_register R600_names_CB0[] = {
{0x00028040, 1, 0, "CB_COLOR0_BASE"},
{0x000280A0, 0, 0, "CB_COLOR0_INFO"},
{0x00028060, 0, 0, "CB_COLOR0_SIZE"},
@@ -372,7 +399,7 @@ static const struct radeon_register R600_CB0_names[] = {
{0x00028100, 0, 0, "CB_COLOR0_MASK"},
};
-static const struct radeon_register R600_CB1_names[] = {
+static const struct radeon_register R600_names_CB1[] = {
{0x00028044, 1, 0, "CB_COLOR1_BASE"},
{0x000280A4, 0, 0, "CB_COLOR1_INFO"},
{0x00028064, 0, 0, "CB_COLOR1_SIZE"},
@@ -382,7 +409,7 @@ static const struct radeon_register R600_CB1_names[] = {
{0x00028104, 0, 0, "CB_COLOR1_MASK"},
};
-static const struct radeon_register R600_CB2_names[] = {
+static const struct radeon_register R600_names_CB2[] = {
{0x00028048, 1, 0, "CB_COLOR2_BASE"},
{0x000280A8, 0, 0, "CB_COLOR2_INFO"},
{0x00028068, 0, 0, "CB_COLOR2_SIZE"},
@@ -392,7 +419,7 @@ static const struct radeon_register R600_CB2_names[] = {
{0x00028108, 0, 0, "CB_COLOR2_MASK"},
};
-static const struct radeon_register R600_CB3_names[] = {
+static const struct radeon_register R600_names_CB3[] = {
{0x0002804C, 1, 0, "CB_COLOR3_BASE"},
{0x000280AC, 0, 0, "CB_COLOR3_INFO"},
{0x0002806C, 0, 0, "CB_COLOR3_SIZE"},
@@ -402,7 +429,7 @@ static const struct radeon_register R600_CB3_names[] = {
{0x0002810C, 0, 0, "CB_COLOR3_MASK"},
};
-static const struct radeon_register R600_CB4_names[] = {
+static const struct radeon_register R600_names_CB4[] = {
{0x00028050, 1, 0, "CB_COLOR4_BASE"},
{0x000280B0, 0, 0, "CB_COLOR4_INFO"},
{0x00028070, 0, 0, "CB_COLOR4_SIZE"},
@@ -412,7 +439,7 @@ static const struct radeon_register R600_CB4_names[] = {
{0x00028110, 0, 0, "CB_COLOR4_MASK"},
};
-static const struct radeon_register R600_CB5_names[] = {
+static const struct radeon_register R600_names_CB5[] = {
{0x00028054, 1, 0, "CB_COLOR5_BASE"},
{0x000280B4, 0, 0, "CB_COLOR5_INFO"},
{0x00028074, 0, 0, "CB_COLOR5_SIZE"},
@@ -422,7 +449,7 @@ static const struct radeon_register R600_CB5_names[] = {
{0x00028114, 0, 0, "CB_COLOR5_MASK"},
};
-static const struct radeon_register R600_CB6_names[] = {
+static const struct radeon_register R600_names_CB6[] = {
{0x00028058, 1, 0, "CB_COLOR6_BASE"},
{0x000280B8, 0, 0, "CB_COLOR6_INFO"},
{0x00028078, 0, 0, "CB_COLOR6_SIZE"},
@@ -432,7 +459,7 @@ static const struct radeon_register R600_CB6_names[] = {
{0x00028118, 0, 0, "CB_COLOR6_MASK"},
};
-static const struct radeon_register R600_CB7_names[] = {
+static const struct radeon_register R600_names_CB7[] = {
{0x0002805C, 1, 0, "CB_COLOR7_BASE"},
{0x000280BC, 0, 0, "CB_COLOR7_INFO"},
{0x0002807C, 0, 0, "CB_COLOR7_SIZE"},
@@ -442,7 +469,7 @@ static const struct radeon_register R600_CB7_names[] = {
{0x0002811C, 0, 0, "CB_COLOR7_MASK"},
};
-static const struct radeon_register R600_DB_names[] = {
+static const struct radeon_register R600_names_DB[] = {
{0x0002800C, 1, 0, "DB_DEPTH_BASE"},
{0x00028000, 0, 0, "DB_DEPTH_SIZE"},
{0x00028004, 0, 0, "DB_DEPTH_VIEW"},
@@ -451,7 +478,7 @@ static const struct radeon_register R600_DB_names[] = {
{0x00028D34, 0, 0, "DB_PREFETCH_LIMIT"},
};
-static const struct radeon_register R600_VGT_names[] = {
+static const struct radeon_register R600_names_VGT[] = {
{0x00008958, 0, 0, "VGT_PRIMITIVE_TYPE"},
{0x00028400, 0, 0, "VGT_MAX_VTX_INDX"},
{0x00028404, 0, 0, "VGT_MIN_VTX_INDX"},
@@ -465,81 +492,15 @@ static const struct radeon_register R600_VGT_names[] = {
{0x00028AA4, 0, 0, "VGT_INSTANCE_STEP_RATE_1"},
};
-static const struct radeon_register R600_DRAW_names[] = {
+static const struct radeon_register R600_names_DRAW[] = {
{0x00008970, 0, 0, "VGT_NUM_INDICES"},
{0x000287E4, 0, 0, "VGT_DMA_BASE_HI"},
{0x000287E8, 1, 0, "VGT_DMA_BASE"},
{0x000287F0, 0, 0, "VGT_DRAW_INITIATOR"},
};
-static struct radeon_type R600_types[] = {
- { 128, 0, 0x00000000, 0x00000000, 0x0000, 0, "R600_CONFIG", 41, r600_state_pm4_config, R600_CONFIG_names},
- { 128, 1, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB_CNTL", 18, r600_state_pm4_generic, R600_CB_CNTL_names},
- { 128, 2, 0x00000000, 0x00000000, 0x0000, 0, "R600_RASTERIZER", 21, r600_state_pm4_generic, R600_RASTERIZER_names},
- { 128, 3, 0x00000000, 0x00000000, 0x0000, 0, "R600_VIEWPORT", 9, r600_state_pm4_generic, R600_VIEWPORT_names},
- { 128, 4, 0x00000000, 0x00000000, 0x0000, 0, "R600_SCISSOR", 19, r600_state_pm4_generic, R600_SCISSOR_names},
- { 128, 5, 0x00000000, 0x00000000, 0x0000, 0, "R600_BLEND", 13, r600_state_pm4_generic, R600_BLEND_names},
- { 128, 6, 0x00000000, 0x00000000, 0x0000, 0, "R600_DSA", 16, r600_state_pm4_generic, R600_DSA_names},
- { 128, 7, 0x00000000, 0x00000000, 0x0000, 0, "R600_VS_SHADER", 49, r600_state_pm4_shader, R600_VS_SHADER_names},
- { 128, 8, 0x00000000, 0x00000000, 0x0000, 0, "R600_PS_SHADER", 39, r600_state_pm4_shader, R600_PS_SHADER_names},
- { 128, 9, 0x00030000, 0x00031000, 0x0010, 0, "R600_PS_CONSTANT", 4, r600_state_pm4_generic, R600_PS_CONSTANT_names},
- { 128, 265, 0x00031000, 0x00032000, 0x0010, 0, "R600_VS_CONSTANT", 4, r600_state_pm4_generic, R600_VS_CONSTANT_names},
- { 128, 521, 0x00038000, 0x00039180, 0x001C, 0, "R600_PS_RESOURCE", 7, r600_state_pm4_resource, R600_PS_RESOURCE_names},
- { 128, 681, 0x00039180, 0x0003A300, 0x001C, 0, "R600_VS_RESOURCE", 7, r600_state_pm4_resource, R600_VS_RESOURCE_names},
- { 128, 841, 0x00039180, 0x0003A300, 0x001C, 0, "R600_FS_RESOURCE", 7, r600_state_pm4_resource, R600_FS_RESOURCE_names},
- { 128, 1001, 0x00039180, 0x0003A300, 0x001C, 0, "R600_GS_RESOURCE", 7, r600_state_pm4_resource, R600_GS_RESOURCE_names},
- { 128, 1161, 0x0003C000, 0x0003C0D8, 0x000C, 0, "R600_PS_SAMPLER", 3, r600_state_pm4_generic, R600_PS_SAMPLER_names},
- { 128, 1179, 0x0003C0D8, 0x0003C1B0, 0x000C, 0, "R600_VS_SAMPLER", 3, r600_state_pm4_generic, R600_VS_SAMPLER_names},
- { 128, 1197, 0x0003C1B0, 0x0003C288, 0x000C, 0, "R600_GS_SAMPLER", 3, r600_state_pm4_generic, R600_GS_SAMPLER_names},
- { 128, 1215, 0x0000A400, 0x0000A520, 0x0010, 0, "R600_PS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_PS_SAMPLER_BORDER_names},
- { 128, 1233, 0x0000A600, 0x0000A720, 0x0010, 0, "R600_VS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_VS_SAMPLER_BORDER_names},
- { 128, 1251, 0x0000A800, 0x0000A920, 0x0010, 0, "R600_GS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_GS_SAMPLER_BORDER_names},
- { 128, 1269, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB0", 7, r600_state_pm4_cb0, R600_CB0_names},
- { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB1", 7, r600_state_pm4_cb0, R600_CB1_names},
- { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB2", 7, r600_state_pm4_cb0, R600_CB2_names},
- { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB3", 7, r600_state_pm4_cb0, R600_CB3_names},
- { 128, 1273, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB4", 7, r600_state_pm4_cb0, R600_CB4_names},
- { 128, 1274, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB5", 7, r600_state_pm4_cb0, R600_CB5_names},
- { 128, 1275, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB6", 7, r600_state_pm4_cb0, R600_CB6_names},
- { 128, 1276, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB7", 7, r600_state_pm4_cb0, R600_CB7_names},
- { 128, 1277, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r600_state_pm4_db, R600_DB_names},
- { 128, 1278, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names},
- { 128, 1279, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names},
-};
-
-static struct radeon_type R700_types[] = {
- { 128, 0, 0x00000000, 0x00000000, 0x0000, 0, "R600_CONFIG", 41, r700_state_pm4_config, R600_CONFIG_names},
- { 128, 1, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB_CNTL", 18, r600_state_pm4_generic, R600_CB_CNTL_names},
- { 128, 2, 0x00000000, 0x00000000, 0x0000, 0, "R600_RASTERIZER", 21, r600_state_pm4_generic, R600_RASTERIZER_names},
- { 128, 3, 0x00000000, 0x00000000, 0x0000, 0, "R600_VIEWPORT", 9, r600_state_pm4_generic, R600_VIEWPORT_names},
- { 128, 4, 0x00000000, 0x00000000, 0x0000, 0, "R600_SCISSOR", 19, r600_state_pm4_generic, R600_SCISSOR_names},
- { 128, 5, 0x00000000, 0x00000000, 0x0000, 0, "R600_BLEND", 13, r600_state_pm4_generic, R600_BLEND_names},
- { 128, 6, 0x00000000, 0x00000000, 0x0000, 0, "R600_DSA", 16, r600_state_pm4_generic, R600_DSA_names},
- { 128, 7, 0x00000000, 0x00000000, 0x0000, 0, "R600_VS_SHADER", 49, r600_state_pm4_shader, R600_VS_SHADER_names},
- { 128, 8, 0x00000000, 0x00000000, 0x0000, 0, "R600_PS_SHADER", 39, r600_state_pm4_shader, R600_PS_SHADER_names},
- { 128, 9, 0x00030000, 0x00031000, 0x0010, 0, "R600_PS_CONSTANT", 4, r600_state_pm4_generic, R600_PS_CONSTANT_names},
- { 128, 265, 0x00031000, 0x00032000, 0x0010, 0, "R600_VS_CONSTANT", 4, r600_state_pm4_generic, R600_VS_CONSTANT_names},
- { 128, 521, 0x00038000, 0x00039180, 0x001C, 0, "R600_PS_RESOURCE", 7, r600_state_pm4_resource, R600_PS_RESOURCE_names},
- { 128, 681, 0x00039180, 0x0003A300, 0x001C, 0, "R600_VS_RESOURCE", 7, r600_state_pm4_resource, R600_VS_RESOURCE_names},
- { 128, 841, 0x00039180, 0x0003A300, 0x001C, 0, "R600_FS_RESOURCE", 7, r600_state_pm4_resource, R600_FS_RESOURCE_names},
- { 128, 1001, 0x00039180, 0x0003A300, 0x001C, 0, "R600_GS_RESOURCE", 7, r600_state_pm4_resource, R600_GS_RESOURCE_names},
- { 128, 1161, 0x0003C000, 0x0003C0D8, 0x000C, 0, "R600_PS_SAMPLER", 3, r600_state_pm4_generic, R600_PS_SAMPLER_names},
- { 128, 1179, 0x0003C0D8, 0x0003C1B0, 0x000C, 0, "R600_VS_SAMPLER", 3, r600_state_pm4_generic, R600_VS_SAMPLER_names},
- { 128, 1197, 0x0003C1B0, 0x0003C288, 0x000C, 0, "R600_GS_SAMPLER", 3, r600_state_pm4_generic, R600_GS_SAMPLER_names},
- { 128, 1215, 0x0000A400, 0x0000A520, 0x0010, 0, "R600_PS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_PS_SAMPLER_BORDER_names},
- { 128, 1233, 0x0000A600, 0x0000A720, 0x0010, 0, "R600_VS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_VS_SAMPLER_BORDER_names},
- { 128, 1251, 0x0000A800, 0x0000A920, 0x0010, 0, "R600_GS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_GS_SAMPLER_BORDER_names},
- { 128, 1269, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB0", 7, r700_state_pm4_cb0, R600_CB0_names},
- { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB1", 7, r600_state_pm4_cb0, R600_CB1_names},
- { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB2", 7, r600_state_pm4_cb0, R600_CB2_names},
- { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB3", 7, r600_state_pm4_cb0, R600_CB3_names},
- { 128, 1273, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB4", 7, r600_state_pm4_cb0, R600_CB4_names},
- { 128, 1274, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB5", 7, r600_state_pm4_cb0, R600_CB5_names},
- { 128, 1275, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB6", 7, r600_state_pm4_cb0, R600_CB6_names},
- { 128, 1276, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB7", 7, r600_state_pm4_cb0, R600_CB7_names},
- { 128, 1277, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r700_state_pm4_db, R600_DB_names},
- { 128, 1278, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names},
- { 128, 1279, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names},
+static const struct radeon_register R600_names_VGT_EVENT[] = {
+ {0x00028A90, 1, 0, "VGT_EVENT_INITIATOR"},
};
#endif
diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h
index 5d13378627..e8c2dc0651 100644
--- a/src/gallium/winsys/r600/drm/r600d.h
+++ b/src/gallium/winsys/r600/drm/r600d.h
@@ -82,6 +82,9 @@
#define PKT3_SET_CTL_CONST 0x6F
#define PKT3_SURFACE_BASE_UPDATE 0x73
+#define EVENT_TYPE_ZPASS_DONE 0x15
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
+
#define PKT_TYPE_S(x) (((x) & 0x3) << 30)
#define PKT_TYPE_G(x) (((x) >> 30) & 0x3)
#define PKT_TYPE_C 0x3FFFFFFF
diff --git a/src/gallium/winsys/r600/drm/radeon.c b/src/gallium/winsys/r600/drm/radeon.c
index 80b0a1d397..e2d813ebac 100644
--- a/src/gallium/winsys/r600/drm/radeon.c
+++ b/src/gallium/winsys/r600/drm/radeon.c
@@ -42,24 +42,13 @@ static int radeon_get_device(struct radeon *radeon)
return r;
}
-/* symbol missing drove me crazy hack to get symbol exported */
-static void fake(void)
-{
- struct radeon_ctx *ctx;
- struct radeon_draw *draw;
-
- ctx = radeon_ctx(NULL);
- draw = radeon_draw(NULL);
-}
-
struct radeon *radeon_new(int fd, unsigned device)
{
struct radeon *radeon;
- int r;
+ int r, i, id;
radeon = calloc(1, sizeof(*radeon));
if (radeon == NULL) {
- fake();
return NULL;
}
radeon->fd = fd;
@@ -131,6 +120,19 @@ struct radeon *radeon_new(int fd, unsigned device)
__func__, radeon->device);
break;
}
+ radeon->state_type_id = calloc(radeon->nstype, sizeof(unsigned));
+ if (radeon->state_type_id == NULL) {
+ return radeon_decref(radeon);
+ }
+ for (i = 0, id = 0; i < radeon->nstype; i++) {
+ radeon->state_type_id[i] = id;
+ for (int j = 0; j < radeon->nstype; j++) {
+ if (radeon->stype[j].stype != i)
+ continue;
+ id += radeon->stype[j].num;
+ }
+ }
+ radeon->nstate_per_shader = id;
return radeon;
}
@@ -153,47 +155,3 @@ struct radeon *radeon_decref(struct radeon *radeon)
free(radeon);
return NULL;
}
-
-int radeon_reg_id(struct radeon *radeon, unsigned offset, unsigned *typeid, unsigned *stateid, unsigned *id)
-{
- unsigned i, j;
-
- for (i = 0; i < radeon->ntype; i++) {
- if (radeon->type[i].range_start) {
- if (offset >= radeon->type[i].range_start && offset < radeon->type[i].range_end) {
- *typeid = i;
- j = offset - radeon->type[i].range_start;
- j /= radeon->type[i].stride;
- *stateid = radeon->type[i].id + j;
- *id = (offset - radeon->type[i].range_start - radeon->type[i].stride * j) / 4;
- return 0;
- }
- } else {
- for (j = 0; j < radeon->type[i].nstates; j++) {
- if (radeon->type[i].regs[j].offset == offset) {
- *typeid = i;
- *stateid = radeon->type[i].id;
- *id = j;
- return 0;
- }
- }
- }
- }
- fprintf(stderr, "%s unknown register 0x%08X\n", __func__, offset);
- return -EINVAL;
-}
-
-unsigned radeon_type_from_id(struct radeon *radeon, unsigned id)
-{
- unsigned i;
-
- for (i = 0; i < radeon->ntype - 1; i++) {
- if (radeon->type[i].id == id)
- return i;
- if (id > radeon->type[i].id && id < radeon->type[i + 1].id)
- return i;
- }
- if (radeon->type[i].id == id)
- return i;
- return -1;
-}
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index f259ae7fb5..a1306f6e9d 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -145,7 +145,9 @@ struct radeon_bo *radeon_bo_decref(struct radeon *radeon, struct radeon_bo *bo)
return NULL;
}
- munmap(bo->data, bo->size);
+ if (bo->map_count) {
+ munmap(bo->data, bo->size);
+ }
memset(&args, 0, sizeof(args));
args.handle = bo->handle;
drmIoctl(radeon->fd, DRM_IOCTL_GEM_CLOSE, &args);
diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c
index 45b706bb0f..47fca76136 100644
--- a/src/gallium/winsys/r600/drm/radeon_ctx.c
+++ b/src/gallium/winsys/r600/drm/radeon_ctx.c
@@ -30,21 +30,16 @@
#include "radeon_drm.h"
#include "bof.h"
-int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo)
+static int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo)
{
- void *ptr;
-
- ptr = realloc(ctx->bo, sizeof(struct radeon_bo) * (ctx->nbo + 1));
- if (ptr == NULL) {
- return -ENOMEM;
- }
- ctx->bo = ptr;
+ if (ctx->nbo >= RADEON_CTX_MAX_PM4)
+ return -EBUSY;
ctx->bo[ctx->nbo] = bo;
ctx->nbo++;
return 0;
}
-struct radeon_bo *radeon_ctx_get_bo(struct radeon_ctx *ctx, unsigned reloc)
+static struct radeon_bo *radeon_ctx_get_bo(struct radeon_ctx *ctx, unsigned reloc)
{
struct radeon_cs_reloc *greloc;
unsigned i;
@@ -59,7 +54,7 @@ struct radeon_bo *radeon_ctx_get_bo(struct radeon_ctx *ctx, unsigned reloc)
return NULL;
}
-void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *placement)
+static void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *placement)
{
struct radeon_cs_reloc *greloc;
unsigned i;
@@ -76,50 +71,57 @@ void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *place
}
}
-struct radeon_ctx *radeon_ctx(struct radeon *radeon)
+void radeon_ctx_clear(struct radeon_ctx *ctx)
{
- struct radeon_ctx *ctx;
-
- if (radeon == NULL)
- return NULL;
- ctx = calloc(1, sizeof(*ctx));
- if (ctx == NULL)
- return NULL;
- ctx->radeon = radeon_incref(radeon);
- return ctx;
+ for (int i = 0; i < ctx->nbo; i++) {
+ ctx->bo[i] = radeon_bo_decref(ctx->radeon, ctx->bo[i]);
+ }
+ ctx->ndwords = RADEON_CTX_MAX_PM4;
+ ctx->cdwords = 0;
+ ctx->nreloc = 0;
+ ctx->nbo = 0;
}
-struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx)
+int radeon_ctx_init(struct radeon_ctx *ctx, struct radeon *radeon)
{
- ctx->refcount++;
- return ctx;
+ if (radeon == NULL)
+ return -EINVAL;
+ memset(ctx, 0, sizeof(struct radeon_ctx));
+ ctx->radeon = radeon_incref(radeon);
+ radeon_ctx_clear(ctx);
+ ctx->pm4 = malloc(RADEON_CTX_MAX_PM4 * 4);
+ if (ctx->pm4 == NULL) {
+ radeon_ctx_fini(ctx);
+ return -ENOMEM;
+ }
+ ctx->reloc = malloc(sizeof(struct radeon_cs_reloc) * RADEON_CTX_MAX_PM4);
+ if (ctx->reloc == NULL) {
+ radeon_ctx_fini(ctx);
+ return -ENOMEM;
+ }
+ ctx->bo = malloc(sizeof(void *) * RADEON_CTX_MAX_PM4);
+ if (ctx->bo == NULL) {
+ radeon_ctx_fini(ctx);
+ return -ENOMEM;
+ }
+ return 0;
}
-struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx)
+void radeon_ctx_fini(struct radeon_ctx *ctx)
{
unsigned i;
if (ctx == NULL)
- return NULL;
- if (--ctx->refcount > 0) {
- return NULL;
- }
+ return;
- for (i = 0; i < ctx->ndraw; i++) {
- ctx->draw[i] = radeon_draw_decref(ctx->draw[i]);
- }
for (i = 0; i < ctx->nbo; i++) {
ctx->bo[i] = radeon_bo_decref(ctx->radeon, ctx->bo[i]);
}
ctx->radeon = radeon_decref(ctx->radeon);
- free(ctx->state);
- free(ctx->draw);
free(ctx->bo);
free(ctx->pm4);
free(ctx->reloc);
- memset(ctx, 0, sizeof(*ctx));
- free(ctx);
- return NULL;
+ memset(ctx, 0, sizeof(struct radeon_ctx));
}
static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *state)
@@ -152,17 +154,17 @@ int radeon_ctx_submit(struct radeon_ctx *ctx)
uint64_t chunk_array[2];
int r = 0;
- if (!ctx->cpm4)
+ if (!ctx->cdwords)
return 0;
#if 0
- for (r = 0; r < ctx->cpm4; r++) {
+ for (r = 0; r < ctx->cdwords; r++) {
fprintf(stderr, "0x%08X\n", ctx->pm4[r]);
}
#endif
drmib.num_chunks = 2;
drmib.chunks = (uint64_t)(uintptr_t)chunk_array;
chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
- chunks[0].length_dw = ctx->cpm4;
+ chunks[0].length_dw = ctx->cdwords;
chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4;
chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
chunks[1].length_dw = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4;
@@ -180,7 +182,6 @@ static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo,
unsigned id, unsigned *placement)
{
unsigned i;
- struct radeon_cs_reloc *ptr;
for (i = 0; i < ctx->nreloc; i++) {
if (ctx->reloc[i].handle == bo->handle) {
@@ -188,14 +189,13 @@ static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo,
return 0;
}
}
- ptr = realloc(ctx->reloc, sizeof(struct radeon_cs_reloc) * (ctx->nreloc + 1));
- if (ptr == NULL)
- return -ENOMEM;
- ctx->reloc = ptr;
- ptr[ctx->nreloc].handle = bo->handle;
- ptr[ctx->nreloc].read_domain = placement[0] | placement [1];
- ptr[ctx->nreloc].write_domain = placement[0] | placement [1];
- ptr[ctx->nreloc].flags = 0;
+ if (ctx->nreloc >= RADEON_CTX_MAX_PM4) {
+ return -EBUSY;
+ }
+ ctx->reloc[ctx->nreloc].handle = bo->handle;
+ ctx->reloc[ctx->nreloc].read_domain = placement[0] | placement [1];
+ ctx->reloc[ctx->nreloc].write_domain = placement[0] | placement [1];
+ ctx->reloc[ctx->nreloc].flags = 0;
ctx->pm4[id] = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4;
ctx->nreloc++;
return 0;
@@ -208,97 +208,80 @@ static int radeon_ctx_state_schedule(struct radeon_ctx *ctx, struct radeon_state
if (state == NULL)
return 0;
- memcpy(&ctx->pm4[ctx->id], state->pm4, state->cpm4 * 4);
+ if (state->cpm4 > ctx->ndwords) {
+ return -EBUSY;
+ }
+ memcpy(&ctx->pm4[ctx->cdwords], state->pm4, state->cpm4 * 4);
for (i = 0; i < state->nreloc; i++) {
rid = state->reloc_pm4_id[i];
bid = state->reloc_bo_id[i];
- cid = ctx->id + rid;
+ cid = ctx->cdwords + rid;
r = radeon_ctx_reloc(ctx, state->bo[bid], cid,
&state->placement[bid * 2]);
if (r) {
- fprintf(stderr, "%s state %d failed to reloc\n", __func__, state->type);
+ fprintf(stderr, "%s state %d failed to reloc\n", __func__, state->stype->stype);
return r;
}
}
- ctx->id += state->cpm4;
+ ctx->cdwords += state->cpm4;
+ ctx->ndwords -= state->cpm4;
return 0;
}
-int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw)
+int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state)
{
- struct radeon_draw *pdraw = NULL;
- struct radeon_draw **ndraw;
- struct radeon_state *nstate, *ostate;
- unsigned cpm4, i, cstate;
- void *tmp;
int r = 0;
- ndraw = realloc(ctx->draw, sizeof(void*) * (ctx->ndraw + 1));
- if (ndraw == NULL)
- return -ENOMEM;
- ctx->draw = ndraw;
- for (i = 0; i < draw->nstate; i++) {
- r = radeon_ctx_state_bo(ctx, draw->state[i]);
- if (r)
- return r;
- }
- r = radeon_draw_check(draw);
+ /* !!! ONLY ACCEPT QUERY STATE HERE !!! */
+ r = radeon_state_pm4(state);
if (r)
return r;
- if (draw->cpm4 >= RADEON_CTX_MAX_PM4) {
- fprintf(stderr, "%s single draw too big %d, max %d\n",
- __func__, draw->cpm4, RADEON_CTX_MAX_PM4);
+ /* BEGIN/END query are balanced in the same cs so account for END
+ * END query when scheduling BEGIN query
+ */
+ switch (state->stype->stype) {
+ case R600_STATE_QUERY_BEGIN:
+ /* is there enough place for begin & end */
+ if ((state->cpm4 * 2) > ctx->ndwords)
+ return -EBUSY;
+ ctx->ndwords -= state->cpm4;
+ break;
+ case R600_STATE_QUERY_END:
+ ctx->ndwords += state->cpm4;
+ break;
+ default:
return -EINVAL;
}
- tmp = realloc(ctx->state, (ctx->nstate + draw->nstate) * sizeof(void*));
- if (tmp == NULL)
- return -ENOMEM;
- ctx->state = tmp;
- pdraw = ctx->cdraw;
- for (i = 0, cpm4 = 0, cstate = ctx->nstate; i < draw->nstate - 1; i++) {
- nstate = draw->state[i];
- if (nstate) {
- if (pdraw && pdraw->state[i]) {
- ostate = pdraw->state[i];
- if (ostate->pm4_crc != nstate->pm4_crc) {
- ctx->state[cstate++] = nstate;
- cpm4 += nstate->cpm4;
- }
- } else {
- ctx->state[cstate++] = nstate;
- cpm4 += nstate->cpm4;
- }
- }
- }
- /* The last state is the draw state always add it */
- if (draw->state[i] == NULL) {
- fprintf(stderr, "%s no draw command\n", __func__);
- return -EINVAL;
- }
- ctx->state[cstate++] = draw->state[i];
- cpm4 += draw->state[i]->cpm4;
- if ((ctx->draw_cpm4 + cpm4) > RADEON_CTX_MAX_PM4) {
- /* need to flush */
- return -EBUSY;
- }
- ctx->draw_cpm4 += cpm4;
- ctx->nstate = cstate;
- ctx->draw[ctx->ndraw++] = draw;
- ctx->cdraw = draw;
- return 0;
+ return radeon_ctx_state_schedule(ctx, state);
}
int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw)
{
- int r;
+ unsigned previous_cdwords;
+ int r = 0;
- radeon_draw_incref(draw);
- r = radeon_ctx_set_draw_new(ctx, draw);
- if (r)
- radeon_draw_decref(draw);
- return r;
+ for (int i = 0; i < (ctx->radeon->nstate_per_shader * R600_SHADER_MAX); i++) {
+ r = radeon_ctx_state_bo(ctx, draw->state[i]);
+ if (r)
+ return r;
+ }
+ previous_cdwords = ctx->cdwords;
+ for (int i = 0, id = 0; i < ctx->radeon->nstate_per_shader; i++) {
+ for (int j = 0; j < R600_SHADER_MAX; j++) {
+ id = j * ctx->radeon->nstate_per_shader + i;
+ if (draw->state[id]) {
+ r = radeon_ctx_state_schedule(ctx, draw->state[id]);
+ if (r) {
+ ctx->cdwords = previous_cdwords;
+ return r;
+ }
+ }
+ }
+ }
+ return 0;
}
+#if 0
int radeon_ctx_pm4(struct radeon_ctx *ctx)
{
unsigned i;
@@ -310,9 +293,6 @@ int radeon_ctx_pm4(struct radeon_ctx *ctx)
if (ctx->pm4 == NULL)
return -EINVAL;
for (i = 0, ctx->id = 0; i < ctx->nstate; i++) {
- r = radeon_ctx_state_schedule(ctx, ctx->state[i]);
- if (r)
- return r;
}
if (ctx->id != ctx->draw_cpm4) {
fprintf(stderr, "%s miss predicted pm4 size %d for %d\n",
@@ -322,6 +302,7 @@ int radeon_ctx_pm4(struct radeon_ctx *ctx)
ctx->cpm4 = ctx->draw_cpm4;
return 0;
}
+#endif
void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file)
{
@@ -349,8 +330,8 @@ printf("%d relocs\n", ctx->nreloc);
bof_decref(blob);
blob = NULL;
/* dump cs */
-printf("%d pm4\n", ctx->cpm4);
- blob = bof_blob(ctx->cpm4 * 4, ctx->pm4);
+printf("%d pm4\n", ctx->cdwords);
+ blob = bof_blob(ctx->cdwords * 4, ctx->pm4);
if (blob == NULL)
goto out_err;
if (bof_object_set(root, "pm4", blob))
@@ -366,7 +347,6 @@ printf("%d pm4\n", ctx->cpm4);
if (bo == NULL)
goto out_err;
size = bof_int32(ctx->bo[i]->size);
-printf("[%d] %d bo\n", i, size);
if (size == NULL)
goto out_err;
if (bof_object_set(bo, "size", size))
diff --git a/src/gallium/winsys/r600/drm/radeon_draw.c b/src/gallium/winsys/r600/drm/radeon_draw.c
index 4413ed79fb..b992c4a55d 100644
--- a/src/gallium/winsys/r600/drm/radeon_draw.c
+++ b/src/gallium/winsys/r600/drm/radeon_draw.c
@@ -31,111 +31,27 @@
/*
* draw functions
*/
-struct radeon_draw *radeon_draw(struct radeon *radeon)
+int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon)
{
- struct radeon_draw *draw;
-
- draw = calloc(1, sizeof(*draw));
- if (draw == NULL)
- return NULL;
- draw->nstate = radeon->nstate;
draw->radeon = radeon;
- draw->refcount = 1;
- draw->state = calloc(1, sizeof(void*) * draw->nstate);
- if (draw->state == NULL) {
- free(draw);
- return NULL;
- }
- return draw;
-}
-
-struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw)
-{
- draw->refcount++;
- return draw;
-}
-
-struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw)
-{
- unsigned i;
-
- if (draw == NULL)
- return NULL;
- if (--draw->refcount > 0)
- return NULL;
- for (i = 0; i < draw->nstate; i++) {
- draw->state[i] = radeon_state_decref(draw->state[i]);
- }
- free(draw->state);
- memset(draw, 0, sizeof(*draw));
- free(draw);
- return NULL;
-}
-
-int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state)
-{
- if (state == NULL)
- return 0;
- if (state->type >= draw->radeon->ntype)
- return -EINVAL;
- draw->state[state->id] = radeon_state_decref(draw->state[state->id]);
- draw->state[state->id] = state;
+ draw->state = calloc(radeon->nstate_per_shader * R600_SHADER_MAX, sizeof(void*));
+ if (draw->state == NULL)
+ return -ENOMEM;
return 0;
}
-int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state)
+void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state)
{
if (state == NULL)
- return 0;
- radeon_state_incref(state);
- return radeon_draw_set_new(draw, state);
+ return;
+ draw->state[state->state_id] = state;
}
-int radeon_draw_check(struct radeon_draw *draw)
+void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state)
{
- unsigned i;
- int r;
-
- r = radeon_draw_pm4(draw);
- if (r)
- return r;
- for (i = 0, draw->cpm4 = 0; i < draw->nstate; i++) {
- if (draw->state[i]) {
- draw->cpm4 += draw->state[i]->cpm4;
- }
- }
- return 0;
-}
-
-struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw)
-{
- struct radeon_draw *ndraw;
- unsigned i;
-
- if (draw == NULL)
- return NULL;
- ndraw = radeon_draw(draw->radeon);
- if (ndraw == NULL) {
- return NULL;
- }
- for (i = 0; i < draw->nstate; i++) {
- if (radeon_draw_set(ndraw, draw->state[i])) {
- radeon_draw_decref(ndraw);
- return NULL;
- }
- }
- return ndraw;
-}
-
-int radeon_draw_pm4(struct radeon_draw *draw)
-{
- unsigned i;
- int r;
-
- for (i = 0; i < draw->nstate; i++) {
- r = radeon_state_pm4(draw->state[i]);
- if (r)
- return r;
+ if (state == NULL)
+ return;
+ if (draw->state[state->state_id] == state) {
+ draw->state[state->state_id] = NULL;
}
- return 0;
}
diff --git a/src/gallium/winsys/r600/drm/radeon_priv.h b/src/gallium/winsys/r600/drm/radeon_priv.h
index 96c0d060f7..84e552ba4d 100644
--- a/src/gallium/winsys/r600/drm/radeon_priv.h
+++ b/src/gallium/winsys/r600/drm/radeon_priv.h
@@ -37,17 +37,20 @@ struct radeon_register {
char name[64];
};
-struct radeon_type {
- unsigned npm4;
- unsigned id;
- unsigned range_start;
- unsigned range_end;
- unsigned stride;
- unsigned immediate;
- char name[64];
+struct radeon_sub_type {
+ int shader_type;
+ const struct radeon_register *regs;
unsigned nstates;
+};
+
+struct radeon_stype_info {
+ unsigned stype;
+ unsigned num;
+ unsigned stride;
radeon_state_pm4_t pm4;
- const struct radeon_register *regs;
+ struct radeon_sub_type reginfo[R600_SHADER_MAX];
+ unsigned base_id;
+ unsigned npm4;
};
struct radeon {
@@ -55,9 +58,10 @@ struct radeon {
int refcount;
unsigned device;
unsigned family;
- unsigned nstate;
- unsigned ntype;
- const struct radeon_type *type;
+ unsigned nstype;
+ unsigned nstate_per_shader;
+ unsigned *state_type_id;
+ struct radeon_stype_info *stype;
};
extern struct radeon *radeon_new(int fd, unsigned device);
@@ -65,15 +69,6 @@ extern struct radeon *radeon_incref(struct radeon *radeon);
extern struct radeon *radeon_decref(struct radeon *radeon);
extern unsigned radeon_family_from_device(unsigned device);
extern int radeon_is_family_compatible(unsigned family1, unsigned family2);
-extern int radeon_reg_id(struct radeon *radeon, unsigned offset, unsigned *typeid, unsigned *stateid, unsigned *id);
-extern unsigned radeon_type_from_id(struct radeon *radeon, unsigned id);
-
-
-int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo);
-struct radeon_bo *radeon_ctx_get_bo(struct radeon_ctx *ctx, unsigned reloc);
-void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *placement);
-int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw);
-int radeon_ctx_draw(struct radeon_ctx *ctx);
/*
* r600/r700 context functions
diff --git a/src/gallium/winsys/r600/drm/radeon_state.c b/src/gallium/winsys/r600/drm/radeon_state.c
index 308288557a..ac60485b28 100644
--- a/src/gallium/winsys/r600/drm/radeon_state.c
+++ b/src/gallium/winsys/r600/drm/radeon_state.c
@@ -32,82 +32,116 @@
/*
* state core functions
*/
-struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id)
+int radeon_state_init(struct radeon_state *state, struct radeon *radeon, u32 stype, u32 id, u32 shader_type)
{
- struct radeon_state *state;
+ struct radeon_stype_info *found = NULL;
+ int i, j, shader_index = -1;
- if (type > radeon->ntype) {
- fprintf(stderr, "%s invalid type %d\n", __func__, type);
- return NULL;
+ /* traverse the stype array */
+ for (i = 0; i < radeon->nstype; i++) {
+ /* if the type doesn't match, if the shader doesn't match */
+ if (stype != radeon->stype[i].stype)
+ continue;
+ if (shader_type) {
+ for (j = 0; j < 4; j++) {
+ if (radeon->stype[i].reginfo[j].shader_type == shader_type) {
+ shader_index = j;
+ break;
+ }
+ }
+ if (shader_index == -1)
+ continue;
+ } else {
+ if (radeon->stype[i].reginfo[0].shader_type)
+ continue;
+ else
+ shader_index = 0;
+ }
+ if (id > radeon->stype[i].num)
+ continue;
+
+ found = &radeon->stype[i];
+ break;
}
- if (id > radeon->nstate) {
- fprintf(stderr, "%s invalid state id %d\n", __func__, id);
- return NULL;
+
+ if (!found) {
+ fprintf(stderr, "%s invalid type %d/id %d/shader class %d\n", __func__, stype, id, shader_type);
+ return -EINVAL;
}
- state = calloc(1, sizeof(*state));
- if (state == NULL)
- return NULL;
+
+ memset(state, 0, sizeof(struct radeon_state));
+ state->state_id = radeon->nstate_per_shader * shader_index + radeon->state_type_id[stype] + id;
+ state->stype = found;
state->radeon = radeon;
- state->type = type;
state->id = id;
+ state->shader_index = shader_index;
state->refcount = 1;
- state->npm4 = radeon->type[type].npm4;
- state->nstates = radeon->type[type].nstates;
- state->states = calloc(1, state->nstates * 4);
- state->pm4 = calloc(1, radeon->type[type].npm4 * 4);
- if (state->states == NULL || state->pm4 == NULL) {
- radeon_state_decref(state);
- return NULL;
- }
- return state;
+ state->npm4 = found->npm4;
+ state->nstates = found->reginfo[shader_index].nstates;
+ return 0;
}
-struct radeon_state *radeon_state_duplicate(struct radeon_state *state)
+int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type)
{
- struct radeon_state *nstate = radeon_state(state->radeon, state->type, state->id);
- unsigned i;
+ struct radeon_stype_info *found = NULL;
+ int i, j, shader_index = -1;
if (state == NULL)
- return NULL;
- nstate->cpm4 = state->cpm4;
- nstate->nbo = state->nbo;
- nstate->nreloc = state->nreloc;
- memcpy(nstate->states, state->states, state->nstates * 4);
- memcpy(nstate->pm4, state->pm4, state->npm4 * 4);
- memcpy(nstate->placement, state->placement, 8 * 4);
- memcpy(nstate->reloc_pm4_id, state->reloc_pm4_id, 8 * 4);
- memcpy(nstate->reloc_bo_id, state->reloc_bo_id, 8 * 4);
- memcpy(nstate->bo_dirty, state->bo_dirty, 4 * 4);
- for (i = 0; i < state->nbo; i++) {
- nstate->bo[i] = radeon_bo_incref(state->radeon, state->bo[i]);
+ return 0;
+ /* traverse the stype array */
+ for (i = 0; i < state->radeon->nstype; i++) {
+ /* if the type doesn't match, if the shader doesn't match */
+ if (stype != state->radeon->stype[i].stype)
+ continue;
+ if (shader_type) {
+ for (j = 0; j < 4; j++) {
+ if (state->radeon->stype[i].reginfo[j].shader_type == shader_type) {
+ shader_index = j;
+ break;
+ }
+ }
+ if (shader_index == -1)
+ continue;
+ } else {
+ if (state->radeon->stype[i].reginfo[0].shader_type)
+ continue;
+ else
+ shader_index = 0;
+ }
+ if (id > state->radeon->stype[i].num)
+ continue;
+
+ found = &state->radeon->stype[i];
+ break;
}
- return nstate;
-}
-struct radeon_state *radeon_state_incref(struct radeon_state *state)
-{
- state->refcount++;
- return state;
+ if (!found) {
+ fprintf(stderr, "%s invalid type %d/id %d/shader class %d\n", __func__, stype, id, shader_type);
+ return -EINVAL;
+ }
+
+ if (found->reginfo[shader_index].nstates != state->nstates) {
+ fprintf(stderr, "invalid type change from (%d %d %d) to (%d %d %d)\n",
+ state->stype->stype, state->id, state->shader_index, stype, id, shader_index);
+ }
+
+ state->stype = found;
+ state->id = id;
+ state->shader_index = shader_index;
+ state->state_id = state->radeon->nstate_per_shader * shader_index + state->radeon->state_type_id[stype] + id;
+ return radeon_state_pm4(state);
}
-struct radeon_state *radeon_state_decref(struct radeon_state *state)
+void radeon_state_fini(struct radeon_state *state)
{
unsigned i;
if (state == NULL)
return NULL;
- if (--state->refcount > 0) {
- return NULL;
- }
for (i = 0; i < state->nbo; i++) {
state->bo[i] = radeon_bo_decref(state->radeon, state->bo[i]);
}
- free(state->immd);
- free(state->states);
- free(state->pm4);
- memset(state, 0, sizeof(*state));
- free(state);
- return NULL;
+ memset(state, 0, sizeof(struct radeon_state));
}
int radeon_state_replace_always(struct radeon_state *ostate,
@@ -147,12 +181,13 @@ int radeon_state_pm4(struct radeon_state *state)
{
int r;
- if (state == NULL || state->cpm4)
+ if (state == NULL)
return 0;
- r = state->radeon->type[state->type].pm4(state);
+ state->cpm4 = 0;
+ r = state->stype->pm4(state);
if (r) {
fprintf(stderr, "%s failed to build PM4 for state(%d %d)\n",
- __func__, state->type, state->id);
+ __func__, state->stype->stype, state->id);
return r;
}
state->pm4_crc = crc32(state->pm4, state->cpm4 * 4);
diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
index b997abda9b..3a76098b65 100644
--- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
+++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
@@ -52,6 +52,7 @@ struct wrapper_sw_winsys
struct sw_winsys base;
struct pipe_screen *screen;
struct pipe_context *pipe;
+ enum pipe_texture_target target;
};
struct wrapper_sw_displaytarget
@@ -145,7 +146,7 @@ wsw_dt_create(struct sw_winsys *ws,
* XXX Why don't we just get the template.
*/
memset(&templ, 0, sizeof(templ));
- templ.target = PIPE_TEXTURE_2D;
+ templ.target = wsw->target;
templ.width0 = width;
templ.height0 = height;
templ.format = format;
@@ -291,6 +292,11 @@ wrapper_sw_winsys_warp_pipe_screen(struct pipe_screen *screen)
if (!wsw->pipe)
goto err_free;
+ if(screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES))
+ wsw->target = PIPE_TEXTURE_2D;
+ else
+ wsw->target = PIPE_TEXTURE_RECT;
+
return &wsw->base;
err_free: