summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/egl/drivers/glx/egl_glx.c2
-rw-r--r--src/egl/drivers/xdri/egl_xdri.c4
-rw-r--r--src/egl/main/eglapi.c3
-rw-r--r--src/egl/main/eglapi.h2
-rw-r--r--src/egl/main/eglcompiler.h3
-rw-r--r--src/gallium/auxiliary/Makefile1
-rw-r--r--src/gallium/auxiliary/SConscript1
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c32
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h4
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h8
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.c9
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_clip.c38
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h5
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c29
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h3
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c45
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c5
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c14
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h1
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c10
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c282
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c7
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c16
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c49
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.c3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c116
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h49
-rw-r--r--src/gallium/auxiliary/translate/translate.h18
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c40
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c186
-rw-r--r--src/gallium/auxiliary/util/u_blit.c4
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c13
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.c1
-rw-r--r--src/gallium/auxiliary/util/u_format.csv8
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h2
-rw-r--r--src/gallium/auxiliary/util/u_ringbuffer.c145
-rw-r--r--src/gallium/auxiliary/util/u_ringbuffer.h29
-rw-r--r--src/gallium/auxiliary/util/u_tile.c5
-rw-r--r--src/gallium/auxiliary/vl/vl_compositor.c22
-rw-r--r--src/gallium/auxiliary/vl/vl_compositor.h2
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c28
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h4
-rw-r--r--src/gallium/docs/source/context.rst66
-rw-r--r--src/gallium/docs/source/cso/rasterizer.rst101
-rw-r--r--src/gallium/docs/source/distro.rst29
-rw-r--r--src/gallium/docs/source/glossary.rst5
-rw-r--r--src/gallium/docs/source/screen.rst30
-rw-r--r--src/gallium/docs/source/tgsi.rst29
-rw-r--r--src/gallium/drivers/cell/common.h3
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h10
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.c12
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c6
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_shader.c4
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c5
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.c6
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.h4
-rw-r--r--src/gallium/drivers/cell/spu/spu_funcs.c2
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h22
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.c2
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_fetch.c5
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_shader.c15
-rw-r--r--src/gallium/drivers/failover/fo_context.h2
-rw-r--r--src/gallium/drivers/failover/fo_state.c2
-rw-r--r--src/gallium/drivers/i915/i915_context.h3
-rw-r--r--src/gallium/drivers/i915/i915_state.c12
-rw-r--r--src/gallium/drivers/i965/brw_pipe_shader.c6
-rw-r--r--src/gallium/drivers/identity/id_context.c12
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad.h9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c22
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c2
-rw-r--r--src/gallium/drivers/nv04/nv04_state.c12
-rw-r--r--src/gallium/drivers/nv10/nv10_state.c12
-rw-r--r--src/gallium/drivers/nv20/nv20_state.c12
-rw-r--r--src/gallium/drivers/nv30/nv30_fragtex.c10
-rw-r--r--src/gallium/drivers/nv30/nv30_state.c6
-rw-r--r--src/gallium/drivers/nv40/nv40_state.c6
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c10
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h33
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c17
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c855
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h20
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c56
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h6
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c50
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c29
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c481
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c10
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h9
-rw-r--r--src/gallium/drivers/r300/r300_context.c42
-rw-r--r--src/gallium/drivers/r300/r300_context.h80
-rw-r--r--src/gallium/drivers/r300/r300_cs.h2
-rw-r--r--src/gallium/drivers/r300/r300_debug.c8
-rw-r--r--src/gallium/drivers/r300/r300_emit.c170
-rw-r--r--src/gallium/drivers/r300/r300_flush.c10
-rw-r--r--src/gallium/drivers/r300/r300_reg.h16
-rw-r--r--src/gallium/drivers/r300/r300_render.c109
-rw-r--r--src/gallium/drivers/r300/r300_screen.c70
-rw-r--r--src/gallium/drivers/r300/r300_screen.h43
-rw-r--r--src/gallium/drivers/r300/r300_state.c96
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c3
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h26
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.c7
-rw-r--r--src/gallium/drivers/r300/r300_texture.c105
-rw-r--r--src/gallium/drivers/r300/r300_texture.h6
-rw-r--r--src/gallium/drivers/r300/r300_vs.c9
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h3
-rw-r--r--src/gallium/drivers/softpipe/Makefile3
-rw-r--r--src/gallium/drivers/softpipe/SConscript1
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c6
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h2
-rw-r--r--src/gallium/drivers/softpipe/sp_draw_arrays.c198
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h20
-rw-r--r--src/gallium/drivers/softpipe/sp_state_fs.c5
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c5
-rw-r--r--src/gallium/drivers/softpipe/sp_winsys.c245
-rw-r--r--src/gallium/drivers/softpipe/sp_winsys.h13
-rw-r--r--src/gallium/drivers/svga/svga_pipe_constants.c4
-rw-r--r--src/gallium/drivers/svga/svga_state_fs.c4
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c10
-rw-r--r--src/gallium/drivers/trace/tr_context.c11
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.c4
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.h2
-rw-r--r--src/gallium/include/pipe/p_compiler.h45
-rw-r--r--src/gallium/include/pipe/p_context.h18
-rw-r--r--src/gallium/include/pipe/p_shader_tokens.h23
-rw-r--r--src/gallium/include/pipe/p_state.h18
-rw-r--r--src/gallium/state_trackers/dri/dri_drawable.c2
-rw-r--r--src/gallium/state_trackers/dri/dri_screen.c13
-rw-r--r--src/gallium/state_trackers/egl_g3d/common/egl_g3d.c240
-rw-r--r--src/gallium/state_trackers/egl_g3d/common/egl_g3d.h5
-rw-r--r--src/gallium/state_trackers/egl_g3d/common/native.h51
-rw-r--r--src/gallium/state_trackers/egl_g3d/kms/native_kms.c58
-rw-r--r--src/gallium/state_trackers/egl_g3d/kms/native_kms.h1
-rw-r--r--src/gallium/state_trackers/egl_g3d/x11/native_dri2.c142
-rw-r--r--src/gallium/state_trackers/egl_g3d/x11/native_x11.c7
-rw-r--r--src/gallium/state_trackers/egl_g3d/x11/native_x11.h8
-rw-r--r--src/gallium/state_trackers/egl_g3d/x11/native_ximage.c100
-rw-r--r--src/gallium/state_trackers/egl_g3d/x11/x11_screen.c36
-rw-r--r--src/gallium/state_trackers/egl_g3d/x11/x11_screen.h3
-rw-r--r--src/gallium/state_trackers/glx/xlib/glx_getproc.c2
-rw-r--r--src/gallium/state_trackers/python/gallium.i1
-rw-r--r--src/gallium/state_trackers/python/p_context.i5
-rwxr-xr-xsrc/gallium/state_trackers/python/retrace/interpreter.py8
-rw-r--r--src/gallium/state_trackers/python/st_softpipe_winsys.c219
-rw-r--r--src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore1
-rw-r--r--src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore1
-rw-r--r--src/gallium/state_trackers/vega/api_filters.c16
-rw-r--r--src/gallium/state_trackers/vega/api_masks.c5
-rw-r--r--src/gallium/state_trackers/vega/mask.c32
-rw-r--r--src/gallium/state_trackers/vega/paint.c3
-rw-r--r--src/gallium/state_trackers/vega/polygon.c1
-rw-r--r--src/gallium/state_trackers/vega/shader.c16
-rw-r--r--src/gallium/state_trackers/vega/vg_context.c24
-rw-r--r--src/gallium/state_trackers/vega/vg_context.h8
-rw-r--r--src/gallium/state_trackers/vega/vg_tracker.c11
-rw-r--r--src/gallium/state_trackers/xorg/xorg_renderer.c28
-rw-r--r--src/gallium/state_trackers/xorg/xorg_renderer.h4
-rw-r--r--src/gallium/state_trackers/xorg/xorg_xv.c9
-rw-r--r--src/gallium/winsys/drm/i965/xorg/Makefile59
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_buffer.c31
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_buffer.h5
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_drm.c49
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_drm.h10
-rw-r--r--src/gallium/winsys/drm/radeon/dri/Makefile2
-rw-r--r--src/gallium/winsys/egl_xlib/egl_xlib.c2
-rw-r--r--src/glsl/cl/sl_cl_parse.c2
-rw-r--r--src/glu/sgi/libnurbs/interface/bezierPatchMesh.h1
-rw-r--r--src/glu/sgi/libnurbs/interface/glsurfeval.h2
-rw-r--r--src/glu/sgi/libnurbs/internals/mesher.cc3
-rw-r--r--src/glu/sgi/libnurbs/internals/reader.cc2
-rw-r--r--src/glu/sgi/libnurbs/internals/renderhints.cc4
-rw-r--r--src/glu/sgi/libnurbs/internals/simplemath.h2
-rw-r--r--src/glu/sgi/libnurbs/internals/slicer.cc4
-rw-r--r--src/glx/x11/dri2_glx.c26
-rw-r--r--src/glx/x11/indirect.c2
-rw-r--r--src/glx/x11/indirect.h4
-rw-r--r--src/glx/x11/indirect_size.c4
-rw-r--r--src/glx/x11/indirect_size.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_cc.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.c15
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c25
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h30
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c244
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c31
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c180
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c12
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c20
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h1
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.c21
-rw-r--r--src/mesa/drivers/dri/r200/Makefile5
-rw-r--r--src/mesa/drivers/dri/r200/r200_blit.c403
-rw-r--r--src/mesa/drivers/dri/r200/r200_blit.h54
-rw-r--r--src/mesa/drivers/dri/r200/r200_context.c9
-rw-r--r--src/mesa/drivers/dri/r200/r200_context.h2
-rw-r--r--src/mesa/drivers/dri/r200/r200_tex.c7
-rw-r--r--src/mesa/drivers/dri/r200/r200_tex.h2
l---------src/mesa/drivers/dri/r200/radeon_tex_copy.c1
-rw-r--r--src/mesa/drivers/dri/r300/Makefile4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h2
-rw-r--r--src/mesa/drivers/dri/r300/r300_blit.c96
-rw-r--r--src/mesa/drivers/dri/r300/r300_blit.h40
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c10
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.h2
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.c7
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.h2
l---------src/mesa/drivers/dri/r300/radeon_tex_copy.c1
-rw-r--r--src/mesa/drivers/dri/r600/Makefile4
-rw-r--r--src/mesa/drivers/dri/r600/r600_blit.c1660
-rw-r--r--src/mesa/drivers/dri/r600/r600_blit.h21
-rw-r--r--src/mesa/drivers/dri/r600/r600_blit_shaders.h28
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c4
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.h4
-rw-r--r--src/mesa/drivers/dri/r600/r600_tex.c7
-rw-r--r--src/mesa/drivers/dri/r600/r600_tex.h2
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c14
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.h1
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.c24
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c2
l---------src/mesa/drivers/dri/r600/radeon_tex_copy.c1
-rw-r--r--src/mesa/drivers/dri/radeon/Makefile4
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_blit.c403
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_blit.h54
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.h20
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_context.c5
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_context.h1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex.c7
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex.h2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex_copy.c (renamed from src/mesa/drivers/dri/r300/r300_texcopy.c)53
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.c21
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.h11
-rw-r--r--src/mesa/drivers/dri/radeon/server/radeon_reg.h25
-rw-r--r--src/mesa/glapi/gl_XML.py6
-rw-r--r--src/mesa/glapi/glapitemp.h2
-rw-r--r--src/mesa/main/compiler.h3
-rw-r--r--src/mesa/main/fbobject.c3
-rw-r--r--src/mesa/state_tracker/st_atom_constbuf.c12
-rw-r--r--src/mesa/state_tracker/st_atom_shader.c23
-rw-r--r--src/mesa/state_tracker/st_context.c4
-rw-r--r--src/mesa/state_tracker/st_context.h2
-rw-r--r--src/mesa/state_tracker/st_draw.c4
-rw-r--r--src/mesa/state_tracker/st_draw_feedback.c7
-rw-r--r--src/mesa/state_tracker/st_extensions.c1
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.c17
-rw-r--r--src/mesa/state_tracker/st_program.c1
-rw-r--r--src/mesa/state_tracker/st_texture.c1
-rw-r--r--src/mesa/swrast/s_accum.c1
-rw-r--r--src/mesa/swrast/s_atifragshader.c1
-rw-r--r--src/mesa/swrast/s_bitmap.c1
-rw-r--r--src/mesa/swrast/s_copypix.c2
-rw-r--r--src/mesa/swrast/s_depth.c1
-rw-r--r--src/mesa/swrast/s_drawpix.c1
-rw-r--r--src/mesa/swrast/s_feedback.c1
-rw-r--r--src/mesa/swrast/s_fragprog.c1
-rw-r--r--src/mesa/swrast/s_lines.c1
-rw-r--r--src/mesa/swrast/s_points.c1
-rw-r--r--src/mesa/swrast/s_readpix.c1
-rw-r--r--src/mesa/swrast/s_texcombine.c1
-rw-r--r--src/mesa/tnl/t_context.c1
-rw-r--r--src/mesa/tnl/t_draw.c4
-rw-r--r--src/mesa/tnl/t_pipeline.c1
-rw-r--r--src/mesa/tnl/t_rasterpos.c1
-rw-r--r--src/mesa/tnl/t_vb_program.c1
-rw-r--r--src/mesa/vbo/vbo_exec.c3
-rw-r--r--src/mesa/vbo/vbo_exec_array.c1
-rw-r--r--src/mesa/vbo/vbo_exec_draw.c1
-rw-r--r--src/mesa/vbo/vbo_save.c2
-rw-r--r--src/mesa/vbo/vbo_save_loopback.c1
-rw-r--r--src/mesa/vbo/vbo_split_copy.c3
-rw-r--r--src/mesa/x86/x86_xform.c1
299 files changed, 7531 insertions, 2375 deletions
diff --git a/src/egl/drivers/glx/egl_glx.c b/src/egl/drivers/glx/egl_glx.c
index 96292b0e9e..7c6e8637a1 100644
--- a/src/egl/drivers/glx/egl_glx.c
+++ b/src/egl/drivers/glx/egl_glx.c
@@ -899,7 +899,7 @@ GLX_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
* Called from eglGetProcAddress() via drv->API.GetProcAddress().
*/
static _EGLProc
-GLX_eglGetProcAddress(const char *procname)
+GLX_eglGetProcAddress(_EGLDriver *drv, const char *procname)
{
return (_EGLProc) glXGetProcAddress((const GLubyte *) procname);
}
diff --git a/src/egl/drivers/xdri/egl_xdri.c b/src/egl/drivers/xdri/egl_xdri.c
index d2affc66dd..8425b3d11e 100644
--- a/src/egl/drivers/xdri/egl_xdri.c
+++ b/src/egl/drivers/xdri/egl_xdri.c
@@ -355,7 +355,7 @@ xdri_eglTerminate(_EGLDriver *drv, _EGLDisplay *dpy)
* Called from eglGetProcAddress() via drv->API.GetProcAddress().
*/
static _EGLProc
-xdri_eglGetProcAddress(const char *procname)
+xdri_eglGetProcAddress(_EGLDriver *drv, const char *procname)
{
/* the symbol is defined in libGL.so */
return (_EGLProc) _glapi_get_proc_address(procname);
@@ -562,7 +562,7 @@ xdri_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *draw)
struct xdri_egl_display *xdri_dpy = lookup_display(dpy);
struct xdri_egl_surface *xdri_surf = lookup_surface(draw);
- xdri_dpy->psc->driScreen->swapBuffers(xdri_surf->driDrawable);
+ xdri_dpy->psc->driScreen->swapBuffers(xdri_surf->driDrawable, 0, 0, 0);
return EGL_TRUE;
}
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 14cc5fa613..26e0602453 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -716,7 +716,8 @@ void (* EGLAPIENTRY eglGetProcAddress(const char *procname))()
/* now loop over drivers to query their procs */
for (i = 0; i < _eglGlobal.NumDrivers; i++) {
- _EGLProc p = _eglGlobal.Drivers[i]->API.GetProcAddress(procname);
+ _EGLDriver *drv = _eglGlobal.Drivers[i];
+ _EGLProc p = drv->API.GetProcAddress(drv, procname);
if (p)
return p;
}
diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h
index aa0abe3eb6..080f2155e3 100644
--- a/src/egl/main/eglapi.h
+++ b/src/egl/main/eglapi.h
@@ -44,7 +44,7 @@ typedef const char *(*QueryString_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLint n
typedef EGLBoolean (*WaitClient_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx);
typedef EGLBoolean (*WaitNative_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLint engine);
-typedef _EGLProc (*GetProcAddress_t)(const char *procname);
+typedef _EGLProc (*GetProcAddress_t)(_EGLDriver *drv, const char *procname);
diff --git a/src/egl/main/eglcompiler.h b/src/egl/main/eglcompiler.h
index f7c93f14ce..5a3fb49ac2 100644
--- a/src/egl/main/eglcompiler.h
+++ b/src/egl/main/eglcompiler.h
@@ -64,7 +64,8 @@
/**
* Function visibility
*/
-#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303
+#if (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \
+ || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
# define PUBLIC __attribute__((visibility("default")))
#else
# define PUBLIC
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index e3af41c6e0..8f937e3b4e 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -111,6 +111,7 @@ C_SOURCES = \
util/u_math.c \
util/u_mm.c \
util/u_rect.c \
+ util/u_ringbuffer.c \
util/u_simple_shaders.c \
util/u_snprintf.c \
util/u_stream_stdc.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 782eb53386..f957090b5f 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -147,6 +147,7 @@ source = [
'util/u_math.c',
'util/u_mm.c',
'util/u_rect.c',
+ 'util/u_ringbuffer.c',
'util/u_simple_shaders.c',
'util/u_snprintf.c',
'util/u_stream_stdc.c',
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 2b16332e14..fdfb5faa59 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -539,6 +539,38 @@ void cso_restore_samplers(struct cso_context *ctx)
cso_single_sampler_done( ctx );
}
+/*
+ * If the function encouters any errors it will return the
+ * last one. Done to always try to set as many samplers
+ * as possible.
+ */
+enum pipe_error cso_set_vertex_samplers(struct cso_context *ctx,
+ unsigned nr,
+ const struct pipe_sampler_state **templates)
+{
+ unsigned i;
+ enum pipe_error temp, error = PIPE_OK;
+
+ /* TODO: fastpath
+ */
+
+ for (i = 0; i < nr; i++) {
+ temp = cso_single_vertex_sampler( ctx, i, templates[i] );
+ if (temp != PIPE_OK)
+ error = temp;
+ }
+
+ for ( ; i < ctx->nr_samplers; i++) {
+ temp = cso_single_vertex_sampler( ctx, i, NULL );
+ if (temp != PIPE_OK)
+ error = temp;
+ }
+
+ cso_single_vertex_sampler_done( ctx );
+
+ return error;
+}
+
void
cso_save_vertex_samplers(struct cso_context *ctx)
{
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index b9e313e32d..d2089b1c88 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -84,6 +84,10 @@ enum pipe_error cso_single_sampler( struct cso_context *cso,
void cso_single_sampler_done( struct cso_context *cso );
+enum pipe_error cso_set_vertex_samplers(struct cso_context *cso,
+ unsigned count,
+ const struct pipe_sampler_state **states);
+
void
cso_save_vertex_samplers(struct cso_context *cso);
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 667aa46b20..e90dfc5aec 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -95,6 +95,7 @@ void draw_destroy( struct draw_context *draw )
draw_pipeline_destroy( draw );
draw_pt_destroy( draw );
draw_vs_destroy( draw );
+ draw_gs_destroy( draw );
FREE( draw );
}
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index b716209df2..8a64c06efc 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -164,6 +164,14 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw,
void draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count);
+void
+draw_arrays_instanced(struct draw_context *draw,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount);
+
void draw_flush(struct draw_context *draw);
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 5db2e75542..daf8d071f1 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -59,6 +59,15 @@ draw_gs_init( struct draw_context *draw )
return TRUE;
}
+void draw_gs_destroy( struct draw_context *draw )
+{
+ if (!draw->gs.machine)
+ return;
+
+ align_free(draw->gs.machine->Primitives);
+
+ tgsi_exec_machine_destroy(draw->gs.machine);
+}
void draw_gs_set_constants( struct draw_context *draw,
const float (*constants)[4],
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 205cda5eab..51a6115ebf 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -55,7 +55,7 @@
-struct clipper {
+struct clip_stage {
struct draw_stage stage; /**< base class */
/* Basically duplicate some of the flatshading logic here:
@@ -70,9 +70,9 @@ struct clipper {
/* This is a bit confusing:
*/
-static INLINE struct clipper *clipper_stage( struct draw_stage *stage )
+static INLINE struct clip_stage *clip_stage( struct draw_stage *stage )
{
- return (struct clipper *)stage;
+ return (struct clip_stage *)stage;
}
@@ -92,11 +92,12 @@ static void interp_attr( float *fdst,
fdst[3] = LINTERP( t, fout[3], fin[3] );
}
+
static void copy_colors( struct draw_stage *stage,
struct vertex_header *dst,
const struct vertex_header *src )
{
- const struct clipper *clipper = clipper_stage(stage);
+ const struct clip_stage *clipper = clip_stage(stage);
uint i;
for (i = 0; i < clipper->num_color_attribs; i++) {
const uint attr = clipper->color_attribs[i];
@@ -108,7 +109,7 @@ static void copy_colors( struct draw_stage *stage,
/* Interpolate between two vertices to produce a third.
*/
-static void interp( const struct clipper *clip,
+static void interp( const struct clip_stage *clip,
struct vertex_header *dst,
float t,
const struct vertex_header *out,
@@ -179,7 +180,7 @@ static void emit_poly( struct draw_stage *stage,
header.v[2] = inlist[0]; /* keep in v[2] for flatshading */
if (i == n-1)
- header.flags |= edge_last;
+ header.flags |= edge_last;
if (0) {
const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
@@ -200,13 +201,14 @@ static void emit_poly( struct draw_stage *stage,
}
}
+
static INLINE float
dot4(const float *a, const float *b)
{
- return (a[0]*b[0] +
- a[1]*b[1] +
- a[2]*b[2] +
- a[3]*b[3]);
+ return (a[0] * b[0] +
+ a[1] * b[1] +
+ a[2] * b[2] +
+ a[3] * b[3]);
}
@@ -217,7 +219,7 @@ do_clip_tri( struct draw_stage *stage,
struct prim_header *header,
unsigned clipmask )
{
- struct clipper *clipper = clipper_stage( stage );
+ struct clip_stage *clipper = clip_stage( stage );
struct vertex_header *a[MAX_CLIPPED_VERTICES];
struct vertex_header *b[MAX_CLIPPED_VERTICES];
struct vertex_header **inlist = a;
@@ -280,6 +282,7 @@ do_clip_tri( struct draw_stage *stage,
dp_prev = dp;
}
+ /* swap in/out lists */
{
struct vertex_header **tmp = inlist;
inlist = outlist;
@@ -291,15 +294,11 @@ do_clip_tri( struct draw_stage *stage,
/* If flat-shading, copy color to new provoking vertex.
*/
if (clipper->flat && inlist[0] != header->v[2]) {
- if (1) {
- inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
- }
+ inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
copy_colors(stage, inlist[0], header->v[2]);
}
-
-
/* Emit the polygon as triangles to the setup stage:
*/
if (n >= 3)
@@ -314,7 +313,7 @@ do_clip_line( struct draw_stage *stage,
struct prim_header *header,
unsigned clipmask )
{
- const struct clipper *clipper = clipper_stage( stage );
+ const struct clip_stage *clipper = clip_stage( stage );
struct vertex_header *v0 = header->v[0];
struct vertex_header *v1 = header->v[1];
const float *pos0 = v0->clip;
@@ -416,13 +415,14 @@ clip_tri( struct draw_stage *stage,
}
}
+
/* Update state. Could further delay this until we hit the first
* primitive that really requires clipping.
*/
static void
clip_init_state( struct draw_stage *stage )
{
- struct clipper *clipper = clipper_stage( stage );
+ struct clip_stage *clipper = clip_stage( stage );
clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE;
@@ -488,7 +488,7 @@ static void clip_destroy( struct draw_stage *stage )
*/
struct draw_stage *draw_clip_stage( struct draw_context *draw )
{
- struct clipper *clipper = CALLOC_STRUCT(clipper);
+ struct clip_stage *clipper = CALLOC_STRUCT(clip_stage);
if (clipper == NULL)
goto fail;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 1a5269c0de..d40c035240 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -138,7 +138,7 @@ emit_vertex( struct vbuf_stage *vbuf,
/* Note: we really do want data[0] here, not data[pos]:
*/
vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0);
- vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr);
+ vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr);
if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr);
@@ -271,10 +271,12 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
emit_sz = 0;
break;
}
-
+
+ hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
hw_key.element[i].input_buffer = src_buffer;
hw_key.element[i].input_offset = src_offset;
+ hw_key.element[i].instance_divisor = 0;
hw_key.element[i].output_format = output_format;
hw_key.element[i].output_offset = dst_offset;
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index e49041556b..ef49e57536 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -172,6 +172,8 @@ struct draw_context
boolean force_passthrough; /**< never clip or shade */
+ boolean dump_vs;
+
double mrd; /**< minimum resolvable depth value, for polygon offset */
/* pipe state that we need: */
@@ -239,6 +241,8 @@ struct draw_context
unsigned reduced_prim;
+ unsigned instance_id;
+
void *driver_private;
};
@@ -265,6 +269,7 @@ boolean draw_gs_init( struct draw_context *draw );
void draw_gs_set_constants( struct draw_context *,
const float (*constants)[4],
unsigned size );
+void draw_gs_destroy( struct draw_context *draw );
/*******************************************************************************
* Common shading code:
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 2801dbafe4..a5ddec5286 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -280,20 +280,33 @@ void
draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count)
{
- unsigned reduced_prim = u_reduced_prim(prim);
+ draw_arrays_instanced(draw, prim, start, count, 0, 1);
+}
+
+void
+draw_arrays_instanced(struct draw_context *draw,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
+ unsigned reduced_prim = u_reduced_prim(mode);
+ unsigned instance;
+
if (reduced_prim != draw->reduced_prim) {
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
draw->reduced_prim = reduced_prim;
}
if (0)
- draw_print_arrays(draw, prim, start, MIN2(count, 20));
+ draw_print_arrays(draw, mode, start, MIN2(count, 20));
#if 0
{
int i;
- debug_printf("draw_arrays(prim=%u start=%u count=%u):\n",
- prim, start, count);
+ debug_printf("draw_arrays(mode=%u start=%u count=%u):\n",
+ mode, start, count);
tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
debug_printf("Elements:\n");
for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
@@ -311,6 +324,8 @@ draw_arrays(struct draw_context *draw, unsigned prim,
}
#endif
- /* drawing done here: */
- draw_pt_arrays(draw, prim, start, count);
+ for (instance = 0; instance < instanceCount; instance++) {
+ draw->instance_id = instance + startInstance;
+ draw_pt_arrays(draw, mode, start, count);
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 20edf7a227..d5e0d92a60 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -183,7 +183,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
struct pt_fetch;
void draw_pt_fetch_prepare( struct pt_fetch *fetch,
unsigned vertex_input_count,
- unsigned vertex_size );
+ unsigned vertex_size,
+ unsigned instance_id_index );
void draw_pt_fetch_run( struct pt_fetch *fetch,
const unsigned *elts,
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index 064e16c295..4fb53276bb 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -121,10 +121,12 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
emit_sz = 0;
break;
}
-
+
+ hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
hw_key.element[i].input_buffer = src_buffer;
hw_key.element[i].input_offset = src_offset;
+ hw_key.element[i].instance_divisor = 0;
hw_key.element[i].output_format = output_format;
hw_key.element[i].output_offset = dst_offset;
@@ -204,6 +206,7 @@ void draw_pt_emit( struct pt_emit *emit,
translate->run( translate,
0,
vertex_count,
+ draw->instance_id,
hw_verts );
render->unmap_vertices( render,
@@ -263,6 +266,7 @@ void draw_pt_emit_linear(struct pt_emit *emit,
translate->run(translate,
0,
count,
+ draw->instance_id,
hw_verts);
if (0) {
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index 305bfef435..55e7a7b81a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -58,12 +58,14 @@ struct pt_fetch {
*/
void draw_pt_fetch_prepare( struct pt_fetch *fetch,
unsigned vs_input_count,
- unsigned vertex_size )
+ unsigned vertex_size,
+ unsigned instance_id_index )
{
struct draw_context *draw = fetch->draw;
unsigned nr_inputs;
- unsigned i, nr = 0;
+ unsigned i, nr = 0, ei = 0;
unsigned dst_offset = 0;
+ unsigned num_extra_inputs = 0;
struct translate_key key;
fetch->vertex_size = vertex_size;
@@ -78,9 +80,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
{
/* Need to set header->vertex_id = 0xffff somehow.
*/
+ key.element[nr].type = TRANSLATE_ELEMENT_NORMAL;
key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
key.element[nr].input_buffer = draw->pt.nr_vertex_buffers;
key.element[nr].input_offset = 0;
+ key.element[nr].instance_divisor = 0;
key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT;
key.element[nr].output_offset = dst_offset;
dst_offset += 1 * sizeof(float);
@@ -91,19 +95,36 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
*/
dst_offset += 4 * sizeof(float);
}
-
- assert( draw->pt.nr_vertex_elements >= vs_input_count );
- nr_inputs = MIN2( vs_input_count, draw->pt.nr_vertex_elements );
+ if (instance_id_index != ~0) {
+ num_extra_inputs++;
+ }
+
+ assert(draw->pt.nr_vertex_elements + num_extra_inputs >= vs_input_count);
+
+ nr_inputs = MIN2(vs_input_count, draw->pt.nr_vertex_elements + num_extra_inputs);
for (i = 0; i < nr_inputs; i++) {
- key.element[nr].input_format = draw->pt.vertex_element[i].src_format;
- key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index;
- key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset;
- key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- key.element[nr].output_offset = dst_offset;
+ if (i == instance_id_index) {
+ key.element[nr].type = TRANSLATE_ELEMENT_INSTANCE_ID;
+ key.element[nr].input_format = PIPE_FORMAT_R32_USCALED;
+ key.element[nr].output_format = PIPE_FORMAT_R32_USCALED;
+ key.element[nr].output_offset = dst_offset;
+
+ dst_offset += sizeof(uint);
+ } else {
+ key.element[nr].type = TRANSLATE_ELEMENT_NORMAL;
+ key.element[nr].input_format = draw->pt.vertex_element[ei].src_format;
+ key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index;
+ key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset;
+ key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor;
+ key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ key.element[nr].output_offset = dst_offset;
+
+ ei++;
+ dst_offset += 4 * sizeof(float);
+ }
- dst_offset += 4 * sizeof(float);
nr++;
}
@@ -158,6 +179,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
translate->run_elts( translate,
elts,
count,
+ draw->instance_id,
verts );
}
@@ -183,6 +205,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
translate->run( translate,
start,
count,
+ draw->instance_id,
verts );
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index e7fe6b3b76..2a604470e9 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -166,9 +166,11 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
continue;
}
+ key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
key.element[i].input_format = input_format;
key.element[i].input_buffer = input_buffer;
key.element[i].input_offset = input_offset;
+ key.element[i].instance_divisor = src->instance_divisor;
key.element[i].output_format = output_format;
key.element[i].output_offset = dst_offset;
@@ -256,6 +258,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
feme->translate->run_elts( feme->translate,
fetch_elts,
fetch_count,
+ draw->instance_id,
hw_verts );
if (0) {
@@ -314,6 +317,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
feme->translate->run( feme->translate,
start,
count,
+ draw->instance_id,
hw_verts );
if (0) {
@@ -374,6 +378,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
feme->translate->run( feme->translate,
start,
count,
+ draw->instance_id,
hw_verts );
draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 1a9df4cac5..23da556f79 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -59,6 +59,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+ unsigned i;
+ unsigned instance_id_index = ~0;
/* Add one to num_outputs because the pipeline occasionally tags on
* an additional texcoord, eg for AA lines.
@@ -66,6 +68,15 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
unsigned nr = MAX2( vs->info.num_inputs,
vs->info.num_outputs + 1 );
+ /* Scan for instanceID system value.
+ */
+ for (i = 0; i < vs->info.num_inputs; i++) {
+ if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
+ instance_id_index = i;
+ break;
+ }
+ }
+
fpme->prim = prim;
fpme->opt = opt;
@@ -79,7 +90,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
draw_pt_fetch_prepare( fpme->fetch,
vs->info.num_inputs,
- fpme->vertex_size );
+ fpme->vertex_size,
+ instance_id_index );
/* XXX: it's not really gl rasterization rules we care about here,
* but gl vs dx9 clip spaces.
*/
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 3553689532..e03ac8c229 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -43,11 +43,11 @@
#include "translate/translate.h"
#include "translate/translate_cache.h"
+#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_exec.h"
-
void draw_vs_set_constants( struct draw_context *draw,
const float (*constants)[4],
unsigned size )
@@ -83,6 +83,10 @@ draw_create_vertex_shader(struct draw_context *draw,
{
struct draw_vertex_shader *vs;
+ if (draw->dump_vs) {
+ tgsi_dump(shader->tokens, 0);
+ }
+
vs = draw_create_vs_llvm( draw, shader );
if (!vs) {
vs = draw_create_vs_sse( draw, shader );
@@ -152,6 +156,8 @@ draw_delete_vertex_shader(struct draw_context *draw,
boolean
draw_vs_init( struct draw_context *draw )
{
+ draw->dump_vs = debug_get_bool_option("GALLIUM_DUMP_VS", FALSE);
+
draw->vs.machine = tgsi_exec_machine_create();
if (!draw->vs.machine)
return FALSE;
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index e3b807ebd0..00036cfe68 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -43,6 +43,7 @@ struct draw_varient_input
enum pipe_format format;
unsigned buffer;
unsigned offset;
+ unsigned instance_divisor;
};
struct draw_varient_output
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index ad184bd696..da9f3e3d35 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -98,9 +98,9 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
/* loop over verts */
for (i = 0; i < count; i += MAX_VERTICES) {
const uint max_vertices = MIN2(MAX_VERTICES, count - i);
- float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB;
- float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB;
- float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4];
+ PIPE_ALIGN_VAR(16) float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4];
+ PIPE_ALIGN_VAR(16) float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4];
uint attr;
/* convert (up to) four input verts to SoA format */
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index d16692584e..9f40030f39 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -142,6 +142,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->fetch->run_elts( vsvg->fetch,
elts,
count,
+ vsvg->draw->instance_id,
temp_buffer );
vsvg->base.vs->run_linear( vsvg->base.vs,
@@ -181,6 +182,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->emit->run( vsvg->emit,
0, count,
+ vsvg->draw->instance_id,
output_buffer );
FREE(temp_buffer);
@@ -203,6 +205,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->fetch->run( vsvg->fetch,
start,
count,
+ vsvg->draw->instance_id,
temp_buffer );
vsvg->base.vs->run_linear( vsvg->base.vs,
@@ -239,6 +242,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->emit->run( vsvg->emit,
0, count,
+ vsvg->draw->instance_id,
output_buffer );
FREE(temp_buffer);
@@ -281,9 +285,11 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
fetch.nr_elements = key->nr_inputs;
fetch.output_stride = vsvg->temp_vertex_stride;
for (i = 0; i < key->nr_inputs; i++) {
+ fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL;
fetch.element[i].input_format = key->element[i].in.format;
fetch.element[i].input_buffer = key->element[i].in.buffer;
fetch.element[i].input_offset = key->element[i].in.offset;
+ fetch.element[i].instance_divisor = 0;
fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
fetch.element[i].output_offset = i * 4 * sizeof(float);
assert(fetch.element[i].output_offset < fetch.output_stride);
@@ -295,17 +301,21 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
for (i = 0; i < key->nr_outputs; i++) {
if (key->element[i].out.format != EMIT_1F_PSIZE)
{
+ emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
emit.element[i].input_buffer = 0;
emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
+ emit.element[i].instance_divisor = 0;
emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
emit.element[i].output_offset = key->element[i].out.offset;
assert(emit.element[i].input_offset <= fetch.output_stride);
}
else {
+ emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
emit.element[i].input_buffer = 1;
emit.element[i].input_offset = 0;
+ emit.element[i].instance_divisor = 0;
emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
emit.element[i].output_offset = key->element[i].out.offset;
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index ba6f7b15f9..a4b78f1494 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -80,27 +80,11 @@ struct fenced_buffer_list
*/
struct fenced_buffer
{
- /*
- * Immutable members.
- */
-
struct pb_buffer base;
+
struct pb_buffer *buffer;
- struct fenced_buffer_list *list;
-
- /**
- * Protected by fenced_buffer_list::mutex
- */
- struct list_head head;
- /**
- * Following members are mutable and protected by this mutex.
- *
- * You may lock this mutex alone, or lock it with fenced_buffer_list::mutex
- * held, but in order to prevent deadlocks you must never lock
- * fenced_buffer_list::mutex with this mutex held.
- */
- pipe_mutex mutex;
+ /* FIXME: protect access with mutex */
/**
* A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current
@@ -112,6 +96,9 @@ struct fenced_buffer
struct pb_validate *vl;
unsigned validation_flags;
struct pipe_fence_handle *fence;
+
+ struct list_head head;
+ struct fenced_buffer_list *list;
};
@@ -123,24 +110,15 @@ fenced_buffer(struct pb_buffer *buf)
}
-/**
- * Add the buffer to the fenced list.
- *
- * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this
- * order, before calling this function.
- *
- * Reference count should be incremented before calling this function.
- */
static INLINE void
-fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list,
- struct fenced_buffer *fenced_buf)
+_fenced_buffer_add(struct fenced_buffer *fenced_buf)
{
+ struct fenced_buffer_list *fenced_list = fenced_buf->list;
+
assert(pipe_is_referenced(&fenced_buf->base.base.reference));
assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
assert(fenced_buf->fence);
- /* TODO: Move the reference count increment here */
-
#ifdef DEBUG
LIST_DEL(&fenced_buf->head);
assert(fenced_list->numUnfenced);
@@ -152,16 +130,32 @@ fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list,
/**
- * Remove the buffer from the fenced list.
- *
- * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this
- * order before calling this function.
- *
- * Reference count should be decremented after calling this function.
+ * Actually destroy the buffer.
*/
static INLINE void
-fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list,
- struct fenced_buffer *fenced_buf)
+_fenced_buffer_destroy(struct fenced_buffer *fenced_buf)
+{
+ struct fenced_buffer_list *fenced_list = fenced_buf->list;
+
+ assert(!pipe_is_referenced(&fenced_buf->base.base.reference));
+ assert(!fenced_buf->fence);
+#ifdef DEBUG
+ assert(fenced_buf->head.prev);
+ assert(fenced_buf->head.next);
+ LIST_DEL(&fenced_buf->head);
+ assert(fenced_list->numUnfenced);
+ --fenced_list->numUnfenced;
+#else
+ (void)fenced_list;
+#endif
+ pb_reference(&fenced_buf->buffer, NULL);
+ FREE(fenced_buf);
+}
+
+
+static INLINE void
+_fenced_buffer_remove(struct fenced_buffer_list *fenced_list,
+ struct fenced_buffer *fenced_buf)
{
struct pb_fence_ops *ops = fenced_list->ops;
@@ -183,56 +177,37 @@ fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list,
++fenced_list->numUnfenced;
#endif
- /* TODO: Move the reference count decrement and destruction here */
+ /**
+ * FIXME!!!
+ */
+
+ if(!pipe_is_referenced(&fenced_buf->base.base.reference))
+ _fenced_buffer_destroy(fenced_buf);
}
-/**
- * Wait for the fence to expire, and remove it from the fenced list.
- *
- * fenced_buffer::mutex must be held. fenced_buffer_list::mutex must not be
- * held -- it will be acquired internally.
- */
static INLINE enum pipe_error
-fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list,
- struct fenced_buffer *fenced_buf)
+_fenced_buffer_finish(struct fenced_buffer *fenced_buf)
{
+ struct fenced_buffer_list *fenced_list = fenced_buf->list;
struct pb_fence_ops *ops = fenced_list->ops;
- enum pipe_error ret = PIPE_ERROR;
#if 0
debug_warning("waiting for GPU");
#endif
- assert(pipe_is_referenced(&fenced_buf->base.base.reference));
assert(fenced_buf->fence);
-
- /*
- * Acquire the global lock. Must release buffer mutex first to preserve
- * lock order.
- */
- pipe_mutex_unlock(fenced_buf->mutex);
- pipe_mutex_lock(fenced_list->mutex);
- pipe_mutex_lock(fenced_buf->mutex);
-
if(fenced_buf->fence) {
- if(ops->fence_finish(ops, fenced_buf->fence, 0) == 0) {
- /* Remove from the fenced list */
- /* TODO: remove consequents */
- fenced_buffer_remove_locked(fenced_list, fenced_buf);
-
- p_atomic_dec(&fenced_buf->base.base.reference.count);
- assert(pipe_is_referenced(&fenced_buf->base.base.reference));
-
- fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
-
- ret = PIPE_OK;
+ if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) {
+ return PIPE_ERROR;
}
+ /* Remove from the fenced list */
+ /* TODO: remove consequents */
+ _fenced_buffer_remove(fenced_list, fenced_buf);
}
- pipe_mutex_unlock(fenced_list->mutex);
-
- return ret;
+ fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+ return PIPE_OK;
}
@@ -240,8 +215,8 @@ fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list,
* Free as many fenced buffers from the list head as possible.
*/
static void
-fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list,
- int wait)
+_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
+ int wait)
{
struct pb_fence_ops *ops = fenced_list->ops;
struct list_head *curr, *next;
@@ -254,29 +229,21 @@ fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list,
while(curr != &fenced_list->delayed) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
- pipe_mutex_lock(fenced_buf->mutex);
-
if(fenced_buf->fence != prev_fence) {
int signaled;
if (wait)
signaled = ops->fence_finish(ops, fenced_buf->fence, 0);
else
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
- if (signaled != 0) {
- pipe_mutex_unlock(fenced_buf->mutex);
+ if (signaled != 0)
break;
- }
prev_fence = fenced_buf->fence;
}
else {
assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
}
- fenced_buffer_remove_locked(fenced_list, fenced_buf);
- pipe_mutex_unlock(fenced_buf->mutex);
-
- pb_buf = &fenced_buf->base;
- pb_reference(&pb_buf, NULL);
+ _fenced_buffer_remove(fenced_list, fenced_buf);
curr = next;
next = curr->next;
@@ -290,25 +257,30 @@ fenced_buffer_destroy(struct pb_buffer *buf)
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_buffer_list *fenced_list = fenced_buf->list;
- assert(!pipe_is_referenced(&fenced_buf->base.base.reference));
- assert(!fenced_buf->fence);
-
-#ifdef DEBUG
pipe_mutex_lock(fenced_list->mutex);
- assert(fenced_buf->head.prev);
- assert(fenced_buf->head.next);
- LIST_DEL(&fenced_buf->head);
- assert(fenced_list->numUnfenced);
- --fenced_list->numUnfenced;
+ assert(!pipe_is_referenced(&fenced_buf->base.base.reference));
+ if (fenced_buf->fence) {
+ struct pb_fence_ops *ops = fenced_list->ops;
+ if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) {
+ struct list_head *curr, *prev;
+ curr = &fenced_buf->head;
+ prev = curr->prev;
+ do {
+ fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
+ assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
+ _fenced_buffer_remove(fenced_list, fenced_buf);
+ curr = prev;
+ prev = curr->prev;
+ } while (curr != &fenced_list->delayed);
+ }
+ else {
+ /* delay destruction */
+ }
+ }
+ else {
+ _fenced_buffer_destroy(fenced_buf);
+ }
pipe_mutex_unlock(fenced_list->mutex);
-#else
- (void)fenced_list;
-#endif
-
- pb_reference(&fenced_buf->buffer, NULL);
-
- pipe_mutex_destroy(fenced_buf->mutex);
- FREE(fenced_buf);
}
@@ -319,23 +291,24 @@ fenced_buffer_map(struct pb_buffer *buf,
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_buffer_list *fenced_list = fenced_buf->list;
struct pb_fence_ops *ops = fenced_list->ops;
- void *map = NULL;
-
- pipe_mutex_lock(fenced_buf->mutex);
+ void *map;
assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE));
/* Serialize writes */
if((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) ||
((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) {
- if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) &&
- ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) {
+ if(flags & PIPE_BUFFER_USAGE_DONTBLOCK) {
/* Don't wait for the GPU to finish writing */
- goto done;
+ if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0)
+ _fenced_buffer_remove(fenced_list, fenced_buf);
+ else
+ return NULL;
+ }
+ else {
+ /* Wait for the GPU to finish writing */
+ _fenced_buffer_finish(fenced_buf);
}
-
- /* Wait for the GPU to finish writing */
- fenced_buffer_finish_locked(fenced_list, fenced_buf);
}
#if 0
@@ -352,9 +325,6 @@ fenced_buffer_map(struct pb_buffer *buf,
fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE;
}
-done:
- pipe_mutex_unlock(fenced_buf->mutex);
-
return map;
}
@@ -363,9 +333,6 @@ static void
fenced_buffer_unmap(struct pb_buffer *buf)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
-
- pipe_mutex_lock(fenced_buf->mutex);
-
assert(fenced_buf->mapcount);
if(fenced_buf->mapcount) {
pb_unmap(fenced_buf->buffer);
@@ -373,8 +340,6 @@ fenced_buffer_unmap(struct pb_buffer *buf)
if(!fenced_buf->mapcount)
fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE;
}
-
- pipe_mutex_unlock(fenced_buf->mutex);
}
@@ -386,14 +351,11 @@ fenced_buffer_validate(struct pb_buffer *buf,
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
enum pipe_error ret;
- pipe_mutex_lock(fenced_buf->mutex);
-
if(!vl) {
/* invalidate */
fenced_buf->vl = NULL;
fenced_buf->validation_flags = 0;
- ret = PIPE_OK;
- goto done;
+ return PIPE_OK;
}
assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
@@ -401,17 +363,14 @@ fenced_buffer_validate(struct pb_buffer *buf,
flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE;
/* Buffer cannot be validated in two different lists */
- if(fenced_buf->vl && fenced_buf->vl != vl) {
- ret = PIPE_ERROR_RETRY;
- goto done;
- }
+ if(fenced_buf->vl && fenced_buf->vl != vl)
+ return PIPE_ERROR_RETRY;
#if 0
/* Do not validate if buffer is still mapped */
if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) {
/* TODO: wait for the thread that mapped the buffer to unmap it */
- ret = PIPE_ERROR_RETRY;
- goto done;
+ return PIPE_ERROR_RETRY;
}
/* Final sanity checking */
assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE));
@@ -421,21 +380,17 @@ fenced_buffer_validate(struct pb_buffer *buf,
if(fenced_buf->vl == vl &&
(fenced_buf->validation_flags & flags) == flags) {
/* Nothing to do -- buffer already validated */
- ret = PIPE_OK;
- goto done;
+ return PIPE_OK;
}
ret = pb_validate(fenced_buf->buffer, vl, flags);
if (ret != PIPE_OK)
- goto done;
+ return ret;
fenced_buf->vl = vl;
fenced_buf->validation_flags |= flags;
-done:
- pipe_mutex_unlock(fenced_buf->mutex);
-
- return ret;
+ return PIPE_OK;
}
@@ -450,36 +405,29 @@ fenced_buffer_fence(struct pb_buffer *buf,
fenced_buf = fenced_buffer(buf);
fenced_list = fenced_buf->list;
ops = fenced_list->ops;
-
- pipe_mutex_lock(fenced_list->mutex);
- pipe_mutex_lock(fenced_buf->mutex);
-
- assert(pipe_is_referenced(&fenced_buf->base.base.reference));
-
- if(fence != fenced_buf->fence) {
- assert(fenced_buf->vl);
- assert(fenced_buf->validation_flags);
-
- if (fenced_buf->fence) {
- fenced_buffer_remove_locked(fenced_list, fenced_buf);
- p_atomic_dec(&fenced_buf->base.base.reference.count);
- assert(pipe_is_referenced(&fenced_buf->base.base.reference));
- }
- if (fence) {
- ops->fence_reference(ops, &fenced_buf->fence, fence);
- fenced_buf->flags |= fenced_buf->validation_flags;
- p_atomic_inc(&fenced_buf->base.base.reference.count);
- fenced_buffer_add_locked(fenced_list, fenced_buf);
- }
-
- pb_fence(fenced_buf->buffer, fence);
- fenced_buf->vl = NULL;
- fenced_buf->validation_flags = 0;
+ if(fence == fenced_buf->fence) {
+ /* Nothing to do */
+ return;
}
- pipe_mutex_unlock(fenced_buf->mutex);
+ assert(fenced_buf->vl);
+ assert(fenced_buf->validation_flags);
+
+ pipe_mutex_lock(fenced_list->mutex);
+ if (fenced_buf->fence)
+ _fenced_buffer_remove(fenced_list, fenced_buf);
+ if (fence) {
+ ops->fence_reference(ops, &fenced_buf->fence, fence);
+ fenced_buf->flags |= fenced_buf->validation_flags;
+ _fenced_buffer_add(fenced_buf);
+ }
pipe_mutex_unlock(fenced_list->mutex);
+
+ pb_fence(fenced_buf->buffer, fence);
+
+ fenced_buf->vl = NULL;
+ fenced_buf->validation_flags = 0;
}
@@ -489,7 +437,6 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf,
pb_size *offset)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
- /* NOTE: accesses immutable members only -- mutex not necessary */
pb_get_base_buffer(fenced_buf->buffer, base_buf, offset);
}
@@ -529,8 +476,6 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list,
buf->buffer = buffer;
buf->list = fenced_list;
- pipe_mutex_init(buf->mutex);
-
#ifdef DEBUG
pipe_mutex_lock(fenced_list->mutex);
LIST_ADDTAIL(&buf->head, &fenced_list->unfenced);
@@ -572,7 +517,7 @@ fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
int wait)
{
pipe_mutex_lock(fenced_list->mutex);
- fenced_buffer_list_check_free_locked(fenced_list, wait);
+ _fenced_buffer_list_check_free(fenced_list, wait);
pipe_mutex_unlock(fenced_list->mutex);
}
@@ -594,13 +539,11 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
next = curr->next;
while(curr != &fenced_list->unfenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
- pipe_mutex_lock(fenced_buf->mutex);
assert(!fenced_buf->fence);
debug_printf("%10p %7u %7u\n",
(void *) fenced_buf,
fenced_buf->base.base.size,
p_atomic_read(&fenced_buf->base.base.reference.count));
- pipe_mutex_unlock(fenced_buf->mutex);
curr = next;
next = curr->next;
}
@@ -610,7 +553,6 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
while(curr != &fenced_list->delayed) {
int signaled;
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
- pipe_mutex_lock(fenced_buf->mutex);
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
debug_printf("%10p %7u %7u %10p %s\n",
(void *) fenced_buf,
@@ -618,7 +560,6 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
p_atomic_read(&fenced_buf->base.base.reference.count),
(void *) fenced_buf->fence,
signaled == 0 ? "y" : "n");
- pipe_mutex_unlock(fenced_buf->mutex);
curr = next;
next = curr->next;
}
@@ -639,8 +580,8 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
sched_yield();
#endif
+ _fenced_buffer_list_check_free(fenced_list, 1);
pipe_mutex_lock(fenced_list->mutex);
- fenced_buffer_list_check_free_locked(fenced_list, 1);
}
#ifdef DEBUG
@@ -648,7 +589,6 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
#endif
pipe_mutex_unlock(fenced_list->mutex);
- pipe_mutex_destroy(fenced_list->mutex);
fenced_list->ops->destroy(fenced_list->ops);
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 1acf3c373e..f675427d98 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -673,6 +673,13 @@ void x86_and( struct x86_function *p,
emit_op_modrm( p, 0x23, 0x21, dst, src );
}
+void x86_div( struct x86_function *p,
+ struct x86_reg src )
+{
+ assert(src.file == file_REG32 && src.mod == mod_REG);
+ emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src);
+}
+
/***********************************************************************
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 731a651796..f7612d416a 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -244,6 +244,7 @@ void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
+void x86_div( struct x86_function *p, struct x86_reg src );
void x86_cdecl_caller_push_regs( struct x86_function *p );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index e2e5394f86..c254a7274f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -123,7 +123,8 @@ static const char *semantic_names[] =
"NORMAL",
"FACE",
"EDGEFLAG",
- "PRIM_ID"
+ "PRIM_ID",
+ "INSTANCEID"
};
static const char *immediate_type_names[] =
@@ -218,8 +219,13 @@ _dump_register_src(
struct dump_ctx *ctx,
const struct tgsi_full_src_register *src )
{
+ ENM(src->Register.File, file_names);
+ if (src->Register.Dimension) {
+ CHR('[');
+ SID(src->Dimension.Index);
+ CHR(']');
+ }
if (src->Register.Indirect) {
- ENM( src->Register.File, file_names );
CHR( '[' );
ENM( src->Indirect.File, file_names );
CHR( '[' );
@@ -233,16 +239,10 @@ _dump_register_src(
}
CHR( ']' );
} else {
- ENM( src->Register.File, file_names );
CHR( '[' );
SID( src->Register.Index );
CHR( ']' );
}
- if (src->Register.Dimension) {
- CHR( '[' );
- SID( src->Dimension.Index );
- CHR( ']' );
- }
}
static void
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 2bcb33392a..83646b73c1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1041,11 +1041,19 @@ fetch_src_file_channel(
default:
assert( 0 );
+ chan->u[0] = 0;
+ chan->u[1] = 0;
+ chan->u[2] = 0;
+ chan->u[3] = 0;
}
break;
default:
assert( 0 );
+ chan->u[0] = 0;
+ chan->u[1] = 0;
+ chan->u[2] = 0;
+ chan->u[3] = 0;
}
}
@@ -1121,11 +1129,14 @@ fetch_source(const struct tgsi_exec_machine *mach,
* subscript to a register file. Effectively it means that
* the register file is actually a 2D array of registers.
*
- * file[1][3] == file[1*sizeof(file[1])+3],
+ * file[3][1] == file[3*sizeof(file[1])+1],
* where:
* [3] = Dimension.Index
*/
if (reg->Register.Dimension) {
+ int array_size;
+ union tgsi_exec_channel dim_index;
+
/* The size of the first-order array depends on the register file type.
* We need to multiply the index to the first array to get an effective,
* "flat" index that points to the beginning of the second-order array.
@@ -1133,32 +1144,27 @@ fetch_source(const struct tgsi_exec_machine *mach,
switch (reg->Register.File) {
case TGSI_FILE_INPUT:
case TGSI_FILE_SYSTEM_VALUE:
- index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
- index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
- index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
- index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ array_size = TGSI_EXEC_MAX_INPUT_ATTRIBS;
break;
case TGSI_FILE_CONSTANT:
- index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
- index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
- index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
- index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ array_size = TGSI_EXEC_MAX_CONST_BUFFER;
break;
default:
assert( 0 );
+ array_size = 0;
}
- index.i[0] += reg->Dimension.Index;
- index.i[1] += reg->Dimension.Index;
- index.i[2] += reg->Dimension.Index;
- index.i[3] += reg->Dimension.Index;
+ dim_index.i[0] =
+ dim_index.i[1] =
+ dim_index.i[2] =
+ dim_index.i[3] = reg->Dimension.Index;
/* Again, the second subscript index can be addressed indirectly
* identically to the first one.
* Nothing stops us from indirectly addressing the indirect register,
* but there is no need for that, so we won't exercise it.
*
- * file[1][ind[4].y+3],
+ * file[ind[4].y+3][1],
* where:
* ind = DimIndirect.File
* [4] = DimIndirect.Index
@@ -1183,20 +1189,25 @@ fetch_source(const struct tgsi_exec_machine *mach,
&index2,
&indir_index );
- index.i[0] += indir_index.i[0];
- index.i[1] += indir_index.i[1];
- index.i[2] += indir_index.i[2];
- index.i[3] += indir_index.i[3];
+ dim_index.i[0] += indir_index.i[0];
+ dim_index.i[1] += indir_index.i[1];
+ dim_index.i[2] += indir_index.i[2];
+ dim_index.i[3] += indir_index.i[3];
/* for disabled execution channels, zero-out the index to
* avoid using a potential garbage value.
*/
for (i = 0; i < QUAD_SIZE; i++) {
if ((execmask & (1 << i)) == 0)
- index.i[i] = 0;
+ dim_index.i[i] = 0;
}
}
+ index.i[0] += dim_index.i[0] * array_size;
+ index.i[1] += dim_index.i[1] * array_size;
+ index.i[2] += dim_index.i[2] * array_size;
+ index.i[3] += dim_index.i[3] * array_size;
+
/* If by any chance there was a need for a 3D array of register
* files, we would have to check whether Dimension is followed
* by a dimension register and continue the saga.
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 138d2d095b..ad553c71a5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -51,7 +51,8 @@
* Since it's pretty much impossible to form PPC vector immediates, load
* them from memory here:
*/
-const float ppc_builtin_constants[] ALIGN16_ATTRIB = {
+PIPE_ALIGN_VAR(16) const float
+ppc_builtin_constants[] = {
1.0f, -128.0f, 128.0, 0.0
};
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 7f1c8e5dd6..431c3ffb14 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -335,13 +335,9 @@ iter_instruction(
fill_scan_register1d(ind_reg,
inst->Src[i].Indirect.File,
inst->Src[i].Indirect.Index);
- if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) ||
- reg->indices[0] != 0) {
- report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]");
- }
check_register_usage(
ctx,
- reg,
+ ind_reg,
"indirect",
FALSE );
}
@@ -412,7 +408,7 @@ iter_declaration(
uint vert;
for (vert = 0; vert < ctx->implied_array_size; ++vert) {
scan_register *reg = MALLOC(sizeof(scan_register));
- fill_scan_register2d(reg, file, vert, i);
+ fill_scan_register2d(reg, file, i, vert);
check_and_declare(ctx, reg);
}
} else {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 9fcffeda36..7fe5dad5ff 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -933,7 +933,8 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] =
"NORMAL",
"FACE",
"EDGEFLAG",
- "PRIM_ID"
+ "PRIM_ID",
+ "INSTANCEID"
};
static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index e64e2b731d..ab557a23f9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -40,6 +40,8 @@ union tgsi_any_token {
struct tgsi_header header;
struct tgsi_processor processor;
struct tgsi_token token;
+ struct tgsi_property prop;
+ struct tgsi_property_data prop_data;
struct tgsi_declaration decl;
struct tgsi_declaration_range decl_range;
struct tgsi_declaration_semantic decl_semantic;
@@ -64,6 +66,7 @@ struct ureg_tokens {
};
#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS
+#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS
#define UREG_MAX_CONSTANT_RANGE 32
#define UREG_MAX_IMMEDIATE 32
@@ -95,6 +98,13 @@ struct ureg_program
unsigned nr_gs_inputs;
struct {
+ unsigned index;
+ unsigned semantic_name;
+ unsigned semantic_index;
+ } system_value[UREG_MAX_SYSTEM_VALUE];
+ unsigned nr_system_values;
+
+ struct {
unsigned semantic_name;
unsigned semantic_index;
} output[UREG_MAX_OUTPUT];
@@ -123,6 +133,8 @@ struct ureg_program
} constant_range[UREG_MAX_CONSTANT_RANGE];
unsigned nr_constant_ranges;
+ unsigned property_gs_input_prim;
+
unsigned nr_addrs;
unsigned nr_preds;
unsigned nr_loops;
@@ -234,19 +246,29 @@ ureg_src_register( unsigned file,
src.SwizzleY = TGSI_SWIZZLE_Y;
src.SwizzleZ = TGSI_SWIZZLE_Z;
src.SwizzleW = TGSI_SWIZZLE_W;
- src.Pad = 0;
src.Indirect = 0;
+ src.IndirectFile = TGSI_FILE_NULL;
src.IndirectIndex = 0;
src.IndirectSwizzle = 0;
src.Absolute = 0;
src.Index = index;
src.Negate = 0;
+ src.Dimension = 0;
+ src.DimensionIndex = 0;
return src;
}
+void
+ureg_property_gs_input_prim(struct ureg_program *ureg,
+ unsigned gs_input_prim)
+{
+ ureg->property_gs_input_prim = gs_input_prim;
+}
+
+
struct ureg_src
ureg_DECL_fs_input( struct ureg_program *ureg,
@@ -304,6 +326,25 @@ ureg_DECL_gs_input(struct ureg_program *ureg,
}
+struct ureg_src
+ureg_DECL_system_value(struct ureg_program *ureg,
+ unsigned index,
+ unsigned semantic_name,
+ unsigned semantic_index)
+{
+ if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) {
+ ureg->system_value[ureg->nr_system_values].index = index;
+ ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name;
+ ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index;
+ ureg->nr_system_values++;
+ } else {
+ set_bad(ureg);
+ }
+
+ return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index);
+}
+
+
struct ureg_dst
ureg_DECL_output( struct ureg_program *ureg,
unsigned name,
@@ -616,6 +657,35 @@ ureg_DECL_immediate_uint( struct ureg_program *ureg,
struct ureg_src
+ureg_DECL_immediate_block_uint( struct ureg_program *ureg,
+ const unsigned *v,
+ unsigned nr )
+{
+ uint index;
+ uint i;
+
+ if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) {
+ set_bad(ureg);
+ return ureg_src_register(TGSI_FILE_IMMEDIATE, 0);
+ }
+
+ index = ureg->nr_immediates;
+ ureg->nr_immediates += (nr + 3) / 4;
+
+ for (i = index; i < ureg->nr_immediates; i++) {
+ ureg->immediate[i].type = TGSI_IMM_UINT32;
+ ureg->immediate[i].nr = nr > 4 ? 4 : nr;
+ memcpy(ureg->immediate[i].value.u,
+ &v[(i - index) * 4],
+ ureg->immediate[i].nr * sizeof(uint));
+ nr -= 4;
+ }
+
+ return ureg_src_register(TGSI_FILE_IMMEDIATE, index);
+}
+
+
+struct ureg_src
ureg_DECL_immediate_int( struct ureg_program *ureg,
const int *v,
unsigned nr )
@@ -628,7 +698,7 @@ void
ureg_emit_src( struct ureg_program *ureg,
struct ureg_src src )
{
- unsigned size = 1 + (src.Indirect ? 1 : 0);
+ unsigned size = 1 + (src.Indirect ? 1 : 0) + (src.Dimension ? 1 : 0);
union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
unsigned n = 0;
@@ -651,7 +721,7 @@ ureg_emit_src( struct ureg_program *ureg,
if (src.Indirect) {
out[0].src.Indirect = 1;
out[n].value = 0;
- out[n].src.File = TGSI_FILE_ADDRESS;
+ out[n].src.File = src.IndirectFile;
out[n].src.SwizzleX = src.IndirectSwizzle;
out[n].src.SwizzleY = src.IndirectSwizzle;
out[n].src.SwizzleZ = src.IndirectSwizzle;
@@ -660,6 +730,15 @@ ureg_emit_src( struct ureg_program *ureg,
n++;
}
+ if (src.Dimension) {
+ out[0].src.Dimension = 1;
+ out[n].dim.Indirect = 0;
+ out[n].dim.Dimension = 0;
+ out[n].dim.Padding = 0;
+ out[n].dim.Index = src.DimensionIndex;
+ n++;
+ }
+
assert(n == size);
}
@@ -1027,13 +1106,34 @@ emit_immediate( struct ureg_program *ureg,
out[4].imm_data.Uint = v[3];
}
+static void
+emit_property(struct ureg_program *ureg,
+ unsigned name,
+ unsigned data)
+{
+ union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
+
+ out[0].value = 0;
+ out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY;
+ out[0].prop.NrTokens = 2;
+ out[0].prop.PropertyName = name;
+ out[1].prop_data.Data = data;
+}
static void emit_decls( struct ureg_program *ureg )
{
unsigned i;
+ if (ureg->property_gs_input_prim != ~0) {
+ assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY);
+
+ emit_property(ureg,
+ TGSI_PROPERTY_GS_INPUT_PRIM,
+ ureg->property_gs_input_prim);
+ }
+
if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
for (i = 0; i < UREG_MAX_INPUT; i++) {
if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
@@ -1058,6 +1158,15 @@ static void emit_decls( struct ureg_program *ureg )
}
}
+ for (i = 0; i < ureg->nr_system_values; i++) {
+ emit_decl(ureg,
+ TGSI_FILE_SYSTEM_VALUE,
+ ureg->system_value[i].index,
+ ureg->system_value[i].semantic_name,
+ ureg->system_value[i].semantic_index,
+ TGSI_INTERPOLATE_CONSTANT);
+ }
+
for (i = 0; i < ureg->nr_outputs; i++) {
emit_decl( ureg,
TGSI_FILE_OUTPUT,
@@ -1234,6 +1343,7 @@ struct ureg_program *ureg_create( unsigned processor )
return NULL;
ureg->processor = processor;
+ ureg->property_gs_input_prim = ~0;
return ureg;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 6f11273320..8c8a6bbce6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -47,13 +47,15 @@ struct ureg_src
unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */
unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */
unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */
- unsigned Pad : 1; /* BOOL */
unsigned Indirect : 1; /* BOOL */
+ unsigned Dimension : 1; /* BOOL */
unsigned Absolute : 1; /* BOOL */
- int Index : 16; /* SINT */
unsigned Negate : 1; /* BOOL */
+ int Index : 16; /* SINT */
+ unsigned IndirectFile : 4; /* TGSI_FILE_ */
int IndirectIndex : 16; /* SINT */
- int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */
+ unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */
+ int DimensionIndex : 16; /* SINT */
};
/* Very similar to a tgsi_dst_register, removing unsupported fields
@@ -118,6 +120,14 @@ ureg_create_shader_and_destroy( struct ureg_program *p,
}
+/***********************************************************************
+ * Build shader properties:
+ */
+
+void
+ureg_property_gs_input_prim(struct ureg_program *ureg,
+ unsigned gs_input_prim);
+
/***********************************************************************
* Build shader declarations:
@@ -137,6 +147,12 @@ struct ureg_src
ureg_DECL_gs_input(struct ureg_program *,
unsigned index);
+struct ureg_src
+ureg_DECL_system_value(struct ureg_program *,
+ unsigned index,
+ unsigned semantic_name,
+ unsigned semantic_index);
+
struct ureg_dst
ureg_DECL_output( struct ureg_program *,
unsigned semantic_name,
@@ -153,6 +169,11 @@ ureg_DECL_immediate_uint( struct ureg_program *,
unsigned nr );
struct ureg_src
+ureg_DECL_immediate_block_uint( struct ureg_program *,
+ const unsigned *v,
+ unsigned nr );
+
+struct ureg_src
ureg_DECL_immediate_int( struct ureg_program *,
const int *v,
unsigned nr );
@@ -753,18 +774,30 @@ static INLINE struct ureg_src
ureg_src_indirect( struct ureg_src reg, struct ureg_src addr )
{
assert(reg.File != TGSI_FILE_NULL);
- assert(addr.File == TGSI_FILE_ADDRESS);
+ assert(addr.File == TGSI_FILE_ADDRESS || addr.File == TGSI_FILE_TEMPORARY);
reg.Indirect = 1;
+ reg.IndirectFile = addr.File;
reg.IndirectIndex = addr.Index;
reg.IndirectSwizzle = addr.SwizzleX;
return reg;
}
+static INLINE struct ureg_src
+ureg_src_dimension( struct ureg_src reg, int index )
+{
+ assert(reg.File != TGSI_FILE_NULL);
+ reg.Dimension = 1;
+ reg.DimensionIndex = index;
+ return reg;
+}
+
static INLINE struct ureg_dst
ureg_dst( struct ureg_src src )
{
struct ureg_dst dst;
+ assert(!src.Indirect || src.IndirectFile == TGSI_FILE_ADDRESS);
+
dst.File = src.File;
dst.WriteMask = TGSI_WRITEMASK_XYZW;
dst.Indirect = src.Indirect;
@@ -792,13 +825,15 @@ ureg_src( struct ureg_dst dst )
src.SwizzleY = TGSI_SWIZZLE_Y;
src.SwizzleZ = TGSI_SWIZZLE_Z;
src.SwizzleW = TGSI_SWIZZLE_W;
- src.Pad = 0;
src.Indirect = dst.Indirect;
+ src.IndirectFile = TGSI_FILE_ADDRESS;
src.IndirectIndex = dst.IndirectIndex;
src.IndirectSwizzle = dst.IndirectSwizzle;
src.Absolute = 0;
src.Index = dst.Index;
src.Negate = 0;
+ src.Dimension = 0;
+ src.DimensionIndex = 0;
return src;
}
@@ -837,13 +872,15 @@ ureg_src_undef( void )
src.SwizzleY = 0;
src.SwizzleZ = 0;
src.SwizzleW = 0;
- src.Pad = 0;
src.Indirect = 0;
+ src.IndirectFile = TGSI_FILE_NULL;
src.IndirectIndex = 0;
src.IndirectSwizzle = 0;
src.Absolute = 0;
src.Index = 0;
src.Negate = 0;
+ src.Dimension = 0;
+ src.DimensionIndex = 0;
return src;
}
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index 34526eb061..54ed2c1a4b 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -44,12 +44,19 @@
#include "pipe/p_format.h"
#include "pipe/p_state.h"
+enum translate_element_type {
+ TRANSLATE_ELEMENT_NORMAL,
+ TRANSLATE_ELEMENT_INSTANCE_ID
+};
+
struct translate_element
{
+ enum translate_element_type type;
enum pipe_format input_format;
enum pipe_format output_format;
unsigned input_buffer:8;
unsigned input_offset:24;
+ unsigned instance_divisor;
unsigned output_offset;
};
@@ -74,11 +81,13 @@ struct translate {
void (PIPE_CDECL *run_elts)( struct translate *,
const unsigned *elts,
unsigned count,
+ unsigned instance_id,
void *output_buffer);
void (PIPE_CDECL *run)( struct translate *,
unsigned start,
unsigned count,
+ unsigned instance_id,
void *output_buffer);
};
@@ -103,8 +112,13 @@ static INLINE int translate_keysize( const struct translate_key *key )
static INLINE int translate_key_compare( const struct translate_key *a,
const struct translate_key *b )
{
- int keysize = translate_keysize(a);
- return memcmp(a, b, keysize);
+ int keysize_a = translate_keysize(a);
+ int keysize_b = translate_keysize(b);
+
+ if (keysize_a != keysize_b) {
+ return keysize_a - keysize_b;
+ }
+ return memcmp(a, b, keysize_a);
}
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 266e7ee81e..24727d4988 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -46,9 +46,12 @@ struct translate_generic {
struct translate translate;
struct {
+ enum translate_element_type type;
+
fetch_func fetch;
unsigned buffer;
unsigned input_offset;
+ unsigned instance_divisor;
emit_func emit;
unsigned output_offset;
@@ -568,6 +571,7 @@ static emit_func get_emit_func( enum pipe_format format )
static void PIPE_CDECL generic_run_elts( struct translate *translate,
const unsigned *elts,
unsigned count,
+ unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
@@ -583,13 +587,20 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
-
- const char *src = (tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * elt);
+ const char *src;
char *dst = (vert +
tg->attrib[attr].output_offset);
+ if (tg->attrib[attr].instance_divisor) {
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride *
+ (instance_id / tg->attrib[attr].instance_divisor);
+ } else {
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * elt;
+ }
+
tg->attrib[attr].fetch( src, data );
if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
@@ -607,6 +618,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
unsigned count,
+ unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
@@ -623,13 +635,25 @@ static void PIPE_CDECL generic_run( struct translate *translate,
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
- const char *src = (tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * elt);
-
char *dst = (vert +
tg->attrib[attr].output_offset);
- tg->attrib[attr].fetch( src, data );
+ if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+ const char *src;
+
+ if (tg->attrib[attr].instance_divisor) {
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride *
+ (instance_id / tg->attrib[attr].instance_divisor);
+ } else {
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * elt;
+ }
+
+ tg->attrib[attr].fetch( src, data );
+ } else {
+ data[0] = (float)instance_id;
+ }
if (0) debug_printf("vert %d attr %d: %f %f %f %f\n",
i, attr, data[0], data[1], data[2], data[3]);
@@ -683,10 +707,12 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->translate.run = generic_run;
for (i = 0; i < key->nr_elements; i++) {
+ tg->attrib[i].type = key->element[i].type;
tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format);
tg->attrib[i].buffer = key->element[i].input_buffer;
tg->attrib[i].input_offset = key->element[i].input_offset;
+ tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
tg->attrib[i].output_offset = key->element[i].output_offset;
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index b62db8d8f3..c13e742738 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -49,19 +49,29 @@
typedef void (PIPE_CDECL *run_func)( struct translate *translate,
unsigned start,
unsigned count,
- void *output_buffer );
+ unsigned instance_id,
+ void *output_buffer);
typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
const unsigned *elts,
unsigned count,
- void *output_buffer );
+ unsigned instance_id,
+ void *output_buffer);
struct translate_buffer {
const void *base_ptr;
unsigned stride;
- void *ptr; /* updated per vertex */
};
+struct translate_buffer_varient {
+ unsigned buffer_index;
+ unsigned instance_divisor;
+ void *ptr; /* updated either per vertex or per instance */
+};
+
+
+#define ELEMENT_BUFFER_INSTANCE_ID 1001
+
struct translate_sse {
struct translate translate;
@@ -81,6 +91,16 @@ struct translate_sse {
struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
unsigned nr_buffers;
+ /* Multiple buffer varients can map to a single buffer. */
+ struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS];
+ unsigned nr_buffer_varients;
+
+ /* Multiple elements can map to a single buffer varient. */
+ unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS];
+
+ boolean use_instancing;
+ unsigned instance_id;
+
run_func gen_run;
run_elts_func gen_run_elts;
@@ -359,32 +379,61 @@ static boolean init_inputs( struct translate_sse *p,
boolean linear )
{
unsigned i;
- if (linear) {
- for (i = 0; i < p->nr_buffers; i++) {
+ struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
+ get_offset(p, &p->instance_id));
+
+ for (i = 0; i < p->nr_buffer_varients; i++) {
+ struct translate_buffer_varient *varient = &p->buffer_varient[i];
+ struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
+
+ if (linear || varient->instance_divisor) {
struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[i].stride));
+ get_offset(p, &buffer->stride));
struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[i].ptr));
+ get_offset(p, &varient->ptr));
struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[i].base_ptr));
+ get_offset(p, &buffer->base_ptr));
struct x86_reg elt = p->idx_EBX;
- struct x86_reg tmp = p->tmp_EAX;
-
+ struct x86_reg tmp_EAX = p->tmp_EAX;
/* Calculate pointer to first attrib:
+ * base_ptr + stride * index, where index depends on instance divisor
*/
- x86_mov(p->func, tmp, buf_stride);
- x86_imul(p->func, tmp, elt);
- x86_add(p->func, tmp, buf_base_ptr);
+ if (varient->instance_divisor) {
+ /* Our index is instance ID divided by instance divisor.
+ */
+ x86_mov(p->func, tmp_EAX, instance_id);
+
+ if (varient->instance_divisor != 1) {
+ struct x86_reg tmp_EDX = p->machine_EDX;
+ struct x86_reg tmp_ECX = p->outbuf_ECX;
+
+ /* TODO: Add x86_shr() to rtasm and use it whenever
+ * instance divisor is power of two.
+ */
+
+ x86_push(p->func, tmp_EDX);
+ x86_push(p->func, tmp_ECX);
+ x86_xor(p->func, tmp_EDX, tmp_EDX);
+ x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
+ x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
+ x86_pop(p->func, tmp_ECX);
+ x86_pop(p->func, tmp_EDX);
+ }
+ } else {
+ x86_mov(p->func, tmp_EAX, elt);
+ }
+ x86_imul(p->func, tmp_EAX, buf_stride);
+ x86_add(p->func, tmp_EAX, buf_base_ptr);
/* In the linear case, keep the buffer pointer instead of the
* index number.
*/
- if (p->nr_buffers == 1)
- x86_mov( p->func, elt, tmp );
+ if (linear && p->nr_buffer_varients == 1)
+ x86_mov(p->func, elt, tmp_EAX);
else
- x86_mov( p->func, buf_ptr, tmp );
+ x86_mov(p->func, buf_ptr, tmp_EAX);
}
}
@@ -394,31 +443,36 @@ static boolean init_inputs( struct translate_sse *p,
static struct x86_reg get_buffer_ptr( struct translate_sse *p,
boolean linear,
- unsigned buf_idx,
+ unsigned var_idx,
struct x86_reg elt )
{
- if (linear && p->nr_buffers == 1) {
+ if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
+ return x86_make_disp(p->machine_EDX,
+ get_offset(p, &p->instance_id));
+ }
+ if (linear && p->nr_buffer_varients == 1) {
return p->idx_EBX;
}
- else if (linear) {
+ else if (linear || p->buffer_varient[var_idx].instance_divisor) {
struct x86_reg ptr = p->tmp_EAX;
struct x86_reg buf_ptr =
x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[buf_idx].ptr));
+ get_offset(p, &p->buffer_varient[var_idx].ptr));
x86_mov(p->func, ptr, buf_ptr);
return ptr;
}
else {
struct x86_reg ptr = p->tmp_EAX;
+ const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
struct x86_reg buf_stride =
x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[buf_idx].stride));
+ get_offset(p, &p->buffer[varient->buffer_index].stride));
struct x86_reg buf_base_ptr =
x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[buf_idx].base_ptr));
+ get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
@@ -436,28 +490,33 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
static boolean incr_inputs( struct translate_sse *p,
boolean linear )
{
- if (linear && p->nr_buffers == 1) {
+ if (linear && p->nr_buffer_varients == 1) {
struct x86_reg stride = x86_make_disp(p->machine_EDX,
get_offset(p, &p->buffer[0].stride));
- x86_add(p->func, p->idx_EBX, stride);
- sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
+ if (p->buffer_varient[0].instance_divisor == 0) {
+ x86_add(p->func, p->idx_EBX, stride);
+ sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
+ }
}
else if (linear) {
unsigned i;
/* Is this worthwhile??
*/
- for (i = 0; i < p->nr_buffers; i++) {
+ for (i = 0; i < p->nr_buffer_varients; i++) {
+ struct translate_buffer_varient *varient = &p->buffer_varient[i];
struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[i].ptr));
+ get_offset(p, &varient->ptr));
struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[i].stride));
+ get_offset(p, &p->buffer[varient->buffer_index].stride));
- x86_mov(p->func, p->tmp_EAX, buf_ptr);
- x86_add(p->func, p->tmp_EAX, buf_stride);
- if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
- x86_mov(p->func, buf_ptr, p->tmp_EAX);
+ if (varient->instance_divisor == 0) {
+ x86_mov(p->func, p->tmp_EAX, buf_ptr);
+ x86_add(p->func, p->tmp_EAX, buf_stride);
+ if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
+ x86_mov(p->func, buf_ptr, p->tmp_EAX);
+ }
}
}
else {
@@ -514,7 +573,18 @@ static boolean build_vertex_emit( struct translate_sse *p,
x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1));
x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2));
x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3));
- x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 4));
+ x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5));
+
+ /* Load instance ID.
+ */
+ if (p->use_instancing) {
+ x86_mov(p->func,
+ p->tmp_EAX,
+ x86_fn_arg(p->func, 4));
+ x86_mov(p->func,
+ x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
+ p->tmp_EAX);
+ }
/* Get vertex count, compare to zero
*/
@@ -531,17 +601,18 @@ static boolean build_vertex_emit( struct translate_sse *p,
label = x86_get_label(p->func);
{
struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
- int last_vb = -1;
+ int last_varient = -1;
struct x86_reg vb;
for (j = 0; j < p->translate.key.nr_elements; j++) {
const struct translate_element *a = &p->translate.key.element[j];
+ unsigned varient = p->element_to_buffer_varient[j];
/* Figure out source pointer address:
*/
- if (a->input_buffer != last_vb) {
- last_vb = a->input_buffer;
- vb = get_buffer_ptr(p, linear, a->input_buffer, elt);
+ if (varient != last_varient) {
+ last_varient = varient;
+ vb = get_buffer_ptr(p, linear, varient, elt);
}
if (!translate_attr( p, a,
@@ -624,6 +695,7 @@ static void translate_sse_release( struct translate *translate )
static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
const unsigned *elts,
unsigned count,
+ unsigned instance_id,
void *output_buffer )
{
struct translate_sse *p = (struct translate_sse *)translate;
@@ -631,12 +703,14 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
p->gen_run_elts( translate,
elts,
count,
- output_buffer );
+ instance_id,
+ output_buffer);
}
static void PIPE_CDECL translate_sse_run( struct translate *translate,
unsigned start,
unsigned count,
+ unsigned instance_id,
void *output_buffer )
{
struct translate_sse *p = (struct translate_sse *)translate;
@@ -644,7 +718,8 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate,
p->gen_run( translate,
start,
count,
- output_buffer );
+ instance_id,
+ output_buffer);
}
@@ -666,8 +741,37 @@ struct translate *translate_sse2_create( const struct translate_key *key )
p->translate.run_elts = translate_sse_run_elts;
p->translate.run = translate_sse_run;
- for (i = 0; i < key->nr_elements; i++)
- p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 );
+ for (i = 0; i < key->nr_elements; i++) {
+ if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
+ unsigned j;
+
+ p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1);
+
+ if (key->element[i].instance_divisor) {
+ p->use_instancing = TRUE;
+ }
+
+ /*
+ * Map vertex element to vertex buffer varient.
+ */
+ for (j = 0; j < p->nr_buffer_varients; j++) {
+ if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer &&
+ p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) {
+ break;
+ }
+ }
+ if (j == p->nr_buffer_varients) {
+ p->buffer_varient[j].buffer_index = key->element[i].input_buffer;
+ p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor;
+ p->nr_buffer_varients++;
+ }
+ p->element_to_buffer_varient[i] = j;
+ } else {
+ assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID);
+
+ p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID;
+ }
+ }
if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 3f74e2aa8b..9725890bd4 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -262,6 +262,10 @@ regions_overlap(int srcX0, int srcY0,
* Copy pixel block from src surface to dst surface.
* Overlapping regions are acceptable.
* Flipping and stretching are supported.
+ * \param filter one of PIPE_TEX_MIPFILTER_NEAREST/LINEAR
+ * \param writemask controls which channels in the dest surface are sourced
+ * from the src surface. Disabled channels are sourced
+ * from (0,0,0,1).
* XXX what about clipping???
* XXX need some control over blitting Z and/or stencil.
*/
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 46b4706b76..249a0375fc 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -379,9 +379,16 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
float t1 = y1 / (float)surf->height;
float s2 = x2 / (float)surf->width;
float t2 = y2 / (float)surf->height;
- const float st[4][2] = {
- {s1, t1}, {s2, t1}, {s2, t2}, {s1, t2}
- };
+ float st[4][2];
+
+ st[0][0] = s1;
+ st[0][1] = t1;
+ st[1][0] = s2;
+ st[1][1] = t1;
+ st[2][0] = s2;
+ st[2][1] = t2;
+ st[3][0] = s1;
+ st[3][1] = t2;
util_map_texcoords2d_onto_cubemap(surf->face,
/* pointer, stride in floats */
diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c
index 4110485fb1..e2e23c3cdd 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.c
+++ b/src/gallium/auxiliary/util/u_draw_quad.c
@@ -61,6 +61,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
/* tell pipe about the vertex attributes */
for (i = 0; i < num_attribs; i++) {
velements[i].src_offset = i * 4 * sizeof(float);
+ velements[i].instance_divisor = 0;
velements[i].vertex_buffer_index = 0;
velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
velements[i].nr_components = 4;
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index 9f16b42944..01f7931aed 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -62,10 +62,10 @@ PIPE_FORMAT_R16G16_SSCALED , array , 1, 1, s16 , s16 , , , xy01,
PIPE_FORMAT_R16G16B16_SSCALED , array , 1, 1, s16 , s16 , s16 , , xyz1, rgb
PIPE_FORMAT_R16G16B16A16_SSCALED , array , 1, 1, s16 , s16 , s16 , s16 , xyzw, rgb
PIPE_FORMAT_R8_UNORM , array , 1, 1, un8 , , , , x001, rgb
-PIPE_FORMAT_R8G8_UNORM , array , 1, 1, un8 , un8 , , , xy01, rgb
-PIPE_FORMAT_R8G8B8_UNORM , array , 1, 1, un8 , un8 , un8 , , xyz1, rgb
-PIPE_FORMAT_R8G8B8A8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , xyzw, rgb
-PIPE_FORMAT_R8G8B8X8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , xyz1, rgb
+PIPE_FORMAT_R8G8_UNORM , array , 1, 1, un8 , un8 , , , yx01, rgb
+PIPE_FORMAT_R8G8B8_UNORM , array , 1, 1, un8 , un8 , un8 , , zyx1, rgb
+PIPE_FORMAT_R8G8B8A8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , wzyx, rgb
+PIPE_FORMAT_R8G8B8X8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , wzy1, rgb
PIPE_FORMAT_R8_USCALED , array , 1, 1, u8 , , , , x001, rgb
PIPE_FORMAT_R8G8_USCALED , array , 1, 1, u8 , u8 , , , xy01, rgb
PIPE_FORMAT_R8G8B8_USCALED , array , 1, 1, u8 , u8 , u8 , , xyz1, rgb
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index 43eb0153ee..0ab53c75dd 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -425,6 +425,8 @@ util_pack_z(enum pipe_format format, double z)
if (z == 1.0)
return 0xffffffff;
return (uint) (z * 0xffffffff);
+ case PIPE_FORMAT_Z32_FLOAT:
+ return (uint)z;
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
if (z == 1.0)
diff --git a/src/gallium/auxiliary/util/u_ringbuffer.c b/src/gallium/auxiliary/util/u_ringbuffer.c
new file mode 100644
index 0000000000..3f43a19e01
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_ringbuffer.c
@@ -0,0 +1,145 @@
+
+#include "pipe/p_thread.h"
+#include "pipe/p_defines.h"
+#include "util/u_ringbuffer.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+/* Generic ringbuffer:
+ */
+struct util_ringbuffer
+{
+ struct util_packet *buf;
+ unsigned mask;
+
+ /* Can this be done with atomic variables??
+ */
+ unsigned head;
+ unsigned tail;
+ pipe_condvar change;
+ pipe_mutex mutex;
+};
+
+
+struct util_ringbuffer *util_ringbuffer_create( unsigned dwords )
+{
+ struct util_ringbuffer *ring = CALLOC_STRUCT(util_ringbuffer);
+ if (ring == NULL)
+ return NULL;
+
+ assert(util_is_power_of_two(dwords));
+
+ ring->buf = MALLOC( dwords * sizeof(unsigned) );
+ if (ring->buf == NULL)
+ goto fail;
+
+ ring->mask = dwords - 1;
+
+ pipe_condvar_init(ring->change);
+ pipe_mutex_init(ring->mutex);
+ return ring;
+
+fail:
+ FREE(ring->buf);
+ FREE(ring);
+ return NULL;
+}
+
+void util_ringbuffer_destroy( struct util_ringbuffer *ring )
+{
+ pipe_condvar_destroy(ring->change);
+ pipe_mutex_destroy(ring->mutex);
+ FREE(ring->buf);
+ FREE(ring);
+}
+
+static INLINE unsigned util_ringbuffer_space( const struct util_ringbuffer *ring )
+{
+ return (ring->tail - (ring->head + 1)) & ring->mask;
+}
+
+void util_ringbuffer_enqueue( struct util_ringbuffer *ring,
+ const struct util_packet *packet )
+{
+ unsigned i;
+
+ /* XXX: over-reliance on mutexes, etc:
+ */
+ pipe_mutex_lock(ring->mutex);
+
+ /* Wait for free space:
+ */
+ while (util_ringbuffer_space(ring) < packet->dwords)
+ pipe_condvar_wait(ring->change, ring->mutex);
+
+ /* Copy data to ring:
+ */
+ for (i = 0; i < packet->dwords; i++) {
+
+ /* Copy all dwords of the packet. Note we're abusing the
+ * typesystem a little - we're being passed a pointer to
+ * something, but probably not an array of packet structs:
+ */
+ ring->buf[ring->head] = packet[i];
+ ring->head++;
+ ring->head &= ring->mask;
+ }
+
+ /* Signal change:
+ */
+ pipe_condvar_signal(ring->change);
+ pipe_mutex_unlock(ring->mutex);
+}
+
+enum pipe_error util_ringbuffer_dequeue( struct util_ringbuffer *ring,
+ struct util_packet *packet,
+ unsigned max_dwords,
+ boolean wait )
+{
+ const struct util_packet *ring_packet;
+ unsigned i;
+ int ret = PIPE_OK;
+
+ /* XXX: over-reliance on mutexes, etc:
+ */
+ pipe_mutex_lock(ring->mutex);
+
+ /* Wait for free space:
+ */
+ if (wait) {
+ while (util_ringbuffer_space(ring) == 0)
+ pipe_condvar_wait(ring->change, ring->mutex);
+ }
+ else {
+ if (util_ringbuffer_space(ring) == 0) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto out;
+ }
+ }
+
+ ring_packet = &ring->buf[ring->tail];
+
+ /* Both of these are considered bugs. Raise an assert on debug builds.
+ */
+ if (ring_packet->dwords > ring->mask + 1 - util_ringbuffer_space(ring) ||
+ ring_packet->dwords > max_dwords) {
+ assert(0);
+ ret = PIPE_ERROR_BAD_INPUT;
+ goto out;
+ }
+
+ /* Copy data from ring:
+ */
+ for (i = 0; i < ring_packet->dwords; i++) {
+ packet[i] = ring->buf[ring->tail];
+ ring->tail++;
+ ring->tail &= ring->mask;
+ }
+
+out:
+ /* Signal change:
+ */
+ pipe_condvar_signal(ring->change);
+ pipe_mutex_unlock(ring->mutex);
+ return ret;
+}
diff --git a/src/gallium/auxiliary/util/u_ringbuffer.h b/src/gallium/auxiliary/util/u_ringbuffer.h
new file mode 100644
index 0000000000..85f0ad6c1f
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_ringbuffer.h
@@ -0,0 +1,29 @@
+
+#ifndef UTIL_RINGBUFFER_H
+#define UTIL_RINGBUFFER_H
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h" /* only for pipe_error! */
+
+/* Generic header
+ */
+struct util_packet {
+ unsigned dwords:8;
+ unsigned data24:24;
+};
+
+struct util_ringbuffer;
+
+struct util_ringbuffer *util_ringbuffer_create( unsigned dwords );
+
+void util_ringbuffer_destroy( struct util_ringbuffer *ring );
+
+void util_ringbuffer_enqueue( struct util_ringbuffer *ring,
+ const struct util_packet *packet );
+
+enum pipe_error util_ringbuffer_dequeue( struct util_ringbuffer *ring,
+ struct util_packet *packet,
+ unsigned max_dwords,
+ boolean wait );
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index 1ba82bb21f..f9936eb1cb 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -1357,7 +1357,10 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
/*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
break;
default:
- debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format));
+ util_format_write_4f(format,
+ p, src_stride * sizeof(float),
+ packed, util_format_get_stride(format, w),
+ 0, 0, w, h);
}
pipe_put_tile_raw(pt, x, y, w, h, packed, 0);
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index fc2a1c59a6..a524e2fdfb 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -316,6 +316,7 @@ init_buffers(struct vl_compositor *c)
pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer);
c->vertex_elems[0].src_offset = 0;
+ c->vertex_elems[0].instance_divisor = 0;
c->vertex_elems[0].vertex_buffer_index = 0;
c->vertex_elems[0].nr_components = 2;
c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
@@ -345,6 +346,7 @@ init_buffers(struct vl_compositor *c)
pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer);
c->vertex_elems[1].src_offset = 0;
+ c->vertex_elems[1].instance_divisor = 0;
c->vertex_elems[1].vertex_buffer_index = 1;
c->vertex_elems[1].nr_components = 2;
c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
@@ -353,7 +355,7 @@ init_buffers(struct vl_compositor *c)
* Create our vertex shader's constant buffer
* Const buffer contains scaling and translation vectors
*/
- c->vs_const_buf.buffer = pipe_buffer_create
+ c->vs_const_buf = pipe_buffer_create
(
c->pipe->screen,
1,
@@ -365,7 +367,7 @@ init_buffers(struct vl_compositor *c)
* Create our fragment shader's constant buffer
* Const buffer contains the color conversion matrix and bias vectors
*/
- c->fs_const_buf.buffer = pipe_buffer_create
+ c->fs_const_buf = pipe_buffer_create
(
c->pipe->screen,
1,
@@ -390,8 +392,8 @@ cleanup_buffers(struct vl_compositor *c)
for (i = 0; i < 2; ++i)
pipe_buffer_reference(&c->vertex_bufs[i].buffer, NULL);
- pipe_buffer_reference(&c->vs_const_buf.buffer, NULL);
- pipe_buffer_reference(&c->fs_const_buf.buffer, NULL);
+ pipe_buffer_reference(&c->vs_const_buf, NULL);
+ pipe_buffer_reference(&c->fs_const_buf, NULL);
}
bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe)
@@ -483,13 +485,13 @@ void vl_compositor_render(struct vl_compositor *compositor,
compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs);
compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems);
- compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, &compositor->vs_const_buf);
- compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, &compositor->fs_const_buf);
+ compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, compositor->vs_const_buf);
+ compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf);
vs_consts = pipe_buffer_map
(
compositor->pipe->screen,
- compositor->vs_const_buf.buffer,
+ compositor->vs_const_buf,
PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
);
@@ -511,7 +513,7 @@ void vl_compositor_render(struct vl_compositor *compositor,
vs_consts->src_trans.z = 0;
vs_consts->src_trans.w = 0;
- pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf.buffer);
+ pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf);
compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence);
@@ -525,10 +527,10 @@ void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float
memcpy
(
- pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+ pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE),
mat,
sizeof(struct fragment_shader_consts)
);
- pipe_buffer_unmap(compositor->pipe->screen, compositor->fs_const_buf.buffer);
+ pipe_buffer_unmap(compositor->pipe->screen, compositor->fs_const_buf);
}
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index f441901a75..6a9a3fd7af 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -47,7 +47,7 @@ struct vl_compositor
struct pipe_scissor_state scissor;
struct pipe_vertex_buffer vertex_bufs[2];
struct pipe_vertex_element vertex_elems[2];
- struct pipe_constant_buffer vs_const_buf, fs_const_buf;
+ struct pipe_buffer *vs_const_buf, *fs_const_buf;
};
bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index caf581aca6..e43187545c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -891,53 +891,61 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
/* Position element */
r->vertex_elems[0].src_offset = 0;
+ r->vertex_elems[0].instance_divisor = 0;
r->vertex_elems[0].vertex_buffer_index = 0;
r->vertex_elems[0].nr_components = 2;
r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* Luma, texcoord element */
r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
+ r->vertex_elems[1].instance_divisor = 0;
r->vertex_elems[1].vertex_buffer_index = 0;
r->vertex_elems[1].nr_components = 2;
r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* Chroma Cr texcoord element */
r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
+ r->vertex_elems[2].instance_divisor = 0;
r->vertex_elems[2].vertex_buffer_index = 0;
r->vertex_elems[2].nr_components = 2;
r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* Chroma Cb texcoord element */
r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
+ r->vertex_elems[3].instance_divisor = 0;
r->vertex_elems[3].vertex_buffer_index = 0;
r->vertex_elems[3].nr_components = 2;
r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* First ref surface top field texcoord element */
r->vertex_elems[4].src_offset = 0;
+ r->vertex_elems[4].instance_divisor = 0;
r->vertex_elems[4].vertex_buffer_index = 1;
r->vertex_elems[4].nr_components = 2;
r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* First ref surface bottom field texcoord element */
r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
+ r->vertex_elems[5].instance_divisor = 0;
r->vertex_elems[5].vertex_buffer_index = 1;
r->vertex_elems[5].nr_components = 2;
r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* Second ref surface top field texcoord element */
r->vertex_elems[6].src_offset = 0;
+ r->vertex_elems[6].instance_divisor = 0;
r->vertex_elems[6].vertex_buffer_index = 2;
r->vertex_elems[6].nr_components = 2;
r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* Second ref surface bottom field texcoord element */
r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
+ r->vertex_elems[7].instance_divisor = 0;
r->vertex_elems[7].vertex_buffer_index = 2;
r->vertex_elems[7].nr_components = 2;
r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
- r->vs_const_buf.buffer = pipe_buffer_create
+ r->vs_const_buf = pipe_buffer_create
(
r->pipe->screen,
DEFAULT_BUF_ALIGNMENT,
@@ -945,7 +953,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
sizeof(struct vertex_shader_consts)
);
- r->fs_const_buf.buffer = pipe_buffer_create
+ r->fs_const_buf = pipe_buffer_create
(
r->pipe->screen,
DEFAULT_BUF_ALIGNMENT,
@@ -954,11 +962,11 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
memcpy
(
- pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+ pipe_buffer_map(r->pipe->screen, r->fs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE),
&fs_consts, sizeof(struct fragment_shader_consts)
);
- pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer);
+ pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf);
return true;
}
@@ -970,8 +978,8 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
assert(r);
- pipe_buffer_reference(&r->vs_const_buf.buffer, NULL);
- pipe_buffer_reference(&r->fs_const_buf.buffer, NULL);
+ pipe_buffer_reference(&r->vs_const_buf, NULL);
+ pipe_buffer_reference(&r->fs_const_buf, NULL);
for (i = 0; i < 3; ++i)
pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
@@ -1284,19 +1292,19 @@ flush(struct vl_mpeg12_mc_renderer *r)
vs_consts = pipe_buffer_map
(
- r->pipe->screen, r->vs_const_buf.buffer,
+ r->pipe->screen, r->vs_const_buf,
PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
);
vs_consts->denorm.x = r->surface->width0;
vs_consts->denorm.y = r->surface->height0;
- pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
+ pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf);
r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
- &r->vs_const_buf);
+ r->vs_const_buf);
r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0,
- &r->fs_const_buf);
+ r->fs_const_buf);
if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 64184337a0..f00b8c7b8b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -63,8 +63,8 @@ struct vl_mpeg12_mc_renderer
struct pipe_viewport_state viewport;
struct pipe_scissor_state scissor;
- struct pipe_constant_buffer vs_const_buf;
- struct pipe_constant_buffer fs_const_buf;
+ struct pipe_buffer *vs_const_buf;
+ struct pipe_buffer *fs_const_buf;
struct pipe_framebuffer_state fb_state;
struct pipe_vertex_element vertex_elems[8];
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index 21f5f9111a..d394f5b4f1 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -33,7 +33,11 @@ This state describes how resources in various flavours (textures,
buffers, surfaces) are bound to the driver.
-* ``set_constant_buffer``
+* ``set_constant_buffer`` sets a constant buffer to be used for a given shader
+ type. index is used to indicate which buffer to set (some apis may allow
+ multiple ones to be set, and binding a specific one later, though drivers
+ are mostly restricted to the first one right now).
+
* ``set_framebuffer_state``
* ``set_fragment_sampler_textures``
* ``set_vertex_sampler_textures``
@@ -47,7 +51,6 @@ These pieces of state are too small, variable, and/or trivial to have CSO
objects. They all follow simple, one-method binding calls, e.g.
``set_edgeflags``.
-* ``set_edgeflags``
* ``set_blend_color``
* ``set_clip_state``
* ``set_polygon_stipple``
@@ -72,12 +75,67 @@ stencil-only clears of packed depth-stencil buffers.
Drawing
^^^^^^^
-``draw_arrays``
+``draw_arrays`` draws a specified primitive.
+
+This command is equivalent to calling ``draw_arrays_instanced``
+with ``startInstance`` set to 0 and ``instanceCount`` set to 1.
+
+``draw_elements`` draws a specified primitive using an optional
+index buffer.
-``draw_elements``
+This command is equivalent to calling ``draw_elements_instanced``
+with ``startInstance`` set to 0 and ``instanceCount`` set to 1.
``draw_range_elements``
+XXX: this is (probably) a temporary entrypoint, as the range
+information should be available from the vertex_buffer state.
+Using this to quickly evaluate a specialized path in the draw
+module.
+
+``draw_arrays_instanced`` draws multiple instances of the same primitive.
+
+This command is equivalent to calling ``draw_elements_instanced``
+with ``indexBuffer`` set to NULL and ``indexSize`` set to 0.
+
+``draw_elements_instanced`` draws multiple instances of the same primitive
+using an optional index buffer.
+
+For instanceID in the range between ``startInstance``
+and ``startInstance``+``instanceCount``-1, inclusive, draw a primitive
+specified by ``mode`` and sequential numbers in the range between ``start``
+and ``start``+``count``-1, inclusive.
+
+If ``indexBuffer`` is not NULL, it specifies an index buffer with index
+byte size of ``indexSize``. The sequential numbers are used to lookup
+the index buffer and the resulting indices in turn are used to fetch
+vertex attributes.
+
+If ``indexBuffer`` is NULL, the sequential numbers are used directly
+as indices to fetch vertex attributes.
+
+If a given vertex element has ``instance_divisor`` set to 0, it is said
+it contains per-vertex data and effective vertex attribute address needs
+to be recalculated for every index.
+
+ attribAddr = ``stride`` * index + ``src_offset``
+
+If a given vertex element has ``instance_divisor`` set to non-zero,
+it is said it contains per-instance data and effective vertex attribute
+address needs to recalculated for every ``instance_divisor``-th instance.
+
+ attribAddr = ``stride`` * instanceID / ``instance_divisor`` + ``src_offset``
+
+In the above formulas, ``src_offset`` is taken from the given vertex element
+and ``stride`` is taken from a vertex buffer associated with the given
+vertex element.
+
+The calculated attribAddr is used as an offset into the vertex buffer to
+fetch the attribute data.
+
+The value of ``instanceID`` can be read in a vertex shader through a system
+value register declared with INSTANCEID semantic name.
+
Queries
^^^^^^^
diff --git a/src/gallium/docs/source/cso/rasterizer.rst b/src/gallium/docs/source/cso/rasterizer.rst
index 4d8e1708e7..bfa4a1170a 100644
--- a/src/gallium/docs/source/cso/rasterizer.rst
+++ b/src/gallium/docs/source/cso/rasterizer.rst
@@ -7,32 +7,69 @@ The rasterizer state controls the rendering of points, lines and triangles.
Attributes include polygon culling state, line width, line stipple,
multisample state, scissoring and flat/smooth shading.
-
Members
-------
+bypass_vs_clip_and_viewport
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Whether the entire TCL pipeline should be bypassed. This implies that
+vertices are pre-transformed for the viewport, and will not be run
+through the vertex shader.
+
+.. note::
+
+ Implementations may still clip away vertices that are not in the viewport
+ when this is set.
+
flatshade
- If set, the provoking vertex of each polygon is used to determine the
- color of the entire polygon. If not set, fragment colors will be
- interpolated between the vertex colors.
- Note that this is separate from the fragment shader input attributes
- CONSTANT, LINEAR and PERSPECTIVE. We need the flatshade state at
+^^^^^^^^^
+
+If set, the provoking vertex of each polygon is used to determine the color
+of the entire polygon. If not set, fragment colors will be interpolated
+between the vertex colors.
+
+The actual interpolated shading algorithm is obviously
+implementation-dependent, but will usually be Gourard for most hardware.
+
+.. note::
+
+ This is separate from the fragment shader input attributes
+ CONSTANT, LINEAR and PERSPECTIVE. The flatshade state is needed at
clipping time to determine how to set the color of new vertices.
- Also note that the draw module can implement flat shading by copying
- the provoking vertex color to all the other vertices in the primitive.
+
+ :ref:`Draw` can implement flat shading by copying the provoking vertex
+ color to all the other vertices in the primitive.
flatshade_first
- Whether the first vertex should be the provoking vertex, for most
- primitives. If not set, the last vertex is the provoking vertex.
+^^^^^^^^^^^^^^^
+
+Whether the first vertex should be the provoking vertex, for most primitives.
+If not set, the last vertex is the provoking vertex.
+
+There are several important exceptions to the specification of this rule.
+
+* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first
+ vertex. If the caller wishes to change the provoking vertex, they merely
+ need to rotate the vertices themselves.
+* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no
+ effect; the provoking vertex is always the last vertex.
+* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the
+ second vertex, not the first. This permits each segment of the fan to have
+ a different color.
+
+Other Members
+^^^^^^^^^^^^^
light_twoside
- If set, there are per-vertex back-facing colors. The draw module
+ If set, there are per-vertex back-facing colors. :ref:`Draw`
uses this state along with the front/back information to set the
final vertex colors prior to rasterization.
front_winding
Indicates the window order of front-facing polygons, either
PIPE_WINDING_CW or PIPE_WINDING_CCW
+
cull_mode
Indicates which polygons to cull, either PIPE_WINDING_NONE (cull no
polygons), PIPE_WINDING_CW (cull clockwise-winding polygons),
@@ -68,7 +105,7 @@ line_stipple_enable
line_stipple_pattern
16-bit bitfield of on/off flags, used to pattern the line stipple.
line_stipple_factor
- When drawinga stippled line, each bit in the stipple pattern is
+ When drawing a stippled line, each bit in the stipple pattern is
repeated N times, where N = line_stipple_factor + 1.
line_last_pixel
Controls whether the last pixel in a line is drawn or not. OpenGL
@@ -96,7 +133,7 @@ sprite_coord_mode
lower left vertex will have coordinate (0,0,0,1).
For PIPE_SPRITE_COORD_UPPER_LEFT, the upper-left vertex will have
coordinate (0,0,0,1).
- This state is needed by the 'draw' module because that's where each
+ This state is needed by :ref:`Draw` because that's where each
point vertex is converted into four quad vertices. There's no other
place to emit the new vertex texture coordinates which are required for
sprite rendering.
@@ -108,45 +145,9 @@ scissor
Whether the scissor test is enabled.
multisample
- Whether :ref:`MSAA` is enabled.
-
-bypass_vs_clip_and_viewport
- Whether the entire TCL pipeline should be bypassed. This implies that
- vertices are pre-transformed for the viewport, and will not be run
- through the vertex shader. Note that implementations may still clip away
- vertices that are not in the viewport.
+ Whether :term:`MSAA` is enabled.
gl_rasterization_rules
Whether the rasterizer should use (0.5, 0.5) pixel centers. When not set,
the rasterizer will use (0, 0) for pixel centers.
-
-Notes
------
-
-flatshade
-^^^^^^^^^
-
-The actual interpolated shading algorithm is obviously
-implementation-dependent, but will usually be Gourard for most hardware.
-
-bypass_vs_clip_and_viewport
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-When set, this implies that vertices are pre-transformed for the viewport, and
-will not be run through the vertex shader. Note that implementations may still
-clip away vertices that are not visible.
-
-flatshade_first
-^^^^^^^^^^^^^^^
-
-There are several important exceptions to the specification of this rule.
-
-* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first
- vertex. If the caller wishes to change the provoking vertex, they merely
- need to rotate the vertices themselves.
-* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no
- effect; the provoking vertex is always the last vertex.
-* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the
- second vertex, not the first. This permits each segment of the fan to have
- a different color.
diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst
index 33e846e33d..0ef9fe2645 100644
--- a/src/gallium/docs/source/distro.rst
+++ b/src/gallium/docs/source/distro.rst
@@ -61,10 +61,7 @@ VMWare SVGA
ATI r300
^^^^^^^^
-AMD/ATI r600
-^^^^^^^^^^^^
-
-Highly experimental.
+Testing-quality.
Softpipe
^^^^^^^^
@@ -109,17 +106,31 @@ Auxiliary
CSO Cache
^^^^^^^^^
+The CSO cache is used to accelerate preparation of state by saving
+driver-specific state structures for later use.
+
+.. _draw:
+
Draw
^^^^
+Draw is a software :term:`TCL` pipeline for hardware that lacks vertex shaders
+or other essential parts of pre-rasterization vertex preparation.
+
Gallivm
^^^^^^^
Indices
^^^^^^^
-Pipe Buffer Manager
-^^^^^^^^^^^^^^^^^^^
+Indices provides tools for translating or generating element indices for
+use with element-based rendering.
+
+Pipe Buffer Managers
+^^^^^^^^^^^^^^^^^^^^
+
+Each of these managers provides various services to drivers that are not
+fully utilizing a memory manager.
Remote Debugger
^^^^^^^^^^^^^^^
@@ -127,12 +138,12 @@ Remote Debugger
Runtime Assembly Emission
^^^^^^^^^^^^^^^^^^^^^^^^^
-Surface Context Tracker
-^^^^^^^^^^^^^^^^^^^^^^^
-
TGSI
^^^^
+The TGSI auxiliary module provides basic utilities for manipulating TGSI
+streams.
+
Translate
^^^^^^^^^
diff --git a/src/gallium/docs/source/glossary.rst b/src/gallium/docs/source/glossary.rst
index 6a9110ce78..aec89f8b5c 100644
--- a/src/gallium/docs/source/glossary.rst
+++ b/src/gallium/docs/source/glossary.rst
@@ -8,3 +8,8 @@ Glossary
Multi-Sampled Anti-Aliasing. A basic anti-aliasing technique that takes
multiple samples of the depth buffer, and uses this information to
smooth the edges of polygons.
+
+ TCL
+ Transform, Clipping, & Lighting. The three stages of preparation in a
+ rasterizing pipeline prior to the actual rasterization of vertices into
+ fragments.
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 9631e6967e..72bb75a55d 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -3,6 +3,28 @@ Screen
A screen is an object representing the context-independent part of a device.
+Useful Flags
+------------
+
+.. _pipe_texture_usage:
+
+PIPE_TEXTURE_USAGE
+^^^^^^^^^^^^^^^^^^
+
+These flags determine the possible roles a texture may be used for during its
+lifetime. Texture usage flags are cumulative and may be combined to create a
+texture that can be used as multiple things.
+
+* ``RENDER_TARGET``: A colorbuffer or pixelbuffer.
+* ``DISPLAY_TARGET``: A sharable buffer that can be given to another process.
+* ``PRIMARY``: A frontbuffer or scanout buffer.
+* ``DEPTH_STENCIL``: A depthbuffer, stencilbuffer, or Z buffer. Gallium does
+ not explicitly provide for stencil-only buffers, so any stencilbuffer
+ validated here is implicitly also a depthbuffer.
+* ``SAMPLER``: A texture that may be sampled from in a fragment or vertex
+ shader.
+* ``DYNAMIC``: A texture that will be mapped frequently.
+
Methods
-------
@@ -33,6 +55,14 @@ is_format_supported
See if a format can be used in a specific manner.
+**usage** is a bitmask of :ref:`PIPE_TEXTURE_USAGE` flags.
+
+Returns TRUE if all usages can be satisfied.
+
+.. note::
+
+ ``PIPE_TEXTURE_USAGE_DYNAMIC`` is not a valid usage.
+
texture_create
^^^^^^^^^^^^^^
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index ebee4902b0..65a669d8cf 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -516,8 +516,11 @@ SEQ - Set On Equal
.. math::
dst.x = (src0.x == src1.x) ? 1 : 0
+
dst.y = (src0.y == src1.y) ? 1 : 0
+
dst.z = (src0.z == src1.z) ? 1 : 0
+
dst.w = (src0.w == src1.w) ? 1 : 0
@@ -526,8 +529,11 @@ SFL - Set On False
.. math::
dst.x = 0
+
dst.y = 0
+
dst.z = 0
+
dst.w = 0
Considered for removal.
@@ -537,8 +543,11 @@ SGT - Set On Greater Than
.. math::
dst.x = (src0.x > src1.x) ? 1 : 0
+
dst.y = (src0.y > src1.y) ? 1 : 0
+
dst.z = (src0.z > src1.z) ? 1 : 0
+
dst.w = (src0.w > src1.w) ? 1 : 0
@@ -560,8 +569,11 @@ SLE - Set On Less Equal Than
.. math::
dst.x = (src0.x <= src1.x) ? 1 : 0
+
dst.y = (src0.y <= src1.y) ? 1 : 0
+
dst.z = (src0.z <= src1.z) ? 1 : 0
+
dst.w = (src0.w <= src1.w) ? 1 : 0
@@ -570,8 +582,11 @@ SNE - Set On Not Equal
.. math::
dst.x = (src0.x != src1.x) ? 1 : 0
+
dst.y = (src0.y != src1.y) ? 1 : 0
+
dst.z = (src0.z != src1.z) ? 1 : 0
+
dst.w = (src0.w != src1.w) ? 1 : 0
@@ -580,8 +595,11 @@ STR - Set On True
.. math::
dst.x = 1
+
dst.y = 1
+
dst.z = 1
+
dst.w = 1
@@ -629,8 +647,11 @@ X2D - 2D Coordinate Transformation
.. math::
dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y
+
dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w
+
dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y
+
dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w
Considered for removal.
@@ -979,13 +1000,13 @@ XOR - Bitwise Xor
.. math::
- dst.x = src0.x ^ src1.x
+ dst.x = src0.x \oplus src1.x
- dst.y = src0.y ^ src1.y
+ dst.y = src0.y \oplus src1.y
- dst.z = src0.z ^ src1.z
+ dst.z = src0.z \oplus src1.z
- dst.w = src0.w ^ src1.w
+ dst.w = src0.w \oplus src1.w
SAD - Sum Of Absolute Differences
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index d5f5c7bbba..aa29dcb394 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -358,6 +358,7 @@ struct cell_spu_function_info
/** This is the object passed to spe_create_thread() */
+PIPE_ALIGN_TYPE(16,
struct cell_init_info
{
unsigned id;
@@ -370,7 +371,7 @@ struct cell_init_info
uint *buffer_status; /**< points at cell_context->buffer_status */
struct cell_spu_function_info *spu_functions;
-} ALIGN16_ATTRIB;
+});
#endif /* CELL_COMMON_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index 5c3188e7f9..e402ed2922 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -89,7 +89,7 @@ struct cell_buffer_node;
*/
struct cell_buffer_list
{
- struct cell_fence fence ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) struct cell_fence fence;
struct cell_buffer_node *head;
};
@@ -115,7 +115,7 @@ struct cell_context
struct pipe_blend_color blend_color;
struct pipe_clip_state clip;
- struct pipe_constant_buffer constants[2];
+ struct pipe_buffer *constants[2];
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
@@ -150,18 +150,18 @@ struct cell_context
/** Mapped constant buffers */
void *mapped_constants[PIPE_SHADER_TYPES];
- struct cell_spu_function_info spu_functions ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions;
uint num_cells, num_spus;
/** Buffers for command batches, vertex/index data */
uint buffer_size[CELL_NUM_BUFFERS];
- ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE];
int cur_batch; /**< which buffer is being filled w/ commands */
/** [4] to ensure 16-byte alignment for each status word */
- uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4];
/** Associated with each command/batch buffer is a list of pipe_buffers
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index 3fa8b975d3..0a4da8ecc8 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -51,17 +51,17 @@ cell_map_constant_buffers(struct cell_context *sp)
struct pipe_winsys *ws = sp->pipe.winsys;
uint i;
for (i = 0; i < 2; i++) {
- if (sp->constants[i].buffer && sp->constants[i].buffer->size) {
- sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer,
+ if (sp->constants[i] && sp->constants[i]->size) {
+ sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i],
PIPE_BUFFER_USAGE_CPU_READ);
cell_flush_buffer_range(sp, sp->mapped_constants[i],
- sp->constants[i].buffer->size);
+ sp->constants[i]->size);
}
}
draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX,
sp->mapped_constants[PIPE_SHADER_VERTEX],
- sp->constants[PIPE_SHADER_VERTEX].buffer->size);
+ sp->constants[PIPE_SHADER_VERTEX]->size);
}
static void
@@ -70,8 +70,8 @@ cell_unmap_constant_buffers(struct cell_context *sp)
struct pipe_winsys *ws = sp->pipe.winsys;
uint i;
for (i = 0; i < 2; i++) {
- if (sp->constants[i].buffer && sp->constants[i].buffer->size)
- ws->buffer_unmap(ws, sp->constants[i].buffer);
+ if (sp->constants[i] && sp->constants[i]->size)
+ ws->buffer_unmap(ws, sp->constants[i]);
sp->mapped_constants[i] = NULL;
}
}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index 5b87286d4c..f1e1dcb9eb 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -240,12 +240,12 @@ cell_emit_state(struct cell_context *cell)
if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) {
const uint shader = PIPE_SHADER_FRAGMENT;
- const uint num_const = cell->constants[shader].buffer->size / sizeof(float);
+ const uint num_const = cell->constants[shader]->size / sizeof(float);
uint i, j;
float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float)));
uint32_t *ibuf = (uint32_t *) buf;
const float *constants = pipe_buffer_map(cell->pipe.screen,
- cell->constants[shader].buffer,
+ cell->constants[shader],
PIPE_BUFFER_USAGE_CPU_READ);
ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS;
ibuf[4] = num_const;
@@ -253,7 +253,7 @@ cell_emit_state(struct cell_context *cell)
for (i = 0; i < num_const; i++) {
buf[j++] = constants[i];
}
- pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer);
+ pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader]);
}
if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
index 6568c784fe..1b09cf7f7d 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c
@@ -183,7 +183,7 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs)
static void
cell_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *buf)
+ struct pipe_buffer *buf)
{
struct cell_context *cell = cell_context(pipe);
@@ -193,7 +193,7 @@ cell_set_constant_buffer(struct pipe_context *pipe,
draw_flush(cell->draw);
/* note: reference counting */
- pipe_buffer_reference(&cell->constants[shader].buffer, buf->buffer);
+ pipe_buffer_reference(&cell->constants[shader], buf);
if (shader == PIPE_SHADER_VERTEX)
cell->dirty |= CELL_NEW_VS_CONSTANTS;
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
index 12b855a3db..55bd85bde2 100644
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -53,8 +53,7 @@ struct spu_vs_context draw;
/**
* Buffers containing dynamically generated SPU code:
*/
-static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
- ALIGN16_ATTRIB;
+PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS];
@@ -543,7 +542,7 @@ cmd_batch(uint opcode)
{
const uint buf = (opcode >> 8) & 0xff;
uint size = (opcode >> 16);
- qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16];
const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]);
uint pos;
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index d86d8e09a5..d2166a4901 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -1839,10 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach )
/* execute declarations (interpolants) */
if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
for (i = 0; i < mach->NumDeclarations; i++) {
+ PIPE_ALIGN_VAR(16)
union {
struct tgsi_full_declaration decl;
qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16];
- } d ALIGN16_ATTRIB;
+ } d;
unsigned ea = (unsigned) (mach->Declarations + pc);
spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
@@ -1853,10 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach )
/* execute instructions, until pc is set to -1 */
while (pc != -1) {
+ PIPE_ALIGN_VAR(16)
union {
struct tgsi_full_instruction inst;
qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16];
- } i ALIGN16_ATTRIB;
+ } i;
unsigned ea = (unsigned) (mach->Instructions + pc);
spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));
diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h
index 8605679940..0ca92af248 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.h
+++ b/src/gallium/drivers/cell/spu/spu_exec.h
@@ -98,9 +98,9 @@ struct spu_exec_machine
* 4 internal temporaries
* 1 address
*/
+ PIPE_ALIGN_VAR(16)
struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS
- + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]
- ALIGN16_ATTRIB;
+ + TGSI_EXEC_NUM_TEMP_EXTRAS + 1];
struct spu_exec_vector *Addrs;
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c
index ff3d609d25..98919c43ff 100644
--- a/src/gallium/drivers/cell/spu/spu_funcs.c
+++ b/src/gallium/drivers/cell/spu/spu_funcs.c
@@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions,
void
return_function_info(void)
{
- struct cell_spu_function_info funcs ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs;
int tag = TAG_MISC;
ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 33767e7c51..b18f4c22ef 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -93,6 +93,7 @@ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
vector float *constants);
+PIPE_ALIGN_TYPE(16,
struct spu_framebuffer
{
void *color_start; /**< addr of color surface in main memory */
@@ -107,10 +108,11 @@ struct spu_framebuffer
uint zsize; /**< 0, 2 or 4 bytes per Z */
float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */
-} ALIGN16_ATTRIB;
+});
/** per-texture level info */
+PIPE_ALIGN_TYPE(16,
struct spu_texture_level
{
void *start;
@@ -123,20 +125,22 @@ struct spu_texture_level
vector signed int mask_s, mask_t, mask_r;
/** texcoord clamp limits */
vector signed int max_s, max_t, max_r;
-} ALIGN16_ATTRIB;
+});
+PIPE_ALIGN_TYPE(16,
struct spu_texture
{
struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS];
uint max_level;
uint target; /**< PIPE_TEXTURE_x */
-} ALIGN16_ATTRIB;
+});
/**
* All SPU global/context state will be in a singleton object of this type:
*/
+PIPE_ALIGN_TYPE(16,
struct spu_global
{
/** One-time init/constant info */
@@ -155,8 +159,8 @@ struct spu_global
struct vertex_info vertex_info;
/** Current color and Z tiles */
- tile_t ctile ALIGN16_ATTRIB;
- tile_t ztile ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) tile_t ctile;
+ PIPE_ALIGN_VAR(16) tile_t ztile;
/** Read depth/stencil tiles? */
boolean read_depth_stencil;
@@ -165,8 +169,8 @@ struct spu_global
ubyte cur_ctile_status, cur_ztile_status;
/** Status of all tiles in framebuffer */
- ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
- ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE];
+ PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE];
/** Current fragment ops machine code, at 8-byte boundary */
uint *fragment_ops_code;
@@ -175,7 +179,7 @@ struct spu_global
spu_fragment_ops_func fragment_ops[2];
/** Current fragment program machine code, at 8-byte boundary */
- uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB;
+ PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
/** Current fragment ops function */
spu_fragment_program_func fragment_program;
@@ -187,7 +191,7 @@ struct spu_global
/** Fragment program constants */
vector float constants[4 * CELL_MAX_CONSTANTS];
-} ALIGN16_ATTRIB;
+});
extern struct spu_global spu;
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
index 5ffb7073ab..14987e3c3a 100644
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ b/src/gallium/drivers/cell/spu/spu_render.c
@@ -169,7 +169,7 @@ void
cmd_render(const struct cell_command_render *render, uint *pos_incr)
{
/* we'll DMA into these buffers */
- ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE];
const uint vertex_size = render->vertex_size; /* in bytes */
/*const*/ uint total_vertex_bytes = render->num_verts * vertex_size;
uint index_bytes;
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
index 03375d84a5..087963960d 100644
--- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
+++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
@@ -43,7 +43,8 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in,
const qword *shuffle_data);
-static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = {
+PIPE_ALIGN_VAR(16) static const qword
+fetch_shuffle_data[5] = {
/* Shuffle used by CVT_64_FLOAT
*/
{
@@ -110,7 +111,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
unsigned idx;
const unsigned bytes_per_entry = draw->vertex_fetch.size[attr];
const unsigned quads_per_entry = (bytes_per_entry + 15) / 16;
- qword in[2 * 4] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) qword in[2 * 4];
/* Fetch four attributes for four vertices.
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
index fbe5b34d39..3e9804bf8e 100644
--- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c
+++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
@@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw,
struct spu_exec_machine *machine = &draw->machine;
unsigned int j;
- ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS);
+ PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS];
+ PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS];
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
@@ -119,8 +119,8 @@ run_vertex_program(struct spu_vs_context *draw,
ASSERT_ALIGN16(draw->constants);
machine->Consts = (float (*)[4]) draw->constants;
- machine->Inputs = ALIGN16_ASSIGN(inputs);
- machine->Outputs = ALIGN16_ASSIGN(outputs);
+ machine->Inputs = inputs;
+ machine->Outputs = outputs;
spu_vertex_fetch( draw, machine, elts, count );
@@ -132,8 +132,9 @@ run_vertex_program(struct spu_vs_context *draw,
for (j = 0; j < count; j++) {
unsigned slot;
float x, y, z, w;
+ PIPE_ALIGN_VAR(16)
unsigned char buffer[sizeof(struct vertex_header)
- + MAX_VERTEX_SIZE] ALIGN16_ATTRIB;
+ + MAX_VERTEX_SIZE];
struct vertex_header *const tmpOut =
(struct vertex_header *) buffer;
const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header)
@@ -186,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw,
}
-unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]
- ALIGN16_ATTRIB;
+PIPE_ALIGN_VAR(16) unsigned char
+immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]);
void
diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h
index 149393712a..191a44c3df 100644
--- a/src/gallium/drivers/failover/fo_context.h
+++ b/src/gallium/drivers/failover/fo_context.h
@@ -125,7 +125,7 @@ failover_context( struct pipe_context *pipe )
void
failover_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *buf);
+ struct pipe_buffer *buf);
#endif /* FO_CONTEXT_H */
diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c
index 3f5f556032..d6ec4d1313 100644
--- a/src/gallium/drivers/failover/fo_state.c
+++ b/src/gallium/drivers/failover/fo_state.c
@@ -495,7 +495,7 @@ failover_set_vertex_elements(struct pipe_context *pipe,
void
failover_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *buf)
+ struct pipe_buffer *buf)
{
struct failover_context *failover = failover_context(pipe);
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 234b441ce6..37cbd56036 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -233,7 +233,8 @@ struct i915_context
struct pipe_blend_color blend_color;
struct pipe_clip_state clip;
- struct pipe_constant_buffer constants[PIPE_SHADER_TYPES];
+ /* XXX unneded */
+ struct pipe_buffer *constants[PIPE_SHADER_TYPES];
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 5f5b6f8e18..0fab6e1bc3 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -518,7 +518,7 @@ static void i915_delete_vs_state(struct pipe_context *pipe, void *shader)
static void i915_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *buf)
+ struct pipe_buffer *buf)
{
struct i915_context *i915 = i915_context(pipe);
struct pipe_screen *screen = pipe->screen;
@@ -538,13 +538,13 @@ static void i915_set_constant_buffer(struct pipe_context *pipe,
*/
if (buf) {
void *mapped;
- if (buf->buffer && buf->buffer->size &&
- (mapped = pipe_buffer_map(screen, buf->buffer,
+ if (buf->size &&
+ (mapped = pipe_buffer_map(screen, buf,
PIPE_BUFFER_USAGE_CPU_READ))) {
- memcpy(i915->current.constants[shader], mapped, buf->buffer->size);
- pipe_buffer_unmap(screen, buf->buffer);
+ memcpy(i915->current.constants[shader], mapped, buf->size);
+ pipe_buffer_unmap(screen, buf);
i915->current.num_user_constants[shader]
- = buf->buffer->size / (4 * sizeof(float));
+ = buf->size / (4 * sizeof(float));
}
else {
i915->current.num_user_constants[shader] = 0;
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index bb32d90e33..e389587f3e 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -262,7 +262,7 @@ static void brw_delete_vs_state( struct pipe_context *pipe, void *prog )
static void brw_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *buf)
+ struct pipe_buffer *buf)
{
struct brw_context *brw = brw_context(pipe);
@@ -270,13 +270,13 @@ static void brw_set_constant_buffer(struct pipe_context *pipe,
if (shader == PIPE_SHADER_FRAGMENT) {
pipe_buffer_reference( &brw->curr.fragment_constants,
- buf->buffer );
+ buf );
brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_CONSTANTS;
}
else {
pipe_buffer_reference( &brw->curr.vertex_constants,
- buf->buffer );
+ buf );
brw->state.dirty.mesa |= PIPE_NEW_VERTEX_CONSTANTS;
}
diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index 9f5b4e6323..f9063d90fb 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -404,17 +404,17 @@ static void
identity_set_constant_buffer(struct pipe_context *_pipe,
uint shader,
uint index,
- const struct pipe_constant_buffer *_buffer)
+ struct pipe_buffer *_buffer)
{
struct identity_context *id_pipe = identity_context(_pipe);
struct pipe_context *pipe = id_pipe->pipe;
- struct pipe_constant_buffer unwrapped_buffer;
- struct pipe_constant_buffer *buffer = NULL;
+ struct pipe_buffer *unwrapped_buffer;
+ struct pipe_buffer *buffer = NULL;
- /* unwrap the input state */
+ /* XXX hmm? unwrap the input state */
if (_buffer) {
- unwrapped_buffer.buffer = identity_buffer_unwrap(_buffer->buffer);
- buffer = &unwrapped_buffer;
+ unwrapped_buffer = identity_buffer_unwrap(_buffer);
+ buffer = unwrapped_buffer;
}
pipe->set_constant_buffer(pipe,
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 1cc3c9227c..aaa675aec7 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -124,8 +124,8 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
}
for (i = 0; i < Elements(llvmpipe->constants); i++) {
- if (llvmpipe->constants[i].buffer) {
- pipe_buffer_reference(&llvmpipe->constants[i].buffer, NULL);
+ if (llvmpipe->constants[i]) {
+ pipe_buffer_reference(&llvmpipe->constants[i], NULL);
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 6411797cf5..426d6eb4a1 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -64,7 +64,7 @@ struct llvmpipe_context {
/** Other rendering state */
struct pipe_blend_color blend_color[4][16];
struct pipe_clip_state clip;
- struct pipe_constant_buffer constants[PIPE_SHADER_TYPES];
+ struct pipe_buffer *constants[PIPE_SHADER_TYPES];
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h
index 7eb05de77a..c3a48700a4 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad.h
+++ b/src/gallium/drivers/llvmpipe/lp_quad.h
@@ -31,6 +31,7 @@
#ifndef LP_QUAD_H
#define LP_QUAD_H
+#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
#include "tgsi/tgsi_exec.h"
@@ -83,7 +84,7 @@ struct quad_header_inout
struct quad_header_output
{
/** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
- float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
+ PIPE_ALIGN_VAR(16) float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
};
@@ -92,9 +93,9 @@ struct quad_header_output
*/
struct quad_interp_coef
{
- float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
- float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
- float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ PIPE_ALIGN_VAR(16) float a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ PIPE_ALIGN_VAR(16) float dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ PIPE_ALIGN_VAR(16) float dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
};
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index b18f17c0cd..0b2d3a2801 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -117,7 +117,7 @@ struct setup_context {
/**
* Execute fragment shader for the four fragments in the quad.
*/
-ALIGN_STACK
+PIPE_ALIGN_STACK
static void
shade_quads(struct llvmpipe_context *llvmpipe,
struct quad_header *quads[],
@@ -130,7 +130,7 @@ shade_quads(struct llvmpipe_context *llvmpipe,
uint8_t *tile;
uint8_t *color;
void *depth;
- uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS];
+ PIPE_ALIGN_VAR(16) uint32_t mask[4][NUM_CHANNELS];
unsigned chan_index;
unsigned q;
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 7020da145f..e16793186b 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -154,7 +154,7 @@ void llvmpipe_set_clip_state( struct pipe_context *,
void llvmpipe_set_constant_buffer(struct pipe_context *,
uint shader, uint index,
- const struct pipe_constant_buffer *buf);
+ struct pipe_buffer *buf);
void *llvmpipe_create_fs_state(struct pipe_context *,
const struct pipe_shader_state *);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index b73ca2d41e..9f4bbef73f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -714,12 +714,11 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
void
llvmpipe_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *constants)
+ struct pipe_buffer *constants)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
- struct pipe_buffer *buffer = constants ? constants->buffer : NULL;
- unsigned size = buffer ? buffer->size : 0;
- const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL;
+ unsigned size = constants ? constants->size : 0;
+ const void *data = constants ? llvmpipe_buffer(constants)->data : NULL;
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
@@ -727,7 +726,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
draw_flush(llvmpipe->draw);
/* note: reference counting */
- pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer);
+ pipe_buffer_reference(&llvmpipe->constants[shader], constants);
if(shader == PIPE_SHADER_FRAGMENT) {
llvmpipe->jit_context.constants = data;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 29fff91981..6c29e8d8ac 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -462,7 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend,
}
-ALIGN_STACK
+PIPE_ALIGN_STACK
static boolean
test_one(unsigned verbose,
FILE *fp,
@@ -531,11 +531,11 @@ test_one(unsigned verbose,
success = TRUE;
for(i = 0; i < n && success; ++i) {
if(mode == AoS) {
- ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
@@ -596,11 +596,11 @@ test_one(unsigned verbose,
if(mode == SoA) {
const unsigned stride = type.length*type.width/8;
- ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
boolean mismatch;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index faddfb9677..c1abee424c 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -142,7 +142,7 @@ add_conv_test(LLVMModuleRef module,
}
-ALIGN_STACK
+PIPE_ALIGN_STACK
static boolean
test_one(unsigned verbose,
FILE *fp,
@@ -230,8 +230,8 @@ test_one(unsigned verbose,
for(i = 0; i < n && success; ++i) {
unsigned src_stride = src_type.length*src_type.width/8;
unsigned dst_stride = dst_type.length*dst_type.width/8;
- ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
- ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
int64_t start_counter = 0;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 23ea9ebbe7..2b258f1052 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -199,7 +199,7 @@ add_store_rgba_test(LLVMModuleRef module,
}
-ALIGN_STACK
+PIPE_ALIGN_STACK
static boolean
test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test)
{
diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c
index e3dc4c5bf4..b67f1e16b1 100644
--- a/src/gallium/drivers/nv04/nv04_state.c
+++ b/src/gallium/drivers/nv04/nv04_state.c
@@ -332,7 +332,7 @@ nv04_set_clip_state(struct pipe_context *pipe,
static void
nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
- const struct pipe_constant_buffer *buf )
+ struct pipe_buffer *buf )
{
struct nv04_context *nv04 = nv04_context(pipe);
struct pipe_screen *pscreen = pipe->screen;
@@ -342,13 +342,13 @@ nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
if (buf) {
void *mapped;
- if (buf->buffer && buf->buffer->size &&
- (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)))
+ if (buf && buf->size &&
+ (mapped = pipe_buffer_map(pscreen, buf, PIPE_BUFFER_USAGE_CPU_READ)))
{
- memcpy(nv04->constbuf[shader], mapped, buf->buffer->size);
+ memcpy(nv04->constbuf[shader], mapped, buf->size);
nv04->constbuf_nr[shader] =
- buf->buffer->size / (4 * sizeof(float));
- pipe_buffer_unmap(pscreen, buf->buffer);
+ buf->size / (4 * sizeof(float));
+ pipe_buffer_unmap(pscreen, buf);
}
}
}
diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c
index ffc6be3c40..ad7def53b1 100644
--- a/src/gallium/drivers/nv10/nv10_state.c
+++ b/src/gallium/drivers/nv10/nv10_state.c
@@ -458,7 +458,7 @@ nv10_set_clip_state(struct pipe_context *pipe,
static void
nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
- const struct pipe_constant_buffer *buf )
+ struct pipe_buffer *buf )
{
struct nv10_context *nv10 = nv10_context(pipe);
struct pipe_screen *pscreen = pipe->screen;
@@ -468,13 +468,13 @@ nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
if (buf) {
void *mapped;
- if (buf->buffer && buf->buffer->size &&
- (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)))
+ if (buf->size &&
+ (mapped = pipe_buffer_map(pscreen, buf, PIPE_BUFFER_USAGE_CPU_READ)))
{
- memcpy(nv10->constbuf[shader], mapped, buf->buffer->size);
+ memcpy(nv10->constbuf[shader], mapped, buf->size);
nv10->constbuf_nr[shader] =
- buf->buffer->size / (4 * sizeof(float));
- pipe_buffer_unmap(pscreen, buf->buffer);
+ buf->size / (4 * sizeof(float));
+ pipe_buffer_unmap(pscreen, buf);
}
}
}
diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c
index 3a82e63423..45697a60ef 100644
--- a/src/gallium/drivers/nv20/nv20_state.c
+++ b/src/gallium/drivers/nv20/nv20_state.c
@@ -451,7 +451,7 @@ nv20_set_clip_state(struct pipe_context *pipe,
static void
nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
- const struct pipe_constant_buffer *buf )
+ struct pipe_buffer *buf )
{
struct nv20_context *nv20 = nv20_context(pipe);
struct pipe_screen *pscreen = pipe->screen;
@@ -461,13 +461,13 @@ nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
if (buf) {
void *mapped;
- if (buf->buffer && buf->buffer->size &&
- (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)))
+ if (buf->size &&
+ (mapped = pipe_buffer_map(pscreen, buf, PIPE_BUFFER_USAGE_CPU_READ)))
{
- memcpy(nv20->constbuf[shader], mapped, buf->buffer->size);
+ memcpy(nv20->constbuf[shader], mapped, buf->size);
nv20->constbuf_nr[shader] =
- buf->buffer->size / (4 * sizeof(float));
- pipe_buffer_unmap(pscreen, buf->buffer);
+ buf->size / (4 * sizeof(float));
+ pipe_buffer_unmap(pscreen, buf);
}
}
}
diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
index 9893567891..0cc3172dcd 100644
--- a/src/gallium/drivers/nv30/nv30_fragtex.c
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -43,7 +43,6 @@ static struct nv30_texture_format *
nv30_fragtex_format(uint pipe_format)
{
struct nv30_texture_format *tf = nv30_texture_formats;
- char fs[128];
while (tf->defined) {
if (tf->pipe == pipe_format)
@@ -65,7 +64,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer);
struct nv30_texture_format *tf;
struct nouveau_stateobj *so;
- uint32_t txf, txs , txp;
+ uint32_t txf, txs;
unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
tf = nv30_fragtex_format(pt->format);
@@ -97,13 +96,6 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
return NULL;
}
- if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
- txp = 0;
- } else {
- txp = nv30mt->level[0].pitch;
- txf |= (1<<13) /*FIXME: NV34TCL_TX_FORMAT_LINEAR ? */;
- }
-
txs = tf->swizzle;
so = so_new(1, 8, 2);
diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c
index a80dfb0488..065c927a10 100644
--- a/src/gallium/drivers/nv30/nv30_state.c
+++ b/src/gallium/drivers/nv30/nv30_state.c
@@ -590,12 +590,12 @@ nv30_set_clip_state(struct pipe_context *pipe,
static void
nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
- const struct pipe_constant_buffer *buf )
+ struct pipe_buffer *buf )
{
struct nv30_context *nv30 = nv30_context(pipe);
- nv30->constbuf[shader] = buf->buffer;
- nv30->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float));
+ nv30->constbuf[shader] = buf;
+ nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float));
if (shader == PIPE_SHADER_VERTEX) {
nv30->dirty |= NV30_NEW_VERTPROG;
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
index ed0ca9e02c..7d990f7d56 100644
--- a/src/gallium/drivers/nv40/nv40_state.c
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -605,12 +605,12 @@ nv40_set_clip_state(struct pipe_context *pipe,
static void
nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
- const struct pipe_constant_buffer *buf )
+ struct pipe_buffer *buf )
{
struct nv40_context *nv40 = nv40_context(pipe);
- nv40->constbuf[shader] = buf->buffer;
- nv40->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float));
+ nv40->constbuf[shader] = buf;
+ nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float));
if (shader == PIPE_SHADER_VERTEX) {
nv40->dirty |= NV40_NEW_VERTPROG;
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 5997456e4c..1e69746322 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -67,8 +67,12 @@ nv50_destroy(struct pipe_context *pipe)
so_ref(NULL, &nv50->state.vertprog);
if (nv50->state.fragprog)
so_ref(NULL, &nv50->state.fragprog);
- if (nv50->state.programs)
- so_ref(NULL, &nv50->state.programs);
+ if (nv50->state.geomprog)
+ so_ref(NULL, &nv50->state.geomprog);
+ if (nv50->state.fp_linkage)
+ so_ref(NULL, &nv50->state.fp_linkage);
+ if (nv50->state.gp_linkage)
+ so_ref(NULL, &nv50->state.gp_linkage);
if (nv50->state.vtxfmt)
so_ref(NULL, &nv50->state.vtxfmt);
if (nv50->state.vtxbuf)
@@ -100,7 +104,9 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv50->pipe.destroy = nv50_destroy;
nv50->pipe.draw_arrays = nv50_draw_arrays;
+ nv50->pipe.draw_arrays_instanced = nv50_draw_arrays_instanced;
nv50->pipe.draw_elements = nv50_draw_elements;
+ nv50->pipe.draw_elements_instanced = nv50_draw_elements_instanced;
nv50->pipe.clear = nv50_clear;
nv50->pipe.flush = nv50_flush;
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index cbd4c3ff86..bebcd95054 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -29,9 +29,7 @@
#define NV50_CB_PVP 1
#define NV50_CB_PFP 2
#define NV50_CB_PGP 3
-#define NV50_CB_TIC 4
-#define NV50_CB_TSC 5
-#define NV50_CB_PUPLOAD 6
+#define NV50_CB_AUX 4
#define NV50_NEW_BLEND (1 << 0)
#define NV50_NEW_ZSA (1 << 1)
@@ -45,9 +43,11 @@
#define NV50_NEW_VERTPROG_CB (1 << 9)
#define NV50_NEW_FRAGPROG (1 << 10)
#define NV50_NEW_FRAGPROG_CB (1 << 11)
-#define NV50_NEW_ARRAYS (1 << 12)
-#define NV50_NEW_SAMPLER (1 << 13)
-#define NV50_NEW_TEXTURE (1 << 14)
+#define NV50_NEW_GEOMPROG (1 << 12)
+#define NV50_NEW_GEOMPROG_CB (1 << 13)
+#define NV50_NEW_ARRAYS (1 << 14)
+#define NV50_NEW_SAMPLER (1 << 15)
+#define NV50_NEW_TEXTURE (1 << 16)
struct nv50_blend_stateobj {
struct pipe_blend_state pipe;
@@ -129,10 +129,13 @@ struct nv50_state {
unsigned miptree_nr[PIPE_SHADER_TYPES];
struct nouveau_stateobj *vertprog;
struct nouveau_stateobj *fragprog;
- struct nouveau_stateobj *programs;
+ struct nouveau_stateobj *geomprog;
+ struct nouveau_stateobj *fp_linkage;
+ struct nouveau_stateobj *gp_linkage;
struct nouveau_stateobj *vtxfmt;
struct nouveau_stateobj *vtxbuf;
struct nouveau_stateobj *vtxattr;
+ struct nouveau_stateobj *instbuf;
unsigned vtxelt_nr;
};
@@ -157,6 +160,7 @@ struct nv50_context {
struct pipe_framebuffer_state framebuffer;
struct nv50_program *vertprog;
struct nv50_program *fragprog;
+ struct nv50_program *geomprog;
struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned vtxbuf_nr;
@@ -193,11 +197,22 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50);
/* nv50_vbo.c */
extern void nv50_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
+extern void nv50_draw_arrays_instanced(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount);
extern void nv50_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
unsigned count);
+extern void nv50_draw_elements_instanced(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount);
extern void nv50_vbo_validate(struct nv50_context *nv50);
/* nv50_clear.c */
@@ -207,7 +222,9 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
/* nv50_program.c */
extern void nv50_vertprog_validate(struct nv50_context *nv50);
extern void nv50_fragprog_validate(struct nv50_context *nv50);
-extern void nv50_linkage_validate(struct nv50_context *nv50);
+extern void nv50_geomprog_validate(struct nv50_context *nv50);
+extern void nv50_fp_linkage_validate(struct nv50_context *nv50);
+extern void nv50_gp_linkage_validate(struct nv50_context *nv50);
extern void nv50_program_destroy(struct nv50_context *nv50,
struct nv50_program *p);
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 3f1edf0a13..dc8364ced7 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -92,12 +92,23 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
case PIPE_FORMAT_Z24S8_UNORM:
tile_flags = 0x1800;
break;
+ case PIPE_FORMAT_Z16_UNORM:
+ tile_flags = 0x6c00;
+ break;
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
tile_flags = 0x2800;
break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ tile_flags = 0x7400;
+ break;
default:
- tile_flags = 0x7000;
+ if ((pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) &&
+ util_format_get_blocksizebits(pt->format) == 32)
+ tile_flags = 0x7a00;
+ else
+ tile_flags = 0x7000;
break;
}
@@ -145,7 +156,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
mt->level[0].tile_mode, tile_flags,
&mt->base.bo);
if (ret) {
- for (l = 0; l < pt->last_level; ++l)
+ for (l = 0; l <= pt->last_level; ++l)
FREE(mt->level[l].image_offset);
FREE(mt);
return NULL;
@@ -188,7 +199,7 @@ nv50_miptree_destroy(struct pipe_texture *pt)
struct nv50_miptree *mt = nv50_miptree(pt);
unsigned l;
- for (l = 0; l < pt->last_level; ++l)
+ for (l = 0; l <= pt->last_level; ++l)
FREE(mt->level[l].image_offset);
nouveau_bo_ref(NULL, &mt->base.bo);
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index e16fa479e5..20db51070f 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -92,6 +92,11 @@ struct nv50_reg {
int rhw; /* result hw for FP outputs, or interpolant index */
int acc; /* instruction where this reg is last read (first insn == 1) */
+
+ int vtx; /* vertex index, for GP inputs (TGSI Dimension.Index) */
+ int indirect[2]; /* index into pc->addr, or -1 */
+
+ ubyte buf_index; /* c{0 .. 15}[] or g{0 .. 15}[] */
};
#define NV50_MOD_NEG 1
@@ -135,7 +140,8 @@ struct nv50_pc {
int immd_nr;
struct nv50_reg **addr;
int addr_nr;
- uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */
+ struct nv50_reg *sysval;
+ int sysval_nr;
struct nv50_reg *temp_temp[16];
struct nv50_program_exec *temp_temp_exec[16];
@@ -171,6 +177,8 @@ struct nv50_pc {
uint8_t edgeflag_out;
};
+static struct nv50_reg *get_address_reg(struct nv50_pc *, struct nv50_reg *);
+
static INLINE void
ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
{
@@ -179,7 +187,10 @@ ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
reg->hw = hw;
reg->mod = 0;
reg->rhw = -1;
+ reg->vtx = -1;
reg->acc = 0;
+ reg->indirect[0] = reg->indirect[1] = -1;
+ reg->buf_index = (type == P_CONST) ? 1 : 0;
}
static INLINE unsigned
@@ -197,7 +208,8 @@ terminate_mbb(struct nv50_pc *pc)
/* remove records of temporary address register values */
for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
- pc->r_addr[i].rhw = -1;
+ if (pc->r_addr[i].index < 0)
+ pc->r_addr[i].acc = 0;
}
static void
@@ -260,6 +272,7 @@ reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
if (reg) {
alloc_reg(pc, reg);
*ri = *reg;
+ reg->indirect[0] = reg->indirect[1] = -1;
reg->mod = 0;
}
return ri;
@@ -464,6 +477,12 @@ is_join(struct nv50_program_exec *e)
return FALSE;
}
+static INLINE boolean
+is_control_flow(struct nv50_program_exec *e)
+{
+ return (e->inst[0] & 2);
+}
+
static INLINE void
set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
struct nv50_program_exec *e)
@@ -525,11 +544,33 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
static INLINE void
set_addr(struct nv50_program_exec *e, struct nv50_reg *a)
{
+ assert(a->type == P_ADDR);
+
assert(!(e->inst[0] & 0x0c000000));
assert(!(e->inst[1] & 0x00000004));
e->inst[0] |= (a->hw & 3) << 26;
- e->inst[1] |= (a->hw >> 2) << 2;
+ e->inst[1] |= a->hw & 4;
+}
+
+static void
+emit_arl(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, uint8_t);
+
+static void
+emit_shl_imm(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, int);
+
+static void
+emit_mov_from_addr(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[1] = 0x40000000;
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+ set_addr(e, src);
+
+ emit(pc, e);
}
static void
@@ -548,72 +589,6 @@ emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst,
emit(pc, e);
}
-static struct nv50_reg *
-alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
-{
- struct nv50_reg *a_tgsi = NULL, *a = NULL;
- int i;
- uint8_t avail = ~pc->addr_alloc;
-
- if (!ref) {
- /* allocate for TGSI_FILE_ADDRESS */
- while (avail) {
- i = ffs(avail) - 1;
-
- if (pc->r_addr[i].rhw < 0 ||
- pc->r_addr[i].acc != pc->insn_cur) {
- pc->addr_alloc |= (1 << i);
-
- pc->r_addr[i].rhw = -1;
- pc->r_addr[i].index = i;
- return &pc->r_addr[i];
- }
- avail &= ~(1 << i);
- }
- assert(0);
- return NULL;
- }
-
- /* Allocate and set an address reg so we can access 'ref'.
- *
- * If and r_addr->index will be -1 or the hw index the value
- * value in rhw is relative to. If rhw < 0, the reg has not
- * been initialized or is in use for TGSI_FILE_ADDRESS.
- */
- while (avail) { /* only consider regs that are not TGSI */
- i = ffs(avail) - 1;
- avail &= ~(1 << i);
-
- if ((!a || a->rhw >= 0) && pc->r_addr[i].rhw < 0) {
- /* prefer an usused reg with low hw index */
- a = &pc->r_addr[i];
- continue;
- }
- if (!a && pc->r_addr[i].acc != pc->insn_cur)
- a = &pc->r_addr[i];
-
- if (ref->hw - pc->r_addr[i].rhw >= 128)
- continue;
-
- if ((ref->acc >= 0 && pc->r_addr[i].index < 0) ||
- (ref->acc < 0 && pc->r_addr[i].index == ref->index)) {
- pc->r_addr[i].acc = pc->insn_cur;
- return &pc->r_addr[i];
- }
- }
- assert(a);
-
- if (ref->acc < 0)
- a_tgsi = pc->addr[ref->index];
-
- emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4);
-
- a->rhw = ref->hw & ~0x7f;
- a->acc = pc->insn_cur;
- a->index = a_tgsi ? ref->index : -1;
- return a;
-}
-
#define INTERP_LINEAR 0
#define INTERP_FLAT 1
#define INTERP_PERSPECTIVE 2
@@ -657,15 +632,15 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
e->param.shift = s;
e->param.mask = m << (s % 32);
- if (src->hw > 127)
- set_addr(e, alloc_addr(pc, src));
+ if (src->hw < 0 || src->hw > 127) /* need (additional) address reg */
+ set_addr(e, get_address_reg(pc, src));
else
if (src->acc < 0) {
assert(src->type == P_CONST);
- set_addr(e, pc->addr[src->index]);
+ set_addr(e, pc->addr[src->indirect[0]]);
}
- e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
+ e->inst[1] |= (src->buf_index << 22);
}
/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */
@@ -694,6 +669,12 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
if (src->type == P_ATTR) {
set_long(pc, e);
e->inst[1] |= 0x00200000;
+
+ if (src->vtx >= 0) {
+ /* indirect (vertex base + c) load from p[] */
+ e->inst[0] |= 0x01800000;
+ set_addr(e, get_address_reg(pc, src));
+ }
}
alloc_reg(pc, src);
@@ -808,6 +789,11 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
if (src->type == P_ATTR) {
set_long(pc, e);
e->inst[1] |= 0x00200000;
+
+ if (src->vtx >= 0) {
+ e->inst[0] |= 0x01800000; /* src from p[] */
+ set_addr(e, get_address_reg(pc, src));
+ }
} else
if (src->type == P_CONST || src->type == P_IMMD) {
struct nv50_reg *temp = temp_temp(pc, e);
@@ -832,13 +818,13 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
src = temp;
} else
if (src->type == P_CONST || src->type == P_IMMD) {
- assert(!(e->inst[0] & 0x00800000));
- if (e->inst[0] & 0x01000000) {
+ if (e->inst[0] & 0x01800000) {
struct nv50_reg *temp = temp_temp(pc, e);
emit_mov(pc, temp, src);
src = temp;
} else {
+ assert(!(e->inst[0] & 0x00800000));
set_data(pc, src, 0x7f, 16, e);
e->inst[0] |= 0x00800000;
}
@@ -862,13 +848,13 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
src = temp;
} else
if (src->type == P_CONST || src->type == P_IMMD) {
- assert(!(e->inst[0] & 0x01000000));
- if (e->inst[0] & 0x00800000) {
+ if (e->inst[0] & 0x01800000) {
struct nv50_reg *temp = temp_temp(pc, e);
emit_mov(pc, temp, src);
src = temp;
} else {
+ assert(!(e->inst[0] & 0x01000000));
set_data(pc, src, 0x7f, 32+14, e);
e->inst[0] |= 0x01000000;
}
@@ -997,11 +983,125 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
e->inst[0] |= dst->hw << 2;
e->inst[0] |= s << 16; /* shift left */
- set_src_0_restricted(pc, src, e);
+ set_src_0(pc, src, e);
emit(pc, e);
}
+static boolean
+address_reg_suitable(struct nv50_reg *a, struct nv50_reg *r)
+{
+ if (!r)
+ return FALSE;
+
+ if (r->vtx != a->vtx)
+ return FALSE;
+ if (r->vtx >= 0)
+ return (r->indirect[1] == a->indirect[1]);
+
+ if (r->hw < a->rhw || (r->hw - a->rhw) >= 128)
+ return FALSE;
+
+ if (a->index >= 0)
+ return (a->index == r->indirect[0]);
+ return (a->indirect[0] == r->indirect[0]);
+}
+
+static void
+load_vertex_base(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *a, int shift)
+{
+ struct nv50_reg mem, *temp;
+
+ ctor_reg(&mem, P_ATTR, -1, dst->vtx);
+
+ assert(dst->type == P_ADDR);
+ if (!a) {
+ emit_arl(pc, dst, &mem, 0);
+ return;
+ }
+ temp = alloc_temp(pc, NULL);
+
+ if (shift) {
+ emit_mov_from_addr(pc, temp, a);
+ if (shift < 0)
+ emit_shl_imm(pc, temp, temp, shift);
+ emit_arl(pc, dst, temp, MAX2(shift, 0));
+ }
+ emit_mov(pc, temp, &mem);
+ set_addr(pc->p->exec_tail, dst);
+
+ emit_arl(pc, dst, temp, 0);
+ free_temp(pc, temp);
+}
+
+/* case (ref == NULL): allocate address register for TGSI_FILE_ADDRESS
+ * case (vtx >= 0, acc >= 0): load vertex base from a[vtx * 4] to $aX
+ * case (vtx >= 0, acc < 0): load vertex base from s[$aY + vtx * 4] to $aX
+ * case (vtx < 0, acc >= 0): memory address too high to encode
+ * case (vtx < 0, acc < 0): get source register for TGSI_FILE_ADDRESS
+ */
+static struct nv50_reg *
+get_address_reg(struct nv50_pc *pc, struct nv50_reg *ref)
+{
+ int i;
+ struct nv50_reg *a_ref, *a = NULL;
+
+ for (i = 0; i < NV50_SU_MAX_ADDR; ++i) {
+ if (pc->r_addr[i].acc == 0)
+ a = &pc->r_addr[i]; /* an unused address reg */
+ else
+ if (address_reg_suitable(&pc->r_addr[i], ref)) {
+ pc->r_addr[i].acc = pc->insn_cur;
+ return &pc->r_addr[i];
+ } else
+ if (!a && pc->r_addr[i].index < 0 &&
+ pc->r_addr[i].acc < pc->insn_cur)
+ a = &pc->r_addr[i];
+ }
+ if (!a) {
+ /* We'll be able to spill address regs when this
+ * mess is replaced with a proper compiler ...
+ */
+ NOUVEAU_ERR("out of address regs\n");
+ abort();
+ return NULL;
+ }
+
+ /* initialize and reserve for this TGSI instruction */
+ a->rhw = 0;
+ a->index = a->indirect[0] = a->indirect[1] = -1;
+ a->acc = pc->insn_cur;
+
+ if (!ref) {
+ a->vtx = -1;
+ return a;
+ }
+ a->vtx = ref->vtx;
+
+ /* now put in the correct value ... */
+
+ if (ref->vtx >= 0) {
+ a->indirect[1] = ref->indirect[1];
+
+ /* For an indirect vertex index, we need to shift address right
+ * by 2, the address register will contain vtx * 16, we need to
+ * load from a[vtx * 4].
+ */
+ load_vertex_base(pc, a, (ref->acc < 0) ?
+ pc->addr[ref->indirect[1]] : NULL, -2);
+ } else {
+ assert(ref->acc < 0 || ref->indirect[0] < 0);
+
+ a->rhw = ref->hw & ~0x7f;
+ a->indirect[0] = ref->indirect[0];
+ a_ref = (ref->acc < 0) ? pc->addr[ref->indirect[0]] : NULL;
+
+ emit_add_addr_imm(pc, a, a_ref, a->rhw * 4);
+ }
+ return a;
+}
+
#define NV50_MAX_F32 0x880
#define NV50_MAX_S32 0x08c
#define NV50_MAX_U32 0x084
@@ -1629,6 +1729,18 @@ emit_ret(struct nv50_pc *pc, int pred, unsigned cc)
emit_control_flow(pc, 0x3, pred, cc);
}
+static void
+emit_prim_cmd(struct nv50_pc *pc, unsigned cmd)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xf0000000 | (cmd << 9);
+ e->inst[1] = 0xc0000000;
+ set_long(pc, e);
+
+ emit(pc, e);
+}
+
#define QOP_ADD 0
#define QOP_SUBR 1
#define QOP_SUB 2
@@ -2171,14 +2283,19 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
{
struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c];
if (!r) {
- r = alloc_addr(pc, NULL);
- pc->addr[dst->Register.Index * 4 + c] = r;
+ r = get_address_reg(pc, NULL);
+ r->index = dst->Register.Index * 4 + c;
+ pc->addr[r->index] = r;
}
assert(r);
return r;
}
case TGSI_FILE_NULL:
return NULL;
+ case TGSI_FILE_SYSTEM_VALUE:
+ assert(pc->sysval[dst->Register.Index].type == P_RESULT);
+ assert(c == 0);
+ return &pc->sysval[dst->Register.Index];
default:
break;
}
@@ -2208,6 +2325,18 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
switch (src->Register.File) {
case TGSI_FILE_INPUT:
r = &pc->attr[src->Register.Index * 4 + c];
+
+ if (!src->Dimension.Dimension)
+ break;
+ r = reg_instance(pc, r);
+ r->vtx = src->Dimension.Index;
+
+ if (!src->Dimension.Indirect)
+ break;
+ swz = tgsi_util_get_src_register_swizzle(
+ &src->DimIndirect, 0);
+ r->acc = -1;
+ r->indirect[1] = src->DimIndirect.Index * 4 + swz;
break;
case TGSI_FILE_TEMPORARY:
r = &pc->temp[src->Register.Index * 4 + c];
@@ -2221,12 +2350,12 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
* use the index field to select the address reg.
*/
r = reg_instance(pc, NULL);
+ ctor_reg(r, P_CONST, -1, src->Register.Index * 4 + c);
+
swz = tgsi_util_get_src_register_swizzle(
- &src->Indirect, 0);
- ctor_reg(r, P_CONST,
- src->Indirect.Index * 4 + swz,
- src->Register.Index * 4 + c);
+ &src->Indirect, 0);
r->acc = -1;
+ r->indirect[0] = src->Indirect.Index * 4 + swz;
break;
case TGSI_FILE_IMMEDIATE:
r = &pc->immd[src->Register.Index * 4 + c];
@@ -2237,6 +2366,10 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
r = pc->addr[src->Register.Index * 4 + c];
assert(r);
break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ assert(c == 0);
+ r = &pc->sysval[src->Register.Index];
+ break;
default:
assert(0);
break;
@@ -2273,7 +2406,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
r->mod |= mod & NV50_MOD_I32;
assert(r);
- if (r->acc >= 0 && r != temp)
+ if (r->acc >= 0 && r->vtx < 0 && r != temp)
return reg_instance(pc, r); /* will clear r->mod */
return r;
}
@@ -2495,10 +2628,14 @@ nv50_program_tx_insn(struct nv50_pc *pc,
}
break;
case TGSI_OPCODE_ARL:
- assert(src[0][0]);
temp = temp_temp(pc, NULL);
- emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32);
- emit_arl(pc, dst[0], temp, 4);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, temp, src[0][c], -1,
+ CVT_FLOOR | CVT_S32_F32);
+ emit_arl(pc, dst[c], temp, 4);
+ }
break;
case TGSI_OPCODE_BGNLOOP:
pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc);
@@ -2605,6 +2742,9 @@ nv50_program_tx_insn(struct nv50_pc *pc,
pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
terminate_mbb(pc);
break;
+ case TGSI_OPCODE_EMIT:
+ emit_prim_cmd(pc, 1);
+ break;
case TGSI_OPCODE_ENDIF:
pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
@@ -2628,8 +2768,12 @@ nv50_program_tx_insn(struct nv50_pc *pc,
pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size;
terminate_mbb(pc);
break;
+ case TGSI_OPCODE_ENDPRIM:
+ emit_prim_cmd(pc, 2);
+ break;
case TGSI_OPCODE_ENDSUB:
assert(pc->in_subroutine);
+ terminate_mbb(pc);
pc->in_subroutine = FALSE;
break;
case TGSI_OPCODE_EX2:
@@ -3028,10 +3172,14 @@ nv50_program_tx_insn(struct nv50_pc *pc,
if (!is_long(pc->p->exec_tail))
convert_to_long(pc, pc->p->exec_tail);
else
- if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail))
+ if (is_immd(pc->p->exec_tail) ||
+ is_join(pc->p->exec_tail) ||
+ is_control_flow(pc->p->exec_tail))
emit_nop(pc);
pc->p->exec_tail->inst[1] |= 1; /* set exit bit */
+
+ terminate_mbb(pc);
break;
default:
NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
@@ -3327,17 +3475,53 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg)
* value of 0 for back-facing, and 0xffffffff for front-facing.
*/
static void
-load_frontfacing(struct nv50_pc *pc, struct nv50_reg *a)
+load_frontfacing(struct nv50_pc *pc, struct nv50_reg *sv)
+{
+ struct nv50_reg *temp = alloc_temp(pc, NULL);
+ int r_pred = 0;
+
+ temp->rhw = 255;
+ emit_interp(pc, temp, NULL, INTERP_FLAT);
+
+ emit_cvt(pc, sv, temp, r_pred, CVT_ABS | CVT_F32_S32);
+
+ emit_not(pc, temp, temp);
+ set_pred(pc, 0x2, r_pred, pc->p->exec_tail);
+ emit_cvt(pc, sv, temp, -1, CVT_F32_S32);
+ set_pred(pc, 0x2, r_pred, pc->p->exec_tail);
+
+ free_temp(pc, temp);
+}
+
+static void
+load_instance_id(struct nv50_pc *pc, unsigned index)
{
- struct nv50_reg *one = alloc_immd(pc, 1.0f);
+ struct nv50_reg reg, mem;
- assert(a->rhw == -1);
- alloc_reg(pc, a); /* do this before rhw is set */
- a->rhw = 255;
- load_interpolant(pc, a);
- emit_bitop2(pc, a, a, one, TGSI_OPCODE_AND);
+ ctor_reg(&reg, P_TEMP, -1, -1);
+ ctor_reg(&mem, P_CONST, -1, 24); /* startInstance */
+ mem.buf_index = 2;
- FREE(one);
+ emit_add_b32(pc, &reg, &pc->sysval[index], &mem);
+ pc->sysval[index] = reg;
+}
+
+static void
+copy_semantic_info(struct nv50_program *p)
+{
+ unsigned i, id;
+
+ for (i = 0; i < p->cfg.in_nr; ++i) {
+ id = p->cfg.in[i].id;
+ p->cfg.in[i].sn = p->info.input_semantic_name[id];
+ p->cfg.in[i].si = p->info.input_semantic_index[id];
+ }
+
+ for (i = 0; i < p->cfg.out_nr; ++i) {
+ id = p->cfg.out[i].id;
+ p->cfg.out[i].sn = p->info.output_semantic_name[id];
+ p->cfg.out[i].si = p->info.output_semantic_index[id];
+ }
}
static boolean
@@ -3346,7 +3530,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
struct tgsi_parse_context tp;
struct nv50_program *p = pc->p;
boolean ret = FALSE;
- unsigned i, c, flat_nr = 0;
+ unsigned i, c, instance_id, vertex_id, flat_nr = 0;
tgsi_parse_init(&tp, pc->p->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&tp)) {
@@ -3386,13 +3570,13 @@ nv50_program_tx_prep(struct nv50_pc *pc)
switch (d->Semantic.Name) {
case TGSI_SEMANTIC_BCOLOR:
p->cfg.two_side[si].hw = first;
- if (p->cfg.io_nr > first)
- p->cfg.io_nr = first;
+ if (p->cfg.out_nr > first)
+ p->cfg.out_nr = first;
break;
case TGSI_SEMANTIC_PSIZE:
p->cfg.psiz = first;
- if (p->cfg.io_nr > first)
- p->cfg.io_nr = first;
+ if (p->cfg.out_nr > first)
+ p->cfg.out_nr = first;
break;
case TGSI_SEMANTIC_EDGEFLAG:
pc->edgeflag_out = first;
@@ -3432,6 +3616,37 @@ nv50_program_tx_prep(struct nv50_pc *pc)
pc->interp_mode[i] = mode;
}
break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ assert(d->Declaration.Semantic);
+ switch (d->Semantic.Name) {
+ case TGSI_SEMANTIC_FACE:
+ assert(p->type == PIPE_SHADER_FRAGMENT);
+ load_frontfacing(pc,
+ &pc->sysval[first]);
+ break;
+ case TGSI_SEMANTIC_INSTANCEID:
+ assert(p->type == PIPE_SHADER_VERTEX);
+ instance_id = first;
+ p->cfg.regs[0] |= (1 << 4);
+ break;
+ case TGSI_SEMANTIC_PRIMID:
+ assert(p->type != PIPE_SHADER_VERTEX);
+ p->cfg.prim_id = first;
+ break;
+ /*
+ case TGSI_SEMANTIC_PRIMIDIN:
+ assert(p->type == PIPE_SHADER_GEOMETRY);
+ pc->sysval[first].hw = 6;
+ p->cfg.regs[0] |= (1 << 8);
+ break;
+ case TGSI_SEMANTIC_VERTEXID:
+ assert(p->type == PIPE_SHADER_VERTEX);
+ vertex_id = first;
+ p->cfg.regs[0] |= (1 << 12) | (1 << 0);
+ break;
+ */
+ }
+ break;
case TGSI_FILE_ADDRESS:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_SAMPLER:
@@ -3452,68 +3667,98 @@ nv50_program_tx_prep(struct nv50_pc *pc)
}
}
- if (p->type == PIPE_SHADER_VERTEX) {
+ if (p->type == PIPE_SHADER_VERTEX || p->type == PIPE_SHADER_GEOMETRY) {
int rid = 0;
- for (i = 0; i < pc->attr_nr * 4; ++i) {
- if (pc->attr[i].acc) {
- pc->attr[i].hw = rid++;
- p->cfg.attr[i / 32] |= 1 << (i % 32);
+ if (p->type == PIPE_SHADER_GEOMETRY) {
+ for (i = 0; i < pc->attr_nr; ++i) {
+ p->cfg.in[i].hw = rid;
+ p->cfg.in[i].id = i;
+
+ for (c = 0; c < 4; ++c) {
+ int n = i * 4 + c;
+ if (!pc->attr[n].acc)
+ continue;
+ pc->attr[n].hw = rid++;
+ p->cfg.in[i].mask |= 1 << c;
+ }
+ }
+ } else {
+ for (i = 0; i < pc->attr_nr * 4; ++i) {
+ if (pc->attr[i].acc) {
+ pc->attr[i].hw = rid++;
+ p->cfg.attr[i / 32] |= 1 << (i % 32);
+ }
+ }
+ if (p->cfg.regs[0] & (1 << 0))
+ pc->sysval[vertex_id].hw = rid++;
+ if (p->cfg.regs[0] & (1 << 4)) {
+ pc->sysval[instance_id].hw = rid++;
+ load_instance_id(pc, instance_id);
}
}
for (i = 0, rid = 0; i < pc->result_nr; ++i) {
- p->cfg.io[i].hw = rid;
- p->cfg.io[i].id = i;
+ p->cfg.out[i].hw = rid;
+ p->cfg.out[i].id = i;
for (c = 0; c < 4; ++c) {
int n = i * 4 + c;
if (!pc->result[n].acc)
continue;
pc->result[n].hw = rid++;
- p->cfg.io[i].mask |= 1 << c;
+ p->cfg.out[i].mask |= 1 << c;
}
}
+ if (p->cfg.prim_id < 0x40) {
+ /* GP has to write to PrimitiveID */
+ ctor_reg(&pc->sysval[p->cfg.prim_id],
+ P_RESULT, p->cfg.prim_id, rid);
+ p->cfg.prim_id = rid++;
+ }
for (c = 0; c < 2; ++c)
if (p->cfg.two_side[c].hw < 0x40)
- p->cfg.two_side[c] = p->cfg.io[
+ p->cfg.two_side[c] = p->cfg.out[
p->cfg.two_side[c].hw];
if (p->cfg.psiz < 0x40)
- p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw;
+ p->cfg.psiz = p->cfg.out[p->cfg.psiz].hw;
+
+ copy_semantic_info(p);
} else
if (p->type == PIPE_SHADER_FRAGMENT) {
- int rid, aid;
+ int rid, aid, base;
unsigned n = 0, m = pc->attr_nr - flat_nr;
pc->allow32 = TRUE;
- int base = (TGSI_SEMANTIC_POSITION ==
- p->info.input_semantic_name[0]) ? 0 : 1;
+ base = (TGSI_SEMANTIC_POSITION ==
+ p->info.input_semantic_name[0]) ? 0 : 1;
/* non-flat interpolants have to be mapped to
* the lower hardware IDs, so sort them:
*/
for (i = 0; i < pc->attr_nr; i++) {
if (pc->interp_mode[i] == INTERP_FLAT)
- p->cfg.io[m++].id = i;
+ p->cfg.in[m++].id = i;
else {
if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE))
- p->cfg.io[n].linear = TRUE;
- p->cfg.io[n++].id = i;
+ p->cfg.in[n].linear = TRUE;
+ p->cfg.in[n++].id = i;
}
}
+ copy_semantic_info(p);
if (!base) /* set w-coordinate mask from perspective interp */
- p->cfg.io[0].mask |= p->cfg.regs[1] >> 24;
+ p->cfg.in[0].mask |= p->cfg.regs[1] >> 24;
aid = popcnt4( /* if fcrd isn't contained in cfg.io */
- base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask);
+ base ? (p->cfg.regs[1] >> 24) : p->cfg.in[0].mask);
for (n = 0; n < pc->attr_nr; ++n) {
- p->cfg.io[n].hw = rid = aid;
- i = p->cfg.io[n].id;
+ p->cfg.in[n].hw = rid = aid;
+ i = p->cfg.in[n].id;
if (p->info.input_semantic_name[n] ==
TGSI_SEMANTIC_FACE) {
@@ -3525,15 +3770,15 @@ nv50_program_tx_prep(struct nv50_pc *pc)
if (!pc->attr[i * 4 + c].acc)
continue;
pc->attr[i * 4 + c].rhw = rid++;
- p->cfg.io[n].mask |= 1 << c;
+ p->cfg.in[n].mask |= 1 << c;
load_interpolant(pc, &pc->attr[i * 4 + c]);
}
- aid += popcnt4(p->cfg.io[n].mask);
+ aid += popcnt4(p->cfg.in[n].mask);
}
if (!base)
- p->cfg.regs[1] |= p->cfg.io[0].mask << 24;
+ p->cfg.regs[1] |= p->cfg.in[0].mask << 24;
m = popcnt4(p->cfg.regs[1] >> 24);
@@ -3543,32 +3788,33 @@ nv50_program_tx_prep(struct nv50_pc *pc)
p->cfg.regs[1] |= aid - m;
if (flat_nr) {
- i = p->cfg.io[pc->attr_nr - flat_nr].hw;
+ i = p->cfg.in[pc->attr_nr - flat_nr].hw;
p->cfg.regs[1] |= (i - m) << 16;
} else
p->cfg.regs[1] |= p->cfg.regs[1] << 16;
/* mark color semantic for light-twoside */
- n = 0x40;
- for (i = 0; i < pc->attr_nr; i++) {
- ubyte si, sn;
-
- sn = p->info.input_semantic_name[p->cfg.io[i].id];
- si = p->info.input_semantic_index[p->cfg.io[i].id];
-
- if (sn == TGSI_SEMANTIC_COLOR) {
- p->cfg.two_side[si] = p->cfg.io[i];
-
- /* increase colour count */
- p->cfg.regs[0] += popcnt4(
- p->cfg.two_side[si].mask) << 16;
-
- n = MIN2(n, p->cfg.io[i].hw - m);
+ n = 0x80;
+ for (i = 0; i < p->cfg.in_nr; i++) {
+ if (p->cfg.in[i].sn == TGSI_SEMANTIC_COLOR) {
+ n = MIN2(n, p->cfg.in[i].hw - m);
+ p->cfg.two_side[p->cfg.in[i].si] = p->cfg.in[i];
+
+ p->cfg.regs[0] += /* increase colour count */
+ popcnt4(p->cfg.in[i].mask) << 16;
}
}
- if (n < 0x40)
+ if (n < 0x80)
p->cfg.regs[0] += n;
+ if (p->cfg.prim_id < 0x40) {
+ pc->sysval[p->cfg.prim_id].rhw = rid++;
+ emit_interp(pc, &pc->sysval[p->cfg.prim_id], NULL,
+ INTERP_FLAT);
+ /* increase FP_INTERPOLANT_CTRL_COUNT */
+ p->cfg.regs[1] += 1;
+ }
+
/* Initialize FP results:
* FragDepth is always first TGSI and last hw output
*/
@@ -3622,10 +3868,31 @@ free_nv50_pc(struct nv50_pc *pc)
FREE(pc->attr);
if (pc->temp)
FREE(pc->temp);
+ if (pc->sysval)
+ FREE(pc->sysval);
+ if (pc->insn_pos)
+ FREE(pc->insn_pos);
FREE(pc);
}
+static INLINE uint32_t
+nv50_map_gs_output_prim(unsigned pprim)
+{
+ switch (pprim) {
+ case PIPE_PRIM_POINTS:
+ return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_POINTS;
+ case PIPE_PRIM_LINE_STRIP:
+ return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP;
+ default:
+ NOUVEAU_ERR("invalid GS_OUTPUT_PRIMITIVE: %u\n", pprim);
+ abort();
+ return 0;
+ }
+}
+
static boolean
ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
{
@@ -3639,25 +3906,55 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1;
pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1;
assert(pc->addr_nr <= 2);
+ pc->sysval_nr = p->info.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
p->cfg.high_temp = 4;
p->cfg.two_side[0].hw = 0x40;
p->cfg.two_side[1].hw = 0x40;
+ p->cfg.prim_id = 0x40;
p->cfg.edgeflag_in = pc->edgeflag_out = 0xff;
+ for (i = 0; i < p->info.num_properties; ++i) {
+ unsigned *data = &p->info.properties[i].data[0];
+
+ switch (p->info.properties[i].name) {
+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+ p->cfg.prim_type = nv50_map_gs_output_prim(data[0]);
+ break;
+ case TGSI_PROPERTY_GS_MAX_VERTICES:
+ p->cfg.vert_count = data[0];
+ break;
+ default:
+ break;
+ }
+ }
+
switch (p->type) {
case PIPE_SHADER_VERTEX:
p->cfg.psiz = 0x40;
p->cfg.clpd = 0x40;
- p->cfg.io_nr = pc->result_nr;
+ p->cfg.out_nr = pc->result_nr;
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ assert(p->cfg.prim_type);
+ assert(p->cfg.vert_count);
+
+ p->cfg.psiz = 0x80;
+ p->cfg.clpd = 0x80;
+ p->cfg.prim_id = 0x80;
+ p->cfg.out_nr = pc->result_nr;
+ p->cfg.in_nr = pc->attr_nr;
+
+ p->cfg.two_side[0].hw = 0x80;
+ p->cfg.two_side[1].hw = 0x80;
break;
case PIPE_SHADER_FRAGMENT:
rtype[0] = rtype[1] = P_TEMP;
p->cfg.regs[0] = 0x01000004;
- p->cfg.io_nr = pc->attr_nr;
+ p->cfg.in_nr = pc->attr_nr;
if (p->info.writes_z) {
p->cfg.regs[2] |= 0x00000100;
@@ -3715,7 +4012,16 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
return FALSE;
}
for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
- ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1);
+ ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1);
+
+ if (pc->sysval_nr) {
+ pc->sysval = CALLOC(pc->sysval_nr, sizeof(struct nv50_reg *));
+ if (!pc->sysval)
+ return FALSE;
+ /* will only ever use SYSTEM_VALUE[i].x (hopefully) */
+ for (i = 0; i < pc->sysval_nr; ++i)
+ ctor_reg(&pc->sysval[i], rtype[0], i, -1);
+ }
return TRUE;
}
@@ -3877,13 +4183,17 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
if (p->param_nr) {
unsigned cb;
- uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
+ uint32_t *map = pipe_buffer_map(pscreen,
+ nv50->constbuf[p->type],
PIPE_BUFFER_USAGE_CPU_READ);
-
- if (p->type == PIPE_SHADER_VERTEX)
+ switch (p->type) {
+ case PIPE_SHADER_GEOMETRY: cb = NV50_CB_PGP; break;
+ case PIPE_SHADER_FRAGMENT: cb = NV50_CB_PFP; break;
+ default:
cb = NV50_CB_PVP;
- else
- cb = NV50_CB_PFP;
+ assert(p->type == PIPE_SHADER_VERTEX);
+ break;
+ }
nv50_program_upload_data(nv50, map, 0, p->param_nr, cb);
pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]);
@@ -3977,19 +4287,18 @@ nv50_vertprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
- so = so_new(5, 8, 2);
+ so = so_new(5, 7, 2);
so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
+ NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
+ NOUVEAU_BO_LOW, 0, 0);
so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
so_data (so, p->cfg.attr[0]);
so_data (so, p->cfg.attr[1]);
so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
so_data (so, p->cfg.high_result);
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2);
- so_data (so, p->cfg.high_result); //8);
+ so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1);
so_data (so, p->cfg.high_temp);
so_method(so, tesla, NV50TCL_VP_START_ID, 1);
so_data (so, 0); /* program start offset */
@@ -4033,42 +4342,74 @@ nv50_fragprog_validate(struct nv50_context *nv50)
so_ref(NULL, &so);
}
+void
+nv50_geomprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_program *p = nv50->geomprog;
+ struct nouveau_stateobj *so;
+
+ if (!p->translated) {
+ nv50_program_validate(nv50, p);
+ if (!p->translated)
+ assert(0);
+ }
+
+ nv50_program_validate_data(nv50, p);
+ nv50_program_validate_code(nv50, p);
+
+ so = so_new(6, 7, 2);
+ so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1);
+ so_data (so, p->cfg.high_temp);
+ so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1);
+ so_data (so, p->cfg.high_result);
+ so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1);
+ so_data (so, p->cfg.prim_type);
+ so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1);
+ so_data (so, p->cfg.vert_count);
+ so_method(so, tesla, NV50TCL_GP_START_ID, 1);
+ so_data (so, 0);
+ so_ref(so, &nv50->state.geomprog);
+ so_ref(NULL, &so);
+}
+
static uint32_t
nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
{
+ struct nv50_program *vp;
struct nv50_program *fp = nv50->fragprog;
- struct nv50_program *vp = nv50->vertprog;
unsigned i, c, m = base;
uint32_t origin = 0x00000010;
+ vp = nv50->geomprog ? nv50->geomprog : nv50->vertprog;
+
/* XXX: this might not work correctly in all cases yet - we'll
* just assume that an FP generic input that is not written in
* the VP is PointCoord.
*/
memset(pntc, 0, 8 * sizeof(uint32_t));
- for (i = 0; i < fp->cfg.io_nr; i++) {
- uint8_t sn, si;
- uint8_t j, k = fp->cfg.io[i].id;
- unsigned n = popcnt4(fp->cfg.io[i].mask);
+ for (i = 0; i < fp->cfg.in_nr; i++) {
+ unsigned j, n = popcnt4(fp->cfg.in[i].mask);
- if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) {
+ if (fp->cfg.in[i].sn != TGSI_SEMANTIC_GENERIC) {
m += n;
continue;
}
- for (j = 0; j < vp->info.num_outputs; ++j) {
- sn = vp->info.output_semantic_name[j];
- si = vp->info.output_semantic_index[j];
-
- if (sn == fp->info.input_semantic_name[k] &&
- si == fp->info.input_semantic_index[k])
+ for (j = 0; j < vp->cfg.out_nr; ++j)
+ if (vp->cfg.out[j].sn == fp->cfg.in[i].sn &&
+ vp->cfg.out[j].si == fp->cfg.in[i].si)
break;
- }
- if (j < vp->info.num_outputs) {
- ubyte mode =
- nv50->rasterizer->pipe.sprite_coord_mode[si];
+ if (j < vp->cfg.out_nr) {
+ ubyte mode = nv50->rasterizer->pipe.sprite_coord_mode[
+ vp->cfg.out[j].si];
if (mode == PIPE_SPRITE_COORD_NONE) {
m += n;
@@ -4080,7 +4421,7 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
/* this is either PointCoord or replaced by sprite coords */
for (c = 0; c < 4; c++) {
- if (!(fp->cfg.io[i].mask & (1 << c)))
+ if (!(fp->cfg.in[i].mask & (1 << c)))
continue;
pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
++m;
@@ -4090,18 +4431,22 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
}
static int
-nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4],
- struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo)
+nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4],
+ struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo)
{
int c;
uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw;
- uint8_t *map = (uint8_t *)p_map;
+ uint8_t *map = (uint8_t *)map32;
for (c = 0; c < 4; ++c) {
if (mf & 1) {
if (fpi->linear == TRUE)
lin[mid / 32] |= 1 << (mid % 32);
- map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40);
+ if (mv & 1)
+ map[mid] = oid;
+ else
+ map[mid] = (c == 3) ? (zval + 1) : zval;
+ ++mid;
}
oid += mv & 1;
@@ -4113,34 +4458,42 @@ nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4],
}
void
-nv50_linkage_validate(struct nv50_context *nv50)
+nv50_fp_linkage_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_program *vp = nv50->vertprog;
struct nv50_program *fp = nv50->fragprog;
struct nouveau_stateobj *so;
- struct nv50_sreg4 dummy, *vpo;
+ struct nv50_sreg4 dummy;
int i, n, c, m = 0;
- uint32_t map[16], lin[4], reg[5], pcrd[8];
+ uint32_t map[16], lin[4], reg[6], pcrd[8];
+ uint8_t zval = 0x40;
+ if (nv50->geomprog) {
+ vp = nv50->geomprog;
+ zval = 0x80;
+ }
memset(map, 0, sizeof(map));
memset(lin, 0, sizeof(lin));
reg[1] = 0x00000004; /* low and high clip distance map ids */
reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */
reg[3] = 0x00000000; /* point size map id & enable */
+ reg[5] = 0x00000000; /* primitive ID map slot */
reg[0] = fp->cfg.regs[0]; /* colour semantic reg */
reg[4] = fp->cfg.regs[1]; /* interpolant info */
dummy.linear = FALSE;
dummy.mask = 0xf; /* map all components of HPOS */
- m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]);
+ m = nv50_vec4_map(map, m, zval, lin, &dummy, &vp->cfg.out[0]);
dummy.mask = 0x0;
if (vp->cfg.clpd < 0x40) {
- for (c = 0; c < vp->cfg.clpd_nr; ++c)
- map[m++] = vp->cfg.clpd + c;
+ for (c = 0; c < vp->cfg.clpd_nr; ++c) {
+ map[m / 4] |= (vp->cfg.clpd + c) << ((m % 4) * 8);
+ ++m;
+ }
reg[1] = (m << 8);
}
@@ -4148,35 +4501,37 @@ nv50_linkage_validate(struct nv50_context *nv50)
/* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */
if (nv50->rasterizer->pipe.light_twoside) {
- vpo = &vp->cfg.two_side[0];
+ struct nv50_sreg4 *vpo = &vp->cfg.two_side[0];
+ struct nv50_sreg4 *fpi = &fp->cfg.two_side[0];
- m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]);
- m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]);
+ m = nv50_vec4_map(map, m, zval, lin, &fpi[0], &vpo[0]);
+ m = nv50_vec4_map(map, m, zval, lin, &fpi[1], &vpo[1]);
}
reg[0] += m - 4; /* adjust FFC0 id */
reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */
- for (i = 0; i < fp->cfg.io_nr; i++) {
- ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id];
- ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id];
-
- /* position must be mapped first */
- assert(i == 0 || sn != TGSI_SEMANTIC_POSITION);
-
+ for (i = 0; i < fp->cfg.in_nr; i++) {
/* maybe even remove these from cfg.io */
- if (sn == TGSI_SEMANTIC_POSITION || sn == TGSI_SEMANTIC_FACE)
+ if (fp->cfg.in[i].sn == TGSI_SEMANTIC_POSITION ||
+ fp->cfg.in[i].sn == TGSI_SEMANTIC_FACE)
continue;
- /* VP outputs and vp->cfg.io are in the same order */
- for (n = 0; n < vp->info.num_outputs; ++n) {
- if (vp->info.output_semantic_name[n] == sn &&
- vp->info.output_semantic_index[n] == si)
+ for (n = 0; n < vp->cfg.out_nr; ++n)
+ if (vp->cfg.out[n].sn == fp->cfg.in[i].sn &&
+ vp->cfg.out[n].si == fp->cfg.in[i].si)
break;
- }
- vpo = (n < vp->info.num_outputs) ? &vp->cfg.io[n] : &dummy;
- m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo);
+ m = nv50_vec4_map(map, m, zval, lin, &fp->cfg.in[i],
+ (n < vp->cfg.out_nr) ?
+ &vp->cfg.out[n] : &dummy);
+ }
+ /* PrimitiveID either is replaced by the system value, or
+ * written by the geometry shader into an output register
+ */
+ if (fp->cfg.prim_id < 0x40) {
+ map[m / 4] |= vp->cfg.prim_id << ((m % 4) * 8);
+ reg[5] = m++;
}
if (nv50->rasterizer->pipe.point_size_per_vertex) {
@@ -4184,14 +4539,28 @@ nv50_linkage_validate(struct nv50_context *nv50)
reg[3] = (m++ << 4) | 1;
}
- /* now fill the stateobj */
- so = so_new(7, 57, 0);
+ /* now fill the stateobj (at most 28 so_data) */
+ so = so_new(10, 54, 0);
n = (m + 3) / 4;
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
- so_data (so, m);
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
- so_datap (so, map, n);
+ assert(m <= 32);
+ if (vp->type == PIPE_SHADER_GEOMETRY) {
+ so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1);
+ so_data (so, m);
+ so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n);
+ so_datap (so, map, n);
+ } else {
+ so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
+ so_data (so, vp->cfg.regs[0]);
+
+ so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1);
+ so_data (so, reg[5]);
+
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
+ so_data (so, m);
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
+ so_datap (so, map, n);
+ }
so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
so_datap (so, reg, 4);
@@ -4211,8 +4580,77 @@ nv50_linkage_validate(struct nv50_context *nv50)
so_datap (so, pcrd, 8);
}
- so_ref(so, &nv50->state.programs);
- so_ref(NULL, &so);
+ so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
+ so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
+
+ so_ref(so, &nv50->state.fp_linkage);
+ so_ref(NULL, &so);
+}
+
+static int
+construct_vp_gp_mapping(uint32_t *map32, int m,
+ struct nv50_program *vp, struct nv50_program *gp)
+{
+ uint8_t *map = (uint8_t *)map32;
+ int i, j, c;
+
+ for (i = 0; i < gp->cfg.in_nr; ++i) {
+ uint8_t oid, mv = 0, mg = gp->cfg.in[i].mask;
+
+ for (j = 0; j < vp->cfg.out_nr; ++j) {
+ if (vp->cfg.out[j].sn == gp->cfg.in[i].sn &&
+ vp->cfg.out[j].si == gp->cfg.in[i].si) {
+ mv = vp->cfg.out[j].mask;
+ oid = vp->cfg.out[j].hw;
+ break;
+ }
+ }
+
+ for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
+ if (mg & mv & 1)
+ map[m++] = oid;
+ else
+ if (mg & 1)
+ map[m++] = (c == 3) ? 0x41 : 0x40;
+ oid += mv & 1;
+ }
+ }
+ return m;
+}
+
+void
+nv50_gp_linkage_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so;
+ struct nv50_program *vp = nv50->vertprog;
+ struct nv50_program *gp = nv50->geomprog;
+ uint32_t map[16];
+ int m = 0;
+
+ if (!gp) {
+ so_ref(NULL, &nv50->state.gp_linkage);
+ return;
+ }
+ memset(map, 0, sizeof(map));
+
+ m = construct_vp_gp_mapping(map, m, vp, gp);
+
+ so = so_new(3, 24 - 3, 0);
+
+ so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
+ so_data (so, vp->cfg.regs[0] | gp->cfg.regs[0]);
+
+ assert(m <= 32);
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
+ so_data (so, m);
+
+ m = (m + 3) / 4;
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
+ so_datap (so, map, m);
+
+ so_ref(so, &nv50->state.gp_linkage);
+ so_ref(NULL, &so);
}
void
@@ -4229,6 +4667,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
nouveau_bo_ref(NULL, &p->bo);
+ FREE(p->immd);
nouveau_resource_free(&p->data[0]);
p->translated = 0;
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 461fec1d89..1e3ad6bff0 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -16,11 +16,13 @@ struct nv50_program_exec {
};
struct nv50_sreg4 {
- uint8_t hw;
- uint8_t id; /* tgsi index, nv50 needs them sorted: flat ones last */
+ uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
+ uint8_t id; /* tgsi index */
uint8_t mask;
boolean linear;
+
+ ubyte sn, si; /* semantic name & index */
};
struct nv50_program {
@@ -49,16 +51,24 @@ struct nv50_program {
uint32_t regs[4];
/* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */
- unsigned io_nr;
- struct nv50_sreg4 io[PIPE_MAX_SHADER_OUTPUTS];
+ unsigned in_nr, out_nr;
+ struct nv50_sreg4 in[PIPE_MAX_SHADER_INPUTS];
+ struct nv50_sreg4 out[PIPE_MAX_SHADER_OUTPUTS];
/* FP colour inputs, VP/GP back colour outputs */
struct nv50_sreg4 two_side[2];
- /* VP only */
+ /* GP only */
+ unsigned vert_count;
+ uint8_t prim_type;
+
+ /* VP & GP only */
uint8_t clpd, clpd_nr;
uint8_t psiz;
uint8_t edgeflag_in;
+
+ /* FP & GP only */
+ uint8_t prim_id;
} cfg;
};
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 28e2b35dea..9d58f3c965 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -167,7 +167,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
struct nv50_screen *screen = nv50_screen(pscreen);
unsigned i;
- for (i = 0; i < 2; i++) {
+ for (i = 0; i < 3; i++) {
if (screen->constbuf_parm[i])
nouveau_bo_ref(NULL, &screen->constbuf_parm[i]);
}
@@ -329,7 +329,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_ref(NULL, &so);
/* Static tesla init */
- so = so_new(40, 84, 20);
+ so = so_new(47, 95, 24);
so_method(so, screen->tesla, NV50TCL_COND_MODE, 1);
so_data (so, NV50TCL_COND_MODE_ALWAYS);
@@ -352,10 +352,11 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_data (so, 0xf);
/* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
- so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1);
- so_data (so, 0x54);
- so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1);
- so_data (so, 0x54);
+ for (i = 0; i < 3; ++i) {
+ so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(i), 1);
+ so_data (so, 0x54);
+ }
+
/* origin is top left (set to 1 for bottom left) */
so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1);
so_data (so, 0);
@@ -370,8 +371,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
return NULL;
}
- for (i = 0; i < 2; i++) {
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4,
+ for (i = 0; i < 3; i++) {
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (256 * 4) * 4,
&screen->constbuf_parm[i]);
if (ret) {
nv50_screen_destroy(pscreen);
@@ -406,22 +407,45 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000001 | (NV50_CB_PMISC << 12));
so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
+ so_data (so, 0x00000021 | (NV50_CB_PMISC << 12));
+ so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000031 | (NV50_CB_PMISC << 12));
+ /* bind auxiliary constbuf to immediate data bo */
so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_AUX << 16) | 0x00000200);
+ so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
+ so_data (so, 0x00000201 | (NV50_CB_AUX << 12));
+ so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
+ so_data (so, 0x00000221 | (NV50_CB_AUX << 12));
+
+ so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
+ so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, (NV50_CB_PVP << 16) | 0x00000800);
so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000101 | (NV50_CB_PVP << 12));
so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_PGP << 16) | 0x00000800);
+ so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
+ so_data (so, 0x00000121 | (NV50_CB_PGP << 12));
+
+ so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
+ so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, (NV50_CB_PFP << 16) | 0x00000800);
so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000131 | (NV50_CB_PFP << 12));
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index a038a4e3c2..0d786b0f2e 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -18,10 +18,12 @@ struct nv50_screen {
struct nouveau_notifier *sync;
struct nouveau_bo *constbuf_misc[1];
- struct nouveau_bo *constbuf_parm[2];
+ struct nouveau_bo *constbuf_parm[PIPE_SHADER_TYPES];
struct nouveau_resource *immd_heap[1];
- struct nouveau_resource *parm_heap[2];
+ struct nouveau_resource *parm_heap[PIPE_SHADER_TYPES];
+
+ struct pipe_buffer *strm_vbuf[16];
struct nouveau_bo *tic;
struct nouveau_bo *tsc;
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 1f67df814b..6ab33be663 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -531,7 +531,7 @@ nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso)
struct nv50_program *p = hwcso;
nv50_program_destroy(nv50, p);
- FREE((void*)p->pipe.tokens);
+ FREE((void *)p->pipe.tokens);
FREE(p);
}
@@ -563,7 +563,39 @@ nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso)
struct nv50_program *p = hwcso;
nv50_program_destroy(nv50, p);
- FREE((void*)p->pipe.tokens);
+ FREE((void *)p->pipe.tokens);
+ FREE(p);
+}
+
+static void *
+nv50_gp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv50_program *p = CALLOC_STRUCT(nv50_program);
+
+ p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ p->type = PIPE_SHADER_GEOMETRY;
+ tgsi_scan_shader(p->pipe.tokens, &p->info);
+ return (void *)p;
+}
+
+static void
+nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->fragprog = hwcso;
+ nv50->dirty |= NV50_NEW_GEOMPROG;
+}
+
+static void
+nv50_gp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_program *p = hwcso;
+
+ nv50_program_destroy(nv50, p);
+ FREE((void *)p->pipe.tokens);
FREE(p);
}
@@ -585,17 +617,21 @@ nv50_set_clip_state(struct pipe_context *pipe,
static void
nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
- const struct pipe_constant_buffer *buf )
+ struct pipe_buffer *buf )
{
struct nv50_context *nv50 = nv50_context(pipe);
if (shader == PIPE_SHADER_VERTEX) {
- nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer;
+ nv50->constbuf[PIPE_SHADER_VERTEX] = buf;
nv50->dirty |= NV50_NEW_VERTPROG_CB;
} else
if (shader == PIPE_SHADER_FRAGMENT) {
- nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer;
+ nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf;
nv50->dirty |= NV50_NEW_FRAGPROG_CB;
+ } else
+ if (shader == PIPE_SHADER_GEOMETRY) {
+ nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf;
+ nv50->dirty |= NV50_NEW_GEOMPROG_CB;
}
}
@@ -696,6 +732,10 @@ nv50_init_state_functions(struct nv50_context *nv50)
nv50->pipe.bind_fs_state = nv50_fp_state_bind;
nv50->pipe.delete_fs_state = nv50_fp_state_delete;
+ nv50->pipe.create_gs_state = nv50_gp_state_create;
+ nv50->pipe.bind_gs_state = nv50_gp_state_bind;
+ nv50->pipe.delete_gs_state = nv50_gp_state_delete;
+
nv50->pipe.set_blend_color = nv50_set_blend_color;
nv50->pipe.set_clip_state = nv50_set_clip_state;
nv50->pipe.set_constant_buffer = nv50_set_constant_buffer;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index f83232f43c..956da9b304 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -199,6 +199,8 @@ nv50_state_emit(struct nv50_context *nv50)
nv50->state.dirty |= NV50_NEW_VERTPROG;
if (nv50->state.fragprog)
nv50->state.dirty |= NV50_NEW_FRAGPROG;
+ if (nv50->state.geomprog)
+ nv50->state.dirty |= NV50_NEW_GEOMPROG;
if (nv50->state.rast)
nv50->state.dirty |= NV50_NEW_RASTERIZER;
if (nv50->state.blend_colour)
@@ -228,9 +230,14 @@ nv50_state_emit(struct nv50_context *nv50)
so_emit(chan, nv50->state.vertprog);
if (nv50->state.dirty & NV50_NEW_FRAGPROG)
so_emit(chan, nv50->state.fragprog);
+ if (nv50->state.dirty & NV50_NEW_GEOMPROG && nv50->state.geomprog)
+ so_emit(chan, nv50->state.geomprog);
if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
- NV50_NEW_RASTERIZER))
- so_emit(chan, nv50->state.programs);
+ NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER))
+ so_emit(chan, nv50->state.fp_linkage);
+ if ((nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG))
+ && nv50->state.gp_linkage)
+ so_emit(chan, nv50->state.gp_linkage);
if (nv50->state.dirty & NV50_NEW_RASTERIZER)
so_emit(chan, nv50->state.rast);
if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
@@ -267,6 +274,9 @@ nv50_state_flush_notify(struct nouveau_channel *chan)
so_emit_reloc_markers(chan, nv50->state.fragprog);
so_emit_reloc_markers(chan, nv50->state.vtxbuf);
so_emit_reloc_markers(chan, nv50->screen->static_init);
+
+ if (nv50->state.instbuf)
+ so_emit_reloc_markers(chan, nv50->state.instbuf);
}
boolean
@@ -291,9 +301,15 @@ nv50_state_validate(struct nv50_context *nv50)
if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
nv50_fragprog_validate(nv50);
+ if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB))
+ nv50_geomprog_validate(nv50);
+
if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
- NV50_NEW_RASTERIZER))
- nv50_linkage_validate(nv50);
+ NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER))
+ nv50_fp_linkage_validate(nv50);
+
+ if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_VERTPROG))
+ nv50_gp_linkage_validate(nv50);
if (nv50->dirty & NV50_NEW_RASTERIZER)
so_ref(nv50->rasterizer->so, &nv50->state.rast);
@@ -400,8 +416,9 @@ viewport_uptodate:
for (i = 0; i < PIPE_SHADER_TYPES; ++i)
nr += nv50->sampler_nr[i];
- so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES
- + nr * 8, PIPE_SHADER_TYPES * 2);
+ so = so_new(1 + 5 * PIPE_SHADER_TYPES,
+ 1 + 19 * PIPE_SHADER_TYPES + nr * 8,
+ PIPE_SHADER_TYPES * 2);
nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index bef548b728..871536dca9 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -155,7 +155,7 @@ static boolean
nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
unsigned p)
{
- static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 };
+ static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2, 1 };
struct nouveau_grobj *eng2d = nv50->screen->eng2d;
struct nouveau_grobj *tesla = nv50->screen->tesla;
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index f2e510fba6..bfb1b34d27 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -40,6 +40,8 @@ nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
static boolean
nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
+#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
+
static INLINE unsigned
nv50_prim(unsigned mode)
{
@@ -55,6 +57,14 @@ nv50_prim(unsigned mode)
case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
+ case PIPE_PRIM_LINES_ADJACENCY:
+ return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
default:
break;
}
@@ -152,6 +162,309 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
return (hw_type | hw_size);
}
+/* For instanced drawing from user buffers, hitting the FIFO repeatedly
+ * with the same vertex data is probably worse than uploading all data.
+ */
+static boolean
+nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i)
+{
+ struct nv50_screen *nscreen = nv50->screen;
+ struct pipe_screen *pscreen = &nscreen->base.base;
+ struct pipe_buffer *buf = nscreen->strm_vbuf[i];
+ struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
+ uint8_t *src;
+ unsigned size = align(vb->buffer->size, 4096);
+
+ if (buf && buf->size < size)
+ pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL);
+
+ if (!nscreen->strm_vbuf[i]) {
+ nscreen->strm_vbuf[i] = pipe_buffer_create(
+ pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size);
+ buf = nscreen->strm_vbuf[i];
+ }
+
+ src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+ if (!src)
+ return FALSE;
+ src += vb->buffer_offset;
+
+ size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */
+ if (vb->buffer_offset + size > vb->buffer->size)
+ size = vb->buffer->size - vb->buffer_offset;
+
+ pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src);
+ pipe_buffer_unmap(pscreen, vb->buffer);
+
+ vb->buffer = buf; /* don't pipe_reference, this is a private copy */
+ return TRUE;
+}
+
+static void
+nv50_upload_user_vbufs(struct nv50_context *nv50)
+{
+ unsigned i;
+
+ if (nv50->vbo_fifo)
+ nv50->dirty |= NV50_NEW_ARRAYS;
+ if (!(nv50->dirty & NV50_NEW_ARRAYS))
+ return;
+
+ for (i = 0; i < nv50->vtxbuf_nr; ++i) {
+ if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)
+ continue;
+ nv50_upload_vtxbuf(nv50, i);
+ }
+}
+
+static void
+nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ float v[4];
+
+ util_format_read_4f(nv50->vtxelt[i].src_format,
+ v, 0, data, 0, 0, 0, 1, 1);
+
+ switch (nv50->vtxelt[i].nr_components) {
+ case 4:
+ BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4);
+ OUT_RINGf (chan, v[0]);
+ OUT_RINGf (chan, v[1]);
+ OUT_RINGf (chan, v[2]);
+ OUT_RINGf (chan, v[3]);
+ break;
+ case 3:
+ BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3);
+ OUT_RINGf (chan, v[0]);
+ OUT_RINGf (chan, v[1]);
+ OUT_RINGf (chan, v[2]);
+ break;
+ case 2:
+ BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2);
+ OUT_RINGf (chan, v[0]);
+ OUT_RINGf (chan, v[1]);
+ break;
+ case 1:
+ BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1);
+ OUT_RINGf (chan, v[0]);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static unsigned
+init_per_instance_arrays_immd(struct nv50_context *nv50,
+ unsigned startInstance,
+ unsigned pos[16], unsigned step[16])
+{
+ struct nouveau_bo *bo;
+ unsigned i, b, count = 0;
+
+ for (i = 0; i < nv50->vtxelt_nr; ++i) {
+ if (!nv50->vtxelt[i].instance_divisor)
+ continue;
+ ++count;
+ b = nv50->vtxelt[i].vertex_buffer_index;
+
+ pos[i] = nv50->vtxelt[i].src_offset +
+ nv50->vtxbuf[b].buffer_offset +
+ startInstance * nv50->vtxbuf[b].stride;
+ step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
+
+ bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+ if (!bo->map)
+ nouveau_bo_map(bo, NOUVEAU_BO_RD);
+
+ nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
+ }
+
+ return count;
+}
+
+static unsigned
+init_per_instance_arrays(struct nv50_context *nv50,
+ unsigned startInstance,
+ unsigned pos[16], unsigned step[16])
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ struct nouveau_bo *bo;
+ struct nouveau_stateobj *so;
+ unsigned i, b, count = 0;
+ const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ if (nv50->vbo_fifo)
+ return init_per_instance_arrays_immd(nv50, startInstance,
+ pos, step);
+
+ so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
+
+ for (i = 0; i < nv50->vtxelt_nr; ++i) {
+ if (!nv50->vtxelt[i].instance_divisor)
+ continue;
+ ++count;
+ b = nv50->vtxelt[i].vertex_buffer_index;
+
+ pos[i] = nv50->vtxelt[i].src_offset +
+ nv50->vtxbuf[b].buffer_offset +
+ startInstance * nv50->vtxbuf[b].stride;
+
+ if (!startInstance) {
+ step[i] = 0;
+ continue;
+ }
+ step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
+
+ bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+
+ so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
+ so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
+ }
+
+ if (count && startInstance) {
+ so_ref (so, &nv50->state.instbuf); /* for flush notify */
+ so_emit(chan, nv50->state.instbuf);
+ }
+ so_ref (NULL, &so);
+
+ return count;
+}
+
+static void
+step_per_instance_arrays_immd(struct nv50_context *nv50,
+ unsigned pos[16], unsigned step[16])
+{
+ struct nouveau_bo *bo;
+ unsigned i, b;
+
+ for (i = 0; i < nv50->vtxelt_nr; ++i) {
+ if (!nv50->vtxelt[i].instance_divisor)
+ continue;
+ if (++step[i] != nv50->vtxelt[i].instance_divisor)
+ continue;
+ b = nv50->vtxelt[i].vertex_buffer_index;
+ bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+
+ step[i] = 0;
+ pos[i] += nv50->vtxbuf[b].stride;
+
+ nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
+ }
+}
+
+static void
+step_per_instance_arrays(struct nv50_context *nv50,
+ unsigned pos[16], unsigned step[16])
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ struct nouveau_bo *bo;
+ struct nouveau_stateobj *so;
+ unsigned i, b;
+ const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ if (nv50->vbo_fifo) {
+ step_per_instance_arrays_immd(nv50, pos, step);
+ return;
+ }
+
+ so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
+
+ for (i = 0; i < nv50->vtxelt_nr; ++i) {
+ if (!nv50->vtxelt[i].instance_divisor)
+ continue;
+ b = nv50->vtxelt[i].vertex_buffer_index;
+
+ if (++step[i] == nv50->vtxelt[i].instance_divisor) {
+ step[i] = 0;
+ pos[i] += nv50->vtxbuf[b].stride;
+ }
+
+ bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+
+ so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
+ so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
+ }
+
+ so_ref (so, &nv50->state.instbuf); /* for flush notify */
+ so_ref (NULL, &so);
+
+ so_emit(chan, nv50->state.instbuf);
+}
+
+static INLINE void
+nv50_unmap_vbufs(struct nv50_context *nv50)
+{
+ unsigned i;
+
+ for (i = 0; i < nv50->vtxbuf_nr; ++i)
+ if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
+ nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
+}
+
+void
+nv50_draw_arrays_instanced(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned startInstance, unsigned instanceCount)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->tesla->channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ unsigned i, nz_divisors;
+ unsigned step[16], pos[16];
+
+ if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
+ nv50_upload_user_vbufs(nv50);
+
+ nv50_state_validate(nv50);
+
+ nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
+
+ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
+ OUT_RING (chan, NV50_CB_AUX | (24 << 8));
+ OUT_RING (chan, startInstance);
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, nv50_prim(mode));
+
+ if (nv50->vbo_fifo)
+ nv50_push_arrays(nv50, start, count);
+ else {
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
+ OUT_RING (chan, start);
+ OUT_RING (chan, count);
+ }
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (chan, 0);
+
+ for (i = 1; i < instanceCount; i++) {
+ if (nz_divisors) /* any non-zero array divisors ? */
+ step_per_instance_arrays(nv50, pos, step);
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, nv50_prim(mode) | (1 << 28));
+
+ if (nv50->vbo_fifo)
+ nv50_push_arrays(nv50, start, count);
+ else {
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
+ OUT_RING (chan, start);
+ OUT_RING (chan, count);
+ }
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (chan, 0);
+ }
+ nv50_unmap_vbufs(nv50);
+
+ so_ref(NULL, &nv50->state.instbuf);
+}
+
void
nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
unsigned count)
@@ -182,6 +495,8 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
+ nv50_unmap_vbufs(nv50);
+
/* XXX: not sure what to do if ret != TRUE: flush and retry?
*/
assert(ret);
@@ -210,7 +525,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
unsigned nr = count > 2046 ? 2046 : count;
int i;
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
for (i = 0; i < nr; i += 2)
OUT_RING (chan, (map[i + 1] << 16) | map[i]);
@@ -243,7 +558,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
unsigned nr = count > 2046 ? 2046 : count;
int i;
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
for (i = 0; i < nr; i += 2)
OUT_RING (chan, (map[i + 1] << 16) | map[i]);
@@ -268,7 +583,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
while (count) {
unsigned nr = count > 2047 ? 2047 : count;
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr);
OUT_RINGp (chan, map, nr);
count -= nr;
@@ -277,6 +592,77 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
return TRUE;
}
+static INLINE void
+nv50_draw_elements_inline(struct nv50_context *nv50,
+ void *map, unsigned indexSize,
+ unsigned start, unsigned count)
+{
+ switch (indexSize) {
+ case 1:
+ nv50_draw_elements_inline_u08(nv50, map, start, count);
+ break;
+ case 2:
+ nv50_draw_elements_inline_u16(nv50, map, start, count);
+ break;
+ case 4:
+ nv50_draw_elements_inline_u32(nv50, map, start, count);
+ break;
+ }
+}
+
+void
+nv50_draw_elements_instanced(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned startInstance, unsigned instanceCount)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ struct pipe_screen *pscreen = pipe->screen;
+ void *map;
+ unsigned i, nz_divisors;
+ unsigned step[16], pos[16];
+
+ map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
+
+ if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
+ nv50_upload_user_vbufs(nv50);
+
+ nv50_state_validate(nv50);
+
+ nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
+
+ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
+ OUT_RING (chan, NV50_CB_AUX | (24 << 8));
+ OUT_RING (chan, startInstance);
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, nv50_prim(mode));
+
+ nv50_draw_elements_inline(nv50, map, indexSize, start, count);
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (chan, 0);
+
+ for (i = 1; i < instanceCount; ++i) {
+ if (nz_divisors) /* any non-zero array divisors ? */
+ step_per_instance_arrays(nv50, pos, step);
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, nv50_prim(mode) | (1 << 28));
+
+ nv50_draw_elements_inline(nv50, map, indexSize, start, count);
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (chan, 0);
+ }
+ nv50_unmap_vbufs(nv50);
+
+ so_ref(NULL, &nv50->state.instbuf);
+}
+
void
nv50_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
@@ -287,7 +673,6 @@ nv50_draw_elements(struct pipe_context *pipe,
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct pipe_screen *pscreen = pipe->screen;
void *map;
- boolean ret;
map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
@@ -300,29 +685,15 @@ nv50_draw_elements(struct pipe_context *pipe,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
OUT_RING (chan, nv50_prim(mode));
- switch (indexSize) {
- case 1:
- ret = nv50_draw_elements_inline_u08(nv50, map, start, count);
- break;
- case 2:
- ret = nv50_draw_elements_inline_u16(nv50, map, start, count);
- break;
- case 4:
- ret = nv50_draw_elements_inline_u32(nv50, map, start, count);
- break;
- default:
- assert(0);
- ret = FALSE;
- break;
- }
+
+ nv50_draw_elements_inline(nv50, map, indexSize, start, count);
+
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
+ nv50_unmap_vbufs(nv50);
+
pipe_buffer_unmap(pscreen, indexBuffer);
-
- /* XXX: what to do if ret != TRUE? Flush and retry?
- */
- assert(ret);
}
static INLINE boolean
@@ -335,23 +706,16 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
struct nouveau_stateobj *so;
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nouveau_bo *bo = nouveau_bo(vb->buffer);
- float *v;
+ float v[4];
int ret;
- enum pipe_format pf = ve->src_format;
- const struct util_format_description *desc;
-
- desc = util_format_description(pf);
- assert(desc);
-
- if ((desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT) ||
- util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0) != 32)
- return FALSE;
ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
if (ret)
return FALSE;
- v = (float *)(bo->map + (vb->buffer_offset + ve->src_offset));
+ util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map +
+ (vb->buffer_offset + ve->src_offset), 0,
+ 0, 0, 1, 1);
so = *pso;
if (!so)
*pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
@@ -409,7 +773,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
!(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
nv50->vbo_fifo = 0xffff;
- if (nv50->vertprog->cfg.edgeflag_in < 16)
+ if (NV50_USING_LOATHED_EDGEFLAG(nv50))
nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
@@ -437,17 +801,20 @@ nv50_vbo_validate(struct nv50_context *nv50)
nv50->vbo_fifo &= ~(1 << i);
continue;
}
- so_data(vtxfmt, hw | i);
if (nv50->vbo_fifo) {
+ so_data (vtxfmt, hw |
+ (ve->instance_divisor ? (1 << 4) : i));
so_method(vtxbuf, tesla,
NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
so_data (vtxbuf, 0);
continue;
}
+ so_data(vtxfmt, hw | i);
so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
- so_data (vtxbuf, 0x20000000 | vb->stride);
+ so_data (vtxbuf, 0x20000000 |
+ (ve->instance_divisor ? 0 : vb->stride));
so_reloc (vtxbuf, bo, vb->buffer_offset +
ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
@@ -485,7 +852,7 @@ typedef void (*pfn_push)(struct nouveau_channel *, void *);
struct nv50_vbo_emitctx
{
pfn_push push[16];
- void *map[16];
+ uint8_t *map[16];
unsigned stride[16];
unsigned nr_ve;
unsigned vtx_dwords;
@@ -523,19 +890,18 @@ nv50_map_vbufs(struct nv50_context *nv50)
for (i = 0; i < nv50->vtxbuf_nr; ++i) {
struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
- unsigned size, delta;
+ unsigned size = vb->stride * (vb->max_index + 1) + 16;
if (nouveau_bo(vb->buffer)->map)
continue;
- size = vb->stride * (vb->max_index + 1);
- delta = vb->buffer_offset;
-
+ size = vb->stride * (vb->max_index + 1) + 16;
+ size = MIN2(size, vb->buffer->size);
if (!size)
- size = vb->buffer->size - vb->buffer_offset;
+ size = vb->buffer->size;
if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
- delta, size, NOUVEAU_BO_RD))
+ 0, size, NOUVEAU_BO_RD))
break;
}
@@ -546,16 +912,6 @@ nv50_map_vbufs(struct nv50_context *nv50)
return FALSE;
}
-static INLINE void
-nv50_unmap_vbufs(struct nv50_context *nv50)
-{
- unsigned i;
-
- for (i = 0; i < nv50->vtxbuf_nr; ++i)
- if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
- nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
-}
-
static void
emit_b32_1(struct nouveau_channel *chan, void *data)
{
@@ -650,12 +1006,13 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
ve = &nv50->vtxelt[i];
vb = &nv50->vtxbuf[ve->vertex_buffer_index];
- if (!(nv50->vbo_fifo & (1 << i)))
+ if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor)
continue;
n = emit->nr_ve++;
emit->stride[n] = vb->stride;
- emit->map[n] = nouveau_bo(vb->buffer)->map +
+ emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map +
+ vb->buffer_offset +
(start * vb->stride + ve->src_offset);
desc = util_format_description(ve->src_format);
@@ -745,13 +1102,12 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
for (i = 0; i < nr; ++i)
emit_vtx_next(chan, &emit);
count -= nr;
}
- nv50_unmap_vbufs(nv50);
return TRUE;
}
@@ -772,13 +1128,12 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
set_edgeflag(chan, tesla, &emit, *map);
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
for (i = 0; i < nr; ++i)
emit_vtx(chan, &emit, *map++);
count -= nr;
}
- nv50_unmap_vbufs(nv50);
return TRUE;
}
@@ -799,13 +1154,12 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
set_edgeflag(chan, tesla, &emit, *map);
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
for (i = 0; i < nr; ++i)
emit_vtx(chan, &emit, *map++);
count -= nr;
}
- nv50_unmap_vbufs(nv50);
return TRUE;
}
@@ -826,13 +1180,12 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
set_edgeflag(chan, tesla, &emit, *map);
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
for (i = 0; i < nr; ++i)
emit_vtx(chan, &emit, *map++);
count -= nr;
}
- nv50_unmap_vbufs(nv50);
return TRUE;
}
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 51fdb82ff3..92de297ef1 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -33,6 +33,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
/* Reasonable defaults */
caps->num_vert_fpus = 4;
caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE;
+ caps->is_r400 = FALSE;
caps->is_r500 = FALSE;
caps->high_second_pipe = FALSE;
@@ -123,6 +124,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x4A54:
caps->family = CHIP_FAMILY_R420;
caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
break;
case 0x5548:
@@ -136,6 +138,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5D57:
caps->family = CHIP_FAMILY_R423;
caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
break;
case 0x554C:
@@ -147,6 +150,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5D4A:
caps->family = CHIP_FAMILY_R430;
caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
break;
case 0x5D4C:
@@ -157,6 +161,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5D52:
caps->family = CHIP_FAMILY_R480;
caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
break;
case 0x4B48:
@@ -166,6 +171,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x4B4C:
caps->family = CHIP_FAMILY_R481;
caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
break;
case 0x5E4C:
@@ -182,6 +188,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5E4D:
caps->family = CHIP_FAMILY_RV410;
caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
break;
case 0x5954:
@@ -212,6 +219,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x791F:
caps->family = CHIP_FAMILY_RS690;
caps->has_tcl = FALSE;
+ caps->is_r400 = TRUE;
break;
case 0x793F:
@@ -219,6 +227,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x7942:
caps->family = CHIP_FAMILY_RS600;
caps->has_tcl = FALSE;
+ caps->is_r400 = TRUE;
break;
case 0x796C:
@@ -227,6 +236,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x796F:
caps->family = CHIP_FAMILY_RS740;
caps->has_tcl = FALSE;
+ caps->is_r400 = TRUE;
break;
case 0x7100:
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index 0633a8b8a7..2808486492 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -40,11 +40,18 @@ struct r300_capabilities {
unsigned num_z_pipes;
/* Whether or not TCL is physically present */
boolean has_tcl;
+ /* Whether or not this is R400. The differences compared to their R3xx
+ * cousins are:
+ * - Extended fragment shader registers
+ * - Blend LTE/GTE thresholds */
+ boolean is_r400;
/* Whether or not this is an RV515 or newer; R500s have many differences
* that require extra consideration, compared to their R3xx cousins:
* - Extra bit of width and height on texture sizes
* - Blend color is split across two registers
- * - Universal Shader (US) block used for fragment shaders */
+ * - Blend LTE/GTE thresholds
+ * - Universal Shader (US) block used for fragment shaders
+ * - FP16 blending and multisampling */
boolean is_r500;
/* Whether or not the second pixel pipe is accessed with the high bit */
boolean high_second_pipe;
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index af95bbe789..94a9ab3ef3 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -87,7 +87,7 @@ r300_is_texture_referenced(struct pipe_context *pipe,
{
struct pipe_buffer* buf = 0;
- r300_get_texture_buffer(texture, &buf, NULL);
+ r300_get_texture_buffer(pipe->screen, texture, &buf, NULL);
return pipe->is_buffer_referenced(pipe, buf);
}
@@ -110,23 +110,33 @@ static void r300_flush_cb(void *data)
cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL);
}
-#define R300_INIT_ATOM(name) \
- r300->name##_state.state = NULL; \
- r300->name##_state.emit = r300_emit_##name##_state; \
- r300->name##_state.dirty = FALSE; \
- insert_at_tail(&r300->atom_list, &r300->name##_state);
+#define R300_INIT_ATOM(atomname, atomsize) \
+ r300->atomname##_state.name = #atomname; \
+ r300->atomname##_state.state = NULL; \
+ r300->atomname##_state.size = atomsize; \
+ r300->atomname##_state.emit = r300_emit_##atomname##_state; \
+ r300->atomname##_state.dirty = FALSE; \
+ insert_at_tail(&r300->atom_list, &r300->atomname##_state);
static void r300_setup_atoms(struct r300_context* r300)
{
+ /* Create the actual atom list.
+ *
+ * Each atom is examined and emitted in the order it appears here, which
+ * can affect performance and conformance if not handled with care.
+ *
+ * Some atoms never change size, others change every emit. This is just
+ * an upper bound on each atom, to keep the emission machinery from
+ * underallocating space. */
make_empty_list(&r300->atom_list);
- R300_INIT_ATOM(ztop);
- R300_INIT_ATOM(blend);
- R300_INIT_ATOM(blend_color);
- R300_INIT_ATOM(clip);
- R300_INIT_ATOM(dsa);
- R300_INIT_ATOM(rs);
- R300_INIT_ATOM(scissor);
- R300_INIT_ATOM(viewport);
+ R300_INIT_ATOM(ztop, 2);
+ R300_INIT_ATOM(blend, 8);
+ R300_INIT_ATOM(blend_color, 3);
+ R300_INIT_ATOM(clip, 29);
+ R300_INIT_ATOM(dsa, 8);
+ R300_INIT_ATOM(rs, 22);
+ R300_INIT_ATOM(scissor, 3);
+ R300_INIT_ATOM(viewport, 9);
}
struct pipe_context* r300_create_context(struct pipe_screen* screen,
@@ -143,8 +153,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->context.winsys = (struct pipe_winsys*)radeon_winsys;
r300->context.screen = screen;
- r300_init_debug(r300);
-
r300->context.destroy = r300_destroy_context;
r300->context.clear = r300_clear;
@@ -182,7 +190,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state);
r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state);
r300->rs_block = CALLOC_STRUCT(r300_rs_block);
- r300->scissor_state.state = CALLOC_STRUCT(r300_scissor_state);
+ r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state);
r300->vertex_info = CALLOC_STRUCT(r300_vertex_info);
r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state);
r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 5937f0e2cc..5e33dc042a 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -30,16 +30,28 @@
#include "pipe/p_context.h"
#include "pipe/p_inlines.h"
+#include "r300_screen.h"
+
struct r300_context;
struct r300_fragment_shader;
struct r300_vertex_shader;
struct r300_atom {
+ /* List pointers. */
struct r300_atom *prev, *next;
+ /* Name, for debugging. */
+ const char* name;
+ /* Opaque state. */
void* state;
+ /* Emit the state to the context. */
void (*emit)(struct r300_context*, void*);
+ /* Upper bound on number of dwords to emit. */
+ unsigned size;
+ /* Whether this atom should be emitted. */
boolean dirty;
+ /* Another dirty flag that is never automatically cleared. */
+ boolean always_dirty;
};
struct r300_blend_state {
@@ -75,10 +87,10 @@ struct r300_rs_state {
uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */
uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */
uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */
- uint32_t depth_scale_front; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */
- uint32_t depth_offset_front;/* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */
- uint32_t depth_scale_back; /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */
- uint32_t depth_offset_back; /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */
+ float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */
+ /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */
+ float depth_offset; /* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */
+ /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */
uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */
uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */
uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
@@ -106,16 +118,6 @@ struct r300_sampler_state {
unsigned min_lod, max_lod;
};
-struct r300_scissor_regs {
- uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */
- uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */
-};
-
-struct r300_scissor_state {
- struct r300_scissor_regs framebuffer;
- struct r300_scissor_regs scissor;
-};
-
struct r300_texture_state {
uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */
uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */
@@ -188,6 +190,12 @@ struct r300_query {
struct r300_query* next;
};
+enum r300_buffer_tiling {
+ R300_BUFFER_LINEAR = 0,
+ R300_BUFFER_TILED,
+ R300_BUFFER_SQUARETILED
+};
+
struct r300_texture {
/* Parent class */
struct pipe_texture tex;
@@ -224,6 +232,9 @@ struct r300_texture {
/* Registers carrying texture format data. */
struct r300_texture_state state;
+
+ /* Buffer tiling */
+ enum r300_buffer_tiling microtile, macrotile;
};
struct r300_vertex_info {
@@ -315,9 +326,10 @@ struct r300_context {
uint32_t dirty_hw;
/* Whether the TCL engine should be in bypass mode. */
boolean tcl_bypass;
-
- /** Combination of DBG_xxx flags */
- unsigned debug;
+ /* Whether polygon offset is enabled. */
+ boolean polygon_offset_enabled;
+ /* Z buffer bit depth. */
+ uint32_t zbuffer_bpp;
};
/* Convenience cast wrapper. */
@@ -331,35 +343,15 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300);
void r300_init_state_functions(struct r300_context* r300);
void r300_init_surface_functions(struct r300_context* r300);
-/* Debug functionality. */
-
-/**
- * Debug flags to disable/enable certain groups of debugging outputs.
- *
- * \note These may be rather coarse, and the grouping may be impractical.
- * If you find, while debugging the driver, that a different grouping
- * of these flags would be beneficial, just feel free to change them
- * but make sure to update the documentation in r300_debug.c to reflect
- * those changes.
- */
-/*@{*/
-#define DBG_HELP 0x0000001
-#define DBG_FP 0x0000002
-#define DBG_VP 0x0000004
-#define DBG_CS 0x0000008
-#define DBG_DRAW 0x0000010
-#define DBG_TEX 0x0000020
-#define DBG_FALL 0x0000040
-/*@}*/
-
-static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags)
+static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags)
{
- return (ctx->debug & flags) ? TRUE : FALSE;
+ return SCREEN_DBG_ON(r300_screen(ctx->context.screen), flags);
}
-static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...)
+static INLINE void CTX_DBG(struct r300_context * ctx, unsigned flags,
+ const char * fmt, ...)
{
- if (DBG_ON(ctx, flags)) {
+ if (CTX_DBG_ON(ctx, flags)) {
va_list va;
va_start(va, fmt);
debug_vprintf(fmt, va);
@@ -367,6 +359,8 @@ static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * f
}
}
-void r300_init_debug(struct r300_context * ctx);
+#define DBG_ON CTX_DBG_ON
+#define DBG CTX_DBG
#endif /* R300_CONTEXT_H */
+
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index d142fee050..151f72b0fe 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -52,7 +52,7 @@
#define CS_LOCALS(context) \
struct r300_context* const cs_context_copy = (context); \
struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \
- int cs_count = 0;
+ int cs_count = 0; (void) cs_count;
#define CHECK_CS(size) \
assert(cs_winsys->check_cs(cs_winsys, (size)))
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index 2a6ed54ac9..00d4f31c2b 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -46,7 +46,7 @@ static struct debug_option debug_options[] = {
{ 0, 0, 0 }
};
-void r300_init_debug(struct r300_context * ctx)
+void r300_init_debug(struct r300_screen * screen)
{
const char * options = debug_get_option("RADEON_DEBUG", 0);
boolean printhint = FALSE;
@@ -64,7 +64,7 @@ void r300_init_debug(struct r300_context * ctx)
for(opt = debug_options; opt->name; ++opt) {
if (!strncmp(options, opt->name, length)) {
- ctx->debug |= opt->flag;
+ screen->debug |= opt->flag;
break;
}
}
@@ -77,11 +77,11 @@ void r300_init_debug(struct r300_context * ctx)
options += length;
}
- if (!ctx->debug)
+ if (!screen->debug)
printhint = TRUE;
}
- if (printhint || ctx->debug & DBG_HELP) {
+ if (printhint || screen->debug & DBG_HELP) {
debug_printf("You can enable debug output by setting the RADEON_DEBUG environment variable\n"
"to a comma-separated list of debug options. Available options are:\n");
for(opt = debug_options; opt->name; ++opt) {
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 0e5533c790..2ea9fab015 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -41,6 +41,7 @@ void r300_emit_blend_state(struct r300_context* r300, void* state)
{
struct r300_blend_state* blend = (struct r300_blend_state*)state;
CS_LOCALS(r300);
+
BEGIN_CS(8);
OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop);
OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3);
@@ -419,8 +420,10 @@ void r300_emit_fb_state(struct r300_context* r300,
OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] |
- r300_translate_colorformat(tex->tex.format), 0,
- RADEON_GEM_DOMAIN_VRAM, 0);
+ r300_translate_colorformat(tex->tex.format) |
+ R300_COLOR_TILE(tex->macrotile) |
+ R300_COLOR_MICROTILE(tex->microtile),
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i),
r300_translate_out_fmt(surf->format));
@@ -443,8 +446,10 @@ void r300_emit_fb_state(struct r300_context* r300,
OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format));
OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
- OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0,
- RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] |
+ R300_DEPTHMACROTILE(tex->macrotile) |
+ R300_DEPTHMICROTILE(tex->microtile),
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
}
END_CS;
@@ -579,19 +584,37 @@ void r300_emit_query_end(struct r300_context* r300)
void r300_emit_rs_state(struct r300_context* r300, void* state)
{
struct r300_rs_state* rs = (struct r300_rs_state*)state;
+ float scale, offset;
CS_LOCALS(r300);
- BEGIN_CS(22);
+ BEGIN_CS(18 + (rs->polygon_offset_enable ? 5 : 0));
OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status);
OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size);
OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2);
OUT_CS(rs->point_minmax);
OUT_CS(rs->line_control);
- OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 6);
- OUT_CS(rs->depth_scale_front);
- OUT_CS(rs->depth_offset_front);
- OUT_CS(rs->depth_scale_back);
- OUT_CS(rs->depth_offset_back);
+
+ if (rs->polygon_offset_enable) {
+ scale = rs->depth_scale * 12;
+ offset = rs->depth_offset;
+
+ switch (r300->zbuffer_bpp) {
+ case 16:
+ offset *= 4;
+ break;
+ case 24:
+ offset *= 2;
+ break;
+ }
+
+ OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ OUT_CS_32F(scale);
+ OUT_CS_32F(offset);
+ OUT_CS_32F(scale);
+ OUT_CS_32F(offset);
+ }
+
+ OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2);
OUT_CS(rs->polygon_offset_enable);
OUT_CS(rs->cull_mode);
OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config);
@@ -641,27 +664,47 @@ void r300_emit_rs_block_state(struct r300_context* r300,
END_CS;
}
-static void r300_emit_scissor_regs(struct r300_context* r300,
- struct r300_scissor_regs* scissor)
+void r300_emit_scissor_state(struct r300_context* r300, void* state)
{
+ unsigned minx, miny, maxx, maxy;
+ uint32_t top_left, bottom_right;
+ struct r300_screen* r300screen = r300_screen(r300->context.screen);
+ struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state;
CS_LOCALS(r300);
- BEGIN_CS(3);
- OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
- OUT_CS(scissor->top_left);
- OUT_CS(scissor->bottom_right);
- END_CS;
-}
+ minx = miny = 0;
+ maxx = r300->framebuffer_state.width;
+ maxy = r300->framebuffer_state.height;
-void r300_emit_scissor_state(struct r300_context* r300, void* state)
-{
- struct r300_scissor_state* scissor = (struct r300_scissor_state*)state;
- /* XXX argfl! */
if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) {
- r300_emit_scissor_regs(r300, &scissor->scissor);
+ minx = MAX2(minx, scissor->minx);
+ miny = MAX2(miny, scissor->miny);
+ maxx = MIN2(maxx, scissor->maxx);
+ maxy = MIN2(maxy, scissor->maxy);
+ }
+
+ if (r300screen->caps->is_r500) {
+ top_left =
+ (minx << R300_SCISSORS_X_SHIFT) |
+ (miny << R300_SCISSORS_Y_SHIFT);
+ bottom_right =
+ ((maxx - 1) << R300_SCISSORS_X_SHIFT) |
+ ((maxy - 1) << R300_SCISSORS_Y_SHIFT);
} else {
- r300_emit_scissor_regs(r300, &scissor->framebuffer);
+ /* Offset of 1440 in non-R500 chipsets. */
+ top_left =
+ ((minx + 1440) << R300_SCISSORS_X_SHIFT) |
+ ((miny + 1440) << R300_SCISSORS_Y_SHIFT);
+ bottom_right =
+ (((maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
+ (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT);
}
+
+ BEGIN_CS(3);
+ OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
+ OUT_CS(top_left);
+ OUT_CS(bottom_right);
+ END_CS;
}
void r300_emit_texture(struct r300_context* r300,
@@ -680,12 +723,18 @@ void r300_emit_texture(struct r300_context* r300,
filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
}
- /* determine min/max levels */
- /* the MAX_MIP level is the largest (finest) one */
- max_level = MIN2(sampler->max_lod, tex->tex.last_level);
- min_level = MIN2(sampler->min_lod, max_level);
- format0 |= R300_TX_NUM_LEVELS(max_level);
- filter0 |= R300_TX_MAX_MIP_LEVEL(min_level);
+ if (tex->is_npot) {
+ /* NPOT textures don't support mip filter, unfortunately.
+ * This prevents incorrect rendering. */
+ filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK;
+ } else {
+ /* determine min/max levels */
+ /* the MAX_MIP level is the largest (finest) one */
+ max_level = MIN2(sampler->max_lod, tex->tex.last_level);
+ min_level = MIN2(sampler->min_lod, max_level);
+ format0 |= R300_TX_NUM_LEVELS(max_level);
+ filter0 |= R300_TX_MAX_MIP_LEVEL(min_level);
+ }
BEGIN_CS(16);
OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 |
@@ -697,8 +746,10 @@ void r300_emit_texture(struct r300_context* r300,
OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1);
OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2);
OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1);
- OUT_CS_RELOC(tex->buffer, 0,
- RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ OUT_CS_RELOC(tex->buffer,
+ R300_TXO_MACRO_TILE(tex->macrotile) |
+ R300_TXO_MICRO_TILE(tex->microtile),
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0);
END_CS;
}
@@ -764,32 +815,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
END_CS;
}
-#if 0
-void r300_emit_draw_packet(struct r300_context* r300)
-{
- CS_LOCALS(r300);
-
- DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, "
- "vertex size %d\n", r300->vbo,
- r300->vertex_info->vinfo.size);
- /* Set the pointer to our vertex buffer. The emitted values are this:
- * PACKET3 [3D_LOAD_VBPNTR]
- * COUNT [1]
- * FORMAT [size | stride << 8]
- * OFFSET [offset into BO]
- * VBPNTR [relocated BO]
- */
- BEGIN_CS(7);
- OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3);
- OUT_CS(1);
- OUT_CS(r300->vertex_info->vinfo.size |
- (r300->vertex_info->vinfo.size << 8));
- OUT_CS(r300->vbo_offset);
- OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
- END_CS;
-}
-#endif
-
void r300_emit_vertex_format_state(struct r300_context* r300)
{
int i;
@@ -992,14 +1017,23 @@ void r300_emit_dirty_state(struct r300_context* r300)
struct r300_screen* r300screen = r300_screen(r300->context.screen);
struct r300_texture* tex;
struct r300_atom* atom;
- int i, dirty_tex = 0;
+ unsigned i, dwords = 1024;
+ int dirty_tex = 0;
boolean invalid = FALSE;
- /* Check size of CS. */
- /* Make sure we have at least 8*1024 spare dwords. */
+ /* Check the required number of dwords against the space remaining in the
+ * current CS object. If we need more, then flush. */
+
+ foreach(atom, &r300->atom_list) {
+ if (atom->dirty || atom->always_dirty) {
+ dwords += atom->size;
+ }
+ }
+
+ /* Make sure we have at least 2*1024 spare dwords. */
/* XXX It would be nice to know the number of dwords we really need to
* XXX emit. */
- if (!r300->winsys->check_cs(r300->winsys, 8*1024)) {
+ if (!r300->winsys->check_cs(r300->winsys, dwords)) {
r300->context.flush(&r300->context, 0, NULL);
}
@@ -1039,10 +1073,12 @@ validate:
}
}
/* ...occlusion query buffer... */
- if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo,
- 0, RADEON_GEM_DOMAIN_GTT)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
+ if (r300->dirty_state & R300_NEW_QUERY) {
+ if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo,
+ 0, RADEON_GEM_DOMAIN_GTT)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
}
/* ...and vertex buffer. */
if (r300->vbo) {
@@ -1071,7 +1107,7 @@ validate:
}
foreach(atom, &r300->atom_list) {
- if (atom->dirty) {
+ if (atom->dirty || atom->always_dirty) {
atom->emit(r300, atom->state);
atom->dirty = FALSE;
}
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 14a08241fc..59819cb106 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -37,8 +37,10 @@ static void r300_flush(struct pipe_context* pipe,
{
struct r300_context *r300 = r300_context(pipe);
struct r300_query *query;
+ struct r300_atom *atom;
CS_LOCALS(r300);
+ (void) cs_count;
/* We probably need to flush Draw, but we may have been called from
* within Draw. This feels kludgy, but it might be the best thing.
*
@@ -54,7 +56,15 @@ static void r300_flush(struct pipe_context* pipe,
r300_emit_invariant_state(r300);
r300->dirty_state = R300_NEW_KITCHEN_SINK;
r300->dirty_hw = 0;
+
+ /* New kitchen sink, baby. */
+ foreach(atom, &r300->atom_list) {
+ if (atom->state) {
+ atom->dirty = TRUE;
+ }
+ }
}
+
/* reset flushed query */
foreach(query, &r300->query_list) {
query->flushed = TRUE;
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 034bfc15cf..361813891f 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -1619,18 +1619,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_TX_OFFSET_5 0x4554
#define R300_TX_OFFSET_6 0x4558
#define R300_TX_OFFSET_7 0x455C
- /* BEGIN: Guess from R200 */
+
# define R300_TXO_ENDIAN_NO_SWAP (0 << 0)
# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0)
# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0)
# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
-# define R300_TXO_MACRO_TILE (1 << 2)
+# define R300_TXO_MACRO_TILE_LINEAR (0 << 2)
+# define R300_TXO_MACRO_TILE_TILED (1 << 2)
+# define R300_TXO_MACRO_TILE(x) ((x) << 2)
# define R300_TXO_MICRO_TILE_LINEAR (0 << 3)
-# define R300_TXO_MICRO_TILE (1 << 3)
-# define R300_TXO_MICRO_TILE_SQUARE (2 << 3)
+# define R300_TXO_MICRO_TILE_TILED (1 << 3)
+# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3)
+# define R300_TXO_MICRO_TILE(x) ((x) << 3)
# define R300_TXO_OFFSET_MASK 0xffffffe0
# define R300_TXO_OFFSET_SHIFT 5
- /* END: Guess from R200 */
/* 32 bit chroma key */
#define R300_TX_CHROMA_KEY_0 0x4580
@@ -2283,9 +2285,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_COLORPITCH_MASK 0x00003FFE
# define R300_COLOR_TILE_DISABLE (0 << 16)
# define R300_COLOR_TILE_ENABLE (1 << 16)
+# define R300_COLOR_TILE(x) ((x) << 16)
# define R300_COLOR_MICROTILE_DISABLE (0 << 17)
# define R300_COLOR_MICROTILE_ENABLE (1 << 17)
# define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */
+# define R300_COLOR_MICROTILE(x) ((x) << 17)
# define R300_COLOR_ENDIAN_NO_SWAP (0 << 19)
# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19)
# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19)
@@ -2544,9 +2548,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_DEPTHPITCH_MASK 0x00003FFC
# define R300_DEPTHMACROTILE_DISABLE (0 << 16)
# define R300_DEPTHMACROTILE_ENABLE (1 << 16)
+# define R300_DEPTHMACROTILE(x) ((x) << 16)
# define R300_DEPTHMICROTILE_LINEAR (0 << 17)
# define R300_DEPTHMICROTILE_TILED (1 << 17)
# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17)
+# define R300_DEPTHMICROTILE(x) ((x) << 17)
# define R300_DEPTHENDIAN_NO_SWAP (0 << 18)
# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18)
# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18)
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index ee43421cdb..90de062bcd 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -114,6 +114,44 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
return color_control;
}
+static void r300_emit_draw_immediate(struct r300_context *r300,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ struct pipe_buffer* vbo = r300->vertex_buffer[0].buffer;
+ unsigned vertex_size = r300->vertex_buffer[0].stride / sizeof(float);
+ unsigned i;
+ uint32_t* map;
+ CS_LOCALS(r300);
+
+ map = (uint32_t*)pipe_buffer_map_range(r300->context.screen, vbo,
+ start * vertex_size, count * vertex_size,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ BEGIN_CS(10 + count * vertex_size);
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, mode));
+ OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
+ OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
+ OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
+ r300_translate_primitive(mode));
+ //debug_printf("r300: Immd %d verts, %d attrs\n", count, vertex_size);
+ for (i = 0; i < count * vertex_size; i++) {
+ if (i % vertex_size == 0) {
+ //debug_printf("r300: -- vert --\n");
+ }
+ //debug_printf("r300: 0x%08x\n", *map);
+ OUT_CS(*map);
+ map++;
+ }
+ END_CS;
+
+ pipe_buffer_unmap(r300->context.screen, vbo);
+}
+
static void r300_emit_draw_arrays(struct r300_context *r300,
unsigned mode,
unsigned count)
@@ -207,17 +245,49 @@ validate:
return TRUE;
}
+static void r300_shorten_ubyte_elts(struct r300_context* r300,
+ struct pipe_buffer** elts,
+ unsigned count)
+{
+ struct pipe_screen* screen = r300->context.screen;
+ struct pipe_buffer* new_elts;
+ unsigned char *in_map;
+ unsigned short *out_map;
+ unsigned i;
+
+ new_elts = screen->buffer_create(screen, 32,
+ PIPE_BUFFER_USAGE_INDEX |
+ PIPE_BUFFER_USAGE_CPU_WRITE |
+ PIPE_BUFFER_USAGE_GPU_READ,
+ 2 * count);
+
+ in_map = pipe_buffer_map(screen, *elts, PIPE_BUFFER_USAGE_CPU_READ);
+ out_map = pipe_buffer_map(screen, new_elts, PIPE_BUFFER_USAGE_CPU_WRITE);
+
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned short)*in_map;
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(screen, *elts);
+ pipe_buffer_unmap(screen, new_elts);
+
+ *elts = new_elts;
+}
+
/* This is the fast-path drawing & emission for HW TCL. */
void r300_draw_range_elements(struct pipe_context* pipe,
- struct pipe_buffer* indexBuffer,
- unsigned indexSize,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count)
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
{
struct r300_context* r300 = r300_context(pipe);
+ struct pipe_buffer* orgIndexBuffer = indexBuffer;
if (!u_trim_pipe_prim(mode, &count)) {
return;
@@ -236,13 +306,18 @@ void r300_draw_range_elements(struct pipe_context* pipe,
return;
}
+ if (indexSize == 1) {
+ r300_shorten_ubyte_elts(r300, &indexBuffer, count);
+ indexSize = 2;
+ }
+
if (!r300->winsys->add_buffer(r300->winsys, indexBuffer,
RADEON_GEM_DOMAIN_GTT, 0)) {
- return;
+ goto cleanup;
}
if (!r300->winsys->validate(r300->winsys)) {
- return;
+ goto cleanup;
}
r300_emit_dirty_state(r300);
@@ -251,6 +326,11 @@ void r300_draw_range_elements(struct pipe_context* pipe,
r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex,
mode, start, count);
+
+cleanup:
+ if (indexBuffer != orgIndexBuffer) {
+ pipe->screen->buffer_destroy(indexBuffer);
+ }
}
/* Simple helpers for context setup. Should probably be moved to util. */
@@ -264,7 +344,7 @@ void r300_draw_elements(struct pipe_context* pipe,
}
void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
- unsigned start, unsigned count)
+ unsigned start, unsigned count)
{
struct r300_context* r300 = r300_context(pipe);
@@ -287,9 +367,12 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
r300_emit_dirty_state(r300);
- r300_emit_aos(r300, start);
-
- r300_emit_draw_arrays(r300, mode, count);
+ if (FALSE && count <= 4 && r300->vertex_buffer_count == 1) {
+ r300_emit_draw_immediate(r300, mode, start, count);
+ } else {
+ r300_emit_aos(r300, start);
+ r300_emit_draw_arrays(r300, mode, count);
+ }
}
/****************************************************************************
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 287664b1d2..67325c6b80 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -183,10 +183,20 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param)
}
}
-static boolean check_tex_format(enum pipe_format format, uint32_t usage,
- boolean is_r500)
+static boolean r300_is_format_supported(struct pipe_screen* screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned usage,
+ unsigned geom_flags)
{
uint32_t retval = 0;
+ boolean is_r500 = r300_screen(screen)->caps->is_r500;
+
+ if (target >= PIPE_MAX_TEXTURE_TYPES) {
+ debug_printf("r300: Implementation error: Received bogus texture "
+ "target %d in %s\n", target, __FUNCTION__);
+ return FALSE;
+ }
switch (format) {
/* Supported formats. */
@@ -247,28 +257,13 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage,
case PIPE_FORMAT_Z32_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
- debug_printf("r300: Note: Got unsupported format: %s in %s\n",
- pf_name(format), __FUNCTION__);
+ SCREEN_DBG(r300_screen(screen), DBG_TEX,
+ "r300: Note: Got unsupported format: %s in %s\n",
+ pf_name(format), __FUNCTION__);
return FALSE;
- /* XXX These don't even exist
- case PIPE_FORMAT_A32R32G32B32:
- case PIPE_FORMAT_A16R16G16B16: */
- /* XXX What the deuce is UV88? (r3xx accel page 14)
- debug_printf("r300: Warning: Got unimplemented format: %s in %s\n",
- pf_name(format), __FUNCTION__);
- return FALSE; */
-
- /* XXX Supported yet unimplemented r5xx formats: */
- /* XXX Again, what is UV1010 this time? (r5xx accel page 148) */
- /* XXX Even more that don't exist
- case PIPE_FORMAT_A10R10G10B10_UNORM:
- case PIPE_FORMAT_A2R10G10B10_UNORM:
- case PIPE_FORMAT_I10_UNORM:
- debug_printf(
- "r300: Warning: Got unimplemented r500 format: %s in %s\n",
- pf_name(format), __FUNCTION__);
- return FALSE; */
+ /* XXX Add all remaining gallium-supported formats,
+ * see util/u_format.csv. */
default:
/* Unknown format... */
@@ -286,30 +281,6 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage,
return (retval >= usage);
}
-static boolean r300_is_format_supported(struct pipe_screen* pscreen,
- enum pipe_format format,
- enum pipe_texture_target target,
- unsigned tex_usage,
- unsigned geom_flags)
-{
- switch (target) {
- case PIPE_TEXTURE_1D: /* handle 1D textures as 2D ones */
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_3D:
- case PIPE_TEXTURE_CUBE:
- return check_tex_format(format, tex_usage,
- r300_screen(pscreen)->caps->is_r500);
-
- default:
- debug_printf("r300: Fatal: This is not a format target: %d\n",
- target);
- assert(0);
- break;
- }
-
- return FALSE;
-}
-
static struct pipe_transfer*
r300_get_tex_transfer(struct pipe_screen *screen,
struct pipe_texture *texture,
@@ -319,6 +290,7 @@ r300_get_tex_transfer(struct pipe_screen *screen,
{
struct r300_texture *tex = (struct r300_texture *)texture;
struct r300_transfer *trans;
+ struct r300_screen *rscreen = r300_screen(screen);
unsigned offset;
offset = r300_texture_get_offset(tex, level, zslice, face); /* in bytes */
@@ -330,11 +302,8 @@ r300_get_tex_transfer(struct pipe_screen *screen,
trans->transfer.y = y;
trans->transfer.width = w;
trans->transfer.height = h;
- trans->transfer.stride = r300_texture_get_stride(tex, level);
+ trans->transfer.stride = r300_texture_get_stride(rscreen, tex, level);
trans->transfer.usage = usage;
-
- /* XXX not sure whether it's required to set these two,
- the driver doesn't use them */
trans->transfer.zslice = zslice;
trans->transfer.face = face;
@@ -396,6 +365,7 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys)
caps->num_frag_pipes = radeon_winsys->gb_pipes;
caps->num_z_pipes = radeon_winsys->z_pipes;
+ r300_init_debug(r300screen);
r300_parse_chipset(caps);
r300screen->caps = caps;
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 2217988add..580fda3984 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -35,6 +35,9 @@ struct r300_screen {
/* Chipset capabilities */
struct r300_capabilities* caps;
+
+ /** Combination of DBG_xxx flags */
+ unsigned debug;
};
struct r300_transfer {
@@ -60,4 +63,44 @@ r300_transfer(struct pipe_transfer* transfer)
/* Creates a new r300 screen. */
struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys);
+/* Debug functionality. */
+
+/**
+ * Debug flags to disable/enable certain groups of debugging outputs.
+ *
+ * \note These may be rather coarse, and the grouping may be impractical.
+ * If you find, while debugging the driver, that a different grouping
+ * of these flags would be beneficial, just feel free to change them
+ * but make sure to update the documentation in r300_debug.c to reflect
+ * those changes.
+ */
+/*@{*/
+#define DBG_HELP 0x0000001
+#define DBG_FP 0x0000002
+#define DBG_VP 0x0000004
+#define DBG_CS 0x0000008
+#define DBG_DRAW 0x0000010
+#define DBG_TEX 0x0000020
+#define DBG_FALL 0x0000040
+/*@}*/
+
+static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
+{
+ return (screen->debug & flags) ? TRUE : FALSE;
+}
+
+static INLINE void SCREEN_DBG(struct r300_screen * screen, unsigned flags,
+ const char * fmt, ...)
+{
+ if (SCREEN_DBG_ON(screen, flags)) {
+ va_list va;
+ va_start(va, fmt);
+ debug_vprintf(fmt, va);
+ va_end(va);
+ }
+}
+
+void r300_init_debug(struct r300_screen* ctx);
+
#endif /* R300_SCREEN_H */
+
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 78764ddc98..e2ec0bc5bd 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -340,6 +340,7 @@ static void r300_set_blend_color(struct pipe_context* pipe,
const struct pipe_blend_color* color)
{
struct r300_context* r300 = r300_context(pipe);
+ struct r300_screen* r300screen = r300_screen(pipe->screen);
struct r300_blend_color_state* state =
(struct r300_blend_color_state*)r300->blend_color_state.state;
union util_color uc;
@@ -355,6 +356,7 @@ static void r300_set_blend_color(struct pipe_context* pipe,
float_to_fixed10(color->color[2]) |
(float_to_fixed10(color->color[1]) << 16);
+ r300->blend_color_state.size = r300screen->caps->is_r500 ? 3 : 2;
r300->blend_color_state.dirty = TRUE;
}
@@ -365,11 +367,14 @@ static void r300_set_clip_state(struct pipe_context* pipe,
if (r300_screen(pipe->screen)->caps->has_tcl) {
memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state));
- r300->clip_state.dirty = TRUE;
+ r300->clip_state.size = 29;
} else {
draw_flush(r300->draw);
draw_set_clip_state(r300->draw, state);
+ r300->clip_state.size = 2;
}
+
+ r300->clip_state.dirty = TRUE;
}
/* Create a new depth, stencil, and alpha state based on the CSO dsa state.
@@ -462,8 +467,10 @@ static void r300_bind_dsa_state(struct pipe_context* pipe,
void* state)
{
struct r300_context* r300 = r300_context(pipe);
+ struct r300_screen* r300screen = r300_screen(pipe->screen);
r300->dsa_state.state = state;
+ r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6;
r300->dsa_state.dirty = TRUE;
}
@@ -474,36 +481,12 @@ static void r300_delete_dsa_state(struct pipe_context* pipe,
FREE(state);
}
-static void r300_set_scissor_regs(const struct pipe_scissor_state* state,
- struct r300_scissor_regs *scissor,
- boolean is_r500)
-{
- if (is_r500) {
- scissor->top_left =
- (state->minx << R300_SCISSORS_X_SHIFT) |
- (state->miny << R300_SCISSORS_Y_SHIFT);
- scissor->bottom_right =
- ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) |
- ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT);
- } else {
- /* Offset of 1440 in non-R500 chipsets. */
- scissor->top_left =
- ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) |
- ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT);
- scissor->bottom_right =
- (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
- (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT);
- }
-}
-
static void
r300_set_framebuffer_state(struct pipe_context* pipe,
const struct pipe_framebuffer_state* state)
{
struct r300_context* r300 = r300_context(pipe);
- struct r300_scissor_state* scissor =
- (struct r300_scissor_state*)r300->scissor_state.state;
- struct pipe_scissor_state pscissor;
+ uint32_t zbuffer_bpp = 0;
if (r300->draw) {
draw_flush(r300->draw);
@@ -511,19 +494,29 @@ static void
r300->framebuffer_state = *state;
- /* XXX Arg. This is silly. */
- pscissor.minx = pscissor.miny = 0;
- pscissor.maxx = state->width;
- pscissor.maxy = state->height;
- r300_set_scissor_regs(&pscissor, &scissor->framebuffer,
- r300_screen(r300->context.screen)->caps->is_r500);
-
/* Don't rely on the order of states being set for the first time. */
r300->dirty_state |= R300_NEW_FRAMEBUFFERS;
r300->blend_state.dirty = TRUE;
r300->dsa_state.dirty = TRUE;
r300->scissor_state.dirty = TRUE;
+
+ /* Polyfon offset depends on the zbuffer bit depth. */
+ if (state->zsbuf && r300->polygon_offset_enabled) {
+ switch (util_format_get_blocksize(state->zsbuf->texture->format)) {
+ case 2:
+ zbuffer_bpp = 16;
+ break;
+ case 4:
+ zbuffer_bpp = 24;
+ break;
+ }
+
+ if (r300->zbuffer_bpp != zbuffer_bpp) {
+ r300->zbuffer_bpp = zbuffer_bpp;
+ r300->rs_state.dirty = TRUE;
+ }
+ }
}
/* Create fragment shader state. */
@@ -627,9 +620,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
rs->line_control = pack_float_16_6x(state->line_width) |
R300_GA_LINE_CNTL_END_TYPE_COMP;
- /* XXX I think there is something wrong with the polygon mode,
- * XXX re-test when r300g is in a better shape */
-
/* Enable polygon mode */
if (state->fill_cw != PIPE_POLYGON_MODE_FILL ||
state->fill_ccw != PIPE_POLYGON_MODE_FILL) {
@@ -682,10 +672,8 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
}
if (rs->polygon_offset_enable) {
- rs->depth_offset_front = rs->depth_offset_back =
- fui(state->offset_units);
- rs->depth_scale_front = rs->depth_scale_back =
- fui(state->offset_scale);
+ rs->depth_offset = state->offset_units;
+ rs->depth_scale = state->offset_scale;
}
if (state->line_stipple_enable) {
@@ -717,7 +705,13 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
draw_set_rasterizer_state(r300->draw, &rs->rs);
}
- r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport;
+ if (rs) {
+ r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport;
+ r300->polygon_offset_enabled = rs->rs.offset_cw || rs->rs.offset_ccw;
+ } else {
+ r300->tcl_bypass = FALSE;
+ r300->polygon_offset_enabled = FALSE;
+ }
r300->rs_state.state = rs;
r300->rs_state.dirty = TRUE;
@@ -864,11 +858,9 @@ static void r300_set_scissor_state(struct pipe_context* pipe,
const struct pipe_scissor_state* state)
{
struct r300_context* r300 = r300_context(pipe);
- struct r300_scissor_state* scissor =
- (struct r300_scissor_state*)r300->scissor_state.state;
- r300_set_scissor_regs(state, &scissor->scissor,
- r300_screen(r300->context.screen)->caps->is_r500);
+ memcpy(r300->scissor_state.state, state,
+ sizeof(struct pipe_scissor_state));
r300->scissor_state.dirty = TRUE;
}
@@ -1015,22 +1007,22 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
static void r300_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *buf)
+ struct pipe_buffer *buf)
{
struct r300_context* r300 = r300_context(pipe);
void *mapped;
- if (buf == NULL || buf->buffer->size == 0 ||
- (mapped = pipe_buffer_map(pipe->screen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)) == NULL)
+ if (buf == NULL || buf->size == 0 ||
+ (mapped = pipe_buffer_map(pipe->screen, buf, PIPE_BUFFER_USAGE_CPU_READ)) == NULL)
{
r300->shader_constants[shader].count = 0;
return;
}
- assert((buf->buffer->size % 4 * sizeof(float)) == 0);
- memcpy(r300->shader_constants[shader].constants, mapped, buf->buffer->size);
- r300->shader_constants[shader].count = buf->buffer->size / (4 * sizeof(float));
- pipe_buffer_unmap(pipe->screen, buf->buffer);
+ assert((buf->size % 4 * sizeof(float)) == 0);
+ memcpy(r300->shader_constants[shader].constants, mapped, buf->size);
+ r300->shader_constants[shader].count = buf->size / (4 * sizeof(float));
+ pipe_buffer_unmap(pipe->screen, buf);
if (shader == PIPE_SHADER_VERTEX)
r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS;
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 192846411b..99c2720897 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -350,7 +350,8 @@ static void r300_update_rs_block(struct r300_context* r300,
/* Rasterize colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
- if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) {
+ if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used ||
+ vs_outputs->color[1] != ATTR_UNUSED) {
/* Always rasterize if it's written by the VS,
* otherwise it locks up. */
rX00_rs_col(rs, col_count, i, FALSE);
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 35be00e1b0..e2180b33b7 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -537,6 +537,7 @@ r300_translate_vertex_data_type(enum pipe_format format) {
static INLINE uint16_t
r300_translate_vertex_data_swizzle(enum pipe_format format) {
const struct util_format_description *desc = util_format_description(format);
+ unsigned swizzle[4], i;
assert(format);
@@ -547,11 +548,26 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) {
return 0;
}
- return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) |
- (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) |
- (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) |
- (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) |
- (0xf << R300_WRITE_ENA_SHIFT));
+ /* Swizzles for 8bits formats are in the reversed order, not sure why. */
+ if (desc->channel[0].size == 8) {
+ for (i = 0; i < 4; i++) {
+ if (desc->swizzle[i] <= 3) {
+ swizzle[i] = 3 - desc->swizzle[i];
+ } else {
+ swizzle[i] = desc->swizzle[i];
+ }
+ }
+ } else {
+ for (i = 0; i < 4; i++) {
+ swizzle[i] = desc->swizzle[i];
+ }
+ }
+
+ return ((swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) |
+ (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) |
+ (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) |
+ (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) |
+ (0xf << R300_WRITE_ENA_SHIFT));
}
#endif /* R300_STATE_INLINES_H */
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index f25f3ca217..47d7e60a40 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -79,7 +79,8 @@ void r300_emit_invariant_state(struct r300_context* r300)
END_CS;
/* XXX unsorted stuff from surface_fill */
- BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + (caps->is_r500 ? 4 : 0));
+ BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) +
+ (caps->family >= CHIP_FAMILY_RV350 ? 4 : 0));
if (caps->has_tcl) {
/*Flushing PVS is required before the VAP_GB registers can be changed*/
@@ -115,10 +116,12 @@ void r300_emit_invariant_state(struct r300_context* r300)
OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C);
OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525);
OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000);
- if (caps->is_r500) {
+
+ if (caps->family >= CHIP_FAMILY_RV350) {
OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE);
}
+
OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000);
OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000);
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 9a96206a4d..1f73f74c26 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -30,10 +30,23 @@
#include "r300_texture.h"
#include "r300_screen.h"
-static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
+#define TILE_WIDTH 0
+#define TILE_HEIGHT 1
+
+static const unsigned microblock_table[5][3][2] = {
+ /*linear tiled square-tiled */
+ {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */
+ {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */
+ {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */
+ {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */
+ {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */
+};
+
+static void r300_setup_texture_state(struct r300_screen* screen, struct r300_texture* tex)
{
struct r300_texture_state* state = &tex->state;
struct pipe_texture *pt = &tex->tex;
+ boolean is_r500 = screen->caps->is_r500;
state->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) |
R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff);
@@ -67,8 +80,8 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
}
assert(is_r500 || (pt->width0 <= 2048 && pt->height0 <= 2048));
- debug_printf("r300: Set texture state (%dx%d, %d levels)\n",
- pt->width0, pt->height0, pt->last_level);
+ SCREEN_DBG(screen, DBG_TEX, "r300: Set texture state (%dx%d, %d levels)\n",
+ pt->width0, pt->height0, pt->last_level);
}
unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
@@ -92,33 +105,78 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
}
/**
+ * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile
+ * of the given texture.
+ */
+static unsigned r300_texture_get_tile_size(struct r300_texture* tex, int dim)
+{
+ unsigned pixsize, tile_size;
+
+ pixsize = util_format_get_blocksize(tex->tex.format);
+ tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim] *
+ (tex->macrotile == R300_BUFFER_TILED ? 8 : 1);
+
+ assert(tile_size);
+ return tile_size;
+}
+
+/**
* Return the stride, in bytes, of the texture images of the given texture
* at the given level.
*/
-unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level)
+unsigned r300_texture_get_stride(struct r300_screen* screen,
+ struct r300_texture* tex, unsigned level)
{
+ unsigned tile_width, width;
+
if (tex->stride_override)
return tex->stride_override;
+ /* Check the level. */
if (level > tex->tex.last_level) {
- debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__,
- level, tex->tex.last_level);
+ SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n",
+ __FUNCTION__, level, tex->tex.last_level);
return 0;
}
- return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32);
+ width = u_minify(tex->tex.width0, level);
+
+ if (!util_format_is_compressed(tex->tex.format)) {
+ tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH);
+ width = align(width, tile_width);
+ return util_format_get_stride(tex->tex.format, width);
+ } else {
+ return align(util_format_get_stride(tex->tex.format, width), 32);
+ }
+}
+
+static unsigned r300_texture_get_nblocksy(struct r300_texture* tex,
+ unsigned level)
+{
+ unsigned height, tile_height;
+
+ height = u_minify(tex->tex.height0, level);
+
+ if (!util_format_is_compressed(tex->tex.format)) {
+ tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT);
+ height = align(height, tile_height);
+ }
+
+ return util_format_get_nblocksy(tex->tex.format, height);
}
-static void r300_setup_miptree(struct r300_texture* tex)
+static void r300_setup_miptree(struct r300_screen* screen,
+ struct r300_texture* tex)
{
struct pipe_texture* base = &tex->tex;
- int stride, size, layer_size;
- int i;
+ unsigned stride, size, layer_size, nblocksy, i;
- for (i = 0; i <= base->last_level; i++) {
- unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i));
+ SCREEN_DBG(screen, DBG_TEX, "r300: Making miptree for texture, format %s\n",
+ pf_name(base->format));
- stride = r300_texture_get_stride(tex, i);
+ for (i = 0; i <= base->last_level; i++) {
+ stride = r300_texture_get_stride(screen, tex, i);
+ nblocksy = r300_texture_get_nblocksy(tex, i);
layer_size = stride * nblocksy;
if (base->target == PIPE_TEXTURE_CUBE)
@@ -131,10 +189,10 @@ static void r300_setup_miptree(struct r300_texture* tex)
tex->layer_size[i] = layer_size;
tex->pitch[i] = stride / util_format_get_blocksize(base->format);
- debug_printf("r300: Texture miptree: Level %d "
- "(%dx%dx%d px, pitch %d bytes)\n",
+ SCREEN_DBG(screen, DBG_TEX, "r300: Texture miptree: Level %d "
+ "(%dx%dx%d px, pitch %d bytes) %d bytes total\n",
i, u_minify(base->width0, i), u_minify(base->height0, i),
- u_minify(base->depth0, i), stride);
+ u_minify(base->depth0, i), stride, tex->size);
}
}
@@ -150,6 +208,7 @@ static struct pipe_texture*
const struct pipe_texture* template)
{
struct r300_texture* tex = CALLOC_STRUCT(r300_texture);
+ struct r300_screen* rscreen = r300_screen(screen);
if (!tex) {
return NULL;
@@ -160,10 +219,10 @@ static struct pipe_texture*
tex->tex.screen = screen;
r300_setup_flags(tex);
- r300_setup_miptree(tex);
- r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500);
+ r300_setup_miptree(rscreen, tex);
+ r300_setup_texture_state(rscreen, tex);
- tex->buffer = screen->buffer_create(screen, 1024,
+ tex->buffer = screen->buffer_create(screen, 2048,
PIPE_BUFFER_USAGE_PIXEL,
tex->size);
@@ -227,6 +286,7 @@ static struct pipe_texture*
struct pipe_buffer* buffer)
{
struct r300_texture* tex;
+ struct r300_screen* rscreen = r300_screen(screen);
/* Support only 2D textures without mipmaps */
if (base->target != PIPE_TEXTURE_2D ||
@@ -248,7 +308,7 @@ static struct pipe_texture*
tex->pitch[0] = *stride / util_format_get_blocksize(base->format);
r300_setup_flags(tex);
- r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500);
+ r300_setup_texture_state(rscreen, tex);
pipe_buffer_reference(&tex->buffer, buffer);
@@ -315,7 +375,8 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen)
screen->video_surface_destroy= r300_video_surface_destroy;
}
-boolean r300_get_texture_buffer(struct pipe_texture* texture,
+boolean r300_get_texture_buffer(struct pipe_screen* screen,
+ struct pipe_texture* texture,
struct pipe_buffer** buffer,
unsigned* stride)
{
@@ -327,7 +388,7 @@ boolean r300_get_texture_buffer(struct pipe_texture* texture,
pipe_buffer_reference(buffer, tex->buffer);
if (stride) {
- *stride = r300_texture_get_stride(tex, 0);
+ *stride = r300_texture_get_stride(r300_screen(screen), tex, 0);
}
return TRUE;
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 55ceb1a513..1be1e6843c 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -31,7 +31,8 @@ struct r300_texture;
void r300_init_screen_texture_functions(struct pipe_screen* screen);
-unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level);
+unsigned r300_texture_get_stride(struct r300_screen* screen,
+ struct r300_texture* tex, unsigned level);
unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
unsigned zslice, unsigned face);
@@ -115,7 +116,8 @@ r300_video_surface(struct pipe_video_surface *pvs)
#ifndef R300_WINSYS_H
-boolean r300_get_texture_buffer(struct pipe_texture* texture,
+boolean r300_get_texture_buffer(struct pipe_screen* screen,
+ struct pipe_texture* texture,
struct pipe_buffer** buffer,
unsigned* stride);
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 68aef70872..9fbb830047 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -124,7 +124,8 @@ static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs)
/* Colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
- if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) {
+ if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used ||
+ vs_outputs->color[1] != ATTR_UNUSED) {
hwfmt[1] |= R300_INPUT_CNTL_COLOR;
hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
}
@@ -182,7 +183,8 @@ static void r300_stream_locations_notcl(
/* Colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
- if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) {
+ if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used ||
+ vs_outputs->color[1] != ATTR_UNUSED) {
stream_loc[tabi++] = 2 + i;
}
}
@@ -259,7 +261,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
if (outputs->color[i] != ATTR_UNUSED) {
c->code->outputs[outputs->color[i]] = reg++;
- } else if (any_bcolor_used) {
+ } else if (any_bcolor_used ||
+ outputs->color[1] != ATTR_UNUSED) {
reg++;
}
}
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index 1ae6de70fe..bdb8b54bab 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -40,7 +40,8 @@ extern "C" {
struct pipe_context* r300_create_context(struct pipe_screen* screen,
struct radeon_winsys* radeon_winsys);
-boolean r300_get_texture_buffer(struct pipe_texture* texture,
+boolean r300_get_texture_buffer(struct pipe_screen* screen,
+ struct pipe_texture* texture,
struct pipe_buffer** buffer,
unsigned* stride);
diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile
index bcb887a0b2..e4ac49fa85 100644
--- a/src/gallium/drivers/softpipe/Makefile
+++ b/src/gallium/drivers/softpipe/Makefile
@@ -32,6 +32,7 @@ C_SOURCES = \
sp_tex_tile_cache.c \
sp_tile_cache.c \
sp_surface.c \
- sp_video_context.c
+ sp_video_context.c \
+ sp_winsys.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript
index aac9edf44e..3042e556c6 100644
--- a/src/gallium/drivers/softpipe/SConscript
+++ b/src/gallium/drivers/softpipe/SConscript
@@ -34,6 +34,7 @@ softpipe = env.ConvenienceLibrary(
'sp_texture.c',
'sp_tile_cache.c',
'sp_video_context.c',
+ 'sp_winsys.c'
])
Export('softpipe')
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index f3ac6760db..8e01793940 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -113,8 +113,8 @@ softpipe_destroy( struct pipe_context *pipe )
}
for (i = 0; i < Elements(softpipe->constants); i++) {
- if (softpipe->constants[i].buffer) {
- pipe_buffer_reference(&softpipe->constants[i].buffer, NULL);
+ if (softpipe->constants[i]) {
+ pipe_buffer_reference(&softpipe->constants[i], NULL);
}
}
@@ -256,6 +256,8 @@ softpipe_create( struct pipe_screen *screen )
softpipe->pipe.draw_arrays = softpipe_draw_arrays;
softpipe->pipe.draw_elements = softpipe_draw_elements;
softpipe->pipe.draw_range_elements = softpipe_draw_range_elements;
+ softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced;
+ softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced;
softpipe->pipe.clear = softpipe_clear;
softpipe->pipe.flush = softpipe_flush;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 73fa744f9d..da673c57ad 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -63,7 +63,7 @@ struct softpipe_context {
/** Other rendering state */
struct pipe_blend_color blend_color;
struct pipe_clip_state clip;
- struct pipe_constant_buffer constants[PIPE_SHADER_TYPES];
+ struct pipe_buffer *constants[PIPE_SHADER_TYPES];
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index 03d35fb3cb..03b58d2fb7 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -52,18 +52,18 @@ softpipe_map_constant_buffers(struct softpipe_context *sp)
uint i, vssize, gssize;
for (i = 0; i < PIPE_SHADER_TYPES; i++) {
- if (sp->constants[i].buffer && sp->constants[i].buffer->size)
- sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer,
+ if (sp->constants[i] && sp->constants[i]->size)
+ sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i],
PIPE_BUFFER_USAGE_CPU_READ);
}
- if (sp->constants[PIPE_SHADER_VERTEX].buffer)
- vssize = sp->constants[PIPE_SHADER_VERTEX].buffer->size;
+ if (sp->constants[PIPE_SHADER_VERTEX])
+ vssize = sp->constants[PIPE_SHADER_VERTEX]->size;
else
vssize = 0;
- if (sp->constants[PIPE_SHADER_GEOMETRY].buffer)
- gssize = sp->constants[PIPE_SHADER_GEOMETRY].buffer->size;
+ if (sp->constants[PIPE_SHADER_GEOMETRY])
+ gssize = sp->constants[PIPE_SHADER_GEOMETRY]->size;
else
gssize = 0;
@@ -91,26 +91,48 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp)
draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0);
for (i = 0; i < PIPE_SHADER_TYPES; i++) {
- if (sp->constants[i].buffer && sp->constants[i].buffer->size)
- ws->buffer_unmap(ws, sp->constants[i].buffer);
+ if (sp->constants[i] && sp->constants[i]->size)
+ ws->buffer_unmap(ws, sp->constants[i]);
sp->mapped_constants[i] = NULL;
}
}
+/**
+ * Draw vertex arrays, with optional indexing.
+ * Basically, map the vertex buffers (and drawing surfaces), then hand off
+ * the drawing to the 'draw' module.
+ */
+static void
+softpipe_draw_range_elements_instanced(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount);
+
+
void
softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
{
- softpipe_draw_elements(pipe, NULL, 0, mode, start, count);
+ softpipe_draw_range_elements_instanced(pipe,
+ NULL,
+ 0,
+ 0,
+ 0xffffffff,
+ mode,
+ start,
+ count,
+ 0,
+ 1);
}
-/**
- * Draw vertex arrays, with optional indexing.
- * Basically, map the vertex buffers (and drawing surfaces), then hand off
- * the drawing to the 'draw' module.
- */
void
softpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
@@ -119,6 +141,91 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
unsigned max_index,
unsigned mode, unsigned start, unsigned count)
{
+ softpipe_draw_range_elements_instanced(pipe,
+ indexBuffer,
+ indexSize,
+ min_index,
+ max_index,
+ mode,
+ start,
+ count,
+ 0,
+ 1);
+}
+
+
+void
+softpipe_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ softpipe_draw_range_elements_instanced(pipe,
+ indexBuffer,
+ indexSize,
+ 0,
+ 0xffffffff,
+ mode,
+ start,
+ count,
+ 0,
+ 1);
+}
+
+void
+softpipe_draw_arrays_instanced(struct pipe_context *pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
+ softpipe_draw_range_elements_instanced(pipe,
+ NULL,
+ 0,
+ 0,
+ 0xffffffff,
+ mode,
+ start,
+ count,
+ startInstance,
+ instanceCount);
+}
+
+void
+softpipe_draw_elements_instanced(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
+ softpipe_draw_range_elements_instanced(pipe,
+ indexBuffer,
+ indexSize,
+ 0,
+ 0xffffffff,
+ mode,
+ start,
+ count,
+ startInstance,
+ instanceCount);
+}
+
+static void
+softpipe_draw_range_elements_instanced(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
struct softpipe_context *sp = softpipe_context(pipe);
struct draw_context *draw = sp->draw;
unsigned i;
@@ -128,45 +235,48 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
sp->reduced_api_prim = u_reduced_prim(mode);
- if (sp->dirty)
- softpipe_update_derived( sp );
+ if (sp->dirty) {
+ softpipe_update_derived(sp);
+ }
softpipe_map_transfers(sp);
softpipe_map_constant_buffers(sp);
- /*
- * Map vertex buffers
- */
+ /* Map vertex buffers */
for (i = 0; i < sp->num_vertex_buffers; i++) {
- void *buf
- = pipe_buffer_map(pipe->screen,
- sp->vertex_buffer[i].buffer,
- PIPE_BUFFER_USAGE_CPU_READ);
+ void *buf;
+
+ buf = pipe_buffer_map(pipe->screen,
+ sp->vertex_buffer[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_vertex_buffer(draw, i, buf);
}
/* Map index buffer, if present */
if (indexBuffer) {
- void *mapped_indexes
- = pipe_buffer_map(pipe->screen, indexBuffer,
- PIPE_BUFFER_USAGE_CPU_READ);
- draw_set_mapped_element_buffer_range(draw, indexSize,
- min_index,
- max_index,
+ void *mapped_indexes;
+
+ mapped_indexes = pipe_buffer_map(pipe->screen,
+ indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer_range(draw,
+ indexSize,
+ minIndex,
+ maxIndex,
mapped_indexes);
- }
- else {
+ } else {
/* no index/element buffer */
- draw_set_mapped_element_buffer_range(draw, 0, start,
- start + count - 1, NULL);
+ draw_set_mapped_element_buffer_range(draw,
+ 0,
+ start,
+ start + count - 1,
+ NULL);
}
/* draw! */
- draw_arrays(draw, mode, start, count);
+ draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount);
- /*
- * unmap vertex/index buffers - will cause draw module to flush
- */
+ /* unmap vertex/index buffers - will cause draw module to flush */
for (i = 0; i < sp->num_vertex_buffers; i++) {
draw_set_mapped_vertex_buffer(draw, i, NULL);
pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer);
@@ -176,22 +286,8 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
pipe_buffer_unmap(pipe->screen, indexBuffer);
}
-
/* Note: leave drawing surfaces mapped */
softpipe_unmap_constant_buffers(sp);
sp->dirty_render_cache = TRUE;
}
-
-
-void
-softpipe_draw_elements(struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned mode, unsigned start, unsigned count)
-{
- softpipe_draw_range_elements( pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- mode, start, count );
-}
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 7f573aef3c..5812d1eefe 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -526,6 +526,7 @@ static void
sp_vbuf_destroy(struct vbuf_render *vbr)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
+ align_free(cvbr->vertex_buffer);
sp_setup_destroy_context(cvbr->setup);
FREE(cvbr);
}
@@ -541,7 +542,6 @@ sp_create_vbuf_backend(struct softpipe_context *sp)
assert(sp->draw);
-
cvbr->base.max_indices = SP_MAX_VBUF_INDEXES;
cvbr->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE;
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 9b18dac67b..7f244c4fd4 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -139,7 +139,7 @@ void softpipe_set_clip_state( struct pipe_context *,
void softpipe_set_constant_buffer(struct pipe_context *,
uint shader, uint index,
- const struct pipe_constant_buffer *buf);
+ struct pipe_buffer *buf);
void *softpipe_create_fs_state(struct pipe_context *,
const struct pipe_shader_state *);
@@ -200,6 +200,24 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count);
void
+softpipe_draw_arrays_instanced(struct pipe_context *pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount);
+
+void
+softpipe_draw_elements_instanced(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount);
+
+void
softpipe_map_transfers(struct softpipe_context *sp);
void
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index aa12bb215a..b7ed4441b4 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -159,7 +159,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
void
softpipe_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *buf)
+ struct pipe_buffer *buf)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
@@ -169,8 +169,7 @@ softpipe_set_constant_buffer(struct pipe_context *pipe,
draw_flush(softpipe->draw);
/* note: reference counting */
- pipe_buffer_reference(&softpipe->constants[shader].buffer,
- buf ? buf->buffer : NULL);
+ pipe_buffer_reference(&softpipe->constants[shader], buf);
softpipe->dirty |= SP_NEW_CONSTANTS;
}
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index a9436a3394..fae72c81aa 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -57,13 +57,8 @@ softpipe_texture_layout(struct pipe_screen *screen,
unsigned width = pt->width0;
unsigned height = pt->height0;
unsigned depth = pt->depth0;
-
unsigned buffer_size = 0;
- pt->width0 = width;
- pt->height0 = height;
- pt->depth0 = depth;
-
for (level = 0; level <= pt->last_level; level++) {
spt->stride[level] = util_format_get_stride(pt->format, width);
diff --git a/src/gallium/drivers/softpipe/sp_winsys.c b/src/gallium/drivers/softpipe/sp_winsys.c
new file mode 100644
index 0000000000..8169071dc9
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_winsys.c
@@ -0,0 +1,245 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Malloc softpipe winsys. Uses malloc for all memory allocations.
+ *
+ * @author Keith Whitwell
+ * @author Brian Paul
+ * @author Jose Fonseca
+ */
+
+
+#include "pipe/internal/p_winsys_screen.h"/* port to just p_screen */
+#include "pipe/p_format.h"
+#include "pipe/p_context.h"
+#include "pipe/p_inlines.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "softpipe/sp_winsys.h"
+
+
+struct st_softpipe_buffer
+{
+ struct pipe_buffer base;
+ boolean userBuffer; /** Is this a user-space buffer? */
+ void *data;
+ void *mapped;
+};
+
+
+/** Cast wrapper */
+static INLINE struct st_softpipe_buffer *
+st_softpipe_buffer( struct pipe_buffer *buf )
+{
+ return (struct st_softpipe_buffer *)buf;
+}
+
+
+static void *
+st_softpipe_buffer_map(struct pipe_winsys *winsys,
+ struct pipe_buffer *buf,
+ unsigned flags)
+{
+ struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf);
+ st_softpipe_buf->mapped = st_softpipe_buf->data;
+ return st_softpipe_buf->mapped;
+}
+
+
+static void
+st_softpipe_buffer_unmap(struct pipe_winsys *winsys,
+ struct pipe_buffer *buf)
+{
+ struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf);
+ st_softpipe_buf->mapped = NULL;
+}
+
+
+static void
+st_softpipe_buffer_destroy(struct pipe_buffer *buf)
+{
+ struct st_softpipe_buffer *oldBuf = st_softpipe_buffer(buf);
+
+ if (oldBuf->data) {
+ if (!oldBuf->userBuffer)
+ align_free(oldBuf->data);
+
+ oldBuf->data = NULL;
+ }
+
+ FREE(oldBuf);
+}
+
+
+static void
+st_softpipe_flush_frontbuffer(struct pipe_winsys *winsys,
+ struct pipe_surface *surf,
+ void *context_private)
+{
+}
+
+
+
+static const char *
+st_softpipe_get_name(struct pipe_winsys *winsys)
+{
+ return "softpipe";
+}
+
+
+static struct pipe_buffer *
+st_softpipe_buffer_create(struct pipe_winsys *winsys,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size)
+{
+ struct st_softpipe_buffer *buffer = CALLOC_STRUCT(st_softpipe_buffer);
+
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->base.alignment = alignment;
+ buffer->base.usage = usage;
+ buffer->base.size = size;
+
+ buffer->data = align_malloc(size, alignment);
+
+ return &buffer->base;
+}
+
+
+/**
+ * Create buffer which wraps user-space data.
+ */
+static struct pipe_buffer *
+st_softpipe_user_buffer_create(struct pipe_winsys *winsys,
+ void *ptr,
+ unsigned bytes)
+{
+ struct st_softpipe_buffer *buffer;
+
+ buffer = CALLOC_STRUCT(st_softpipe_buffer);
+ if(!buffer)
+ return NULL;
+
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->base.size = bytes;
+ buffer->userBuffer = TRUE;
+ buffer->data = ptr;
+
+ return &buffer->base;
+}
+
+
+static struct pipe_buffer *
+st_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned usage,
+ unsigned tex_usage,
+ unsigned *stride)
+{
+ const unsigned alignment = 64;
+ unsigned nblocksy;
+
+ nblocksy = util_format_get_nblocksy(format, height);
+ *stride = align(util_format_get_stride(format, width), alignment);
+
+ return winsys->buffer_create(winsys, alignment,
+ usage,
+ *stride * nblocksy);
+}
+
+
+static void
+st_softpipe_fence_reference(struct pipe_winsys *winsys,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+}
+
+
+static int
+st_softpipe_fence_signalled(struct pipe_winsys *winsys,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ return 0;
+}
+
+
+static int
+st_softpipe_fence_finish(struct pipe_winsys *winsys,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ return 0;
+}
+
+
+static void
+st_softpipe_destroy(struct pipe_winsys *winsys)
+{
+ FREE(winsys);
+}
+
+
+struct pipe_screen *
+softpipe_create_screen_malloc(void)
+{
+ static struct pipe_winsys *winsys;
+ struct pipe_screen *screen;
+
+ winsys = CALLOC_STRUCT(pipe_winsys);
+ if(!winsys)
+ return NULL;
+
+ winsys->destroy = st_softpipe_destroy;
+
+ winsys->buffer_create = st_softpipe_buffer_create;
+ winsys->user_buffer_create = st_softpipe_user_buffer_create;
+ winsys->buffer_map = st_softpipe_buffer_map;
+ winsys->buffer_unmap = st_softpipe_buffer_unmap;
+ winsys->buffer_destroy = st_softpipe_buffer_destroy;
+
+ winsys->surface_buffer_create = st_softpipe_surface_buffer_create;
+
+ winsys->fence_reference = st_softpipe_fence_reference;
+ winsys->fence_signalled = st_softpipe_fence_signalled;
+ winsys->fence_finish = st_softpipe_fence_finish;
+
+ winsys->flush_frontbuffer = st_softpipe_flush_frontbuffer;
+ winsys->get_name = st_softpipe_get_name;
+
+ screen = softpipe_create_screen(winsys);
+ if(!screen)
+ st_softpipe_destroy(winsys);
+
+ return screen;
+}
diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h
index f203ded29e..3042e01a05 100644
--- a/src/gallium/drivers/softpipe/sp_winsys.h
+++ b/src/gallium/drivers/softpipe/sp_winsys.h
@@ -49,10 +49,17 @@ struct pipe_buffer;
struct pipe_context *softpipe_create( struct pipe_screen * );
+/**
+ * Create a softpipe screen that uses the
+ * given winsys for allocating buffers.
+ */
+struct pipe_screen *softpipe_create_screen( struct pipe_winsys * );
-struct pipe_screen *
-softpipe_create_screen(struct pipe_winsys *);
-
+/**
+ * Create a softpipe screen that uses
+ * regular malloc to create all its buffers.
+ */
+struct pipe_screen *softpipe_create_screen_malloc(void);
boolean
softpipe_get_texture_buffer( struct pipe_texture *texture,
diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c
index 10e7a12189..ca2c7c49d7 100644
--- a/src/gallium/drivers/svga/svga_pipe_constants.c
+++ b/src/gallium/drivers/svga/svga_pipe_constants.c
@@ -49,7 +49,7 @@ struct svga_constbuf
static void svga_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *buf)
+ struct pipe_buffer *buf)
{
struct svga_context *svga = svga_context(pipe);
@@ -57,7 +57,7 @@ static void svga_set_constant_buffer(struct pipe_context *pipe,
assert(index == 0);
pipe_buffer_reference( &svga->curr.cb[shader],
- buf->buffer );
+ buf );
if (shader == PIPE_SHADER_FRAGMENT)
svga->dirty |= SVGA_NEW_FS_CONST_BUFFER;
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index d29f3762d2..ec2886348b 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -81,8 +81,10 @@ static enum pipe_error compile_fs( struct svga_context *svga,
}
result->id = util_bitmask_add(svga->fs_bm);
- if(result->id == UTIL_BITMASK_INVALID_INDEX)
+ if(result->id == UTIL_BITMASK_INVALID_INDEX) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
goto fail;
+ }
ret = SVGA3D_DefineShader(svga->swc,
result->id,
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index ae1e77e7d4..e7e6c08432 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -71,7 +71,7 @@ static enum pipe_error compile_vs( struct svga_context *svga,
struct svga_shader_result **out_result )
{
struct svga_shader_result *result;
- enum pipe_error ret = PIPE_OK;
+ enum pipe_error ret = PIPE_ERROR;
result = svga_translate_vertex_program( vs, key );
if (result == NULL) {
@@ -80,8 +80,10 @@ static enum pipe_error compile_vs( struct svga_context *svga,
}
result->id = util_bitmask_add(svga->vs_bm);
- if(result->id == UTIL_BITMASK_INVALID_INDEX)
+ if(result->id == UTIL_BITMASK_INVALID_INDEX) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
goto fail;
+ }
ret = SVGA3D_DefineShader(svga->swc,
result->id,
@@ -200,10 +202,12 @@ static int update_zero_stride( struct svga_context *svga,
key.output_stride = 4 * sizeof(float);
key.nr_elements = 1;
+ key.element[0].type = TRANSLATE_ELEMENT_NORMAL;
key.element[0].input_format = vel->src_format;
key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
key.element[0].input_buffer = vel->vertex_buffer_index;
key.element[0].input_offset = vel->src_offset;
+ key.element[0].instance_divisor = vel->instance_divisor;
key.element[0].output_offset = const_idx * 4 * sizeof(float);
translate_key_sanitize(&key);
@@ -222,7 +226,7 @@ static int update_zero_stride( struct svga_context *svga,
translate->set_buffer(translate, vel->vertex_buffer_index,
mapped_buffer,
vbuffer->stride);
- translate->run(translate, 0, 1,
+ translate->run(translate, 0, 1, 0,
svga->curr.zero_stride_constants);
pipe_buffer_unmap(svga->pipe.screen,
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 075e4f9a0b..5a9f0fc690 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -812,13 +812,13 @@ trace_context_set_clip_state(struct pipe_context *_pipe,
static INLINE void
trace_context_set_constant_buffer(struct pipe_context *_pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *buffer)
+ struct pipe_buffer *buffer)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct pipe_context *pipe = tr_ctx->pipe;
if (buffer)
- trace_screen_user_buffer_update(_pipe->screen, buffer->buffer);
+ trace_screen_user_buffer_update(_pipe->screen, buffer);
trace_dump_call_begin("pipe_context", "set_constant_buffer");
@@ -827,10 +827,11 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe,
trace_dump_arg(uint, index);
trace_dump_arg(constant_buffer, buffer);
+ /* XXX hmm? */
if (buffer) {
- struct pipe_constant_buffer _buffer;
- _buffer.buffer = trace_buffer_unwrap(tr_ctx, buffer->buffer);
- pipe->set_constant_buffer(pipe, shader, index, &_buffer);
+ struct pipe_buffer *_buffer;
+ _buffer = trace_buffer_unwrap(tr_ctx, buffer);
+ pipe->set_constant_buffer(pipe, shader, index, _buffer);
} else {
pipe->set_constant_buffer(pipe, shader, index, buffer);
}
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 86237e03bc..32f61f8c94 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -229,7 +229,7 @@ void trace_dump_clip_state(const struct pipe_clip_state *state)
}
-void trace_dump_constant_buffer(const struct pipe_constant_buffer *state)
+void trace_dump_constant_buffer(const struct pipe_buffer *state)
{
if (!trace_dumping_enabled_locked())
return;
@@ -241,7 +241,7 @@ void trace_dump_constant_buffer(const struct pipe_constant_buffer *state)
trace_dump_struct_begin("pipe_constant_buffer");
- trace_dump_member(buffer_ptr, state, buffer);
+ trace_dump_reference(&state->reference);
trace_dump_struct_end();
}
diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h
index 07ad6fbb20..c7860fd6e1 100644
--- a/src/gallium/drivers/trace/tr_dump_state.h
+++ b/src/gallium/drivers/trace/tr_dump_state.h
@@ -47,7 +47,7 @@ void trace_dump_scissor_state(const struct pipe_scissor_state *state);
void trace_dump_clip_state(const struct pipe_clip_state *state);
-void trace_dump_constant_buffer(const struct pipe_constant_buffer *state);
+void trace_dump_constant_buffer(const struct pipe_buffer *state);
void trace_dump_token(const struct tgsi_token *token);
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index 26a940593f..272d0308cc 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -104,7 +104,8 @@ typedef unsigned char boolean;
/* Function visibility */
#ifndef PUBLIC
-# if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303
+# if (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \
+ || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
# define PUBLIC __attribute__((visibility("default")))
# else
# define PUBLIC
@@ -139,22 +140,40 @@ typedef unsigned char boolean;
-#if defined(__GNUC__)
-#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) ))
-#define ALIGN16_ASSIGN(NAME) NAME##___aligned
-#define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) ))
-#define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) ))
+/* Macros for data alignment. */
+#if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
+
+/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Type-Attributes.html */
+#define PIPE_ALIGN_TYPE(_alignment, _type) _type __attribute__((aligned(_alignment)))
+
+/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Variable-Attributes.html */
+#define PIPE_ALIGN_VAR(_alignment) __attribute__((aligned(_alignment)))
+
#if (__GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1)) && !defined(PIPE_ARCH_X86_64)
-#define ALIGN_STACK __attribute__((force_align_arg_pointer))
+#define PIPE_ALIGN_STACK __attribute__((force_align_arg_pointer))
#else
-#define ALIGN_STACK
+#define PIPE_ALIGN_STACK
#endif
+
+#elif defined(_MSC_VER)
+
+/* See http://msdn.microsoft.com/en-us/library/83ythb65.aspx */
+#define PIPE_ALIGN_TYPE(_alignment, _type) __declspec(align(_alignment)) _type
+#define PIPE_ALIGN_VAR(_alignment) __declspec(align(_alignment))
+
+#define PIPE_ALIGN_STACK
+
+#elif defined(SWIG)
+
+#define PIPE_ALIGN_TYPE(_alignment, _type) _type
+#define PIPE_ALIGN_VAR(_alignment)
+
+#define PIPE_ALIGN_STACK
+
#else
-#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1]
-#define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned)
-#define ALIGN16_ATTRIB
-#define ALIGN8_ATTRIB
-#define ALIGN_STACK
+
+#error "Unsupported compiler"
+
#endif
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index d2f8085b42..0b8f6da2f4 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -69,6 +69,22 @@ struct pipe_context {
unsigned indexSize,
unsigned mode, unsigned start, unsigned count);
+ void (*draw_arrays_instanced)(struct pipe_context *pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount);
+
+ void (*draw_elements_instanced)(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount);
+
/* XXX: this is (probably) a temporary entrypoint, as the range
* information should be available from the vertex_buffer state.
* Using this to quickly evaluate a specialized path in the draw
@@ -170,7 +186,7 @@ struct pipe_context {
void (*set_constant_buffer)( struct pipe_context *,
uint shader, uint index,
- const struct pipe_constant_buffer *buf );
+ struct pipe_buffer *buf );
void (*set_framebuffer_state)( struct pipe_context *,
const struct pipe_framebuffer_state * );
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 550e2abc32..b489b04466 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -121,17 +121,18 @@ struct tgsi_declaration_range
unsigned Last : 16; /**< UINT */
};
-#define TGSI_SEMANTIC_POSITION 0
-#define TGSI_SEMANTIC_COLOR 1
-#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */
-#define TGSI_SEMANTIC_FOG 3
-#define TGSI_SEMANTIC_PSIZE 4
-#define TGSI_SEMANTIC_GENERIC 5
-#define TGSI_SEMANTIC_NORMAL 6
-#define TGSI_SEMANTIC_FACE 7
-#define TGSI_SEMANTIC_EDGEFLAG 8
-#define TGSI_SEMANTIC_PRIMID 9
-#define TGSI_SEMANTIC_COUNT 10 /**< number of semantic values */
+#define TGSI_SEMANTIC_POSITION 0
+#define TGSI_SEMANTIC_COLOR 1
+#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */
+#define TGSI_SEMANTIC_FOG 3
+#define TGSI_SEMANTIC_PSIZE 4
+#define TGSI_SEMANTIC_GENERIC 5
+#define TGSI_SEMANTIC_NORMAL 6
+#define TGSI_SEMANTIC_FACE 7
+#define TGSI_SEMANTIC_EDGEFLAG 8
+#define TGSI_SEMANTIC_PRIMID 9
+#define TGSI_SEMANTIC_INSTANCEID 10
+#define TGSI_SEMANTIC_COUNT 11 /**< number of semantic values */
struct tgsi_declaration_semantic
{
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 60e96b98de..fdd29ed449 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -66,10 +66,6 @@ extern "C" {
#define PIPE_MAX_TEXTURE_LEVELS 16
-/* fwd decls */
-struct pipe_surface;
-
-
/**
* The driver will certainly subclass this to include actual memory
* management information.
@@ -178,15 +174,6 @@ struct pipe_clip_state
};
-/**
- * Constants for vertex/fragment shaders
- */
-struct pipe_constant_buffer
-{
- struct pipe_buffer *buffer;
-};
-
-
struct pipe_shader_state
{
const struct tgsi_token *tokens;
@@ -376,6 +363,11 @@ struct pipe_vertex_element
/** Offset of this attribute, in bytes, from the start of the vertex */
unsigned src_offset;
+ /** Instance data rate divisor. 0 means this is per-vertex data,
+ * n means per-instance data used for n consecutive instances (n > 0).
+ */
+ unsigned instance_divisor;
+
/** Which vertex_buffer (as given to pipe->set_vertex_buffer()) does
* this attribute live in?
*/
diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c
index f131e77ac5..0fdfa96b35 100644
--- a/src/gallium/state_trackers/dri/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/dri_drawable.c
@@ -180,6 +180,7 @@ dri_get_buffers(__DRIdrawable * dPriv)
switch (buffers[i].attachment) {
case __DRI_BUFFER_FRONT_LEFT:
+ continue;
case __DRI_BUFFER_FAKE_FRONT_LEFT:
index = ST_SURFACE_FRONT_LEFT;
format = drawable->color_format;
@@ -372,6 +373,7 @@ dri_create_buffer(__DRIscreen * sPriv,
/* TODO incase of double buffer visual, delay fake creation */
i = 0;
drawable->attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+ drawable->attachments[i++] = __DRI_BUFFER_FAKE_FRONT_LEFT;
if (visual->doubleBufferMode)
drawable->attachments[i++] = __DRI_BUFFER_BACK_LEFT;
diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c
index 793db087ee..d8c054313b 100644
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -83,7 +83,7 @@ dri_fill_in_modes(struct dri_screen *screen,
unsigned num_modes;
uint8_t depth_bits_array[5];
uint8_t stencil_bits_array[5];
- uint8_t msaa_samples_array[1];
+ uint8_t msaa_samples_array[2];
unsigned depth_buffer_factor;
unsigned back_buffer_factor;
unsigned msaa_samples_factor;
@@ -147,8 +147,9 @@ dri_fill_in_modes(struct dri_screen *screen,
}
msaa_samples_array[0] = 0;
+ msaa_samples_array[1] = 4;
back_buffer_factor = 3;
- msaa_samples_factor = 1;
+ msaa_samples_factor = 2;
num_modes =
depth_buffer_factor * back_buffer_factor * msaa_samples_factor * 4;
@@ -158,7 +159,7 @@ dri_fill_in_modes(struct dri_screen *screen,
depth_bits_array, stencil_bits_array,
depth_buffer_factor, back_buffer_modes,
back_buffer_factor,
- msaa_samples_array, 1);
+ msaa_samples_array, msaa_samples_factor);
} else {
__DRIconfig **configs_a8r8g8b8 = NULL;
__DRIconfig **configs_x8r8g8b8 = NULL;
@@ -170,7 +171,8 @@ dri_fill_in_modes(struct dri_screen *screen,
depth_buffer_factor,
back_buffer_modes,
back_buffer_factor,
- msaa_samples_array, 1);
+ msaa_samples_array,
+ msaa_samples_factor);
if (pf_x8r8g8b8)
configs_x8r8g8b8 = driCreateConfigs(GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV,
depth_bits_array,
@@ -178,7 +180,8 @@ dri_fill_in_modes(struct dri_screen *screen,
depth_buffer_factor,
back_buffer_modes,
back_buffer_factor,
- msaa_samples_array, 1);
+ msaa_samples_array,
+ msaa_samples_factor);
if (configs_a8r8g8b8 && configs_x8r8g8b8)
configs = driConcatConfigs(configs_x8r8g8b8, configs_a8r8g8b8);
diff --git a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c
index 8b2e3b5f85..042e9518c2 100644
--- a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c
+++ b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c
@@ -24,7 +24,9 @@
#include <assert.h>
#include <string.h>
+#include "pipe/p_screen.h"
#include "util/u_memory.h"
+#include "util/u_rect.h"
#include "egldriver.h"
#include "eglcurrent.h"
#include "eglconfigutil.h"
@@ -43,8 +45,13 @@ egl_g3d_validate_context(_EGLDisplay *dpy, _EGLContext *ctx)
struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
struct pipe_screen *screen = gdpy->native->screen;
struct egl_g3d_context *gctx = egl_g3d_context(ctx);
- EGLint num_surfaces;
- EGLint s, i;
+ const uint st_att_map[NUM_NATIVE_ATTACHMENTS] = {
+ ST_SURFACE_FRONT_LEFT,
+ ST_SURFACE_BACK_LEFT,
+ ST_SURFACE_FRONT_RIGHT,
+ ST_SURFACE_BACK_RIGHT,
+ };
+ EGLint num_surfaces, s;
/* validate draw and/or read buffers */
num_surfaces = (gctx->base.ReadSurface == gctx->base.DrawSurface) ? 1 : 2;
@@ -52,6 +59,7 @@ egl_g3d_validate_context(_EGLDisplay *dpy, _EGLContext *ctx)
struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS];
struct egl_g3d_surface *gsurf;
struct egl_g3d_buffer *gbuf;
+ EGLint att;
if (s == 0) {
gsurf = egl_g3d_surface(gctx->base.DrawSurface);
@@ -63,35 +71,33 @@ egl_g3d_validate_context(_EGLDisplay *dpy, _EGLContext *ctx)
}
if (!gctx->force_validate) {
- EGLint cur_w, cur_h;
-
- cur_w = gsurf->base.Width;
- cur_h = gsurf->base.Height;
- gsurf->native->validate(gsurf->native,
- gbuf->native_atts, gbuf->num_atts,
- NULL,
- &gsurf->base.Width, &gsurf->base.Height);
- /* validate only when the geometry changed */
- if (gsurf->base.Width == cur_w && gsurf->base.Height == cur_h)
+ unsigned int seq_num;
+
+ gsurf->native->validate(gsurf->native, gbuf->attachment_mask,
+ &seq_num, NULL, NULL, NULL);
+ /* skip validation */
+ if (gsurf->sequence_number == seq_num)
continue;
}
- gsurf->native->validate(gsurf->native,
- gbuf->native_atts, gbuf->num_atts,
- (struct pipe_texture **) textures,
+ pipe_surface_reference(&gsurf->render_surface, NULL);
+ memset(textures, 0, sizeof(textures));
+
+ gsurf->native->validate(gsurf->native, gbuf->attachment_mask,
+ &gsurf->sequence_number, textures,
&gsurf->base.Width, &gsurf->base.Height);
- for (i = 0; i < gbuf->num_atts; i++) {
- struct pipe_texture *pt = textures[i];
+ for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) {
+ struct pipe_texture *pt = textures[att];
struct pipe_surface *ps;
- if (pt) {
+ if (native_attachment_mask_test(gbuf->attachment_mask, att) && pt) {
ps = screen->get_tex_surface(screen, pt, 0, 0, 0,
PIPE_BUFFER_USAGE_GPU_READ |
PIPE_BUFFER_USAGE_GPU_WRITE);
gctx->stapi->st_set_framebuffer_surface(gbuf->st_fb,
- gbuf->st_atts[i], ps);
+ st_att_map[att], ps);
- if (gbuf->native_atts[i] == gsurf->render_att)
+ if (gsurf->render_att == att)
pipe_surface_reference(&gsurf->render_surface, ps);
pipe_surface_reference(&ps, NULL);
@@ -130,13 +136,7 @@ static void
egl_g3d_route_context(_EGLDisplay *dpy, _EGLContext *ctx)
{
struct egl_g3d_context *gctx = egl_g3d_context(ctx);
- const uint st_att_map[NUM_NATIVE_ATTACHMENTS] = {
- ST_SURFACE_FRONT_LEFT,
- ST_SURFACE_BACK_LEFT,
- ST_SURFACE_FRONT_RIGHT,
- ST_SURFACE_BACK_RIGHT,
- };
- EGLint s, i;
+ EGLint s;
/* route draw and read buffers' attachments */
for (s = 0; s < 2; s++) {
@@ -152,11 +152,7 @@ egl_g3d_route_context(_EGLDisplay *dpy, _EGLContext *ctx)
gbuf = &gctx->read;
}
- gbuf->native_atts[0] = gsurf->render_att;
- gbuf->num_atts = 1;
-
- for (i = 0; i < gbuf->num_atts; i++)
- gbuf->st_atts[i] = st_att_map[gbuf->native_atts[i]];
+ gbuf->attachment_mask = (1 << gsurf->render_att);
/* FIXME OpenGL defaults to draw the front or back buffer when the
* context is single-buffered or double-buffered respectively. In EGL,
@@ -198,19 +194,19 @@ egl_g3d_realloc_context(_EGLDisplay *dpy, _EGLContext *ctx)
if (!gdraw || priv != (void *) &gdraw->base) {
gctx->stapi->st_unreference_framebuffer(gctx->draw.st_fb);
gctx->draw.st_fb = NULL;
- gctx->draw.num_atts = 0;
+ gctx->draw.attachment_mask = 0x0;
}
if (is_equal) {
gctx->read.st_fb = NULL;
- gctx->draw.num_atts = 0;
+ gctx->draw.attachment_mask = 0x0;
}
else {
priv = gctx->stapi->st_framebuffer_private(gctx->read.st_fb);
if (!gread || priv != (void *) &gread->base) {
gctx->stapi->st_unreference_framebuffer(gctx->read.st_fb);
gctx->read.st_fb = NULL;
- gctx->draw.num_atts = 0;
+ gctx->draw.attachment_mask = 0x0;
}
}
}
@@ -459,16 +455,30 @@ egl_g3d_add_configs(_EGLDriver *drv, _EGLDisplay *dpy, EGLint id)
* Flush the front buffer of the context's draw surface.
*/
static void
-egl_g3d_flush_frontbuffer(void *dummy, struct pipe_surface *surf,
- void *context_private)
+egl_g3d_flush_frontbuffer(struct pipe_screen *screen,
+ struct pipe_surface *surf, void *context_private)
{
struct egl_g3d_context *gctx = egl_g3d_context(context_private);
struct egl_g3d_surface *gsurf = egl_g3d_surface(gctx->base.DrawSurface);
- if (gsurf) {
+ if (gsurf)
gsurf->native->flush_frontbuffer(gsurf->native);
- egl_g3d_validate_context(gctx->base.Display, &gctx->base);
- }
+}
+
+/**
+ * Re-validate the context.
+ */
+static void
+egl_g3d_update_buffer(struct pipe_screen *screen, void *context_private)
+{
+ struct egl_g3d_context *gctx = egl_g3d_context(context_private);
+
+ /**
+ * It is likely that the surface has changed when this function is called.
+ * Set force_validate to skip an unnecessary check.
+ */
+ gctx->force_validate = EGL_TRUE;
+ egl_g3d_validate_context(gctx->base.Display, &gctx->base);
}
static EGLBoolean
@@ -512,13 +522,15 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy,
}
dpy->DriverData = gdpy;
- gdpy->native =
- native_create_display(dpy->NativeDisplay, egl_g3d_flush_frontbuffer);
+ gdpy->native = native_create_display(dpy->NativeDisplay);
if (!gdpy->native) {
_eglError(EGL_NOT_INITIALIZED, "eglInitialize(no usable display)");
goto fail;
}
+ gdpy->native->screen->flush_frontbuffer = egl_g3d_flush_frontbuffer;
+ gdpy->native->screen->update_buffer = egl_g3d_update_buffer;
+
dpy->ClientAPIsMask = gdrv->api_mask;
if (egl_g3d_add_configs(drv, dpy, 1) == 1) {
@@ -550,7 +562,7 @@ egl_g3d_create_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf,
_EGLContext *share, const EGLint *attribs)
{
struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
- struct egl_g3d_context *xshare = egl_g3d_context(share);
+ struct egl_g3d_context *gshare = egl_g3d_context(share);
struct egl_g3d_config *gconf = egl_g3d_config(conf);
struct egl_g3d_context *gctx;
const __GLcontextModes *mode;
@@ -575,8 +587,18 @@ egl_g3d_create_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf,
mode = &gconf->native->mode;
gctx->pipe =
gdpy->native->create_context(gdpy->native, (void *) &gctx->base);
+ if (!gctx->pipe) {
+ free(gctx);
+ return NULL;
+ }
+
gctx->st_ctx = gctx->stapi->st_create_context(gctx->pipe, mode,
- (xshare) ? xshare->st_ctx : NULL);
+ (gshare) ? gshare->st_ctx : NULL);
+ if (!gctx->st_ctx) {
+ gctx->pipe->destroy(gctx->pipe);
+ free(gctx);
+ return NULL;
+ }
return &gctx->base;
}
@@ -599,6 +621,16 @@ egl_g3d_destroy_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx)
return EGL_TRUE;
}
+static EGLBoolean
+init_surface_geometry(_EGLSurface *surf)
+{
+ struct egl_g3d_surface *gsurf = egl_g3d_surface(surf);
+
+ return gsurf->native->validate(gsurf->native, 0x0,
+ &gsurf->sequence_number, NULL,
+ &gsurf->base.Width, &gsurf->base.Height);
+}
+
static _EGLSurface *
egl_g3d_create_window_surface(_EGLDriver *drv, _EGLDisplay *dpy,
_EGLConfig *conf, EGLNativeWindowType win,
@@ -626,8 +658,7 @@ egl_g3d_create_window_surface(_EGLDriver *drv, _EGLDisplay *dpy,
return NULL;
}
- if (!gsurf->native->validate(gsurf->native, NULL, 0, NULL,
- &gsurf->base.Width, &gsurf->base.Height)) {
+ if (!init_surface_geometry(&gsurf->base)) {
gsurf->native->destroy(gsurf->native);
free(gsurf);
return NULL;
@@ -667,8 +698,7 @@ egl_g3d_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *dpy,
return NULL;
}
- if (!gsurf->native->validate(gsurf->native, NULL, 0, NULL,
- &gsurf->base.Width, &gsurf->base.Height)) {
+ if (!init_surface_geometry(&gsurf->base)) {
gsurf->native->destroy(gsurf->native);
free(gsurf);
return NULL;
@@ -706,6 +736,12 @@ egl_g3d_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *dpy,
return NULL;
}
+ if (!init_surface_geometry(&gsurf->base)) {
+ gsurf->native->destroy(gsurf->native);
+ free(gsurf);
+ return NULL;
+ }
+
gsurf->render_att = (!gconf->native->mode.doubleBufferMode) ?
NATIVE_ATTACHMENT_FRONT_LEFT : NATIVE_ATTACHMENT_BACK_LEFT;
@@ -817,14 +853,110 @@ egl_g3d_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf)
struct egl_g3d_config *gconf = egl_g3d_config(gsurf->base.Config);
/* force validation if the swap method is not copy */
- if (gconf->native->mode.swapMethod != GLX_SWAP_COPY_OML)
+ if (gconf->native->mode.swapMethod != GLX_SWAP_COPY_OML) {
gctx->force_validate = EGL_TRUE;
- egl_g3d_validate_context(dpy, &gctx->base);
+ egl_g3d_validate_context(dpy, &gctx->base);
+ }
}
return EGL_TRUE;
}
+/**
+ * Find a config that supports the pixmap.
+ */
+static _EGLConfig *
+find_pixmap_config(_EGLDisplay *dpy, EGLNativePixmapType pix)
+{
+ struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
+ struct egl_g3d_config *gconf;
+ EGLint i;
+
+ for (i = 0; i < dpy->NumConfigs; i++) {
+ gconf = egl_g3d_config(dpy->Configs[i]);
+ if (gdpy->native->is_pixmap_supported(gdpy->native, pix, gconf->native))
+ break;
+ }
+
+ return (i < dpy->NumConfigs) ? &gconf->base : NULL;
+}
+
+/**
+ * Get the pipe surface of the given attachment of the native surface.
+ */
+static struct pipe_surface *
+get_pipe_surface(struct native_display *ndpy, struct native_surface *nsurf,
+ enum native_attachment natt)
+{
+ struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS];
+ struct pipe_surface *psurf;
+
+ textures[natt] = NULL;
+ nsurf->validate(nsurf, 1 << natt, NULL, textures, NULL, NULL);
+ if (!textures[natt])
+ return NULL;
+
+ psurf = ndpy->screen->get_tex_surface(ndpy->screen, textures[natt],
+ 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE);
+ pipe_texture_reference(&textures[natt], NULL);
+
+ return psurf;
+}
+
+static EGLBoolean
+egl_g3d_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf,
+ NativePixmapType target)
+{
+ struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
+ struct egl_g3d_surface *gsurf = egl_g3d_surface(surf);
+ _EGLContext *ctx = _eglGetCurrentContext();
+ struct egl_g3d_config *gconf;
+ struct native_surface *nsurf;
+ struct pipe_screen *screen = gdpy->native->screen;
+ struct pipe_surface *psurf;
+
+ if (!gsurf->render_surface)
+ return EGL_TRUE;
+
+ gconf = egl_g3d_config(find_pixmap_config(dpy, target));
+ if (!gconf)
+ return _eglError(EGL_BAD_NATIVE_PIXMAP, "eglCopyBuffers");
+
+ nsurf = gdpy->native->create_pixmap_surface(gdpy->native,
+ target, gconf->native);
+ if (!nsurf)
+ return _eglError(EGL_BAD_NATIVE_PIXMAP, "eglCopyBuffers");
+
+ /* flush if the surface is current */
+ if (ctx && ctx->DrawSurface == &gsurf->base) {
+ struct egl_g3d_context *gctx = egl_g3d_context(ctx);
+ gctx->stapi->st_flush(gctx->st_ctx,
+ PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL);
+ }
+
+ psurf = get_pipe_surface(gdpy->native, nsurf, NATIVE_ATTACHMENT_FRONT_LEFT);
+ if (psurf) {
+ struct pipe_context pipe;
+
+ /**
+ * XXX This is hacky. If we might allow the EGLDisplay to create a pipe
+ * context of its own and use the blitter context for this.
+ */
+ memset(&pipe, 0, sizeof(pipe));
+ pipe.screen = screen;
+
+ util_surface_copy(&pipe, FALSE, psurf, 0, 0,
+ gsurf->render_surface, 0, 0, psurf->width, psurf->height);
+
+ pipe_surface_reference(&psurf, NULL);
+ nsurf->flush_frontbuffer(nsurf);
+ }
+
+ nsurf->destroy(nsurf);
+
+ return EGL_TRUE;
+}
+
static EGLBoolean
egl_g3d_wait_client(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx)
{
@@ -849,15 +981,14 @@ egl_g3d_wait_native(_EGLDriver *drv, _EGLDisplay *dpy, EGLint engine)
}
static _EGLProc
-egl_g3d_get_proc_address(const char *procname)
+egl_g3d_get_proc_address(_EGLDriver *drv, const char *procname)
{
- /* FIXME how come _EGLDriver is not passed? */
- const struct egl_g3d_st *stapi;
+ struct egl_g3d_driver *gdrv = egl_g3d_driver(drv);
_EGLProc proc;
EGLint i;
for (i = 0; i < NUM_EGL_G3D_STS; i++) {
- stapi = egl_g3d_get_st(i);
+ const struct egl_g3d_st *stapi = gdrv->stapis[i];
if (stapi) {
proc = (_EGLProc) stapi->st_get_proc_address(procname);
if (proc)
@@ -1079,6 +1210,7 @@ _eglMain(const char *args)
gdrv->base.API.DestroySurface = egl_g3d_destroy_surface;
gdrv->base.API.MakeCurrent = egl_g3d_make_current;
gdrv->base.API.SwapBuffers = egl_g3d_swap_buffers;
+ gdrv->base.API.CopyBuffers = egl_g3d_copy_buffers;
gdrv->base.API.WaitClient = egl_g3d_wait_client;
gdrv->base.API.WaitNative = egl_g3d_wait_native;
gdrv->base.API.GetProcAddress = egl_g3d_get_proc_address;
diff --git a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h
index 33894b614f..1da8af495b 100644
--- a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h
+++ b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h
@@ -52,9 +52,7 @@ struct egl_g3d_display {
struct egl_g3d_buffer {
struct st_framebuffer *st_fb;
- EGLint num_atts;
- enum native_attachment native_atts[NUM_NATIVE_ATTACHMENTS];
- uint st_atts[NUM_NATIVE_ATTACHMENTS];
+ uint attachment_mask;
};
struct egl_g3d_context {
@@ -73,6 +71,7 @@ struct egl_g3d_surface {
struct native_surface *native;
enum native_attachment render_att;
struct pipe_surface *render_surface;
+ unsigned int sequence_number;
};
struct egl_g3d_config {
diff --git a/src/gallium/state_trackers/egl_g3d/common/native.h b/src/gallium/state_trackers/egl_g3d/common/native.h
index 88d87c6f3a..f374f2e4a6 100644
--- a/src/gallium/state_trackers/egl_g3d/common/native.h
+++ b/src/gallium/state_trackers/egl_g3d/common/native.h
@@ -64,13 +64,19 @@ struct native_surface {
boolean (*flush_frontbuffer)(struct native_surface *nsurf);
/**
- * Validate the buffers of the surface. Those not listed in the attachments
- * will be destroyed. The returned textures are owned by the caller.
+ * Validate the buffers of the surface. textures, if not NULL, points to an
+ * array of size NUM_NATIVE_ATTACHMENTS and the returned textures are owned
+ * by the caller. A sequence number is also returned. The caller can use
+ * it to check if anything has changed since the last call. Any of the
+ * pointers may be NULL and it indicates the caller has no interest in those
+ * values.
+ *
+ * If this function is called multiple times with different attachment
+ * masks, those not listed in the latest call might be destroyed. This
+ * behavior might change in the future.
*/
- boolean (*validate)(struct native_surface *nsurf,
- const enum native_attachment *natts,
- unsigned num_natts,
- struct pipe_texture **textures,
+ boolean (*validate)(struct native_surface *nsurf, uint attachment_mask,
+ unsigned int *seq_num, struct pipe_texture **textures,
int *width, int *height);
/**
@@ -80,6 +86,7 @@ struct native_surface {
};
struct native_config {
+ /* __GLcontextModes should go away some day */
__GLcontextModes mode;
enum pipe_format color_format;
enum pipe_format depth_format;
@@ -107,7 +114,14 @@ struct native_display_modeset;
* the native display server.
*/
struct native_display {
+ /**
+ * The pipe screen of the native display.
+ *
+ * Note that the "flush_frontbuffer" and "update_buffer" callbacks will be
+ * overridden.
+ */
struct pipe_screen *screen;
+
void (*destroy)(struct native_display *ndpy);
/**
@@ -122,6 +136,17 @@ struct native_display {
int *num_configs);
/**
+ * Test if a pixmap is supported by the given config. Required unless no
+ * config has GLX_PIXMAP_BIT set.
+ *
+ * This function is usually called to find a config that supports a given
+ * pixmap. Thus, it is usually called with the same pixmap in a row.
+ */
+ boolean (*is_pixmap_supported)(struct native_display *ndpy,
+ EGLNativePixmapType pix,
+ const struct native_config *nconf);
+
+ /**
* Create a pipe context.
*/
struct pipe_context *(*create_context)(struct native_display *ndpy,
@@ -197,15 +222,19 @@ struct native_display_modeset {
const struct native_mode *nmode);
};
-typedef void (*native_flush_frontbuffer)(void *dummy,
- struct pipe_surface *surf,
- void *context_private);
+/**
+ * Test whether an attachment is set in the mask.
+ */
+static INLINE boolean
+native_attachment_mask_test(uint mask, enum native_attachment att)
+{
+ return !!(mask & (1 << att));
+}
const char *
native_get_name(void);
struct native_display *
-native_create_display(EGLNativeDisplayType dpy,
- native_flush_frontbuffer flush_frontbuffer);
+native_create_display(EGLNativeDisplayType dpy);
#endif /* _NATIVE_H_ */
diff --git a/src/gallium/state_trackers/egl_g3d/kms/native_kms.c b/src/gallium/state_trackers/egl_g3d/kms/native_kms.c
index 0e0babdb14..dc66436630 100644
--- a/src/gallium/state_trackers/egl_g3d/kms/native_kms.c
+++ b/src/gallium/state_trackers/egl_g3d/kms/native_kms.c
@@ -33,22 +33,17 @@
#include "native_kms.h"
static boolean
-kms_surface_validate(struct native_surface *nsurf,
- const enum native_attachment *natts,
- unsigned num_natts,
- struct pipe_texture **textures,
+kms_surface_validate(struct native_surface *nsurf, uint attachment_mask,
+ unsigned int *seq_num, struct pipe_texture **textures,
int *width, int *height)
{
struct kms_surface *ksurf = kms_surface(nsurf);
struct kms_display *kdpy = ksurf->kdpy;
struct pipe_screen *screen = kdpy->base.screen;
struct pipe_texture templ, *ptex;
- int i;
-
- if (num_natts) {
- if (textures)
- memset(textures, 0, sizeof(*textures) * num_natts);
+ int att;
+ if (attachment_mask) {
memset(&templ, 0, sizeof(templ));
templ.target = PIPE_TEXTURE_2D;
templ.last_level = 0;
@@ -62,19 +57,25 @@ kms_surface_validate(struct native_surface *nsurf,
}
/* create textures */
- for (i = 0; i < num_natts; i++) {
- enum native_attachment natt = natts[i];
+ for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) {
+ /* delay the allocation */
+ if (!native_attachment_mask_test(attachment_mask, att))
+ continue;
- ptex = ksurf->textures[natt];
+ ptex = ksurf->textures[att];
if (!ptex) {
ptex = screen->texture_create(screen, &templ);
- ksurf->textures[natt] = ptex;
+ ksurf->textures[att] = ptex;
}
- if (textures)
- pipe_texture_reference(&textures[i], ptex);
+ if (textures) {
+ textures[att] = NULL;
+ pipe_texture_reference(&textures[att], ptex);
+ }
}
+ if (seq_num)
+ *seq_num = ksurf->sequence_number;
if (width)
*width = ksurf->width;
if (height)
@@ -111,7 +112,7 @@ kms_surface_init_framebuffers(struct native_surface *nsurf, boolean need_back)
if (!fb->texture) {
/* make sure the texture has been allocated */
- kms_surface_validate(&ksurf->base, &natt, 1, NULL, NULL, NULL);
+ kms_surface_validate(&ksurf->base, 1 << natt, NULL, NULL, NULL, NULL);
if (!ksurf->textures[natt])
return FALSE;
@@ -196,6 +197,9 @@ kms_surface_swap_buffers(struct native_surface *nsurf)
ksurf->textures[NATIVE_ATTACHMENT_BACK_LEFT];
ksurf->textures[NATIVE_ATTACHMENT_BACK_LEFT] = tmp_texture;
+ /* the front/back textures are swapped */
+ ksurf->sequence_number++;
+
return TRUE;
}
@@ -764,8 +768,7 @@ static struct native_display_modeset kms_display_modeset = {
};
static struct native_display *
-kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api,
- native_flush_frontbuffer flush_frontbuffer)
+kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api)
{
struct kms_display *kdpy;
@@ -807,10 +810,6 @@ kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api,
return NULL;
}
- kdpy->base.screen->flush_frontbuffer =
- (void (*)(struct pipe_screen *, struct pipe_surface *, void *))
- flush_frontbuffer;
-
kdpy->base.destroy = kms_display_destroy;
kdpy->base.get_configs = kms_display_get_configs;
kdpy->base.create_context = kms_display_create_context;
@@ -821,13 +820,6 @@ kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api,
return &kdpy->base;
}
-static void
-dummy_flush_frontbuffer(void *dummy, struct pipe_surface *surf,
- void *context_private)
-{
- _eglLog(_EGL_WARNING, "flush_frontbuffer is not supplied");
-}
-
/* the api is destroyed with the native display */
static struct drm_api *drm_api;
@@ -848,19 +840,15 @@ native_get_name(void)
}
struct native_display *
-native_create_display(EGLNativeDisplayType dpy,
- native_flush_frontbuffer flush_frontbuffer)
+native_create_display(EGLNativeDisplayType dpy)
{
struct native_display *ndpy = NULL;
if (!drm_api)
drm_api = drm_api_create();
- if (!flush_frontbuffer)
- flush_frontbuffer = dummy_flush_frontbuffer;
-
if (drm_api)
- ndpy = kms_create_display(dpy, drm_api, flush_frontbuffer);
+ ndpy = kms_create_display(dpy, drm_api);
return ndpy;
}
diff --git a/src/gallium/state_trackers/egl_g3d/kms/native_kms.h b/src/gallium/state_trackers/egl_g3d/kms/native_kms.h
index 3f869b25ac..095186e3cf 100644
--- a/src/gallium/state_trackers/egl_g3d/kms/native_kms.h
+++ b/src/gallium/state_trackers/egl_g3d/kms/native_kms.h
@@ -81,6 +81,7 @@ struct kms_surface {
int width, height;
struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS];
+ unsigned int sequence_number;
struct kms_framebuffer front_fb, back_fb;
boolean is_shown;
diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c b/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c
index 0dda786bbd..07f82d878c 100644
--- a/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c
+++ b/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c
@@ -64,6 +64,7 @@ struct dri2_surface {
struct pipe_texture *pbuffer_textures[NUM_NATIVE_ATTACHMENTS];
boolean have_back, have_fake;
int width, height;
+ unsigned int sequence_number;
};
struct dri2_config {
@@ -133,21 +134,18 @@ dri2_surface_swap_buffers(struct native_surface *nsurf)
}
static boolean
-dri2_surface_validate(struct native_surface *nsurf,
- const enum native_attachment *natts,
- unsigned num_natts,
- struct pipe_texture **textures,
- int *width, int *height)
+dri2_surface_validate(struct native_surface *nsurf, uint attachment_mask,
+ unsigned int *seq_num, struct pipe_texture **textures,
+ int *width, int *height)
{
struct dri2_surface *dri2surf = dri2_surface(nsurf);
struct dri2_display *dri2dpy = dri2surf->dri2dpy;
unsigned int dri2atts[NUM_NATIVE_ATTACHMENTS];
- EGLint texture_indices[NUM_NATIVE_ATTACHMENTS];
struct pipe_texture templ;
struct x11_drawable_buffer *xbufs;
- int num_ins, num_outs, i;
+ int num_ins, num_outs, att, i;
- if (num_natts) {
+ if (attachment_mask) {
memset(&templ, 0, sizeof(templ));
templ.target = PIPE_TEXTURE_2D;
templ.last_level = 0;
@@ -158,26 +156,31 @@ dri2_surface_validate(struct native_surface *nsurf,
templ.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET;
if (textures)
- memset(textures, 0, sizeof(*textures) * num_natts);
+ memset(textures, 0, sizeof(*textures) * NUM_NATIVE_ATTACHMENTS);
}
/* create textures for pbuffer */
if (dri2surf->type == DRI2_SURFACE_TYPE_PBUFFER) {
struct pipe_screen *screen = dri2dpy->base.screen;
- for (i = 0; i < num_natts; i++) {
- enum native_attachment natt = natts[i];
- struct pipe_texture *ptex = dri2surf->pbuffer_textures[natt];
+ for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) {
+ struct pipe_texture *ptex = dri2surf->pbuffer_textures[att];
+
+ /* delay the allocation */
+ if (!native_attachment_mask_test(attachment_mask, att))
+ continue;
if (!ptex) {
ptex = screen->texture_create(screen, &templ);
- dri2surf->pbuffer_textures[natt] = ptex;
+ dri2surf->pbuffer_textures[att] = ptex;
}
if (textures)
- pipe_texture_reference(&textures[i], ptex);
+ pipe_texture_reference(&textures[att], ptex);
}
+ if (seq_num)
+ *seq_num = dri2surf->sequence_number;
if (width)
*width = dri2surf->width;
if (height)
@@ -186,48 +189,56 @@ dri2_surface_validate(struct native_surface *nsurf,
return TRUE;
}
- for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++)
- texture_indices[i] = -1;
-
/* prepare the attachments */
- num_ins = num_natts;
- for (i = 0; i < num_natts; i++) {
- unsigned int dri2att;
+ num_ins = 0;
+ for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) {
+ if (native_attachment_mask_test(attachment_mask, att)) {
+ unsigned int dri2att;
+
+ switch (att) {
+ case NATIVE_ATTACHMENT_FRONT_LEFT:
+ dri2att = DRI2BufferFrontLeft;
+ break;
+ case NATIVE_ATTACHMENT_BACK_LEFT:
+ dri2att = DRI2BufferBackLeft;
+ break;
+ case NATIVE_ATTACHMENT_FRONT_RIGHT:
+ dri2att = DRI2BufferFrontRight;
+ break;
+ case NATIVE_ATTACHMENT_BACK_RIGHT:
+ dri2att = DRI2BufferBackRight;
+ break;
+ default:
+ assert(0);
+ dri2att = 0;
+ break;
+ }
- switch (natts[i]) {
- case NATIVE_ATTACHMENT_FRONT_LEFT:
- dri2att = DRI2BufferFrontLeft;
- break;
- case NATIVE_ATTACHMENT_BACK_LEFT:
- dri2att = DRI2BufferBackLeft;
- break;
- case NATIVE_ATTACHMENT_FRONT_RIGHT:
- dri2att = DRI2BufferFrontRight;
- break;
- case NATIVE_ATTACHMENT_BACK_RIGHT:
- dri2att = DRI2BufferBackRight;
- break;
- default:
- assert(0);
- dri2att = 0;
- break;
+ dri2atts[num_ins] = dri2att;
+ num_ins++;
}
- dri2atts[i] = dri2att;
- texture_indices[natts[i]] = i;
}
dri2surf->have_back = FALSE;
dri2surf->have_fake = FALSE;
+ /* remember old geometry */
+ templ.width0 = dri2surf->width;
+ templ.height0 = dri2surf->height;
+
xbufs = x11_drawable_get_buffers(dri2dpy->xscr, dri2surf->drawable,
&dri2surf->width, &dri2surf->height,
dri2atts, FALSE, num_ins, &num_outs);
if (!xbufs)
return FALSE;
- /* update width and height */
- templ.width0 = dri2surf->width;
- templ.height0 = dri2surf->height;
+ if (templ.width0 != dri2surf->width || templ.height0 != dri2surf->height) {
+ /* are there cases where the buffers change and the geometry doesn't? */
+ dri2surf->sequence_number++;
+
+ templ.width0 = dri2surf->width;
+ templ.height0 = dri2surf->height;
+ }
for (i = 0; i < num_outs; i++) {
struct x11_drawable_buffer *xbuf = &xbufs[i];
@@ -254,13 +265,13 @@ dri2_surface_validate(struct native_surface *nsurf,
break;
}
- if (!desc || texture_indices[natt] < 0 ||
- (textures && textures[texture_indices[natt]])) {
+ if (!desc || !native_attachment_mask_test(attachment_mask, natt) ||
+ (textures && textures[natt])) {
if (!desc)
_eglLog(_EGL_WARNING, "unknown buffer %d", xbuf->attachment);
- else if (texture_indices[natt] < 0)
+ else if (!native_attachment_mask_test(attachment_mask, natt))
_eglLog(_EGL_WARNING, "unexpected buffer %d", xbuf->attachment);
- else if (textures && textures[texture_indices[natt]])
+ else
_eglLog(_EGL_WARNING, "both real and fake front buffers are listed");
continue;
}
@@ -272,13 +283,15 @@ dri2_surface_validate(struct native_surface *nsurf,
desc, xbuf->pitch, xbuf->name);
if (ptex) {
/* the caller owns the textures */
- textures[texture_indices[natt]] = ptex;
+ textures[natt] = ptex;
}
}
}
free(xbufs);
+ if (seq_num)
+ *seq_num = dri2surf->sequence_number;
if (width)
*width = dri2surf->width;
if (height)
@@ -575,6 +588,21 @@ dri2_display_get_configs(struct native_display *ndpy, int *num_configs)
return configs;
}
+static boolean
+dri2_display_is_pixmap_supported(struct native_display *ndpy,
+ EGLNativePixmapType pix,
+ const struct native_config *nconf)
+{
+ struct dri2_display *dri2dpy = dri2_display(ndpy);
+ uint depth, nconf_depth;
+
+ depth = x11_drawable_get_depth(dri2dpy->xscr, (Drawable) pix);
+ nconf_depth = util_format_get_blocksizebits(nconf->color_format);
+
+ /* simple depth match for now */
+ return (depth == nconf_depth || (depth == 24 && depth + 8 == nconf_depth));
+}
+
static void
dri2_display_destroy(struct native_display *ndpy)
{
@@ -627,18 +655,8 @@ dri2_display_init_screen(struct native_display *ndpy)
return TRUE;
}
-static void
-dri2_display_flush_frontbuffer(void *dummy, struct pipe_surface *surf,
- void *context_private)
-{
- /* TODO get native surface from context private, and remove the callback */
- _eglLog(_EGL_WARNING, "flush_frontbuffer is not supplied");
-}
-
struct native_display *
-x11_create_dri2_display(EGLNativeDisplayType dpy,
- struct drm_api *api,
- native_flush_frontbuffer flush_frontbuffer)
+x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api)
{
struct dri2_display *dri2dpy;
@@ -675,15 +693,9 @@ x11_create_dri2_display(EGLNativeDisplayType dpy,
return NULL;
}
- if (!flush_frontbuffer)
- flush_frontbuffer = dri2_display_flush_frontbuffer;
-
- dri2dpy->base.screen->flush_frontbuffer =
- (void (*)(struct pipe_screen *, struct pipe_surface *, void *))
- flush_frontbuffer;
-
dri2dpy->base.destroy = dri2_display_destroy;
dri2dpy->base.get_configs = dri2_display_get_configs;
+ dri2dpy->base.is_pixmap_supported = dri2_display_is_pixmap_supported;
dri2dpy->base.create_context = dri2_display_create_context;
dri2dpy->base.create_window_surface = dri2_display_create_window_surface;
dri2dpy->base.create_pixmap_surface = dri2_display_create_pixmap_surface;
diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_x11.c b/src/gallium/state_trackers/egl_g3d/x11/native_x11.c
index a4f36e9dec..583ce3d329 100644
--- a/src/gallium/state_trackers/egl_g3d/x11/native_x11.c
+++ b/src/gallium/state_trackers/egl_g3d/x11/native_x11.c
@@ -48,8 +48,7 @@ native_get_name(void)
}
struct native_display *
-native_create_display(EGLNativeDisplayType dpy,
- native_flush_frontbuffer flush_frontbuffer)
+native_create_display(EGLNativeDisplayType dpy)
{
struct native_display *ndpy = NULL;
boolean force_sw;
@@ -59,14 +58,14 @@ native_create_display(EGLNativeDisplayType dpy,
force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE);
if (api && !force_sw) {
- ndpy = x11_create_dri2_display(dpy, api, flush_frontbuffer);
+ ndpy = x11_create_dri2_display(dpy, api);
}
if (!ndpy) {
EGLint level = (force_sw) ? _EGL_INFO : _EGL_WARNING;
_eglLog(level, "use software fallback");
- ndpy = x11_create_ximage_display(dpy, TRUE, flush_frontbuffer);
+ ndpy = x11_create_ximage_display(dpy, TRUE);
}
return ndpy;
diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_x11.h b/src/gallium/state_trackers/egl_g3d/x11/native_x11.h
index 9217eb6252..622ddac5df 100644
--- a/src/gallium/state_trackers/egl_g3d/x11/native_x11.h
+++ b/src/gallium/state_trackers/egl_g3d/x11/native_x11.h
@@ -29,13 +29,9 @@
#include "common/native.h"
struct native_display *
-x11_create_ximage_display(EGLNativeDisplayType dpy,
- boolean use_xshm,
- native_flush_frontbuffer flush_frontbuffer);
+x11_create_ximage_display(EGLNativeDisplayType dpy, boolean use_xshm);
struct native_display *
-x11_create_dri2_display(EGLNativeDisplayType dpy,
- struct drm_api *api,
- native_flush_frontbuffer flush_frontbuffer);
+x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api);
#endif /* _NATIVE_X11_H_ */
diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c b/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c
index e02faa9b7b..d76107c47f 100644
--- a/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c
+++ b/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c
@@ -83,6 +83,7 @@ struct ximage_surface {
GC gc;
struct ximage_buffer buffers[NUM_NATIVE_ATTACHMENTS];
+ unsigned int sequence_number;
};
struct ximage_config {
@@ -260,6 +261,9 @@ ximage_surface_swap_buffers(struct native_surface *nsurf)
*xfront = *xback;
*xback = xtmp;
+ /* the front/back textures are swapped */
+ xsurf->sequence_number++;
+
return ximage_surface_draw_buffer(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT);
}
@@ -285,33 +289,29 @@ ximage_surface_update_geometry(struct native_surface *nsurf)
}
static boolean
-ximage_surface_validate(struct native_surface *nsurf,
- const enum native_attachment *natts,
- unsigned num_natts,
- struct pipe_texture **textures,
- int *width, int *height)
+ximage_surface_validate(struct native_surface *nsurf, uint attachment_mask,
+ unsigned int *seq_num, struct pipe_texture **textures,
+ int *width, int *height)
{
struct ximage_surface *xsurf = ximage_surface(nsurf);
- boolean error = FALSE;
- unsigned i;
+ boolean new_buffers = FALSE;
+ int att;
ximage_surface_update_geometry(&xsurf->base);
- if (textures)
- memset(textures, 0, sizeof(*textures) * num_natts);
-
- for (i = 0; i < num_natts; i++) {
- enum native_attachment natt = natts[i];
- struct ximage_buffer *xbuf = &xsurf->buffers[natt];
+ for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) {
+ struct ximage_buffer *xbuf = &xsurf->buffers[att];
- if (!xbuf)
+ /* delay the allocation */
+ if (!native_attachment_mask_test(attachment_mask, att))
continue;
/* reallocate the texture */
if (!xbuf->texture ||
xsurf->width != xbuf->texture->width0 ||
xsurf->height != xbuf->texture->height0) {
- if (ximage_surface_alloc_buffer(&xsurf->base, natt)) {
+ new_buffers = TRUE;
+ if (ximage_surface_alloc_buffer(&xsurf->base, att)) {
/* update ximage */
if (xbuf->ximage) {
xbuf->ximage->width = xbuf->transfer->width;
@@ -321,27 +321,24 @@ ximage_surface_validate(struct native_surface *nsurf,
}
}
- /* allocation failed */
- if (!xbuf->texture) {
- unsigned j;
- for (j = 0; j < i; j++)
- pipe_texture_reference(&textures[j], NULL);
- for (j = i; j < num_natts; j++)
- textures[j] = NULL;
- error = TRUE;
- break;
+ if (textures) {
+ textures[att] = NULL;
+ pipe_texture_reference(&textures[att], xbuf->texture);
}
-
- if (textures)
- pipe_texture_reference(&textures[i], xbuf->texture);
}
+ /* increase the sequence number so that caller knows */
+ if (new_buffers)
+ xsurf->sequence_number++;
+
+ if (seq_num)
+ *seq_num = xsurf->sequence_number;
if (width)
*width = xsurf->width;
if (height)
*height = xsurf->height;
- return !error;
+ return TRUE;
}
static void
@@ -603,6 +600,34 @@ ximage_display_get_configs(struct native_display *ndpy, int *num_configs)
return configs;
}
+static boolean
+ximage_display_is_pixmap_supported(struct native_display *ndpy,
+ EGLNativePixmapType pix,
+ const struct native_config *nconf)
+{
+ struct ximage_display *xdpy = ximage_display(ndpy);
+ enum pipe_format fmt;
+ uint depth;
+
+ depth = x11_drawable_get_depth(xdpy->xscr, (Drawable) pix);
+ switch (depth) {
+ case 32:
+ fmt = PIPE_FORMAT_A8R8G8B8_UNORM;
+ break;
+ case 24:
+ fmt = PIPE_FORMAT_X8R8G8B8_UNORM;
+ break;
+ case 16:
+ fmt = PIPE_FORMAT_R5G6B5_UNORM;
+ break;
+ default:
+ fmt = PIPE_FORMAT_NONE;
+ break;
+ }
+
+ return (fmt == nconf->color_format);
+}
+
static void
ximage_display_destroy(struct native_display *ndpy)
{
@@ -620,18 +645,8 @@ ximage_display_destroy(struct native_display *ndpy)
free(xdpy);
}
-static void
-ximage_display_flush_frontbuffer(void *dummy, struct pipe_surface *surf,
- void *context_private)
-{
- /* TODO get native surface from context private, and remove the callback */
- _eglLog(_EGL_WARNING, "flush_frontbuffer is not supplied");
-}
-
struct native_display *
-x11_create_ximage_display(EGLNativeDisplayType dpy,
- boolean use_xshm,
- native_flush_frontbuffer flush_frontbuffer)
+x11_create_ximage_display(EGLNativeDisplayType dpy, boolean use_xshm)
{
struct ximage_display *xdpy;
@@ -660,17 +675,12 @@ x11_create_ximage_display(EGLNativeDisplayType dpy,
(use_xshm && x11_screen_support(xdpy->xscr, X11_SCREEN_EXTENSION_XSHM));
xdpy->winsys = create_sw_winsys();
- if (!flush_frontbuffer)
- flush_frontbuffer = ximage_display_flush_frontbuffer;
- xdpy->winsys->flush_frontbuffer =
- (void (*)(struct pipe_winsys *, struct pipe_surface *, void *))
- flush_frontbuffer;
-
xdpy->base.screen = softpipe_create_screen(xdpy->winsys);
xdpy->base.destroy = ximage_display_destroy;
xdpy->base.get_configs = ximage_display_get_configs;
+ xdpy->base.is_pixmap_supported = ximage_display_is_pixmap_supported;
xdpy->base.create_context = ximage_display_create_context;
xdpy->base.create_window_surface = ximage_display_create_window_surface;
xdpy->base.create_pixmap_surface = ximage_display_create_pixmap_surface;
diff --git a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c
index 1e98943242..4d68a88d2e 100644
--- a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c
+++ b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c
@@ -30,6 +30,7 @@
#include <X11/extensions/XShm.h>
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_format.h"
#include "xf86drm.h"
#include "egllog.h"
@@ -50,6 +51,10 @@ struct x11_screen {
XVisualInfo *visuals;
int num_visuals;
+
+ /* cached values for x11_drawable_get_depth */
+ Drawable last_drawable;
+ unsigned int last_depth;
};
@@ -351,6 +356,37 @@ x11_drawable_get_buffers(struct x11_screen *xscr, Drawable drawable,
}
/**
+ * Return the depth of a drawable.
+ *
+ * Unlike other drawable functions, the drawable needs not be a DRI2 drawable.
+ */
+uint
+x11_drawable_get_depth(struct x11_screen *xscr, Drawable drawable)
+{
+ unsigned int depth;
+
+ if (drawable != xscr->last_drawable) {
+ Window root;
+ int x, y;
+ unsigned int w, h, border;
+ Status ok;
+
+ ok = XGetGeometry(xscr->dpy, drawable, &root,
+ &x, &y, &w, &h, &border, &depth);
+ if (!ok)
+ depth = 0;
+
+ xscr->last_drawable = drawable;
+ xscr->last_depth = depth;
+ }
+ else {
+ depth = xscr->last_depth;
+ }
+
+ return depth;
+}
+
+/**
* Create a mode list of the given size.
*/
__GLcontextModes *
diff --git a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h
index 86e8e0501a..bf48218905 100644
--- a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h
+++ b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h
@@ -96,4 +96,7 @@ x11_drawable_get_buffers(struct x11_screen *xscr, Drawable drawable,
int *width, int *height, unsigned int *attachments,
boolean with_format, int num_ins, int *num_outs);
+uint
+x11_drawable_get_depth(struct x11_screen *xscr, Drawable drawable);
+
#endif /* _X11_SCREEN_H_ */
diff --git a/src/gallium/state_trackers/glx/xlib/glx_getproc.c b/src/gallium/state_trackers/glx/xlib/glx_getproc.c
index 84d47b12ed..bd4a85caa0 100644
--- a/src/gallium/state_trackers/glx/xlib/glx_getproc.c
+++ b/src/gallium/state_trackers/glx/xlib/glx_getproc.c
@@ -193,7 +193,7 @@ _glxapi_get_proc_address(const char *funcName)
}
-__GLXextFuncPtr
+PUBLIC __GLXextFuncPtr
glXGetProcAddressARB(const GLubyte *procName)
{
__GLXextFuncPtr f;
diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i
index 96b13c2258..6879722845 100644
--- a/src/gallium/state_trackers/python/gallium.i
+++ b/src/gallium/state_trackers/python/gallium.i
@@ -76,7 +76,6 @@
%rename(BlendColor) pipe_blend_color;
%rename(Blend) pipe_blend_state;
%rename(Clip) pipe_clip_state;
-%rename(ConstantBuffer) pipe_constant_buffer;
%rename(Depth) pipe_depth_state;
%rename(Stencil) pipe_stencil_state;
%rename(Alpha) pipe_alpha_state;
diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i
index 84ce1a41e6..ce893dad45 100644
--- a/src/gallium/state_trackers/python/p_context.i
+++ b/src/gallium/state_trackers/python/p_context.i
@@ -142,10 +142,7 @@ struct st_context {
void set_constant_buffer(unsigned shader, unsigned index,
struct pipe_buffer *buffer )
{
- struct pipe_constant_buffer state;
- memset(&state, 0, sizeof(state));
- state.buffer = buffer;
- $self->pipe->set_constant_buffer($self->pipe, shader, index, &state);
+ $self->pipe->set_constant_buffer($self->pipe, shader, index, buffer);
}
void set_framebuffer(const struct pipe_framebuffer_state *state )
diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py
index a68709f5cf..b61d47d645 100755
--- a/src/gallium/state_trackers/python/retrace/interpreter.py
+++ b/src/gallium/state_trackers/python/retrace/interpreter.py
@@ -94,7 +94,7 @@ struct_factories = {
"pipe_blend_color": gallium.BlendColor,
"pipe_blend_state": gallium.Blend,
#"pipe_clip_state": gallium.Clip,
- #"pipe_constant_buffer": gallium.ConstantBuffer,
+ #"pipe_buffer": gallium.Buffer,
"pipe_depth_state": gallium.Depth,
"pipe_stencil_state": gallium.Stencil,
"pipe_alpha_state": gallium.Alpha,
@@ -462,10 +462,10 @@ class Context(Object):
sys.stdout.flush()
def set_constant_buffer(self, shader, index, buffer):
- if buffer is not None and buffer.buffer is not None:
- self.real.set_constant_buffer(shader, index, buffer.buffer)
+ if buffer is not None:
+ self.real.set_constant_buffer(shader, index, buffer)
- self.dump_constant_buffer(buffer.buffer)
+ self.dump_constant_buffer(buffer)
def set_framebuffer_state(self, state):
_state = gallium.Framebuffer()
diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c
index a3294e877a..dfe3e465f7 100644
--- a/src/gallium/state_trackers/python/st_softpipe_winsys.c
+++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c
@@ -35,225 +35,10 @@
* @author Jose Fonseca
*/
-
-#include "pipe/internal/p_winsys_screen.h"/* port to just p_screen */
-#include "pipe/p_format.h"
-#include "pipe/p_context.h"
-#include "pipe/p_inlines.h"
-#include "util/u_format.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
#include "softpipe/sp_winsys.h"
#include "st_winsys.h"
-
-struct st_softpipe_buffer
-{
- struct pipe_buffer base;
- boolean userBuffer; /** Is this a user-space buffer? */
- void *data;
- void *mapped;
-};
-
-
-/** Cast wrapper */
-static INLINE struct st_softpipe_buffer *
-st_softpipe_buffer( struct pipe_buffer *buf )
-{
- return (struct st_softpipe_buffer *)buf;
-}
-
-
-static void *
-st_softpipe_buffer_map(struct pipe_winsys *winsys,
- struct pipe_buffer *buf,
- unsigned flags)
-{
- struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf);
- st_softpipe_buf->mapped = st_softpipe_buf->data;
- return st_softpipe_buf->mapped;
-}
-
-
-static void
-st_softpipe_buffer_unmap(struct pipe_winsys *winsys,
- struct pipe_buffer *buf)
-{
- struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf);
- st_softpipe_buf->mapped = NULL;
-}
-
-
-static void
-st_softpipe_buffer_destroy(struct pipe_buffer *buf)
-{
- struct st_softpipe_buffer *oldBuf = st_softpipe_buffer(buf);
-
- if (oldBuf->data) {
- if (!oldBuf->userBuffer)
- align_free(oldBuf->data);
-
- oldBuf->data = NULL;
- }
-
- FREE(oldBuf);
-}
-
-
-static void
-st_softpipe_flush_frontbuffer(struct pipe_winsys *winsys,
- struct pipe_surface *surf,
- void *context_private)
-{
-}
-
-
-
-static const char *
-st_softpipe_get_name(struct pipe_winsys *winsys)
-{
- return "softpipe";
-}
-
-
-static struct pipe_buffer *
-st_softpipe_buffer_create(struct pipe_winsys *winsys,
- unsigned alignment,
- unsigned usage,
- unsigned size)
-{
- struct st_softpipe_buffer *buffer = CALLOC_STRUCT(st_softpipe_buffer);
-
- pipe_reference_init(&buffer->base.reference, 1);
- buffer->base.alignment = alignment;
- buffer->base.usage = usage;
- buffer->base.size = size;
-
- buffer->data = align_malloc(size, alignment);
-
- return &buffer->base;
-}
-
-
-/**
- * Create buffer which wraps user-space data.
- */
-static struct pipe_buffer *
-st_softpipe_user_buffer_create(struct pipe_winsys *winsys,
- void *ptr,
- unsigned bytes)
-{
- struct st_softpipe_buffer *buffer;
-
- buffer = CALLOC_STRUCT(st_softpipe_buffer);
- if(!buffer)
- return NULL;
-
- pipe_reference_init(&buffer->base.reference, 1);
- buffer->base.size = bytes;
- buffer->userBuffer = TRUE;
- buffer->data = ptr;
-
- return &buffer->base;
-}
-
-
-static struct pipe_buffer *
-st_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
- unsigned width, unsigned height,
- enum pipe_format format,
- unsigned usage,
- unsigned tex_usage,
- unsigned *stride)
-{
- const unsigned alignment = 64;
- unsigned nblocksy;
-
- nblocksy = util_format_get_nblocksy(format, height);
- *stride = align(util_format_get_stride(format, width), alignment);
-
- return winsys->buffer_create(winsys, alignment,
- usage,
- *stride * nblocksy);
-}
-
-
-static void
-st_softpipe_fence_reference(struct pipe_winsys *winsys,
- struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *fence)
-{
-}
-
-
-static int
-st_softpipe_fence_signalled(struct pipe_winsys *winsys,
- struct pipe_fence_handle *fence,
- unsigned flag)
-{
- return 0;
-}
-
-
-static int
-st_softpipe_fence_finish(struct pipe_winsys *winsys,
- struct pipe_fence_handle *fence,
- unsigned flag)
-{
- return 0;
-}
-
-
-static void
-st_softpipe_destroy(struct pipe_winsys *winsys)
-{
- FREE(winsys);
-}
-
-
-static struct pipe_screen *
-st_softpipe_screen_create(void)
-{
- static struct pipe_winsys *winsys;
- struct pipe_screen *screen;
-
- winsys = CALLOC_STRUCT(pipe_winsys);
- if(!winsys)
- return NULL;
-
- winsys->destroy = st_softpipe_destroy;
-
- winsys->buffer_create = st_softpipe_buffer_create;
- winsys->user_buffer_create = st_softpipe_user_buffer_create;
- winsys->buffer_map = st_softpipe_buffer_map;
- winsys->buffer_unmap = st_softpipe_buffer_unmap;
- winsys->buffer_destroy = st_softpipe_buffer_destroy;
-
- winsys->surface_buffer_create = st_softpipe_surface_buffer_create;
-
- winsys->fence_reference = st_softpipe_fence_reference;
- winsys->fence_signalled = st_softpipe_fence_signalled;
- winsys->fence_finish = st_softpipe_fence_finish;
-
- winsys->flush_frontbuffer = st_softpipe_flush_frontbuffer;
- winsys->get_name = st_softpipe_get_name;
-
- screen = softpipe_create_screen(winsys);
- if(!screen)
- st_softpipe_destroy(winsys);
-
- return screen;
-}
-
-
-static struct pipe_context *
-st_softpipe_context_create(struct pipe_screen *screen)
-{
- return softpipe_create(screen);
-}
-
-
const struct st_winsys st_softpipe_winsys = {
- &st_softpipe_screen_create,
- &st_softpipe_context_create,
+ &softpipe_create_screen_malloc,
+ &softpipe_create,
};
diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore b/src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore
new file mode 100644
index 0000000000..e33609d251
--- /dev/null
+++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore
@@ -0,0 +1 @@
+*.png
diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore b/src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore
new file mode 100644
index 0000000000..e33609d251
--- /dev/null
+++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore
@@ -0,0 +1 @@
+*.png
diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c
index 2f984fb7b9..8f69ee0109 100644
--- a/src/gallium/state_trackers/vega/api_filters.c
+++ b/src/gallium/state_trackers/vega/api_filters.c
@@ -147,22 +147,22 @@ static void setup_constant_buffer(struct vg_context *ctx, const void *buffer,
VGint param_bytes)
{
struct pipe_context *pipe = ctx->pipe;
- struct pipe_constant_buffer *cbuf = &ctx->filter.buffer;
+ struct pipe_buffer **cbuf = &ctx->filter.buffer;
/* We always need to get a new buffer, to keep the drivers simple and
* avoid gratuitous rendering synchronization. */
- pipe_buffer_reference(&cbuf->buffer, NULL);
+ pipe_buffer_reference(cbuf, NULL);
- cbuf->buffer = pipe_buffer_create(pipe->screen, 16,
- PIPE_BUFFER_USAGE_CONSTANT,
- param_bytes);
+ *cbuf = pipe_buffer_create(pipe->screen, 16,
+ PIPE_BUFFER_USAGE_CONSTANT,
+ param_bytes);
- if (cbuf->buffer) {
- st_no_flush_pipe_buffer_write(ctx, cbuf->buffer,
+ if (*cbuf) {
+ st_no_flush_pipe_buffer_write(ctx, *cbuf,
0, param_bytes, buffer);
}
- ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf);
+ ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf);
}
static void setup_samplers(struct vg_context *ctx, struct filter_info *info)
diff --git a/src/gallium/state_trackers/vega/api_masks.c b/src/gallium/state_trackers/vega/api_masks.c
index 4f9f3dae17..97cbe69205 100644
--- a/src/gallium/state_trackers/vega/api_masks.c
+++ b/src/gallium/state_trackers/vega/api_masks.c
@@ -32,7 +32,6 @@
#include "vg_context.h"
#include "pipe/p_context.h"
#include "pipe/p_inlines.h"
-#include "pipe/internal/p_winsys_screen.h" /* for winsys->update_buffer */
#include "util/u_pack_color.h"
#include "util/u_draw_quad.h"
@@ -116,8 +115,8 @@ clear_with_quad(struct vg_context *st, float x0, float y0,
x1, y1);
*/
- if (st->pipe->winsys && st->pipe->winsys->update_buffer)
- st->pipe->winsys->update_buffer( st->pipe->winsys,
+ if (st->pipe->screen && st->pipe->screen->update_buffer)
+ st->pipe->screen->update_buffer( st->pipe->screen,
st->pipe->priv );
cso_save_blend(st->cso_context);
diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c
index 42300bb6d5..3e260e7073 100644
--- a/src/gallium/state_trackers/vega/mask.c
+++ b/src/gallium/state_trackers/vega/mask.c
@@ -217,7 +217,7 @@ static void setup_mask_framebuffer(struct pipe_surface *surf,
static void setup_mask_operation(VGMaskOperation operation)
{
struct vg_context *ctx = vg_current_context();
- struct pipe_constant_buffer *cbuf = &ctx->mask.cbuf;
+ struct pipe_buffer **cbuf = &ctx->mask.cbuf;
const VGint param_bytes = 4 * sizeof(VGfloat);
const VGfloat ones[4] = {1.f, 1.f, 1.f, 1.f};
void *shader = 0;
@@ -225,17 +225,17 @@ static void setup_mask_operation(VGMaskOperation operation)
/* We always need to get a new buffer, to keep the drivers simple and
* avoid gratuitous rendering synchronization.
*/
- pipe_buffer_reference(&cbuf->buffer, NULL);
+ pipe_buffer_reference(cbuf, NULL);
- cbuf->buffer = pipe_buffer_create(ctx->pipe->screen, 1,
- PIPE_BUFFER_USAGE_CONSTANT,
- param_bytes);
- if (cbuf->buffer) {
- st_no_flush_pipe_buffer_write(ctx, cbuf->buffer,
+ *cbuf = pipe_buffer_create(ctx->pipe->screen, 1,
+ PIPE_BUFFER_USAGE_CONSTANT,
+ param_bytes);
+ if (*cbuf) {
+ st_no_flush_pipe_buffer_write(ctx, *cbuf,
0, param_bytes, ones);
}
- ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf);
+ ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf);
switch (operation) {
case VG_UNION_MASK: {
if (!ctx->mask.union_fs) {
@@ -320,22 +320,22 @@ static void setup_mask_samplers(struct pipe_texture *umask)
static void setup_mask_fill(const VGfloat color[4])
{
struct vg_context *ctx = vg_current_context();
- struct pipe_constant_buffer *cbuf = &ctx->mask.cbuf;
+ struct pipe_buffer **cbuf = &ctx->mask.cbuf;
const VGint param_bytes = 4 * sizeof(VGfloat);
/* We always need to get a new buffer, to keep the drivers simple and
* avoid gratuitous rendering synchronization.
*/
- pipe_buffer_reference(&cbuf->buffer, NULL);
+ pipe_buffer_reference(cbuf, NULL);
- cbuf->buffer = pipe_buffer_create(ctx->pipe->screen, 1,
- PIPE_BUFFER_USAGE_CONSTANT,
- param_bytes);
- if (cbuf->buffer) {
- st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, 0, param_bytes, color);
+ *cbuf = pipe_buffer_create(ctx->pipe->screen, 1,
+ PIPE_BUFFER_USAGE_CONSTANT,
+ param_bytes);
+ if (*cbuf) {
+ st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, color);
}
- ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf);
+ ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf);
cso_set_fragment_shader_handle(ctx->cso_context,
shaders_cache_fill(ctx->sc,
VEGA_SOLID_FILL_SHADER));
diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c
index cc73771d35..d8f6299b2d 100644
--- a/src/gallium/state_trackers/vega/paint.c
+++ b/src/gallium/state_trackers/vega/paint.c
@@ -77,7 +77,8 @@ struct vg_paint {
struct pipe_sampler_state sampler;
} pattern;
- struct pipe_constant_buffer cbuf;
+ /* XXX next 3 all unneded? */
+ struct pipe_buffer *cbuf;
struct pipe_shader_state fs_state;
void *fs;
};
diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c
index b6d282d803..d385ee567f 100644
--- a/src/gallium/state_trackers/vega/polygon.c
+++ b/src/gallium/state_trackers/vega/polygon.c
@@ -293,6 +293,7 @@ static void draw_polygon(struct vg_context *ctx,
/* tell pipe about the vertex attributes */
velement.src_offset = 0;
+ velement.instance_divisor = 0;
velement.vertex_buffer_index = 0;
velement.src_format = PIPE_FORMAT_R32G32_FLOAT;
velement.nr_components = COMPONENTS;
diff --git a/src/gallium/state_trackers/vega/shader.c b/src/gallium/state_trackers/vega/shader.c
index d9074a377b..bd5ae79e55 100644
--- a/src/gallium/state_trackers/vega/shader.c
+++ b/src/gallium/state_trackers/vega/shader.c
@@ -51,7 +51,7 @@ struct shader {
VGImageMode image_mode;
float constants[MAX_CONSTANTS];
- struct pipe_constant_buffer cbuf;
+ struct pipe_buffer *cbuf;
struct pipe_shader_state fs_state;
void *fs;
};
@@ -96,25 +96,25 @@ static void setup_constant_buffer(struct shader *shader)
{
struct vg_context *ctx = shader->context;
struct pipe_context *pipe = shader->context->pipe;
- struct pipe_constant_buffer *cbuf = &shader->cbuf;
+ struct pipe_buffer **cbuf = &shader->cbuf;
VGint param_bytes = paint_constant_buffer_size(shader->paint);
float temp_buf[MAX_CONSTANTS];
assert(param_bytes <= sizeof(temp_buf));
paint_fill_constant_buffer(shader->paint, temp_buf);
- if (cbuf->buffer == NULL ||
+ if (*cbuf == NULL ||
memcmp(temp_buf, shader->constants, param_bytes) != 0)
{
- pipe_buffer_reference(&cbuf->buffer, NULL);
+ pipe_buffer_reference(cbuf, NULL);
memcpy(shader->constants, temp_buf, param_bytes);
- cbuf->buffer = pipe_user_buffer_create(pipe->screen,
- &shader->constants,
- sizeof(shader->constants));
+ *cbuf = pipe_user_buffer_create(pipe->screen,
+ &shader->constants,
+ sizeof(shader->constants));
}
- ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf);
+ ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf);
}
static VGint blend_bind_samplers(struct vg_context *ctx,
diff --git a/src/gallium/state_trackers/vega/vg_context.c b/src/gallium/state_trackers/vega/vg_context.c
index 00d23f5c22..c16ac036e3 100644
--- a/src/gallium/state_trackers/vega/vg_context.c
+++ b/src/gallium/state_trackers/vega/vg_context.c
@@ -122,8 +122,8 @@ struct vg_context * vg_create_context(struct pipe_context *pipe,
void vg_destroy_context(struct vg_context *ctx)
{
- struct pipe_constant_buffer *cbuf = &ctx->mask.cbuf;
- struct pipe_constant_buffer *vsbuf = &ctx->vs_const_buffer;
+ struct pipe_buffer **cbuf = &ctx->mask.cbuf;
+ struct pipe_buffer **vsbuf = &ctx->vs_const_buffer;
util_destroy_blit(ctx->blit);
renderer_destroy(ctx->renderer);
@@ -131,11 +131,11 @@ void vg_destroy_context(struct vg_context *ctx)
shader_destroy(ctx->shader);
paint_destroy(ctx->default_paint);
- if (cbuf && cbuf->buffer)
- pipe_buffer_reference(&cbuf->buffer, NULL);
+ if (*cbuf)
+ pipe_buffer_reference(cbuf, NULL);
- if (vsbuf && vsbuf->buffer)
- pipe_buffer_reference(&vsbuf->buffer, NULL);
+ if (*vsbuf)
+ pipe_buffer_reference(vsbuf, NULL);
if (ctx->clear.fs) {
cso_delete_fragment_shader(ctx->cso_context, ctx->clear.fs);
@@ -371,20 +371,20 @@ void vg_validate_state(struct vg_context *ctx)
2.f/fb->width, 2.f/fb->height, 1, 1,
-1, -1, 0, 0
};
- struct pipe_constant_buffer *cbuf = &ctx->vs_const_buffer;
+ struct pipe_buffer **cbuf = &ctx->vs_const_buffer;
vg_set_viewport(ctx, VEGA_Y0_BOTTOM);
- pipe_buffer_reference(&cbuf->buffer, NULL);
- cbuf->buffer = pipe_buffer_create(ctx->pipe->screen, 16,
+ pipe_buffer_reference(cbuf, NULL);
+ *cbuf = pipe_buffer_create(ctx->pipe->screen, 16,
PIPE_BUFFER_USAGE_CONSTANT,
param_bytes);
- if (cbuf->buffer) {
- st_no_flush_pipe_buffer_write(ctx, cbuf->buffer,
+ if (*cbuf) {
+ st_no_flush_pipe_buffer_write(ctx, *cbuf,
0, param_bytes, vs_consts);
}
- ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, cbuf);
+ ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, *cbuf);
}
if ((ctx->state.dirty & VS_DIRTY)) {
cso_set_vertex_shader_handle(ctx->cso_context,
diff --git a/src/gallium/state_trackers/vega/vg_context.h b/src/gallium/state_trackers/vega/vg_context.h
index ccc8889c8c..bc88c8d139 100644
--- a/src/gallium/state_trackers/vega/vg_context.h
+++ b/src/gallium/state_trackers/vega/vg_context.h
@@ -50,7 +50,7 @@ struct st_renderbuffer {
};
struct st_framebuffer {
- VGint init_width, init_height;
+ VGint width, height;
struct st_renderbuffer *strb;
struct st_renderbuffer *dsrb;
@@ -113,7 +113,7 @@ struct vg_context
} clear;
struct {
- struct pipe_constant_buffer cbuf;
+ struct pipe_buffer *cbuf;
struct pipe_sampler_state sampler;
struct vg_shader *union_fs;
@@ -135,7 +135,7 @@ struct vg_context
struct pipe_sampler_state blend_sampler;
struct {
- struct pipe_constant_buffer buffer;
+ struct pipe_buffer *buffer;
void *color_matrix_fs;
} filter;
struct vg_paint *default_paint;
@@ -145,7 +145,7 @@ struct vg_context
struct vg_shader *plain_vs;
struct vg_shader *clear_vs;
struct vg_shader *texture_vs;
- struct pipe_constant_buffer vs_const_buffer;
+ struct pipe_buffer *vs_const_buffer;
};
struct vg_object {
diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c
index ff80aab03a..617c174eb6 100644
--- a/src/gallium/state_trackers/vega/vg_tracker.c
+++ b/src/gallium/state_trackers/vega/vg_tracker.c
@@ -193,8 +193,8 @@ struct st_framebuffer * st_create_framebuffer(const void *visual,
*/
stfb->alpha_mask = 0;
- stfb->init_width = width;
- stfb->init_height = height;
+ stfb->width = width;
+ stfb->height = height;
stfb->privateData = privateData;
}
@@ -282,11 +282,14 @@ void st_resize_framebuffer(struct st_framebuffer *stfb,
/* If this is a noop, exit early and don't do the clear, etc below.
*/
- if (strb->width == width &&
- strb->height == height &&
+ if (stfb->width == width &&
+ stfb->height == height &&
state->zsbuf)
return;
+ stfb->width = width;
+ stfb->height = height;
+
if (strb->width != width || strb->height != height)
st_renderbuffer_alloc_storage(ctx, strb,
width, height);
diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c
index d80f341e6c..8f729b565b 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.c
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.c
@@ -379,14 +379,14 @@ struct xorg_renderer * renderer_create(struct pipe_context *pipe)
void renderer_destroy(struct xorg_renderer *r)
{
- struct pipe_constant_buffer *vsbuf = &r->vs_const_buffer;
- struct pipe_constant_buffer *fsbuf = &r->fs_const_buffer;
+ struct pipe_buffer **vsbuf = &r->vs_const_buffer;
+ struct pipe_buffer **fsbuf = &r->fs_const_buffer;
- if (vsbuf && vsbuf->buffer)
- pipe_buffer_reference(&vsbuf->buffer, NULL);
+ if (*vsbuf)
+ pipe_buffer_reference(vsbuf, NULL);
- if (fsbuf && fsbuf->buffer)
- pipe_buffer_reference(&fsbuf->buffer, NULL);
+ if (*fsbuf)
+ pipe_buffer_reference(fsbuf, NULL);
if (r->shaders) {
xorg_shaders_destroy(r->shaders);
@@ -409,20 +409,20 @@ void renderer_set_constants(struct xorg_renderer *r,
const float *params,
int param_bytes)
{
- struct pipe_constant_buffer *cbuf =
+ struct pipe_buffer **cbuf =
(shader_type == PIPE_SHADER_VERTEX) ? &r->vs_const_buffer :
&r->fs_const_buffer;
- pipe_buffer_reference(&cbuf->buffer, NULL);
- cbuf->buffer = pipe_buffer_create(r->pipe->screen, 16,
- PIPE_BUFFER_USAGE_CONSTANT,
- param_bytes);
+ pipe_buffer_reference(cbuf, NULL);
+ *cbuf = pipe_buffer_create(r->pipe->screen, 16,
+ PIPE_BUFFER_USAGE_CONSTANT,
+ param_bytes);
- if (cbuf->buffer) {
- pipe_buffer_write(r->pipe->screen, cbuf->buffer,
+ if (*cbuf) {
+ pipe_buffer_write(r->pipe->screen, *cbuf,
0, param_bytes, params);
}
- r->pipe->set_constant_buffer(r->pipe, shader_type, 0, cbuf);
+ r->pipe->set_constant_buffer(r->pipe, shader_type, 0, *cbuf);
}
diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.h b/src/gallium/state_trackers/xorg/xorg_renderer.h
index 5272cde2b3..af6aa0567d 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.h
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.h
@@ -23,8 +23,8 @@ struct xorg_renderer {
int fb_width;
int fb_height;
- struct pipe_constant_buffer vs_const_buffer;
- struct pipe_constant_buffer fs_const_buffer;
+ struct pipe_buffer *vs_const_buffer;
+ struct pipe_buffer *fs_const_buffer;
float buffer[BUF_SIZE];
int buffer_size;
diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c
index 6b5a41a372..5bf0e94b62 100644
--- a/src/gallium/state_trackers/xorg/xorg_xv.c
+++ b/src/gallium/state_trackers/xorg/xorg_xv.c
@@ -486,8 +486,11 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id,
int dxo, dyo;
Bool hdtv;
int x, y, w, h;
- struct exa_pixmap_priv *dst = exaGetPixmapDriverPrivate(pPixmap);
- struct pipe_surface *dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst);
+ struct exa_pixmap_priv *dst;
+ struct pipe_surface *dst_surf = NULL;
+
+ exaMoveInPixmap(pPixmap);
+ dst = exaGetPixmapDriverPrivate(pPixmap);
if (dst && !dst->tex) {
xorg_exa_set_shared_usage(pPixmap);
@@ -497,6 +500,7 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id,
if (!dst || !dst->tex)
XORG_FALLBACK("Xv destination %s", !dst ? "!dst" : "!dst->tex");
+ dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst);
hdtv = ((src_w >= RES_720P_X) && (src_h >= RES_720P_Y));
REGION_TRANSLATE(pScrn->pScreen, dstRegion, -pPixmap->screen_x,
@@ -516,7 +520,6 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id,
bind_samplers(pPriv);
setup_fs_video_constants(pPriv->r, hdtv);
- exaMoveInPixmap(pPixmap);
DamageDamageRegion(&pPixmap->drawable, dstRegion);
while (nbox--) {
diff --git a/src/gallium/winsys/drm/i965/xorg/Makefile b/src/gallium/winsys/drm/i965/xorg/Makefile
index d91d0006ef..c25726b0bb 100644
--- a/src/gallium/winsys/drm/i965/xorg/Makefile
+++ b/src/gallium/winsys/drm/i965/xorg/Makefile
@@ -1,19 +1,25 @@
-TARGET = modesetting_drv.so
-CFILES = $(wildcard ./*.c)
-OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES))
TOP = ../../../../../..
-include $(TOP)/configs/current
-INCLUDES = \
- $(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto) \
- -I../gem \
- -I$(TOP)/src/gallium/include \
- -I$(TOP)/src/gallium/drivers \
- -I$(TOP)/src/gallium/auxiliary \
- -I$(TOP)/src/mesa \
- -I$(TOP)/include \
- -I$(TOP)/src/egl/main
+GALLIUMDIR = $(TOP)/src/gallium
+
+TARGET = i965g_drv.so
+
+CFILES = $(wildcard ./*.c)
+
+include ${TOP}/configs/current
+
+OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES))
+
+CFLAGS = -DHAVE_CONFIG_H \
+ -g -Wall -Wimplicit-function-declaration -fPIC \
+ $(shell pkg-config --cflags pixman-1 xorg-server libdrm xproto) \
+ -I${GALLIUMDIR}/include \
+ -I${GALLIUMDIR}/drivers \
+ -I${GALLIUMDIR}/auxiliary \
+ -I${TOP}/src/mesa \
+ -I$(TOP)/include \
+ -I$(TOP)/src/egl/main
LIBS = \
$(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
@@ -23,20 +29,21 @@ LIBS = \
$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
$(GALLIUM_AUXILIARIES)
-DRIVER_DEFINES = \
- -DHAVE_CONFIG_H
-
-
+TARGET_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET)
#############################################
+all default: $(TARGET) $(TARGET_STAGING)
-
-all default: $(TARGET)
-
-$(TARGET): $(OBJECTS) Makefile $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a $(LIBS)
+$(TARGET): $(OBJECTS) Makefile $(GALLIUMDIR)/state_trackers/xorg/libxorgtracker.a $(LIBS)
$(TOP)/bin/mklib -noprefix -o $@ \
$(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_intel
+$(TOP)/$(LIB_DIR)/gallium:
+ mkdir -p $@
+
+$(TARGET_STAGING): $(TARGET) $(TOP)/$(LIB_DIR)/gallium
+ $(INSTALL) $(TARGET) $(TOP)/$(LIB_DIR)/gallium
+
clean:
rm -rf $(OBJECTS) $(TARGET)
@@ -44,14 +51,4 @@ install:
$(INSTALL) -d $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR)
$(MINSTALL) -m 755 $(TARGET) $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR)
-
-##############################################
-
-
-.c.o:
- $(CC) -c $(CFLAGS) $(INCLUDES) $(DRIVER_DEFINES) $< -o $@
-
-
-##############################################
-
.PHONY = all clean install
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
index 385fa857b5..b020ff38fa 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
@@ -58,6 +58,7 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws,
{
struct radeon_winsys *radeon_ws = (struct radeon_winsys *)ws;
struct radeon_pipe_buffer *radeon_buffer;
+ struct pb_desc desc;
uint32_t domain;
radeon_buffer = CALLOC_STRUCT(radeon_pipe_buffer);
@@ -70,6 +71,14 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws,
radeon_buffer->base.usage = usage;
radeon_buffer->base.size = size;
+ if (usage == PIPE_BUFFER_USAGE_CONSTANT && is_r3xx(radeon_ws->pci_id)) {
+ /* Don't bother allocating a BO, as it'll never get to the card. */
+ desc.alignment = alignment;
+ desc.usage = usage;
+ radeon_buffer->pb = pb_malloc_buffer_create(size, &desc);
+ return &radeon_buffer->base;
+ }
+
domain = 0;
if (usage & PIPE_BUFFER_USAGE_PIXEL) {
@@ -133,8 +142,16 @@ static void radeon_buffer_del(struct pipe_buffer *buffer)
struct radeon_pipe_buffer *radeon_buffer =
(struct radeon_pipe_buffer*)buffer;
- radeon_bo_unref(radeon_buffer->bo);
- free(radeon_buffer);
+ if (radeon_buffer->pb) {
+ pipe_reference_init(&radeon_buffer->pb->base.reference, 0);
+ pb_destroy(radeon_buffer->pb);
+ }
+
+ if (radeon_buffer->bo) {
+ radeon_bo_unref(radeon_buffer->bo);
+ }
+
+ FREE(radeon_buffer);
}
static void *radeon_buffer_map(struct pipe_winsys *ws,
@@ -146,6 +163,10 @@ static void *radeon_buffer_map(struct pipe_winsys *ws,
(struct radeon_pipe_buffer*)buffer;
int write = 0;
+ if (radeon_buffer->pb) {
+ return pb_map(radeon_buffer->pb, flags);
+ }
+
if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) {
uint32_t domain;
@@ -174,7 +195,11 @@ static void radeon_buffer_unmap(struct pipe_winsys *ws,
struct radeon_pipe_buffer *radeon_buffer =
(struct radeon_pipe_buffer*)buffer;
- radeon_bo_unmap(radeon_buffer->bo);
+ if (radeon_buffer->pb) {
+ pb_unmap(radeon_buffer->pb);
+ } else {
+ radeon_bo_unmap(radeon_buffer->bo);
+ }
}
static void radeon_fence_reference(struct pipe_winsys *ws,
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
index d7f17564a9..de71cb2f42 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
@@ -36,7 +36,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-//#include "state_tracker/st_public.h"
+#include "pipebuffer/pb_buffer.h"
#include "util/u_memory.h"
@@ -49,7 +49,10 @@
struct radeon_pipe_buffer {
struct pipe_buffer base;
+ /* Pointer to GPU-backed BO. */
struct radeon_bo *bo;
+ /* Pointer to fallback PB buffer. */
+ struct pb_buffer *pb;
boolean flinked;
uint32_t flink;
};
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
index 851c223697..9552f0ad6a 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
@@ -40,12 +40,16 @@ static void do_ioctls(int fd, struct radeon_winsys* winsys)
struct drm_radeon_info info = {0};
int target = 0;
int retval;
+ drmVersionPtr version;
info.value = (unsigned long)&target;
/* We do things in a specific order here.
*
- * First, the PCI ID. This is essential and should return usable numbers
+ * DRM version first. We need to be sure we're running on a KMS chipset.
+ * This is also for some features.
+ *
+ * Then, the PCI ID. This is essential and should return usable numbers
* for all Radeons. If this fails, we probably got handed an FD for some
* non-Radeon card.
*
@@ -55,8 +59,18 @@ static void do_ioctls(int fd, struct radeon_winsys* winsys)
*
* The GEM info is actually bogus on the kernel side, as well as our side
* (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because
- * we don't actually use the info for anything yet.
- * XXX update the above when we can safely use vram_size instead of vram_visible */
+ * we don't actually use the info for anything yet. */
+
+ version = drmGetVersion(fd);
+ if (version->version_major != 2) {
+ fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
+ "only compatible with 2.x.x\n", __FUNCTION__,
+ version->version_major, version->version_minor,
+ version->version_patchlevel);
+ drmFreeVersion(version);
+ exit(1);
+ }
+
info.request = RADEON_INFO_DEVICE_ID;
retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
if (retval) {
@@ -92,16 +106,18 @@ static void do_ioctls(int fd, struct radeon_winsys* winsys)
exit(1);
}
winsys->gart_size = gem_info.gart_size;
- /* XXX */
- winsys->vram_size = gem_info.vram_visible;
-}
-
-/* Guess at whether this chipset should use r300g.
- *
- * I believe that this check is valid, but I haven't been exhaustive. */
-static boolean is_r3xx(int pciid)
-{
- return (pciid > 0x3150) && (pciid < 0x796f);
+ winsys->vram_size = gem_info.vram_size;
+
+ debug_printf("radeon: Successfully grabbed chipset info from kernel!\n"
+ "radeon: DRM version: %d.%d.%d ID: 0x%04x GB: %d Z: %d\n"
+ "radeon: GART size: %d MB VRAM size: %d MB\n",
+ version->version_major, version->version_minor,
+ version->version_patchlevel, winsys->pci_id,
+ winsys->gb_pipes, winsys->z_pipes,
+ winsys->gart_size / 1024 / 1024,
+ winsys->vram_size / 1024 / 1024);
+
+ drmFreeVersion(version);
}
/* Create a pipe_screen. */
@@ -136,12 +152,13 @@ struct pipe_context* radeon_create_context(struct drm_api* api,
}
boolean radeon_buffer_from_texture(struct drm_api* api,
+ struct pipe_screen* screen,
struct pipe_texture* texture,
struct pipe_buffer** buffer,
unsigned* stride)
{
/* XXX fix this */
- return r300_get_texture_buffer(texture, buffer, stride);
+ return r300_get_texture_buffer(screen, texture, buffer, stride);
}
/* Create a buffer from a handle. */
@@ -208,7 +225,7 @@ static boolean radeon_shared_handle_from_texture(struct drm_api *api,
struct radeon_pipe_buffer* radeon_buffer;
struct pipe_buffer *buffer = NULL;
- if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) {
+ if (!radeon_buffer_from_texture(api, screen, texture, &buffer, stride)) {
return FALSE;
}
@@ -240,7 +257,7 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api,
unsigned *handle)
{
struct pipe_buffer *buffer = NULL;
- if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) {
+ if (!radeon_buffer_from_texture(api, screen, texture, &buffer, stride)) {
return FALSE;
}
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.h b/src/gallium/winsys/drm/radeon/core/radeon_drm.h
index bf0e78138d..ddd7983824 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.h
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.h
@@ -56,6 +56,7 @@ struct pipe_context* radeon_create_context(struct drm_api* api,
struct pipe_screen* screen);
boolean radeon_buffer_from_texture(struct drm_api* api,
+ struct pipe_screen* screen,
struct pipe_texture* texture,
struct pipe_buffer** buffer,
unsigned* stride);
@@ -76,4 +77,13 @@ boolean radeon_global_handle_from_buffer(struct drm_api* api,
unsigned* handle);
void radeon_destroy_drm_api(struct drm_api* api);
+
+/* Guess at whether this chipset should use r300g.
+ *
+ * I believe that this check is valid, but I haven't been exhaustive. */
+static boolean is_r3xx(int pciid)
+{
+ return (pciid > 0x3150) && (pciid < 0x796f);
+}
+
#endif
diff --git a/src/gallium/winsys/drm/radeon/dri/Makefile b/src/gallium/winsys/drm/radeon/dri/Makefile
index a9889444de..eaa3418032 100644
--- a/src/gallium/winsys/drm/radeon/dri/Makefile
+++ b/src/gallium/winsys/drm/radeon/dri/Makefile
@@ -2,7 +2,7 @@
TOP = ../../../../../..
include $(TOP)/configs/current
-LIBNAME = radeon_dri.so
+LIBNAME = radeong_dri.so
MINIGLX_SOURCES =
diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c
index 420dccc92c..1d9bac3871 100644
--- a/src/gallium/winsys/egl_xlib/egl_xlib.c
+++ b/src/gallium/winsys/egl_xlib/egl_xlib.c
@@ -274,7 +274,7 @@ xlib_eglTerminate(_EGLDriver *drv, _EGLDisplay *dpy)
static _EGLProc
-xlib_eglGetProcAddress(const char *procname)
+xlib_eglGetProcAddress(_EGLDriver *drv, const char *procname)
{
return (_EGLProc) st_get_proc_address(procname);
}
diff --git a/src/glsl/cl/sl_cl_parse.c b/src/glsl/cl/sl_cl_parse.c
index e9b3707ac1..e256ab8213 100644
--- a/src/glsl/cl/sl_cl_parse.c
+++ b/src/glsl/cl/sl_cl_parse.c
@@ -345,7 +345,7 @@ struct parse_state {
};
-static __inline unsigned int
+static unsigned int
_emit(struct parse_context *ctx,
unsigned int *out,
unsigned char b)
diff --git a/src/glu/sgi/libnurbs/interface/bezierPatchMesh.h b/src/glu/sgi/libnurbs/interface/bezierPatchMesh.h
index 449329665c..ba6868a306 100644
--- a/src/glu/sgi/libnurbs/interface/bezierPatchMesh.h
+++ b/src/glu/sgi/libnurbs/interface/bezierPatchMesh.h
@@ -33,6 +33,7 @@
#ifndef _BEZIERPATCHMESH_H
#define _BEZIERPATCHMESH_H
+#include <GL/gl.h>
#include "bezierPatch.h"
typedef struct bezierPatchMesh{
diff --git a/src/glu/sgi/libnurbs/interface/glsurfeval.h b/src/glu/sgi/libnurbs/interface/glsurfeval.h
index 1567c6b098..621e59391a 100644
--- a/src/glu/sgi/libnurbs/interface/glsurfeval.h
+++ b/src/glu/sgi/libnurbs/interface/glsurfeval.h
@@ -83,7 +83,7 @@ typedef struct surfEvalMachine{
class StoredVertex {
public:
- StoredVertex() { type = 0; }
+ StoredVertex() { type = 0; coord[0] = 0; coord[1] = 0; point[0] = 0; point[1] = 0; }
~StoredVertex(void) {}
void saveEvalCoord(REAL x, REAL y)
{coord[0] = x; coord[1] = y; type = TYPECOORD; }
diff --git a/src/glu/sgi/libnurbs/internals/mesher.cc b/src/glu/sgi/libnurbs/internals/mesher.cc
index 9cc436adbf..b2d83f4128 100644
--- a/src/glu/sgi/libnurbs/internals/mesher.cc
+++ b/src/glu/sgi/libnurbs/internals/mesher.cc
@@ -58,6 +58,9 @@ Mesher::Mesher( Backend& b )
{
stacksize = 0;
vdata = 0;
+ last[0] = 0;
+ last[1] = 0;
+ itop = 0;
lastedge = 0; //needed to prevent purify UMR
}
diff --git a/src/glu/sgi/libnurbs/internals/reader.cc b/src/glu/sgi/libnurbs/internals/reader.cc
index 6135eef60e..c59240d26a 100644
--- a/src/glu/sgi/libnurbs/internals/reader.cc
+++ b/src/glu/sgi/libnurbs/internals/reader.cc
@@ -64,6 +64,7 @@ O_pwlcurve::O_pwlcurve( long _type, long count, INREAL *array, long byte_stride,
owner = 0;
pts = trimpts;
npts = (int) count;
+ save = 0;
int i;
/* copy user data into internal trimming data structures */
@@ -115,6 +116,7 @@ O_pwlcurve::O_pwlcurve( long _type, long count, INREAL *array, long byte_stride,
owner = 0;
pts = trimpts;
npts = (int) count;
+ save = 0;
/* copy user data into internal trimming data structures */
switch( _type ) {
diff --git a/src/glu/sgi/libnurbs/internals/renderhints.cc b/src/glu/sgi/libnurbs/internals/renderhints.cc
index a3aa62d42c..7025f74f5b 100644
--- a/src/glu/sgi/libnurbs/internals/renderhints.cc
+++ b/src/glu/sgi/libnurbs/internals/renderhints.cc
@@ -54,6 +54,10 @@ Renderhints::Renderhints()
errorchecking = N_MSG;
subdivisions = 6.0;
tmp1 = 0.0;
+ displaydomain = 0;
+ maxsubdivisions = (int) subdivisions;
+ wiretris = 0;
+ wirequads = 0;
}
void
diff --git a/src/glu/sgi/libnurbs/internals/simplemath.h b/src/glu/sgi/libnurbs/internals/simplemath.h
index 0a060c57ea..d00062dc70 100644
--- a/src/glu/sgi/libnurbs/internals/simplemath.h
+++ b/src/glu/sgi/libnurbs/internals/simplemath.h
@@ -38,6 +38,8 @@
/* simple inline routines */
+#include "types.h"
+
inline int
max( int x, int y ) { return ( x < y ) ? y : x; }
diff --git a/src/glu/sgi/libnurbs/internals/slicer.cc b/src/glu/sgi/libnurbs/internals/slicer.cc
index 27d2a650d1..1b18d73c17 100644
--- a/src/glu/sgi/libnurbs/internals/slicer.cc
+++ b/src/glu/sgi/libnurbs/internals/slicer.cc
@@ -1181,6 +1181,10 @@ void Slicer::slice(Arc_ptr loop)
Slicer::Slicer( Backend &b )
: CoveAndTiler( b ), Mesher( b ), backend( b )
{
+ oneOverDu = 0;
+ du = 0;
+ dv = 0;
+ isolines = 0;
ulinear = 0;
vlinear = 0;
}
diff --git a/src/glx/x11/dri2_glx.c b/src/glx/x11/dri2_glx.c
index 83149062f3..7b0c52b50d 100644
--- a/src/glx/x11/dri2_glx.c
+++ b/src/glx/x11/dri2_glx.c
@@ -205,14 +205,14 @@ static int
dri2DrawableGetMSC(__GLXscreenConfigs *psc, __GLXDRIdrawable *pdraw,
int64_t *ust, int64_t *msc, int64_t *sbc)
{
- return DRI2GetMSC(psc->dpy, pdraw->drawable, ust, msc, sbc);
+ return DRI2GetMSC(psc->dpy, pdraw->xDrawable, ust, msc, sbc);
}
static int
dri2WaitForMSC(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
int64_t remainder, int64_t *ust, int64_t *msc, int64_t *sbc)
{
- return DRI2WaitMSC(pdraw->psc->dpy, pdraw->drawable, target_msc, divisor,
+ return DRI2WaitMSC(pdraw->psc->dpy, pdraw->xDrawable, target_msc, divisor,
remainder, ust, msc, sbc);
}
@@ -220,7 +220,7 @@ static int
dri2WaitForSBC(__GLXDRIdrawable *pdraw, int64_t target_sbc, int64_t *ust,
int64_t *msc, int64_t *sbc)
{
- return DRI2WaitSBC(pdraw->psc->dpy, pdraw->drawable, target_sbc, ust, msc,
+ return DRI2WaitSBC(pdraw->psc->dpy, pdraw->xDrawable, target_sbc, ust, msc,
sbc);
}
@@ -381,7 +381,7 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
return 0;
}
- DRI2SwapBuffers(pdraw->psc->dpy, pdraw->drawable, target_msc, divisor,
+ DRI2SwapBuffers(pdraw->psc->dpy, pdraw->xDrawable, target_msc, divisor,
remainder, &ret);
#if __DRI2_FLUSH_VERSION >= 2
@@ -575,11 +575,19 @@ dri2CreateScreen(__GLXscreenConfigs * psc, int screen,
psp->swapBuffers = dri2SwapBuffers;
psp->waitGL = dri2WaitGL;
psp->waitX = dri2WaitX;
- psp->getDrawableMSC = dri2DrawableGetMSC;
- psp->waitForMSC = dri2WaitForMSC;
- psp->waitForSBC = dri2WaitForSBC;
- psp->setSwapInterval = dri2SetSwapInterval;
- psp->getSwapInterval = dri2GetSwapInterval;
+ if (pdp->driMinor >= 2) {
+ psp->getDrawableMSC = dri2DrawableGetMSC;
+ psp->waitForMSC = dri2WaitForMSC;
+ psp->waitForSBC = dri2WaitForSBC;
+ psp->setSwapInterval = dri2SetSwapInterval;
+ psp->getSwapInterval = dri2GetSwapInterval;
+ } else {
+ psp->getDrawableMSC = NULL;
+ psp->waitForMSC = NULL;
+ psp->waitForSBC = NULL;
+ psp->setSwapInterval = NULL;
+ psp->getSwapInterval = NULL;
+ }
/* DRI2 suports SubBuffer through DRI2CopyRegion, so it's always
* available.*/
diff --git a/src/glx/x11/indirect.c b/src/glx/x11/indirect.c
index ea90ce4463..262637a947 100644
--- a/src/glx/x11/indirect.c
+++ b/src/glx/x11/indirect.c
@@ -47,7 +47,7 @@
# else
# define FASTCALL
# endif
-# if defined(__GNUC__)
+# if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
# define NOINLINE __attribute__((noinline))
# else
# define NOINLINE
diff --git a/src/glx/x11/indirect.h b/src/glx/x11/indirect.h
index 19a8c0d134..9e73b33818 100644
--- a/src/glx/x11/indirect.h
+++ b/src/glx/x11/indirect.h
@@ -37,7 +37,7 @@
* \author Ian Romanick <idr@us.ibm.com>
*/
-# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) && defined(__ELF__)
+# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined(__ELF__)
# define HIDDEN __attribute__((visibility("hidden")))
# else
# define HIDDEN
@@ -47,7 +47,7 @@
# else
# define FASTCALL
# endif
-# if defined(__GNUC__)
+# if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
# define NOINLINE __attribute__((noinline))
# else
# define NOINLINE
diff --git a/src/glx/x11/indirect_size.c b/src/glx/x11/indirect_size.c
index cdaf02ffe6..f8541b5758 100644
--- a/src/glx/x11/indirect_size.c
+++ b/src/glx/x11/indirect_size.c
@@ -29,7 +29,7 @@
#include <GL/gl.h>
#include "indirect_size.h"
-# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
+# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
# define PURE __attribute__((pure))
# else
# define PURE
@@ -41,7 +41,7 @@
# define FASTCALL
# endif
-# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) && defined(__ELF__)
+# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined(__ELF__)
# define INTERNAL __attribute__((visibility("internal")))
# else
# define INTERNAL
diff --git a/src/glx/x11/indirect_size.h b/src/glx/x11/indirect_size.h
index 9ba0bd6907..af0919f964 100644
--- a/src/glx/x11/indirect_size.h
+++ b/src/glx/x11/indirect_size.h
@@ -36,7 +36,7 @@
* \author Ian Romanick <idr@us.ibm.com>
*/
-# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
+# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
# define PURE __attribute__((pure))
# else
# define PURE
@@ -48,7 +48,7 @@
# define FASTCALL
# endif
-# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) && defined(__ELF__)
+# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined(__ELF__)
# define INTERNAL __attribute__((visibility("internal")))
# else
# define INTERNAL
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index bac1c3a49c..016f27a6a3 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -295,8 +295,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
key, sizeof(*key),
&brw->cc.vp_bo, 1,
- &cc, sizeof(cc),
- NULL, NULL);
+ &cc, sizeof(cc));
/* Emit CC viewport relocation */
dri_bo_emit_reloc(bo,
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index af1d975de9..d3275c7a89 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -130,13 +130,14 @@ static void compile_clip_prog( struct brw_context *brw,
/* Upload
*/
dri_bo_unreference(brw->clip.prog_bo);
- brw->clip.prog_bo = brw_upload_cache( &brw->cache,
- BRW_CLIP_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- &brw->clip.prog_data );
+ brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache,
+ BRW_CLIP_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ sizeof(c.prog_data),
+ &brw->clip.prog_data);
}
/* Calculate interpolants for triangle and line rasterization.
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
index c8f24a94e4..22df7722b6 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -143,8 +143,7 @@ clip_unit_create_from_key(struct brw_context *brw,
bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
key, sizeof(*key),
&brw->clip.prog_bo, 1,
- &clip, sizeof(clip),
- NULL, NULL);
+ &clip, sizeof(clip));
/* Emit clip program relocation */
assert(brw->clip.prog_bo);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0dd3087143..79818b92b7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -131,7 +131,6 @@ struct brw_context;
#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
#define BRW_NEW_PSP 0x800
#define BRW_NEW_WM_SURFACES 0x1000
-#define BRW_NEW_FENCE 0x2000
#define BRW_NEW_INDICES 0x4000
#define BRW_NEW_VERTICES 0x8000
/**
@@ -332,7 +331,6 @@ struct brw_cache {
struct brw_cache_item **items;
GLuint size, n_items;
- GLuint aux_size[BRW_MAX_CACHE];
char *name[BRW_MAX_CACHE];
/* Record of the last BOs chosen for each cache_id. Used to set
@@ -583,6 +581,7 @@ struct brw_context
struct {
struct brw_vs_prog_data *prog_data;
+ int8_t *constant_map; /* variable array following prog_data */
dri_bo *prog_bo;
dri_bo *state_bo;
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 190310afbb..22e3e732f4 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -256,13 +256,24 @@ static void prepare_constant_buffer(struct brw_context *brw)
*/
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
- /* XXX just use a memcpy here */
- for (i = 0; i < nr; i++) {
- const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i];
- buf[offset + i * 4 + 0] = value[0];
- buf[offset + i * 4 + 1] = value[1];
- buf[offset + i * 4 + 2] = value[2];
- buf[offset + i * 4 + 3] = value[3];
+ if (vp->use_const_buffer) {
+ /* Load the subset of push constants that will get used when
+ * we also have a pull constant buffer.
+ */
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ assert(brw->vs.constant_map[i] <= nr);
+ memcpy(buf + offset + brw->vs.constant_map[i] * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ }
+ }
+ } else {
+ for (i = 0; i < nr; i++) {
+ memcpy(buf + offset + i * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ }
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 1bc3eccf49..7261b316c1 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -125,12 +125,13 @@ static void compile_gs_prog( struct brw_context *brw,
/* Upload
*/
dri_bo_unreference(brw->gs.prog_bo);
- brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- &brw->gs.prog_data );
+ brw->gs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_GS_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ sizeof(c.prog_data),
+ &brw->gs.prog_data);
}
static const GLenum gs_prim[GL_POLYGON+1] = {
diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c
index 1af5790a67..7d5a944bf7 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_state.c
@@ -108,8 +108,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
key, sizeof(*key),
&brw->gs.prog_bo, 1,
- &gs, sizeof(gs),
- NULL, NULL);
+ &gs, sizeof(gs));
if (key->prog_active) {
/* Emit GS program relocation */
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 968890f7fb..8e6839b812 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -117,12 +117,13 @@ static void compile_sf_prog( struct brw_context *brw,
/* Upload
*/
dri_bo_unreference(brw->sf.prog_bo);
- brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- &brw->sf.prog_data );
+ brw->sf.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_SF_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ sizeof(c.prog_data),
+ &brw->sf.prog_data);
}
/* Calculate interpolants for triangle and line rasterization.
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index 09223b7cfb..b9b42cd6d5 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -309,8 +309,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
key, sizeof(*key),
reloc_bufs, 2,
- &sf, sizeof(sf),
- NULL, NULL);
+ &sf, sizeof(sf));
/* STATE_PREFETCH command description describes this state as being
* something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 9c9d145c4b..536fe8b249 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -124,16 +124,26 @@ dri_bo *brw_cache_data(struct brw_cache *cache,
dri_bo **reloc_bufs,
GLuint nr_reloc_bufs);
-dri_bo *brw_upload_cache( struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_sz,
- dri_bo **reloc_bufs,
- GLuint nr_reloc_bufs,
- const void *data,
- GLuint data_sz,
- const void *aux,
- void *aux_return );
+drm_intel_bo *brw_upload_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_sz,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs,
+ const void *data,
+ GLuint data_sz);
+
+drm_intel_bo *brw_upload_cache_with_auxdata(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_sz,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs,
+ const void *data,
+ GLuint data_sz,
+ const void *aux,
+ GLuint aux_sz,
+ void *aux_return);
dri_bo *brw_search_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index e4c9ba7d87..5fc47b0420 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -71,25 +71,23 @@
static GLuint
-hash_key(const void *key, GLuint key_size,
- dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+hash_key(struct brw_cache_item *item)
{
- GLuint *ikey = (GLuint *)key;
- GLuint hash = 0, i;
+ GLuint *ikey = (GLuint *)item->key;
+ GLuint hash = item->cache_id, i;
- assert(key_size % 4 == 0);
+ assert(item->key_size % 4 == 0);
/* I'm sure this can be improved on:
*/
- for (i = 0; i < key_size/4; i++) {
+ for (i = 0; i < item->key_size/4; i++) {
hash ^= ikey[i];
hash = (hash << 5) | (hash >> 27);
}
/* Include the BO pointers as key data as well */
- ikey = (GLuint *)reloc_bufs;
- key_size = nr_reloc_bufs * sizeof(dri_bo *);
- for (i = 0; i < key_size/4; i++) {
+ ikey = (GLuint *)item->reloc_bufs;
+ for (i = 0; i < item->nr_reloc_bufs * sizeof(drm_intel_bo *) / 4; i++) {
hash ^= ikey[i];
hash = (hash << 5) | (hash >> 27);
}
@@ -114,11 +112,22 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
cache->brw->state.dirty.cache |= 1 << cache_id;
}
+static int
+brw_cache_item_equals(const struct brw_cache_item *a,
+ const struct brw_cache_item *b)
+{
+ return a->cache_id == b->cache_id &&
+ a->hash == b->hash &&
+ a->key_size == b->key_size &&
+ (memcmp(a->key, b->key, a->key_size) == 0) &&
+ a->nr_reloc_bufs == b->nr_reloc_bufs &&
+ (memcmp(a->reloc_bufs, b->reloc_bufs,
+ a->nr_reloc_bufs * sizeof(dri_bo *)) == 0);
+}
static struct brw_cache_item *
-search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
- GLuint hash, const void *key, GLuint key_size,
- dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+search_cache(struct brw_cache *cache, GLuint hash,
+ struct brw_cache_item *lookup)
{
struct brw_cache_item *c;
@@ -133,13 +142,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
#endif
for (c = cache->items[hash % cache->size]; c; c = c->next) {
- if (c->cache_id == cache_id &&
- c->hash == hash &&
- c->key_size == key_size &&
- memcmp(c->key, key, key_size) == 0 &&
- c->nr_reloc_bufs == nr_reloc_bufs &&
- memcmp(c->reloc_bufs, reloc_bufs,
- nr_reloc_bufs * sizeof(dri_bo *)) == 0)
+ if (brw_cache_item_equals(lookup, c))
return c;
}
@@ -182,10 +185,18 @@ brw_search_cache(struct brw_cache *cache,
void *aux_return)
{
struct brw_cache_item *item;
- GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
+ struct brw_cache_item lookup;
+ GLuint hash;
- item = search_cache(cache, cache_id, hash, key, key_size,
- reloc_bufs, nr_reloc_bufs);
+ lookup.cache_id = cache_id;
+ lookup.key = key;
+ lookup.key_size = key_size;
+ lookup.reloc_bufs = reloc_bufs;
+ lookup.nr_reloc_bufs = nr_reloc_bufs;
+ hash = hash_key(&lookup);
+ lookup.hash = hash;
+
+ item = search_cache(cache, hash, &lookup);
if (item == NULL)
return NULL;
@@ -200,26 +211,34 @@ brw_search_cache(struct brw_cache *cache,
}
-dri_bo *
-brw_upload_cache( struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_size,
- dri_bo **reloc_bufs,
- GLuint nr_reloc_bufs,
- const void *data,
- GLuint data_size,
- const void *aux,
- void *aux_return )
+drm_intel_bo *
+brw_upload_cache_with_auxdata(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs,
+ const void *data,
+ GLuint data_size,
+ const void *aux,
+ GLuint aux_size,
+ void *aux_return)
{
struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
- GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
+ GLuint hash;
GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *);
- GLuint aux_size = cache->aux_size[cache_id];
void *tmp;
dri_bo *bo;
int i;
+ item->cache_id = cache_id;
+ item->key = key;
+ item->key_size = key_size;
+ item->reloc_bufs = reloc_bufs;
+ item->nr_reloc_bufs = nr_reloc_bufs;
+ hash = hash_key(item);
+ item->hash = hash;
+
/* Create the buffer object to contain the data */
bo = dri_bo_alloc(cache->brw->intel.bufmgr,
cache->name[cache_id], data_size, 1 << 6);
@@ -229,19 +248,15 @@ brw_upload_cache( struct brw_cache *cache,
tmp = _mesa_malloc(key_size + aux_size + relocs_size);
memcpy(tmp, key, key_size);
- memcpy(tmp + key_size, aux, cache->aux_size[cache_id]);
+ memcpy(tmp + key_size, aux, aux_size);
memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
for (i = 0; i < nr_reloc_bufs; i++) {
if (reloc_bufs[i] != NULL)
dri_bo_reference(reloc_bufs[i]);
}
- item->cache_id = cache_id;
item->key = tmp;
- item->hash = hash;
- item->key_size = key_size;
item->reloc_bufs = tmp + key_size + aux_size;
- item->nr_reloc_bufs = nr_reloc_bufs;
item->bo = bo;
dri_bo_reference(bo);
@@ -255,7 +270,6 @@ brw_upload_cache( struct brw_cache *cache,
cache->n_items++;
if (aux_return) {
- assert(cache->aux_size[cache_id]);
*(void **)aux_return = (void *)((char *)item->key + item->key_size);
}
@@ -272,6 +286,23 @@ brw_upload_cache( struct brw_cache *cache,
return bo;
}
+drm_intel_bo *
+brw_upload_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs,
+ const void *data,
+ GLuint data_size)
+{
+ return brw_upload_cache_with_auxdata(cache, cache_id,
+ key, key_size,
+ reloc_bufs, nr_reloc_bufs,
+ data, data_size,
+ NULL, 0,
+ NULL);
+}
/**
* Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
@@ -292,11 +323,18 @@ brw_cache_data(struct brw_cache *cache,
GLuint nr_reloc_bufs)
{
dri_bo *bo;
- struct brw_cache_item *item;
- GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs);
-
- item = search_cache(cache, cache_id, hash, data, data_size,
- reloc_bufs, nr_reloc_bufs);
+ struct brw_cache_item *item, lookup;
+ GLuint hash;
+
+ lookup.cache_id = cache_id;
+ lookup.key = data;
+ lookup.key_size = data_size;
+ lookup.reloc_bufs = reloc_bufs;
+ lookup.nr_reloc_bufs = nr_reloc_bufs;
+ hash = hash_key(&lookup);
+ lookup.hash = hash;
+
+ item = search_cache(cache, hash, &lookup);
if (item) {
update_cache_last(cache, cache_id, item->bo);
dri_bo_reference(item->bo);
@@ -306,8 +344,7 @@ brw_cache_data(struct brw_cache *cache,
bo = brw_upload_cache(cache, cache_id,
data, data_size,
reloc_bufs, nr_reloc_bufs,
- data, data_size,
- NULL, NULL);
+ data, data_size);
return bo;
}
@@ -321,11 +358,9 @@ enum pool_type {
static void
brw_init_cache_id(struct brw_cache *cache,
const char *name,
- enum brw_cache_id id,
- GLuint aux_size)
+ enum brw_cache_id id)
{
cache->name[id] = strdup(name);
- cache->aux_size[id] = aux_size;
}
@@ -341,80 +376,28 @@ brw_init_non_surface_cache(struct brw_context *brw)
cache->items = (struct brw_cache_item **)
_mesa_calloc(cache->size * sizeof(struct brw_cache_item));
- brw_init_cache_id(cache,
- "CC_VP",
- BRW_CC_VP,
- 0);
-
- brw_init_cache_id(cache,
- "CC_UNIT",
- BRW_CC_UNIT,
- 0);
-
- brw_init_cache_id(cache,
- "WM_PROG",
- BRW_WM_PROG,
- sizeof(struct brw_wm_prog_data));
-
- brw_init_cache_id(cache,
- "SAMPLER_DEFAULT_COLOR",
- BRW_SAMPLER_DEFAULT_COLOR,
- 0);
-
- brw_init_cache_id(cache,
- "SAMPLER",
- BRW_SAMPLER,
- 0);
-
- brw_init_cache_id(cache,
- "WM_UNIT",
- BRW_WM_UNIT,
- 0);
-
- brw_init_cache_id(cache,
- "SF_PROG",
- BRW_SF_PROG,
- sizeof(struct brw_sf_prog_data));
-
- brw_init_cache_id(cache,
- "SF_VP",
- BRW_SF_VP,
- 0);
-
- brw_init_cache_id(cache,
- "SF_UNIT",
- BRW_SF_UNIT,
- 0);
-
- brw_init_cache_id(cache,
- "VS_UNIT",
- BRW_VS_UNIT,
- 0);
-
- brw_init_cache_id(cache,
- "VS_PROG",
- BRW_VS_PROG,
- sizeof(struct brw_vs_prog_data));
-
- brw_init_cache_id(cache,
- "CLIP_UNIT",
- BRW_CLIP_UNIT,
- 0);
-
- brw_init_cache_id(cache,
- "CLIP_PROG",
- BRW_CLIP_PROG,
- sizeof(struct brw_clip_prog_data));
-
- brw_init_cache_id(cache,
- "GS_UNIT",
- BRW_GS_UNIT,
- 0);
-
- brw_init_cache_id(cache,
- "GS_PROG",
- BRW_GS_PROG,
- sizeof(struct brw_gs_prog_data));
+ brw_init_cache_id(cache, "CC_VP", BRW_CC_VP);
+ brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT);
+ brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG);
+ brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR);
+ brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER);
+ brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT);
+ brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG);
+ brw_init_cache_id(cache, "SF_VP", BRW_SF_VP);
+
+ brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT);
+
+ brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT);
+
+ brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG);
+
+ brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT);
+
+ brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG);
+
+ brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT);
+
+ brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG);
}
@@ -430,15 +413,8 @@ brw_init_surface_cache(struct brw_context *brw)
cache->items = (struct brw_cache_item **)
_mesa_calloc(cache->size * sizeof(struct brw_cache_item));
- brw_init_cache_id(cache,
- "SS_SURFACE",
- BRW_SS_SURFACE,
- 0);
-
- brw_init_cache_id(cache,
- "SS_SURF_BIND",
- BRW_SS_SURF_BIND,
- 0);
+ brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE);
+ brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND);
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index af8dfb4c15..0ecbef1ef9 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -36,13 +36,7 @@
#include "intel_batchbuffer.h"
#include "intel_buffers.h"
-/* This is used to initialize brw->state.atoms[]. We could use this
- * list directly except for a single atom, brw_constant_buffer, which
- * has a .dirty value which changes according to the parameters of the
- * current fragment and vertex programs, and so cannot be a static
- * value.
- */
-const struct brw_tracked_state *atoms[] =
+static const struct brw_tracked_state *atoms[] =
{
&brw_check_fallback,
@@ -208,7 +202,6 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_CONTEXT),
DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
DEFINE_BIT(BRW_NEW_PSP),
- DEFINE_BIT(BRW_NEW_FENCE),
DEFINE_BIT(BRW_NEW_INDICES),
DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
DEFINE_BIT(BRW_NEW_VERTICES),
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index fd055e225e..44b085e214 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -35,6 +35,7 @@
#include "brw_util.h"
#include "brw_state.h"
#include "shader/prog_print.h"
+#include "shader/prog_parameter.h"
@@ -42,9 +43,11 @@ static void do_vs_prog( struct brw_context *brw,
struct brw_vertex_program *vp,
struct brw_vs_prog_key *key )
{
+ GLcontext *ctx = &brw->intel.ctx;
GLuint program_size;
const GLuint *program;
struct brw_vs_compile c;
+ int aux_size;
memset(&c, 0, sizeof(c));
memcpy(&c.key, key, sizeof(*key));
@@ -73,13 +76,27 @@ static void do_vs_prog( struct brw_context *brw,
*/
program = brw_get_program(&c.func, &program_size);
+ /* We upload from &c.prog_data including the constant_map assuming
+ * they're packed together. It would be nice to have a
+ * compile-time assert macro here.
+ */
+ assert(c.constant_map == (int8_t *)&c.prog_data +
+ sizeof(c.prog_data));
+ assert(ctx->Const.VertexProgram.MaxNativeParameters ==
+ ARRAY_SIZE(c.constant_map));
+
+ aux_size = sizeof(c.prog_data);
+ if (c.vp->use_const_buffer)
+ aux_size += c.vp->program.Base.Parameters->NumParameters;
+
dri_bo_unreference(brw->vs.prog_bo);
- brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- &brw->vs.prog_data );
+ brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ aux_size,
+ &brw->vs.prog_data);
}
@@ -109,6 +126,8 @@ static void brw_upload_vs_prog(struct brw_context *brw)
&brw->vs.prog_data);
if (brw->vs.prog_bo == NULL)
do_vs_prog(brw, vp, &key);
+ brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
+ sizeof(*brw->vs.prog_data));
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 4a591365c9..95e0501b1e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -51,6 +51,7 @@ struct brw_vs_compile {
struct brw_compile func;
struct brw_vs_prog_key key;
struct brw_vs_prog_data prog_data;
+ int8_t constant_map[1024];
struct brw_vertex_program *vp;
@@ -81,6 +82,8 @@ struct brw_vs_compile {
GLint index;
struct brw_reg reg;
} current_const[3];
+
+ GLboolean needs_stack;
};
void brw_vs_emit( struct brw_vs_compile *c );
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 1b84dd505f..52cc04fee8 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -104,9 +104,47 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* Vertex program parameters from curbe:
*/
if (c->vp->use_const_buffer) {
- /* get constants from a real constant buffer */
- c->prog_data.curb_read_length = 0;
- c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+ int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
+ int constant = 0;
+
+ /* We've got more constants than we can load with the push
+ * mechanism. This is often correlated with reladdr loads where
+ * we should probably be using a pull mechanism anyway to avoid
+ * excessive reading. However, the pull mechanism is slow in
+ * general. So, we try to allocate as many non-reladdr-loaded
+ * constants through the push buffer as we can before giving up.
+ */
+ memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters);
+ for (i = 0;
+ i < c->vp->program.Base.NumInstructions && constant < max_constant;
+ i++) {
+ struct prog_instruction *inst = &c->vp->program.Base.Instructions[i];
+ int arg;
+
+ for (arg = 0; arg < 3 && constant < max_constant; arg++) {
+ if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR &&
+ inst->SrcReg[arg].File != PROGRAM_CONSTANT &&
+ inst->SrcReg[arg].File != PROGRAM_UNIFORM &&
+ inst->SrcReg[arg].File != PROGRAM_ENV_PARAM &&
+ inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) ||
+ inst->SrcReg[arg].RelAddr)
+ continue;
+
+ if (c->constant_map[inst->SrcReg[arg].Index] == -1) {
+ c->constant_map[inst->SrcReg[arg].Index] = constant++;
+ }
+ }
+ }
+
+ for (i = 0; i < constant; i++) {
+ c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2,
+ (i%2) * 4),
+ 0, 4, 1);
+ }
+ reg += (constant + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+ /* XXX 0 causes a bug elsewhere... */
+ c->prog_data.nr_params = MAX2(constant * 4, 4);
}
else {
/* use a section of the GRF for constants */
@@ -214,8 +252,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
}
- c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
- reg += 2;
+ if (c->needs_stack) {
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+ reg += 2;
+ }
/* Some opcodes need an internal temporary:
*/
@@ -762,15 +802,14 @@ get_constant(struct brw_vs_compile *c,
{
const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
- struct brw_reg const_reg;
- struct brw_reg const2_reg;
- const GLboolean relAddr = src->RelAddr;
+ struct brw_reg const_reg = c->current_const[argIndex].reg;
assert(argIndex < 3);
- if (c->current_const[argIndex].index != src->Index || relAddr) {
+ if (c->current_const[argIndex].index != src->Index) {
struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+ /* Keep track of the last constant loaded in this slot, for reuse. */
c->current_const[argIndex].index = src->Index;
#if 0
@@ -779,48 +818,74 @@ get_constant(struct brw_vs_compile *c,
#endif
/* need to fetch the constant now */
brw_dp_READ_4_vs(p,
- c->current_const[argIndex].reg,/* writeback dest */
+ const_reg, /* writeback dest */
0, /* oword */
- relAddr, /* relative indexing? */
+ 0, /* relative indexing? */
addrReg, /* address register */
16 * src->Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
);
-
- if (relAddr) {
- /* second read */
- const2_reg = get_tmp(c);
-
- /* use upper half of address reg for second read */
- addrReg = stride(addrReg, 0, 4, 0);
- addrReg.subnr = 16;
-
- brw_dp_READ_4_vs(p,
- const2_reg, /* writeback dest */
- 1, /* oword */
- relAddr, /* relative indexing? */
- addrReg, /* address register */
- 16 * src->Index, /* byte offset */
- SURF_INDEX_VERT_CONST_BUFFER
- );
- }
}
- const_reg = c->current_const[argIndex].reg;
+ /* replicate lower four floats into upper half (to get XYZWXYZW) */
+ const_reg = stride(const_reg, 0, 4, 0);
+ const_reg.subnr = 0;
- if (relAddr) {
- /* merge the two Owords into the constant register */
- /* const_reg[7..4] = const2_reg[7..4] */
- brw_MOV(p,
- suboffset(stride(const_reg, 0, 4, 1), 4),
- suboffset(stride(const2_reg, 0, 4, 1), 4));
- release_tmp(c, const2_reg);
- }
- else {
- /* replicate lower four floats into upper half (to get XYZWXYZW) */
- const_reg = stride(const_reg, 0, 4, 0);
- const_reg.subnr = 0;
- }
+ return const_reg;
+}
+
+static struct brw_reg
+get_reladdr_constant(struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex)
+{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
+ struct brw_compile *p = &c->func;
+ struct brw_reg const_reg = c->current_const[argIndex].reg;
+ struct brw_reg const2_reg;
+ struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+
+ assert(argIndex < 3);
+
+ /* Can't reuse a reladdr constant load. */
+ c->current_const[argIndex].index = -1;
+
+ #if 0
+ printf(" fetch const[a0.x+%d] for arg %d into reg %d\n",
+ src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+
+ /* fetch the first vec4 */
+ brw_dp_READ_4_vs(p,
+ const_reg, /* writeback dest */
+ 0, /* oword */
+ 1, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
+ );
+ /* second vec4 */
+ const2_reg = get_tmp(c);
+
+ /* use upper half of address reg for second read */
+ addrReg = stride(addrReg, 0, 4, 0);
+ addrReg.subnr = 16;
+
+ brw_dp_READ_4_vs(p,
+ const2_reg, /* writeback dest */
+ 1, /* oword */
+ 1, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER
+ );
+
+ /* merge the two Owords into the constant register */
+ /* const_reg[7..4] = const2_reg[7..4] */
+ brw_MOV(p,
+ suboffset(stride(const_reg, 0, 4, 1), 4),
+ suboffset(stride(const2_reg, 0, 4, 1), 4));
+ release_tmp(c, const2_reg);
return const_reg;
}
@@ -928,7 +993,13 @@ get_src_reg( struct brw_vs_compile *c,
case PROGRAM_ENV_PARAM:
case PROGRAM_LOCAL_PARAM:
if (c->vp->use_const_buffer) {
- return get_constant(c, inst, argIndex);
+ if (!relAddr && c->constant_map[index] != -1) {
+ assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]];
+ } else if (relAddr)
+ return get_reladdr_constant(c, inst, argIndex);
+ else
+ return get_constant(c, inst, argIndex);
}
else if (relAddr) {
return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
@@ -1380,12 +1451,14 @@ void brw_vs_emit(struct brw_vs_compile *c )
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_access_mode(p, BRW_ALIGN_16);
-
- /* Message registers can't be read, so copy the output into GRF register
- if they are used in source registers */
+
for (insn = 0; insn < nr_insns; insn++) {
GLuint i;
struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+
+ /* Message registers can't be read, so copy the output into GRF
+ * register if they are used in source registers
+ */
for (i = 0; i < 3; i++) {
struct prog_src_register *src = &inst->SrcReg[i];
GLuint index = src->Index;
@@ -1393,12 +1466,23 @@ void brw_vs_emit(struct brw_vs_compile *c )
if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS)
c->output_regs[index].used_in_src = GL_TRUE;
}
+
+ switch (inst->Opcode) {
+ case OPCODE_CAL:
+ case OPCODE_RET:
+ c->needs_stack = GL_TRUE;
+ break;
+ default:
+ break;
+ }
}
/* Static register allocation
*/
brw_vs_alloc_regs(c);
- brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+ if (c->needs_stack)
+ brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
for (insn = 0; insn < nr_insns; insn++) {
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index 345ffa7ee1..fd9f2fee42 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -164,8 +164,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
key, sizeof(*key),
&brw->vs.prog_bo, 1,
- &vs, sizeof(vs),
- NULL, NULL);
+ &vs, sizeof(vs));
/* Emit VS program relocation */
dri_bo_emit_reloc(bo,
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index 3bc9840a97..3f6e16fcb0 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -168,8 +168,7 @@ brw_vs_get_binding_table(struct brw_context *brw)
bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->vs.surf_bo, BRW_VS_MAX_SURF,
- data, data_size,
- NULL, NULL);
+ data, data_size);
/* Emit binding table relocations to surface state */
for (i = 0; i < BRW_VS_MAX_SURF; i++) {
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 72749b3859..bb7a293812 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -172,12 +172,6 @@ static void brw_new_batch( struct intel_context *intel )
}
}
-
-static void brw_note_fence( struct intel_context *intel, GLuint fence )
-{
- brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
-}
-
static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
{
/* nothing */
@@ -193,7 +187,6 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.update_texture_state = 0;
brw->intel.vtbl.invalidate_state = brw_invalidate_state;
- brw->intel.vtbl.note_fence = brw_note_fence;
brw->intel.vtbl.new_batch = brw_new_batch;
brw->intel.vtbl.finish_batch = brw_finish_batch;
brw->intel.vtbl.destroy = brw_destroy_context;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 6895f64410..fb24379c90 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -199,12 +199,13 @@ static void do_wm_prog( struct brw_context *brw,
program = brw_get_program(&c->func, &program_size);
dri_bo_unreference(brw->wm.prog_bo);
- brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG,
- &c->key, sizeof(c->key),
- NULL, 0,
- program, program_size,
- &c->prog_data,
- &brw->wm.prog_data );
+ brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG,
+ &c->key, sizeof(c->key),
+ NULL, 0,
+ program, program_size,
+ &c->prog_data,
+ sizeof(c->prog_data),
+ &brw->wm.prog_data);
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index ad267a4e6a..87387b1e2d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -326,8 +326,7 @@ static void upload_wm_samplers( struct brw_context *brw )
brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER,
&key, sizeof(key),
brw->wm.sdc_bo, key.sampler_count,
- &sampler, sizeof(sampler),
- NULL, NULL);
+ &sampler, sizeof(sampler));
/* Emit SDC relocations */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index d3373ea79e..a7f80db554 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -210,8 +210,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
key, sizeof(*key),
reloc_bufs, 3,
- &wm, sizeof(wm),
- NULL, NULL);
+ &wm, sizeof(wm));
/* Emit WM program relocation */
dri_bo_emit_reloc(bo,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index f26cfabb7d..357c8c90de 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -256,8 +256,7 @@ brw_create_texture_surface( struct brw_context *brw,
bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
&key->bo, key->bo ? 1 : 0,
- &surf, sizeof(surf),
- NULL, NULL);
+ &surf, sizeof(surf));
if (key->bo) {
/* Emit relocation to surface contents */
@@ -351,8 +350,7 @@ brw_create_constant_surface( struct brw_context *brw,
bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
&key->bo, key->bo ? 1 : 0,
- &surf, sizeof(surf),
- NULL, NULL);
+ &surf, sizeof(surf));
if (key->bo) {
/* Emit relocation to surface contents. Section 5.1.1 of the gen4
@@ -653,8 +651,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
BRW_SS_SURFACE,
&key, sizeof(key),
&region_bo, 1,
- &surf, sizeof(surf),
- NULL, NULL);
+ &surf, sizeof(surf));
if (region_bo != NULL) {
/* We might sample from it, and we might render to it, so flag
* them both. We might be able to figure out from other state
@@ -701,8 +698,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
- data, data_size,
- NULL, NULL);
+ data, data_size);
/* Emit binding table relocations to surface state */
for (i = 0; i < BRW_WM_MAX_SURF; i++) {
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 3f6634c65a..d52fe2eef2 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -506,27 +506,7 @@ intelFlush(GLcontext * ctx)
static void
intel_glFlush(GLcontext *ctx)
{
- struct intel_context *intel = intel_context(ctx);
-
intel_flush(ctx, GL_TRUE);
-
- /* We're using glFlush as an indicator that a frame is done, which is
- * what DRI2 does before calling SwapBuffers (and means we should catch
- * people doing front-buffer rendering, as well)..
- *
- * Wait for the swapbuffers before the one we just emitted, so we don't
- * get too many swaps outstanding for apps that are GPU-heavy but not
- * CPU-heavy.
- *
- * Unfortunately, we don't have a handle to the batch containing the swap,
- * and getting our hands on that doesn't seem worth it, so we just us the
- * first batch we emitted after the last swap.
- */
- if (intel->first_post_swapbuffers_batch != NULL) {
- drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
- drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
- intel->first_post_swapbuffers_batch = NULL;
- }
}
void
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 07207bfbec..6ba281cc14 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -107,7 +107,6 @@ struct intel_context
void (*finish_batch) (struct intel_context * intel);
void (*new_batch) (struct intel_context * intel);
void (*emit_invarient_state) (struct intel_context * intel);
- void (*note_fence) (struct intel_context *intel, GLuint fence);
void (*update_texture_state) (struct intel_context * intel);
void (*render_start) (struct intel_context * intel);
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index e240957197..6c2cb3b57e 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -128,8 +128,29 @@ intelDRI2Flush(__DRIdrawable *drawable)
static void
intelDRI2FlushInvalidate(__DRIdrawable *drawable)
{
+ struct intel_context *intel = drawable->driContextPriv->driverPrivate;
+
intelDRI2Flush(drawable);
drawable->validBuffers = GL_FALSE;
+
+ /* We're using FlushInvalidate as an indicator that a frame is
+ * done. It's only called immediately after SwapBuffers, so it
+ * won't affect front-buffer rendering or applications explicitly
+ * managing swap regions using MESA_copy_buffer.
+ *
+ * Wait for the swapbuffers before the one we just emitted, so we don't
+ * get too many swaps outstanding for apps that are GPU-heavy but not
+ * CPU-heavy.
+ *
+ * Unfortunately, we don't have a handle to the batch containing the swap,
+ * and getting our hands on that doesn't seem worth it, so we just use the
+ * first batch we emitted after the last swap.
+ */
+ if (intel->first_post_swapbuffers_batch != NULL) {
+ drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
+ drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
+ intel->first_post_swapbuffers_batch = NULL;
+ }
}
static const struct __DRI2flushExtensionRec intelFlushExtension = {
diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
index 8212dc1203..ca33faff87 100644
--- a/src/mesa/drivers/dri/r200/Makefile
+++ b/src/mesa/drivers/dri/r200/Makefile
@@ -29,8 +29,8 @@ RADEON_COMMON_SOURCES = \
radeon_mipmap_tree.c \
radeon_queryobj.c \
radeon_span.c \
- radeon_texture.c
-
+ radeon_texture.c \
+ radeon_tex_copy.c
DRIVER_SOURCES = r200_context.c \
r200_ioctl.c \
@@ -46,6 +46,7 @@ DRIVER_SOURCES = r200_context.c \
r200_sanity.c \
r200_fragshader.c \
r200_vertprog.c \
+ r200_blit.c \
radeon_screen.c \
$(EGL_SOURCES) \
$(RADEON_COMMON_SOURCES) \
diff --git a/src/mesa/drivers/dri/r200/r200_blit.c b/src/mesa/drivers/dri/r200/r200_blit.c
new file mode 100644
index 0000000000..f899f7efdc
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_blit.c
@@ -0,0 +1,403 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_common.h"
+#include "r200_context.h"
+#include "r200_blit.h"
+
+static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
+ int reg, int count)
+{
+ if (count)
+ return CP_PACKET0(reg, count - 1);
+ return CP_PACKET2;
+}
+
+/* common formats supported as both textures and render targets */
+static unsigned is_blit_supported(gl_format mesa_format)
+{
+ /* XXX others? BE/LE? */
+ switch (mesa_format) {
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_ARGB4444:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_A8:
+ break;
+ default:
+ return 0;
+ }
+
+ /* ??? */
+ if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0)
+ return 0;
+
+ return 1;
+}
+
+static inline void emit_vtx_state(struct r200_context *r200)
+{
+ BATCH_LOCALS(&r200->radeon);
+
+ BEGIN_BATCH(14);
+ if (r200->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ OUT_BATCH_REGVAL(R200_SE_VAP_CNTL_STATUS, 0);
+ } else {
+ OUT_BATCH_REGVAL(R200_SE_VAP_CNTL_STATUS, RADEON_TCL_BYPASS);
+ }
+ OUT_BATCH_REGVAL(R200_SE_VAP_CNTL, (R200_VAP_FORCE_W_TO_ONE |
+ (9 << R200_VAP_VF_MAX_VTX_NUM__SHIFT)));
+ OUT_BATCH_REGVAL(R200_SE_VTX_STATE_CNTL, 0);
+ OUT_BATCH_REGVAL(R200_SE_VTE_CNTL, 0);
+ OUT_BATCH_REGVAL(R200_SE_VTX_FMT_0, R200_VTX_XY);
+ OUT_BATCH_REGVAL(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
+ OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD |
+ RADEON_BFACE_SOLID |
+ RADEON_FFACE_SOLID |
+ RADEON_VTX_PIX_CENTER_OGL |
+ RADEON_ROUND_MODE_ROUND |
+ RADEON_ROUND_PREC_4TH_PIX));
+ END_BATCH();
+}
+
+static void inline emit_tx_setup(struct r200_context *r200,
+ gl_format mesa_format,
+ struct radeon_bo *bo,
+ intptr_t offset,
+ unsigned width,
+ unsigned height,
+ unsigned pitch)
+{
+ uint32_t txformat = R200_TXFORMAT_NON_POWER2;
+ BATCH_LOCALS(&r200->radeon);
+
+ assert(width <= 2047);
+ assert(height <= 2047);
+ assert(offset % 32 == 0);
+
+ /* XXX others? BE/LE? */
+ switch (mesa_format) {
+ case MESA_FORMAT_ARGB8888:
+ txformat |= R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP;
+ break;
+ case MESA_FORMAT_XRGB8888:
+ txformat |= R200_TXFORMAT_ARGB8888;
+ break;
+ case MESA_FORMAT_RGB565:
+ txformat |= R200_TXFORMAT_RGB565;
+ break;
+ case MESA_FORMAT_ARGB4444:
+ txformat |= R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP;
+ break;
+ case MESA_FORMAT_ARGB1555:
+ txformat |= R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP;
+ break;
+ case MESA_FORMAT_A8:
+ txformat |= R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP;
+ break;
+ default:
+ break;
+ }
+
+ BEGIN_BATCH(28);
+ OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
+ OUT_BATCH_REGVAL(R200_PP_CNTL_X, 0);
+ OUT_BATCH_REGVAL(R200_PP_TXMULTI_CTL_0, 0);
+ OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
+ R200_TXC_ARG_B_ZERO |
+ R200_TXC_ARG_C_R0_COLOR |
+ R200_TXC_OP_MADD));
+ OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
+ OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_R0_ALPHA |
+ R200_TXA_OP_MADD));
+ OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
+ OUT_BATCH_REGVAL(R200_PP_TXFILTER_0, (R200_CLAMP_S_CLAMP_LAST |
+ R200_CLAMP_T_CLAMP_LAST |
+ R200_MAG_FILTER_NEAREST |
+ R200_MIN_FILTER_NEAREST));
+ OUT_BATCH_REGVAL(R200_PP_TXFORMAT_0, txformat);
+ OUT_BATCH_REGVAL(R200_PP_TXFORMAT_X_0, 0);
+ OUT_BATCH_REGVAL(R200_PP_TXSIZE_0, ((width - 1) |
+ ((height - 1) << RADEON_TEX_VSIZE_SHIFT)));
+ OUT_BATCH_REGVAL(R200_PP_TXPITCH_0, pitch * _mesa_get_format_bytes(mesa_format) - 32);
+
+ OUT_BATCH_REGSEQ(R200_PP_TXOFFSET_0, 1);
+ OUT_BATCH_RELOC(0, bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+
+ END_BATCH();
+}
+
+static inline void emit_cb_setup(struct r200_context *r200,
+ struct radeon_bo *bo,
+ intptr_t offset,
+ gl_format mesa_format,
+ unsigned pitch,
+ unsigned width,
+ unsigned height)
+{
+ uint32_t dst_pitch = pitch;
+ uint32_t dst_format = 0;
+ BATCH_LOCALS(&r200->radeon);
+
+ /* XXX others? BE/LE? */
+ switch (mesa_format) {
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888:
+ dst_format = RADEON_COLOR_FORMAT_ARGB8888;
+ break;
+ case MESA_FORMAT_RGB565:
+ dst_format = RADEON_COLOR_FORMAT_RGB565;
+ break;
+ case MESA_FORMAT_ARGB4444:
+ dst_format = RADEON_COLOR_FORMAT_ARGB4444;
+ break;
+ case MESA_FORMAT_ARGB1555:
+ dst_format = RADEON_COLOR_FORMAT_ARGB1555;
+ break;
+ case MESA_FORMAT_A8:
+ dst_format = RADEON_COLOR_FORMAT_RGB8;
+ break;
+ default:
+ break;
+ }
+
+ BEGIN_BATCH_NO_AUTOSTATE(22);
+ OUT_BATCH_REGVAL(R200_RE_AUX_SCISSOR_CNTL, 0);
+ OUT_BATCH_REGVAL(R200_RE_CNTL, 0);
+ OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0);
+ OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, ((width << RADEON_RE_WIDTH_SHIFT) |
+ (height << RADEON_RE_HEIGHT_SHIFT)));
+ OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff);
+ OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+ OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format);
+
+ OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1);
+ OUT_BATCH_RELOC(0, bo, 0, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1);
+ OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+
+ END_BATCH();
+}
+
+static GLboolean validate_buffers(struct r200_context *r200,
+ struct radeon_bo *src_bo,
+ struct radeon_bo *dst_bo)
+{
+ int ret;
+ radeon_cs_space_add_persistent_bo(r200->radeon.cmdbuf.cs,
+ src_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ radeon_cs_space_add_persistent_bo(r200->radeon.cmdbuf.cs,
+ dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
+
+ ret = radeon_cs_space_check_with_bo(r200->radeon.cmdbuf.cs,
+ first_elem(&r200->radeon.dma.reserved)->bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+/**
+ * Calculate texcoords for given image region.
+ * Output values are [minx, maxx, miny, maxy]
+ */
+static inline void calc_tex_coords(float img_width, float img_height,
+ float x, float y,
+ float reg_width, float reg_height,
+ unsigned flip_y, float *buf)
+{
+ buf[0] = x / img_width;
+ buf[1] = buf[0] + reg_width / img_width;
+ buf[2] = y / img_height;
+ buf[3] = buf[2] + reg_height / img_height;
+ if (flip_y)
+ {
+ buf[2] = 1.0 - buf[1];
+ buf[3] = 1.0 - buf[3];
+ }
+}
+
+static inline void emit_draw_packet(struct r200_context *r200,
+ unsigned src_width, unsigned src_height,
+ unsigned src_x_offset, unsigned src_y_offset,
+ unsigned dst_x_offset, unsigned dst_y_offset,
+ unsigned reg_width, unsigned reg_height,
+ unsigned flip_y)
+{
+ float texcoords[4];
+ float verts[12];
+ BATCH_LOCALS(&r200->radeon);
+
+ calc_tex_coords(src_width, src_height,
+ src_x_offset, src_y_offset,
+ reg_width, reg_height,
+ flip_y, texcoords);
+
+ verts[0] = dst_x_offset;
+ verts[1] = dst_y_offset + reg_height;
+ verts[2] = texcoords[0];
+ verts[3] = texcoords[3];
+
+ verts[4] = dst_x_offset + reg_width;
+ verts[5] = dst_y_offset + reg_height;
+ verts[6] = texcoords[1];
+ verts[7] = texcoords[3];
+
+ verts[8] = dst_x_offset + reg_width;
+ verts[9] = dst_y_offset;
+ verts[10] = texcoords[1];
+ verts[11] = texcoords[2];
+
+ BEGIN_BATCH(14);
+ OUT_BATCH(R200_CP_CMD_3D_DRAW_IMMD_2 | (12 << 16));
+ OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+ (3 << 16));
+ OUT_BATCH_TABLE(verts, 12);
+ END_BATCH();
+}
+
+/**
+ * Copy a region of [@a width x @a height] pixels from source buffer
+ * to destination buffer.
+ * @param[in] r200 r200 context
+ * @param[in] src_bo source radeon buffer object
+ * @param[in] src_offset offset of the source image in the @a src_bo
+ * @param[in] src_mesaformat source image format
+ * @param[in] src_pitch aligned source image width
+ * @param[in] src_width source image width
+ * @param[in] src_height source image height
+ * @param[in] src_x_offset x offset in the source image
+ * @param[in] src_y_offset y offset in the source image
+ * @param[in] dst_bo destination radeon buffer object
+ * @param[in] dst_offset offset of the destination image in the @a dst_bo
+ * @param[in] dst_mesaformat destination image format
+ * @param[in] dst_pitch aligned destination image width
+ * @param[in] dst_width destination image width
+ * @param[in] dst_height destination image height
+ * @param[in] dst_x_offset x offset in the destination image
+ * @param[in] dst_y_offset y offset in the destination image
+ * @param[in] width region width
+ * @param[in] height region height
+ * @param[in] flip_y set if y coords of the source image need to be flipped
+ */
+unsigned r200_blit(GLcontext *ctx,
+ struct radeon_bo *src_bo,
+ intptr_t src_offset,
+ gl_format src_mesaformat,
+ unsigned src_pitch,
+ unsigned src_width,
+ unsigned src_height,
+ unsigned src_x_offset,
+ unsigned src_y_offset,
+ struct radeon_bo *dst_bo,
+ intptr_t dst_offset,
+ gl_format dst_mesaformat,
+ unsigned dst_pitch,
+ unsigned dst_width,
+ unsigned dst_height,
+ unsigned dst_x_offset,
+ unsigned dst_y_offset,
+ unsigned reg_width,
+ unsigned reg_height,
+ unsigned flip_y)
+{
+ struct r200_context *r200 = R200_CONTEXT(ctx);
+
+ if (!is_blit_supported(dst_mesaformat))
+ return GL_FALSE;
+
+ /* Make sure that colorbuffer has even width - hw limitation */
+ if (dst_pitch % 2 > 0)
+ ++dst_pitch;
+
+ /* Rendering to small buffer doesn't work.
+ * Looks like a hw limitation.
+ */
+ if (dst_pitch < 32)
+ return GL_FALSE;
+
+ /* Need to clamp the region size to make sure
+ * we don't read outside of the source buffer
+ * or write outside of the destination buffer.
+ */
+ if (reg_width + src_x_offset > src_width)
+ reg_width = src_width - src_x_offset;
+ if (reg_height + src_y_offset > src_height)
+ reg_height = src_height - src_y_offset;
+ if (reg_width + dst_x_offset > dst_width)
+ reg_width = dst_width - dst_x_offset;
+ if (reg_height + dst_y_offset > dst_height)
+ reg_height = dst_height - dst_y_offset;
+
+ if (src_bo == dst_bo) {
+ return GL_FALSE;
+ }
+
+ if (0) {
+ fprintf(stderr, "src: size [%d x %d], pitch %d, "
+ "offset [%d x %d], format %s, bo %p\n",
+ src_width, src_height, src_pitch,
+ src_x_offset, src_y_offset,
+ _mesa_get_format_name(src_mesaformat),
+ src_bo);
+ fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n",
+ dst_pitch, dst_x_offset, dst_y_offset,
+ _mesa_get_format_name(dst_mesaformat), dst_bo);
+ fprintf(stderr, "region: %d x %d\n", reg_width, reg_height);
+ }
+
+ /* Flush is needed to make sure that source buffer has correct data */
+ radeonFlush(r200->radeon.glCtx);
+
+ rcommonEnsureCmdBufSpace(&r200->radeon, 78, __FUNCTION__);
+
+ if (!validate_buffers(r200, src_bo, dst_bo))
+ return GL_FALSE;
+
+ /* 14 */
+ emit_vtx_state(r200);
+ /* 28 */
+ emit_tx_setup(r200, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch);
+ /* 22 */
+ emit_cb_setup(r200, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
+ /* 14 */
+ emit_draw_packet(r200, src_width, src_height,
+ src_x_offset, src_y_offset,
+ dst_x_offset, dst_y_offset,
+ reg_width, reg_height,
+ flip_y);
+
+ radeonFlush(ctx);
+
+ return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r200/r200_blit.h b/src/mesa/drivers/dri/r200/r200_blit.h
new file mode 100644
index 0000000000..38487266ae
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_blit.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef R200_BLIT_H
+#define R200_BLIT_H
+
+void r200_blit_init(struct r200_context *r200);
+
+unsigned r200_blit(GLcontext *ctx,
+ struct radeon_bo *src_bo,
+ intptr_t src_offset,
+ gl_format src_mesaformat,
+ unsigned src_pitch,
+ unsigned src_width,
+ unsigned src_height,
+ unsigned src_x_offset,
+ unsigned src_y_offset,
+ struct radeon_bo *dst_bo,
+ intptr_t dst_offset,
+ gl_format dst_mesaformat,
+ unsigned dst_pitch,
+ unsigned dst_width,
+ unsigned dst_height,
+ unsigned dst_x_offset,
+ unsigned dst_y_offset,
+ unsigned width,
+ unsigned height,
+ unsigned flip_y);
+
+#endif // R200_BLIT_H
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
index f34e319222..3d6d0f5ec0 100644
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -61,6 +61,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_maos.h"
#include "r200_vertprog.h"
#include "radeon_queryobj.h"
+#include "r200_blit.h"
#include "radeon_span.h"
@@ -268,6 +269,7 @@ static void r200_init_vtbl(radeonContextPtr radeon)
radeon->vtbl.fallback = r200Fallback;
radeon->vtbl.update_scissor = r200_vtbl_update_scissor;
radeon->vtbl.emit_query_finish = r200_emit_query_finish;
+ radeon->vtbl.blit = r200_blit;
}
@@ -294,6 +296,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
if ( !rmesa )
return GL_FALSE;
+ rmesa->radeon.radeonScreen = screen;
r200_init_vtbl(&rmesa->radeon);
/* init exp fog table data */
r200InitStaticFogData();
@@ -326,10 +329,14 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
r200InitDriverFuncs(&functions);
r200InitIoctlFuncs(&functions);
r200InitStateFuncs(&functions);
- r200InitTextureFuncs(&functions);
+ r200InitTextureFuncs(&rmesa->radeon, &functions);
r200InitShaderFuncs(&functions);
radeonInitQueryObjFunctions(&functions);
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ r200_init_texcopy_functions(&functions);
+ }
+
if (!radeonInitContext(&rmesa->radeon, &functions,
glVisual, driContextPriv,
sharedContextPrivate)) {
diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h
index 17e4d8962e..a9dce310ae 100644
--- a/src/mesa/drivers/dri/r200/r200_context.h
+++ b/src/mesa/drivers/dri/r200/r200_context.h
@@ -645,6 +645,8 @@ extern GLboolean r200MakeCurrent( __DRIcontext *driContextPriv,
__DRIdrawable *driReadPriv );
extern GLboolean r200UnbindContext( __DRIcontext *driContextPriv );
+extern void r200_init_texcopy_functions(struct dd_function_table *table);
+
/* ================================================================
* Debugging:
*/
diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
index 5b87ba6ccd..0916df6476 100644
--- a/src/mesa/drivers/dri/r200/r200_tex.c
+++ b/src/mesa/drivers/dri/r200/r200_tex.c
@@ -477,7 +477,7 @@ static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx,
-void r200InitTextureFuncs( struct dd_function_table *functions )
+void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions )
{
/* Note: we only plug in the functions we implement in the driver
* since _mesa_init_driver_functions() was already called.
@@ -511,6 +511,11 @@ void r200InitTextureFuncs( struct dd_function_table *functions )
functions->CompressedTexImage2D = radeonCompressedTexImage2D;
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
+ if (radeon->radeonScreen->kernel_mm) {
+ functions->CopyTexImage2D = radeonCopyTexImage2D;
+ functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
+ }
+
functions->GenerateMipmap = radeonGenerateMipmap;
functions->NewTextureImage = radeonNewTextureImage;
diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h
index e122de6e5e..1a1e7038df 100644
--- a/src/mesa/drivers/dri/r200/r200_tex.h
+++ b/src/mesa/drivers/dri/r200/r200_tex.h
@@ -48,7 +48,7 @@ extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint
extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t );
-extern void r200InitTextureFuncs( struct dd_function_table *functions );
+extern void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions );
extern void r200UpdateFragmentShader( GLcontext *ctx );
diff --git a/src/mesa/drivers/dri/r200/radeon_tex_copy.c b/src/mesa/drivers/dri/r200/radeon_tex_copy.c
new file mode 120000
index 0000000000..dfa5ba34e6
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_tex_copy.c
@@ -0,0 +1 @@
+../radeon/radeon_tex_copy.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index be005bd164..0d0fbcc408 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -39,7 +39,8 @@ RADEON_COMMON_SOURCES = \
radeon_mipmap_tree.c \
radeon_span.c \
radeon_queryobj.c \
- radeon_texture.c
+ radeon_texture.c \
+ radeon_tex_copy.c
DRIVER_SOURCES = \
radeon_screen.c \
@@ -50,7 +51,6 @@ DRIVER_SOURCES = \
r300_state.c \
r300_render.c \
r300_tex.c \
- r300_texcopy.c \
r300_texstate.c \
r300_vertprog.c \
r300_fragprog_common.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 731adc1af2..f27f858652 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -23,6 +23,8 @@
#ifndef RADEON_COMPILER_H
#define RADEON_COMPILER_H
+#include "../../../../main/compiler.h"
+
#include "memory_pool.h"
#include "radeon_code.h"
#include "radeon_program.h"
diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c
index 2eec27e900..e24c7955d4 100644
--- a/src/mesa/drivers/dri/r300/r300_blit.c
+++ b/src/mesa/drivers/dri/r300/r300_blit.c
@@ -150,8 +150,8 @@ static void r300_emit_tx_setup(struct r300_context *r300,
(R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT) |
(R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT) |
R300_TX_MIN_FILTER_MIP_NONE |
- R300_TX_MIN_FILTER_LINEAR |
- R300_TX_MAG_FILTER_LINEAR |
+ R300_TX_MIN_FILTER_NEAREST |
+ R300_TX_MAG_FILTER_NEAREST |
(0 << 28));
OUT_BATCH_REGVAL(R300_TX_FILTER1_0, 0);
OUT_BATCH_REGVAL(R300_TX_SIZE_0,
@@ -403,9 +403,8 @@ static void calc_tex_coords(float img_width, float img_height,
buf[3] = buf[2] + reg_height / img_height;
if (flip_y)
{
- float tmp = buf[2];
- buf[2] = 1.0 - buf[3];
- buf[3] = 1.0 - tmp;
+ buf[2] = 1.0 - buf[2];
+ buf[3] = 1.0 - buf[3];
}
}
@@ -424,13 +423,13 @@ static void emit_draw_packet(struct r300_context *r300,
flip_y, texcoords);
float verts[] = { dst_x_offset, dst_y_offset,
- texcoords[0], texcoords[3],
- dst_x_offset, dst_y_offset + reg_height,
texcoords[0], texcoords[2],
+ dst_x_offset, dst_y_offset + reg_height,
+ texcoords[0], texcoords[3],
dst_x_offset + reg_width, dst_y_offset + reg_height,
- texcoords[1], texcoords[2],
+ texcoords[1], texcoords[3],
dst_x_offset + reg_width, dst_y_offset,
- texcoords[1], texcoords[3] };
+ texcoords[1], texcoords[2] };
BATCH_LOCALS(&r300->radeon);
@@ -495,6 +494,27 @@ static void emit_cb_setup(struct r300_context *r300,
END_BATCH();
}
+static unsigned is_blit_supported(gl_format dst_format)
+{
+ switch (dst_format) {
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_ARGB8888_REV:
+ case MESA_FORMAT_XRGB8888:
+ break;
+ default:
+ return 0;
+ }
+
+ if (_mesa_get_format_bits(dst_format, GL_DEPTH_BITS) > 0)
+ return 0;
+
+ return 1;
+}
+
/**
* Copy a region of [@a width x @a height] pixels from source buffer
* to destination buffer.
@@ -519,29 +539,31 @@ static void emit_cb_setup(struct r300_context *r300,
* @param[in] height region height
* @param[in] flip_y set if y coords of the source image need to be flipped
*/
-GLboolean r300_blit(struct r300_context *r300,
- struct radeon_bo *src_bo,
- intptr_t src_offset,
- gl_format src_mesaformat,
- unsigned src_pitch,
- unsigned src_width,
- unsigned src_height,
- unsigned src_x_offset,
- unsigned src_y_offset,
- struct radeon_bo *dst_bo,
- intptr_t dst_offset,
- gl_format dst_mesaformat,
- unsigned dst_pitch,
- unsigned dst_width,
- unsigned dst_height,
- unsigned dst_x_offset,
- unsigned dst_y_offset,
- unsigned reg_width,
- unsigned reg_height,
- unsigned flip_y)
+unsigned r300_blit(GLcontext *ctx,
+ struct radeon_bo *src_bo,
+ intptr_t src_offset,
+ gl_format src_mesaformat,
+ unsigned src_pitch,
+ unsigned src_width,
+ unsigned src_height,
+ unsigned src_x_offset,
+ unsigned src_y_offset,
+ struct radeon_bo *dst_bo,
+ intptr_t dst_offset,
+ gl_format dst_mesaformat,
+ unsigned dst_pitch,
+ unsigned dst_width,
+ unsigned dst_height,
+ unsigned dst_x_offset,
+ unsigned dst_y_offset,
+ unsigned reg_width,
+ unsigned reg_height,
+ unsigned flip_y)
{
- if (_mesa_get_format_bits(src_mesaformat, GL_DEPTH_BITS) > 0)
- return GL_FALSE;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ if (!is_blit_supported(dst_mesaformat))
+ return 0;
/* Make sure that colorbuffer has even width - hw limitation */
if (dst_pitch % 2 > 0)
@@ -551,7 +573,7 @@ GLboolean r300_blit(struct r300_context *r300,
* Looks like a hw limitation.
*/
if (dst_pitch < 32)
- return GL_FALSE;
+ return 0;
/* Need to clamp the region size to make sure
* we don't read outside of the source buffer
@@ -567,6 +589,10 @@ GLboolean r300_blit(struct r300_context *r300,
reg_height = dst_height - dst_y_offset;
if (src_bo == dst_bo) {
+ return 0;
+ }
+
+ if (src_offset % 32 || dst_offset % 32) {
return GL_FALSE;
}
@@ -587,7 +613,7 @@ GLboolean r300_blit(struct r300_context *r300,
radeonFlush(r300->radeon.glCtx);
if (!validate_buffers(r300, src_bo, dst_bo))
- return GL_FALSE;
+ return 0;
rcommonEnsureCmdBufSpace(&r300->radeon, 200, __FUNCTION__);
@@ -618,5 +644,5 @@ GLboolean r300_blit(struct r300_context *r300,
radeonFlush(r300->radeon.glCtx);
- return GL_TRUE;
-} \ No newline at end of file
+ return 1;
+}
diff --git a/src/mesa/drivers/dri/r300/r300_blit.h b/src/mesa/drivers/dri/r300/r300_blit.h
index dc21e88098..735acaddd7 100644
--- a/src/mesa/drivers/dri/r300/r300_blit.h
+++ b/src/mesa/drivers/dri/r300/r300_blit.h
@@ -30,25 +30,25 @@
void r300_blit_init(struct r300_context *r300);
-GLboolean r300_blit(struct r300_context *r300,
- struct radeon_bo *src_bo,
- intptr_t src_offset,
- gl_format src_mesaformat,
- unsigned src_pitch,
- unsigned src_width,
- unsigned src_height,
- unsigned src_x_offset,
- unsigned src_y_offset,
- struct radeon_bo *dst_bo,
- intptr_t dst_offset,
- gl_format dst_mesaformat,
- unsigned dst_pitch,
- unsigned dst_width,
- unsigned dst_height,
- unsigned dst_x_offset,
- unsigned dst_y_offset,
- unsigned width,
- unsigned height,
- unsigned flip_y);
+unsigned r300_blit(GLcontext *ctx,
+ struct radeon_bo *src_bo,
+ intptr_t src_offset,
+ gl_format src_mesaformat,
+ unsigned src_pitch,
+ unsigned src_width,
+ unsigned src_height,
+ unsigned src_x_offset,
+ unsigned src_y_offset,
+ struct radeon_bo *dst_bo,
+ intptr_t dst_offset,
+ gl_format dst_mesaformat,
+ unsigned dst_pitch,
+ unsigned dst_width,
+ unsigned dst_height,
+ unsigned dst_x_offset,
+ unsigned dst_y_offset,
+ unsigned reg_width,
+ unsigned reg_height,
+ unsigned flip_y);
#endif // R300_BLIT_H \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 1f6ccf6ddc..bb0e6db313 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -93,8 +93,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/remap_helper.h"
-void r300_init_texcopy_functions(struct dd_function_table *table);
-
static const struct dri_extension card_extensions[] = {
/* *INDENT-OFF* */
{"GL_ARB_depth_texture", NULL},
@@ -326,6 +324,8 @@ static void r300_init_vtbl(radeonContextPtr radeon)
radeon->vtbl.emit_query_finish = rv530_emit_query_finish_single_z;
} else
radeon->vtbl.emit_query_finish = r300_emit_query_finish;
+
+ radeon->vtbl.blit = r300_blit;
}
static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
@@ -488,15 +488,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
_mesa_init_driver_functions(&functions);
r300InitIoctlFuncs(&functions);
r300InitStateFuncs(&functions);
- r300InitTextureFuncs(&functions);
+ r300InitTextureFuncs(&r300->radeon, &functions);
r300InitShaderFuncs(&functions);
radeonInitQueryObjFunctions(&functions);
radeonInitBufferObjectFuncs(&functions);
- if (r300->radeon.radeonScreen->kernel_mm) {
- r300_init_texcopy_functions(&functions);
- }
-
if (!radeonInitContext(&r300->radeon, &functions,
glVisual, driContextPriv,
sharedContextPrivate)) {
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index 546cd8ddde..78ab43a99f 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -554,8 +554,6 @@ extern void r300InitShaderFunctions(r300ContextPtr r300);
extern void r300InitDraw(GLcontext *ctx);
-extern void r300_init_texcopy_functions(struct dd_function_table *table);
-
#define r300PackFloat32 radeonPackFloat32
#define r300PackFloat24 radeonPackFloat24
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index 963f648cb1..eb5d2d5004 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -312,7 +312,7 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
return &t->base;
}
-void r300InitTextureFuncs(struct dd_function_table *functions)
+void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
{
/* Note: we only plug in the functions we implement in the driver
* since _mesa_init_driver_functions() was already called.
@@ -340,6 +340,11 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
functions->CompressedTexImage2D = radeonCompressedTexImage2D;
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
+ if (radeon->radeonScreen->kernel_mm) {
+ functions->CopyTexImage2D = radeonCopyTexImage2D;
+ functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
+ }
+
functions->GenerateMipmap = radeonGenerateMipmap;
driInitTextureFormats();
diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
index 6ede0fe25c..9694e703b8 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.h
+++ b/src/mesa/drivers/dri/r300/r300_tex.h
@@ -49,7 +49,7 @@ extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
extern GLboolean r300ValidateBuffers(GLcontext * ctx);
-extern void r300InitTextureFuncs(struct dd_function_table *functions);
+extern void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions);
int32_t r300TranslateTexFormat(gl_format mesaFormat);
diff --git a/src/mesa/drivers/dri/r300/radeon_tex_copy.c b/src/mesa/drivers/dri/r300/radeon_tex_copy.c
new file mode 120000
index 0000000000..dfa5ba34e6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_tex_copy.c
@@ -0,0 +1 @@
+../radeon/radeon_tex_copy.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile
index 26f47b7268..e55d0babd8 100644
--- a/src/mesa/drivers/dri/r600/Makefile
+++ b/src/mesa/drivers/dri/r600/Makefile
@@ -39,7 +39,8 @@ RADEON_COMMON_SOURCES = \
radeon_mipmap_tree.c \
radeon_span.c \
radeon_texture.c \
- radeon_queryobj.c
+ radeon_queryobj.c \
+ radeon_tex_copy.c
DRIVER_SOURCES = \
radeon_screen.c \
@@ -59,6 +60,7 @@ DRIVER_SOURCES = \
r700_render.c \
r600_tex.c \
r600_texstate.c \
+ r600_blit.c \
r700_debug.c \
$(RADEON_COMMON_SOURCES) \
$(EGL_SOURCES) \
diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c
new file mode 100644
index 0000000000..d7cd59ade6
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_blit.c
@@ -0,0 +1,1660 @@
+/*
+ * Copyright (C) 2009 Advanced Micro Devices, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_common.h"
+#include "r600_context.h"
+
+#include "r600_blit.h"
+#include "r600_blit_shaders.h"
+#include "r600_cmdbuf.h"
+
+/* common formats supported as both textures and render targets */
+static unsigned is_blit_supported(gl_format mesa_format)
+{
+ switch (mesa_format) {
+ case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_SIGNED_RGBA8888:
+ case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_SIGNED_RGBA8888_REV:
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_ARGB8888_REV:
+ case MESA_FORMAT_XRGB8888_REV:
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_RGB565_REV:
+ case MESA_FORMAT_ARGB4444:
+ case MESA_FORMAT_ARGB4444_REV:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_ARGB1555_REV:
+ case MESA_FORMAT_AL88:
+ case MESA_FORMAT_AL88_REV:
+ case MESA_FORMAT_RGB332:
+ case MESA_FORMAT_A8:
+ case MESA_FORMAT_I8:
+ case MESA_FORMAT_CI8:
+ case MESA_FORMAT_L8:
+ case MESA_FORMAT_RGBA_FLOAT32:
+ case MESA_FORMAT_RGBA_FLOAT16:
+ case MESA_FORMAT_ALPHA_FLOAT32:
+ case MESA_FORMAT_ALPHA_FLOAT16:
+ case MESA_FORMAT_LUMINANCE_FLOAT32:
+ case MESA_FORMAT_LUMINANCE_FLOAT16:
+ case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+ case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+ case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+ case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+ case MESA_FORMAT_X8_Z24:
+ case MESA_FORMAT_S8_Z24:
+ case MESA_FORMAT_Z24_S8:
+ case MESA_FORMAT_Z16:
+ case MESA_FORMAT_Z32:
+ case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SLA8:
+ case MESA_FORMAT_SL8:
+ break;
+ default:
+ return 0;
+ }
+
+ /* ??? */
+ /* not sure blit to depth works or not yet */
+ if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0)
+ return 0;
+
+ return 1;
+}
+
+static inline void
+set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_format,
+ int nPitchInPixel, int w, int h, intptr_t dst_offset)
+{
+ uint32_t cb_color0_base, cb_color0_size = 0, cb_color0_info = 0, cb_color0_view = 0;
+ int id = 0;
+ uint32_t comp_swap, format;
+ BATCH_LOCALS(&context->radeon);
+
+ cb_color0_base = dst_offset / 256;
+
+ SETfield(cb_color0_size, (nPitchInPixel / 8) - 1,
+ PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
+ SETfield(cb_color0_size, ((nPitchInPixel * h) / 64) - 1,
+ SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask);
+
+ SETfield(cb_color0_info, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask);
+ SETfield(cb_color0_info, ARRAY_LINEAR_GENERAL,
+ CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+
+ SETbit(cb_color0_info, BLEND_BYPASS_bit);
+
+ switch(mesa_format) {
+ case MESA_FORMAT_RGBA8888:
+ format = COLOR_8_8_8_8;
+ comp_swap = SWAP_STD_REV;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_SIGNED_RGBA8888:
+ format = COLOR_8_8_8_8;
+ comp_swap = SWAP_STD_REV;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_SNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_RGBA8888_REV:
+ format = COLOR_8_8_8_8;
+ comp_swap = SWAP_STD;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_SIGNED_RGBA8888_REV:
+ format = COLOR_8_8_8_8;
+ comp_swap = SWAP_STD;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_SNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888:
+ format = COLOR_8_8_8_8;
+ comp_swap = SWAP_ALT;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_ARGB8888_REV:
+ case MESA_FORMAT_XRGB8888_REV:
+ format = COLOR_8_8_8_8;
+ comp_swap = SWAP_ALT_REV;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_RGB565:
+ format = COLOR_5_6_5;
+ comp_swap = SWAP_STD_REV;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_RGB565_REV:
+ format = COLOR_5_6_5;
+ comp_swap = SWAP_STD;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_ARGB4444:
+ format = COLOR_4_4_4_4;
+ comp_swap = SWAP_ALT;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_ARGB4444_REV:
+ format = COLOR_4_4_4_4;
+ comp_swap = SWAP_ALT_REV;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_ARGB1555:
+ format = COLOR_1_5_5_5;
+ comp_swap = SWAP_ALT;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_ARGB1555_REV:
+ format = COLOR_1_5_5_5;
+ comp_swap = SWAP_ALT_REV;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_AL88:
+ format = COLOR_8_8;
+ comp_swap = SWAP_STD;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_AL88_REV:
+ format = COLOR_8_8;
+ comp_swap = SWAP_STD_REV;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_RGB332:
+ format = COLOR_3_3_2;
+ comp_swap = SWAP_STD_REV;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_A8:
+ format = COLOR_8;
+ comp_swap = SWAP_ALT_REV;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_I8:
+ case MESA_FORMAT_CI8:
+ format = COLOR_8;
+ comp_swap = SWAP_STD;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_L8:
+ format = COLOR_8;
+ comp_swap = SWAP_ALT;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_RGBA_FLOAT32:
+ format = COLOR_32_32_32_32_FLOAT;
+ comp_swap = SWAP_STD_REV;
+ SETbit(cb_color0_info, BLEND_FLOAT32_bit);
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_RGBA_FLOAT16:
+ format = COLOR_16_16_16_16_FLOAT;
+ comp_swap = SWAP_STD_REV;
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_ALPHA_FLOAT32:
+ format = COLOR_32_FLOAT;
+ comp_swap = SWAP_ALT_REV;
+ SETbit(cb_color0_info, BLEND_FLOAT32_bit);
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_ALPHA_FLOAT16:
+ format = COLOR_16_FLOAT;
+ comp_swap = SWAP_ALT_REV;
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_FLOAT32:
+ format = COLOR_32_FLOAT;
+ comp_swap = SWAP_ALT;
+ SETbit(cb_color0_info, BLEND_FLOAT32_bit);
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_FLOAT16:
+ format = COLOR_16_FLOAT;
+ comp_swap = SWAP_ALT;
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+ format = COLOR_32_32_FLOAT;
+ comp_swap = SWAP_ALT_REV;
+ SETbit(cb_color0_info, BLEND_FLOAT32_bit);
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+ format = COLOR_16_16_FLOAT;
+ comp_swap = SWAP_ALT_REV;
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+ format = COLOR_32_FLOAT;
+ comp_swap = SWAP_STD;
+ SETbit(cb_color0_info, BLEND_FLOAT32_bit);
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+ format = COLOR_16_FLOAT;
+ comp_swap = SWAP_STD;
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_X8_Z24:
+ case MESA_FORMAT_S8_Z24:
+ format = COLOR_8_24;
+ comp_swap = SWAP_STD;
+ SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1,
+ CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_Z24_S8:
+ format = COLOR_24_8;
+ comp_swap = SWAP_STD;
+ SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1,
+ CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_Z16:
+ format = COLOR_16;
+ comp_swap = SWAP_STD;
+ SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1,
+ CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_Z32:
+ format = COLOR_32;
+ comp_swap = SWAP_STD;
+ SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1,
+ CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+ CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_SRGBA8:
+ format = COLOR_8_8_8_8;
+ comp_swap = SWAP_STD_REV;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_SLA8:
+ format = COLOR_8_8;
+ comp_swap = SWAP_ALT_REV;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ case MESA_FORMAT_SL8:
+ format = COLOR_8;
+ comp_swap = SWAP_ALT_REV;
+ SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+ SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+ break;
+ default:
+ fprintf(stderr,"Invalid format for copy %s\n",_mesa_get_format_name(mesa_format));
+ assert("Invalid format for US output\n");
+ return;
+ }
+
+ SETfield(cb_color0_info, format, CB_COLOR0_INFO__FORMAT_shift,
+ CB_COLOR0_INFO__FORMAT_mask);
+ SETfield(cb_color0_info, comp_swap, COMP_SWAP_shift, COMP_SWAP_mask);
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1);
+ R600_OUT_BATCH(cb_color0_base);
+ R600_OUT_BATCH_RELOC(0,
+ bo,
+ 0,
+ 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+ END_BATCH();
+
+ if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) &&
+ (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) {
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
+ R600_OUT_BATCH((2 << id));
+ END_BATCH();
+ }
+
+ /* Set CMASK & TILE buffer to the offset of color buffer as
+ * we don't use those this shouldn't cause any issue and we
+ * then have a valid cmd stream
+ */
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1);
+ R600_OUT_BATCH(cb_color0_base);
+ R600_OUT_BATCH_RELOC(0,
+ bo,
+ 0,
+ 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+ END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1);
+ R600_OUT_BATCH(cb_color0_base);
+ R600_OUT_BATCH_RELOC(0,
+ bo,
+ 0,
+ 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(12);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0);
+ END_BATCH();
+
+ COMMIT_BATCH();
+
+}
+
+static inline void load_shaders(GLcontext * ctx)
+{
+
+ radeonContextPtr radeonctx = RADEON_CONTEXT(ctx);
+ context_t *context = R700_CONTEXT(ctx);
+ int i, size;
+ uint32_t *shader;
+
+ if (context->blit_bo_loaded == 1)
+ return;
+
+ size = 4096;
+ context->blit_bo = radeon_bo_open(radeonctx->radeonScreen->bom, 0,
+ size, 256, RADEON_GEM_DOMAIN_GTT, 0);
+ radeon_bo_map(context->blit_bo, 1);
+ shader = context->blit_bo->ptr;
+
+ for(i=0; i<sizeof(r6xx_vs)/4; i++) {
+ shader[128+i] = r6xx_vs[i];
+ }
+ for(i=0; i<sizeof(r6xx_ps)/4; i++) {
+ shader[256+i] = r6xx_ps[i];
+ }
+
+ radeon_bo_unmap(context->blit_bo);
+ context->blit_bo_loaded = 1;
+
+}
+
+static inline void
+set_shaders(context_t *context)
+{
+ struct radeon_bo * pbo = context->blit_bo;
+ BATCH_LOCALS(&context->radeon);
+
+ uint32_t sq_pgm_start_fs = (512 >> 8);
+ uint32_t sq_pgm_resources_fs = 0;
+ uint32_t sq_pgm_cf_offset_fs = 0;
+
+ uint32_t sq_pgm_start_vs = (512 >> 8);
+ uint32_t sq_pgm_resources_vs = (1 << NUM_GPRS_shift);
+ uint32_t sq_pgm_cf_offset_vs = 0;
+
+ uint32_t sq_pgm_start_ps = (1024 >> 8);
+ uint32_t sq_pgm_resources_ps = (1 << NUM_GPRS_shift);
+ uint32_t sq_pgm_cf_offset_ps = 0;
+ uint32_t sq_pgm_exports_ps = (1 << 1);
+
+ r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
+ /* FS */
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(SQ_PGM_START_FS, 1);
+ R600_OUT_BATCH(sq_pgm_start_fs);
+ R600_OUT_BATCH_RELOC(sq_pgm_start_fs,
+ pbo,
+ sq_pgm_start_fs,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_FS, sq_pgm_resources_fs);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_FS, sq_pgm_cf_offset_fs);
+ END_BATCH();
+
+ /* VS */
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1);
+ R600_OUT_BATCH(sq_pgm_start_vs);
+ R600_OUT_BATCH_RELOC(sq_pgm_start_vs,
+ pbo,
+ sq_pgm_start_vs,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, sq_pgm_resources_vs);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, sq_pgm_cf_offset_vs);
+ END_BATCH();
+
+ /* PS */
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1);
+ R600_OUT_BATCH(sq_pgm_start_ps);
+ R600_OUT_BATCH_RELOC(sq_pgm_start_ps,
+ pbo,
+ sq_pgm_start_ps,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(9);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, sq_pgm_resources_ps);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, sq_pgm_exports_ps);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, sq_pgm_cf_offset_ps);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(18);
+ R600_OUT_BATCH_REGVAL(SPI_VS_OUT_CONFIG, 0); //EXPORT_COUNT is - 1
+ R600_OUT_BATCH_REGVAL(SPI_VS_OUT_ID_0, 0);
+ R600_OUT_BATCH_REGVAL(SPI_PS_INPUT_CNTL_0, SEL_CENTROID_bit);
+ R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift));
+ R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_1, 0);
+ R600_OUT_BATCH_REGVAL(SPI_INTERP_CONTROL_0, 0);
+ END_BATCH();
+
+ COMMIT_BATCH();
+
+}
+
+static inline void
+set_vtx_resource(context_t *context)
+{
+ struct radeon_bo *bo = context->blit_bo;
+ BATCH_LOCALS(&context->radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
+ R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(0);
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
+ R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(0);
+ END_BATCH();
+ COMMIT_BATCH();
+
+ if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
+ r700SyncSurf(context, bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit);
+ else
+ r700SyncSurf(context, bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
+
+ BEGIN_BATCH_NO_AUTOSTATE(9 + 2);
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+ R600_OUT_BATCH(SQ_FETCH_RESOURCE_VS_OFFSET * FETCH_RESOURCE_STRIDE);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(48 - 1);
+ R600_OUT_BATCH(16 << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift);
+ R600_OUT_BATCH(1 << MEM_REQUEST_SIZE_shift);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift);
+ R600_OUT_BATCH_RELOC(SQ_VTX_CONSTANT_WORD0_0,
+ bo,
+ SQ_VTX_CONSTANT_WORD0_0,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+ COMMIT_BATCH();
+
+}
+
+static inline void
+set_tex_resource(context_t * context,
+ gl_format mesa_format, struct radeon_bo *bo, int w, int h,
+ int TexelPitch, intptr_t src_offset)
+{
+ uint32_t sq_tex_resource0, sq_tex_resource1, sq_tex_resource2, sq_tex_resource4, sq_tex_resource6;
+
+ sq_tex_resource0 = sq_tex_resource1 = sq_tex_resource2 = sq_tex_resource4 = sq_tex_resource6 = 0;
+ BATCH_LOCALS(&context->radeon);
+
+ SETfield(sq_tex_resource0, SQ_TEX_DIM_2D, DIM_shift, DIM_mask);
+ SETfield(sq_tex_resource0, ARRAY_LINEAR_GENERAL,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+
+ switch (mesa_format) {
+ case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_SIGNED_RGBA8888:
+ SETfield(sq_tex_resource1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888) {
+ SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+ SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+ SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+ SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+ }
+ break;
+ case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_SIGNED_RGBA8888_REV:
+ SETfield(sq_tex_resource1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888_REV) {
+ SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+ SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+ SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+ SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+ }
+ break;
+ case MESA_FORMAT_ARGB8888:
+ SETfield(sq_tex_resource1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_XRGB8888:
+ SETfield(sq_tex_resource1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB8888_REV:
+ SETfield(sq_tex_resource1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_XRGB8888_REV:
+ SETfield(sq_tex_resource1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB565:
+ SETfield(sq_tex_resource1, FMT_5_6_5,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB565_REV:
+ SETfield(sq_tex_resource1, FMT_5_6_5,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB4444:
+ SETfield(sq_tex_resource1, FMT_4_4_4_4,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB4444_REV:
+ SETfield(sq_tex_resource1, FMT_4_4_4_4,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB1555:
+ SETfield(sq_tex_resource1, FMT_1_5_5_5,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB1555_REV:
+ SETfield(sq_tex_resource1, FMT_1_5_5_5,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_AL88:
+ case MESA_FORMAT_AL88_REV: /* TODO : Check this. */
+ SETfield(sq_tex_resource1, FMT_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB332:
+ SETfield(sq_tex_resource1, FMT_3_3_2,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */
+ SETfield(sq_tex_resource1, FMT_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_L8: /* X, X, X, ONE */
+ SETfield(sq_tex_resource1, FMT_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_I8: /* X, X, X, X */
+ case MESA_FORMAT_CI8:
+ SETfield(sq_tex_resource1, FMT_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGBA_FLOAT32:
+ SETfield(sq_tex_resource1, FMT_32_32_32_32_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGBA_FLOAT16:
+ SETfield(sq_tex_resource1, FMT_16_16_16_16_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */
+ SETfield(sq_tex_resource1, FMT_32_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */
+ SETfield(sq_tex_resource1, FMT_16_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */
+ SETfield(sq_tex_resource1, FMT_32_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */
+ SETfield(sq_tex_resource1, FMT_16_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+ SETfield(sq_tex_resource1, FMT_32_32_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+ SETfield(sq_tex_resource1, FMT_16_16_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+ SETfield(sq_tex_resource1, FMT_32_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+ SETfield(sq_tex_resource1, FMT_16_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_Z16:
+ SETbit(sq_tex_resource0, TILE_TYPE_bit);
+ SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+ SETfield(sq_tex_resource1, FMT_16,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_X8_Z24:
+ SETbit(sq_tex_resource0, TILE_TYPE_bit);
+ SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+ SETfield(sq_tex_resource1, FMT_8_24,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_S8_Z24:
+ SETbit(sq_tex_resource0, TILE_TYPE_bit);
+ SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+ SETfield(sq_tex_resource1, FMT_8_24,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_Z24_S8:
+ SETbit(sq_tex_resource0, TILE_TYPE_bit);
+ SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+ SETfield(sq_tex_resource1, FMT_24_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_Z32:
+ SETbit(sq_tex_resource0, TILE_TYPE_bit);
+ SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+ SETfield(sq_tex_resource1, FMT_32,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_S8:
+ SETbit(sq_tex_resource0, TILE_TYPE_bit);
+ SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+ SETfield(sq_tex_resource1, FMT_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_SRGBA8:
+ SETfield(sq_tex_resource1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ break;
+ case MESA_FORMAT_SLA8:
+ SETfield(sq_tex_resource1, FMT_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ break;
+ case MESA_FORMAT_SL8: /* X, X, X, ONE */
+ SETfield(sq_tex_resource1, FMT_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ break;
+ default:
+ fprintf(stderr,"Invalid format for copy %s\n",_mesa_get_format_name(mesa_format));
+ assert("Invalid format for US output\n");
+ return;
+ };
+
+ SETfield(sq_tex_resource0, (TexelPitch/8)-1, PITCH_shift, PITCH_mask);
+ SETfield(sq_tex_resource0, w - 1, TEX_WIDTH_shift, TEX_WIDTH_mask);
+ SETfield(sq_tex_resource1, h - 1, TEX_HEIGHT_shift, TEX_HEIGHT_mask);
+
+ sq_tex_resource2 = src_offset / 256;
+
+ SETfield(sq_tex_resource6, SQ_TEX_VTX_VALID_TEXTURE,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_shift,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
+
+ r700SyncSurf(context, bo,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
+ 0, TC_ACTION_ENA_bit);
+
+ BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+ R600_OUT_BATCH(0 * 7);
+
+ R600_OUT_BATCH(sq_tex_resource0);
+ R600_OUT_BATCH(sq_tex_resource1);
+ R600_OUT_BATCH(sq_tex_resource2);
+ R600_OUT_BATCH(0); //SQ_TEX_RESOURCE3
+ R600_OUT_BATCH(sq_tex_resource4);
+ R600_OUT_BATCH(0); //SQ_TEX_RESOURCE5
+ R600_OUT_BATCH(sq_tex_resource6);
+ R600_OUT_BATCH_RELOC(0,
+ bo,
+ 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ R600_OUT_BATCH_RELOC(0,
+ bo,
+ 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static inline void
+set_tex_sampler(context_t * context)
+{
+ uint32_t sq_tex_sampler_word0 = 0, sq_tex_sampler_word1 = 0, sq_tex_sampler_word2 = 0;
+ int i = 0;
+
+ SETbit(sq_tex_sampler_word2, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit);
+
+ BATCH_LOCALS(&context->radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(5);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
+ R600_OUT_BATCH(i * 3);
+ R600_OUT_BATCH(sq_tex_sampler_word0);
+ R600_OUT_BATCH(sq_tex_sampler_word1);
+ R600_OUT_BATCH(sq_tex_sampler_word2);
+ END_BATCH();
+
+}
+
+static inline void
+set_scissors(context_t *context, int x1, int y1, int x2, int y2)
+{
+ BATCH_LOCALS(&context->radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(17);
+ R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2);
+ R600_OUT_BATCH((x1 << 0) | (y1 << 16));
+ R600_OUT_BATCH((x2 << 0) | (y2 << 16));
+
+ R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 3);
+ R600_OUT_BATCH(0); //PA_SC_WINDOW_OFFSET
+ R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); //PA_SC_WINDOW_SCISSOR_TL
+ R600_OUT_BATCH((x2 << 0) | (y2 << 16));
+
+ R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2);
+ R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit));
+ R600_OUT_BATCH((x2 << 0) | (y2 << 16));
+
+ /* XXX 16 of these PA_SC_VPORT_SCISSOR_0_TL_num ... */
+ R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_SCISSOR_0_TL, 2 );
+ R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit));
+ R600_OUT_BATCH((x2 << 0) | (y2 << 16));
+ END_BATCH();
+
+ COMMIT_BATCH();
+
+}
+
+static inline void
+set_vb_data(context_t * context, int src_x, int src_y, int dst_x, int dst_y,
+ int w, int h, int src_h, unsigned flip_y)
+{
+ float *vb;
+ radeon_bo_map(context->blit_bo, 1);
+ vb = context->blit_bo->ptr;
+
+ vb[0] = (float)(dst_x);
+ vb[1] = (float)(dst_y);
+ vb[2] = (float)(src_x);
+ vb[3] = (flip_y) ? (float)(src_h - src_y) : (float)src_y;
+
+ vb[4] = (float)(dst_x);
+ vb[5] = (float)(dst_y + h);
+ vb[6] = (float)(src_x);
+ vb[7] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h);
+
+ vb[8] = (float)(dst_x + w);
+ vb[9] = (float)(dst_y + h);
+ vb[10] = (float)(src_x + w);
+ vb[11] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h);
+
+ radeon_bo_unmap(context->blit_bo);
+
+}
+
+static inline void
+draw_auto(context_t *context)
+{
+ BATCH_LOCALS(&context->radeon);
+ uint32_t vgt_primitive_type = 0, vgt_index_type = 0, vgt_draw_initiator = 0, vgt_num_indices;
+
+ SETfield(vgt_primitive_type, DI_PT_RECTLIST,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+ SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift,
+ INDEX_TYPE_mask);
+ SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift,
+ MAJOR_MODE_mask);
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift,
+ SOURCE_SELECT_mask);
+
+ vgt_num_indices = 3;
+
+ BEGIN_BATCH_NO_AUTOSTATE(10);
+ // prim
+ R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+ R600_OUT_BATCH(vgt_primitive_type);
+ // index type
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+ R600_OUT_BATCH(vgt_index_type);
+ // num instances
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+ R600_OUT_BATCH(1);
+ //
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static inline void
+set_default_state(context_t *context)
+{
+ int ps_prio = 0;
+ int vs_prio = 1;
+ int gs_prio = 2;
+ int es_prio = 3;
+ int num_ps_gprs;
+ int num_vs_gprs;
+ int num_gs_gprs;
+ int num_es_gprs;
+ int num_temp_gprs;
+ int num_ps_threads;
+ int num_vs_threads;
+ int num_gs_threads;
+ int num_es_threads;
+ int num_ps_stack_entries;
+ int num_vs_stack_entries;
+ int num_gs_stack_entries;
+ int num_es_stack_entries;
+ uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+ uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+ uint32_t ta_cntl_aux, db_watermarks, sq_dyn_gpr_cntl_ps_flush_req, db_debug;
+ BATCH_LOCALS(&context->radeon);
+
+ switch (context->radeon.radeonScreen->chip_family) {
+ case CHIP_FAMILY_R600:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_FAMILY_RV630:
+ case CHIP_FAMILY_RV635:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 144;
+ num_vs_threads = 40;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_FAMILY_RV610:
+ case CHIP_FAMILY_RV620:
+ case CHIP_FAMILY_RS780:
+ case CHIP_FAMILY_RS880:
+ default:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_FAMILY_RV670:
+ num_ps_gprs = 144;
+ num_vs_gprs = 40;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_FAMILY_RV770:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 188;
+ num_vs_threads = 60;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 256;
+ num_vs_stack_entries = 256;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_FAMILY_RV730:
+ case CHIP_FAMILY_RV740:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 188;
+ num_vs_threads = 60;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_FAMILY_RV710:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 144;
+ num_vs_threads = 48;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ }
+
+ sq_config = 0;
+ if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
+ CLEARbit(sq_config, VC_ENABLE_bit);
+ else
+ SETbit(sq_config, VC_ENABLE_bit);
+ SETbit(sq_config, DX9_CONSTS_bit);
+ SETbit(sq_config, ALU_INST_PREFER_VECTOR_bit);
+ SETfield(sq_config, ps_prio, PS_PRIO_shift, PS_PRIO_mask);
+ SETfield(sq_config, vs_prio, VS_PRIO_shift, VS_PRIO_mask);
+ SETfield(sq_config, gs_prio, GS_PRIO_shift, GS_PRIO_mask);
+ SETfield(sq_config, es_prio, ES_PRIO_shift, ES_PRIO_mask);
+
+ sq_gpr_resource_mgmt_1 = 0;
+ SETfield(sq_gpr_resource_mgmt_1, num_ps_gprs, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask);
+ SETfield(sq_gpr_resource_mgmt_1, num_vs_gprs, NUM_VS_GPRS_shift, NUM_VS_GPRS_mask);
+ SETfield(sq_gpr_resource_mgmt_1, num_temp_gprs,
+ NUM_CLAUSE_TEMP_GPRS_shift, NUM_CLAUSE_TEMP_GPRS_mask);
+
+ sq_gpr_resource_mgmt_2 = 0;
+ SETfield(sq_gpr_resource_mgmt_2, num_gs_gprs, NUM_GS_GPRS_shift, NUM_GS_GPRS_mask);
+ SETfield(sq_gpr_resource_mgmt_2, num_es_gprs, NUM_ES_GPRS_shift, NUM_ES_GPRS_mask);
+
+ sq_thread_resource_mgmt = 0;
+ SETfield(sq_thread_resource_mgmt, num_ps_threads,
+ NUM_PS_THREADS_shift, NUM_PS_THREADS_mask);
+ SETfield(sq_thread_resource_mgmt, num_vs_threads,
+ NUM_VS_THREADS_shift, NUM_VS_THREADS_mask);
+ SETfield(sq_thread_resource_mgmt, num_gs_threads,
+ NUM_GS_THREADS_shift, NUM_GS_THREADS_mask);
+ SETfield(sq_thread_resource_mgmt, num_es_threads,
+ NUM_ES_THREADS_shift, NUM_ES_THREADS_mask);
+
+ sq_stack_resource_mgmt_1 = 0;
+ SETfield(sq_stack_resource_mgmt_1, num_ps_stack_entries,
+ NUM_PS_STACK_ENTRIES_shift, NUM_PS_STACK_ENTRIES_mask);
+ SETfield(sq_stack_resource_mgmt_1, num_vs_stack_entries,
+ NUM_VS_STACK_ENTRIES_shift, NUM_VS_STACK_ENTRIES_mask);
+
+ sq_stack_resource_mgmt_2 = 0;
+ SETfield(sq_stack_resource_mgmt_2, num_gs_stack_entries,
+ NUM_GS_STACK_ENTRIES_shift, NUM_GS_STACK_ENTRIES_mask);
+ SETfield(sq_stack_resource_mgmt_2, num_es_stack_entries,
+ NUM_ES_STACK_ENTRIES_shift, NUM_ES_STACK_ENTRIES_mask);
+
+ ta_cntl_aux = 0;
+ SETfield(ta_cntl_aux, 28, TD_FIFO_CREDIT_shift, TD_FIFO_CREDIT_mask);
+ db_watermarks = 0;
+ SETfield(db_watermarks, 4, DEPTH_FREE_shift, DEPTH_FREE_mask);
+ SETfield(db_watermarks, 16, DEPTH_FLUSH_shift, DEPTH_FLUSH_mask);
+ SETfield(db_watermarks, 0, FORCE_SUMMARIZE_shift, FORCE_SUMMARIZE_mask);
+ SETfield(db_watermarks, 4, DEPTH_PENDING_FREE_shift, DEPTH_PENDING_FREE_mask);
+ sq_dyn_gpr_cntl_ps_flush_req = 0;
+ db_debug = 0;
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
+ SETfield(ta_cntl_aux, 3, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask);
+ db_debug = 0x82000000;
+ SETfield(db_watermarks, 16, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask);
+ } else {
+ SETfield(ta_cntl_aux, 2, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask);
+ SETfield(db_watermarks, 4, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask);
+ SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit);
+ }
+
+ BEGIN_BATCH_NO_AUTOSTATE(117);
+ R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6);
+ R600_OUT_BATCH(sq_config);
+ R600_OUT_BATCH(sq_gpr_resource_mgmt_1);
+ R600_OUT_BATCH(sq_gpr_resource_mgmt_2);
+ R600_OUT_BATCH(sq_thread_resource_mgmt);
+ R600_OUT_BATCH(sq_stack_resource_mgmt_1);
+ R600_OUT_BATCH(sq_stack_resource_mgmt_2);
+
+ R600_OUT_BATCH_REGVAL(TA_CNTL_AUX, ta_cntl_aux);
+ R600_OUT_BATCH_REGVAL(VC_ENHANCE, 0);
+ R600_OUT_BATCH_REGVAL(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, sq_dyn_gpr_cntl_ps_flush_req);
+ R600_OUT_BATCH_REGVAL(DB_DEBUG, db_debug);
+ R600_OUT_BATCH_REGVAL(DB_WATERMARKS, db_watermarks);
+
+ R600_OUT_BATCH_REGSEQ(SQ_ESGS_RING_ITEMSIZE, 9);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+
+ R600_OUT_BATCH_REGVAL(CB_CLRCMP_CONTROL,
+ (CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift));
+ R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0);
+ R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0);
+ R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0);
+ R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0);
+ R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask));
+ R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask));
+ R600_OUT_BATCH_REGVAL(R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
+ R600_OUT_BATCH_REGVAL(CB_COLOR_CONTROL, (0xcc << ROP3_shift));
+
+ R600_OUT_BATCH_REGVAL(PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
+ R600_OUT_BATCH_REGVAL(PA_CL_VS_OUT_CNTL, 0);
+ R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+ R600_OUT_BATCH_REGVAL(PA_SU_SC_MODE_CNTL, (FACE_bit) |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift));
+ R600_OUT_BATCH_REGVAL(PA_SU_VTX_CNTL, (PIX_CENTER_bit) |
+ (X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
+ (X_1_256TH << QUANT_MODE_shift));
+
+ R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4);
+ R600_OUT_BATCH(2048);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+
+ R600_OUT_BATCH_REGSEQ(VGT_OUTPUT_PATH_CNTL, 13);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+
+ R600_OUT_BATCH_REGVAL(VGT_PRIMITIVEID_EN, 0);
+ R600_OUT_BATCH_REGVAL(VGT_MULTI_PRIM_IB_RESET_EN, 0);
+ R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_0, 0);
+ R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_1, 0);
+
+ R600_OUT_BATCH_REGSEQ(VGT_STRMOUT_EN, 3);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+
+ R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, 0);
+
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static GLboolean validate_buffers(context_t *rmesa,
+ struct radeon_bo *src_bo,
+ struct radeon_bo *dst_bo)
+{
+ int ret;
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ src_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
+
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ rmesa->blit_bo, RADEON_GEM_DOMAIN_GTT, 0);
+
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs,
+ rmesa->blit_bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ return GL_FALSE;
+
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs,
+ first_elem(&rmesa->radeon.dma.reserved)->bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+unsigned r600_blit(GLcontext *ctx,
+ struct radeon_bo *src_bo,
+ intptr_t src_offset,
+ gl_format src_mesaformat,
+ unsigned src_pitch,
+ unsigned src_width,
+ unsigned src_height,
+ unsigned src_x,
+ unsigned src_y,
+ struct radeon_bo *dst_bo,
+ intptr_t dst_offset,
+ gl_format dst_mesaformat,
+ unsigned dst_pitch,
+ unsigned dst_width,
+ unsigned dst_height,
+ unsigned dst_x,
+ unsigned dst_y,
+ unsigned w,
+ unsigned h,
+ unsigned flip_y)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ int id = 0;
+
+ if (!is_blit_supported(dst_mesaformat))
+ return GL_FALSE;
+
+ if (src_bo == dst_bo) {
+ return GL_FALSE;
+ }
+
+ if (src_offset % 256 || dst_offset % 256) {
+ return GL_FALSE;
+ }
+
+ if (0) {
+ fprintf(stderr, "src: width %d, height %d, pitch %d vs %d, format %s\n",
+ src_width, src_height, src_pitch,
+ _mesa_format_row_stride(src_mesaformat, src_width),
+ _mesa_get_format_name(src_mesaformat));
+ fprintf(stderr, "dst: width %d, height %d, pitch %d, format %s\n",
+ dst_width, dst_height,
+ _mesa_format_row_stride(dst_mesaformat, dst_width),
+ _mesa_get_format_name(dst_mesaformat));
+ }
+
+ /* Flush is needed to make sure that source buffer has correct data */
+ radeonFlush(ctx);
+
+ rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__);
+
+ /* load shaders */
+ load_shaders(context->radeon.glCtx);
+
+ if (!validate_buffers(context, src_bo, dst_bo))
+ return GL_FALSE;
+
+ /* set clear state */
+ /* 117 */
+ set_default_state(context);
+
+ /* shaders */
+ /* 72 */
+ set_shaders(context);
+
+ /* src */
+ /* 20 */
+ set_tex_resource(context, src_mesaformat, src_bo,
+ src_width, src_height, src_pitch, src_offset);
+
+ /* 5 */
+ set_tex_sampler(context);
+
+ /* dst */
+ /* 27 */
+ set_render_target(context, dst_bo, dst_mesaformat,
+ dst_pitch, dst_width, dst_height, dst_offset);
+ /* scissors */
+ /* 17 */
+ set_scissors(context, dst_x, dst_y, dst_x + dst_width, dst_y + dst_height);
+
+ set_vb_data(context, src_x, src_y, dst_x, dst_y, w, h, src_height, flip_y);
+ /* Vertex buffer setup */
+ /* 24 */
+ set_vtx_resource(context);
+
+ /* draw */
+ /* 10 */
+ draw_auto(context);
+
+ /* 7 */
+ r700SyncSurf(context, dst_bo, 0,
+ RADEON_GEM_DOMAIN_VRAM|RADEON_GEM_DOMAIN_GTT,
+ CB_ACTION_ENA_bit | (1 << (id + 6)));
+
+ /* 5 */
+ r700WaitForIdleClean(context);
+
+ radeonFlush(ctx);
+
+ return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r600/r600_blit.h b/src/mesa/drivers/dri/r600/r600_blit.h
new file mode 100644
index 0000000000..f280e23489
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_blit.h
@@ -0,0 +1,21 @@
+unsigned r600_blit(GLcontext *ctx,
+ struct radeon_bo *src_bo,
+ intptr_t src_offset,
+ gl_format src_mesaformat,
+ unsigned src_pitch,
+ unsigned src_width,
+ unsigned src_height,
+ unsigned src_x_offset,
+ unsigned src_y_offset,
+ struct radeon_bo *dst_bo,
+ intptr_t dst_offset,
+ gl_format dst_mesaformat,
+ unsigned dst_pitch,
+ unsigned dst_width,
+ unsigned dst_height,
+ unsigned dst_x_offset,
+ unsigned dst_y_offset,
+ unsigned w,
+ unsigned h,
+ unsigned flip_y);
+
diff --git a/src/mesa/drivers/dri/r600/r600_blit_shaders.h b/src/mesa/drivers/dri/r600/r600_blit_shaders.h
new file mode 100644
index 0000000000..492dde9636
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_blit_shaders.h
@@ -0,0 +1,28 @@
+const uint32_t r6xx_vs[] =
+{
+ 0x00000004, // CF_DWORD0(ADDR(4))
+ 0x81000000, // SQ_CF_INST_VTX COUNT(1)
+ 0x0000203c, // CF_EXP_IMP CF_POS0 SQ_EXPORT_POS RW_GPR(0) ELEM_SIZE(0)
+ 0x94000b08, // SQ_CF_INST_EXPORT_DONE SWZ XY01 BARRIER(1)
+ 0x00004000, // CF_EXP_IMP 0 SQ_EXPORT_PARAM RW_GPR(0) ELEM_SIZE(0)
+ 0x14200b1a, // SQ_CF_INST_EXPORT_DONE SWZ ZW01 EOP(1) BARRIER(0)
+ 0x00000000,
+ 0x00000000,
+ 0x3c000000, // SQ_VTX_INST_FETCH BUFFER_ID(0) MEGA_FETCH_COUNT(16)
+ 0x68cd1000, // DST_GPR(0) DST_SWZ: XYZW DATA_FORMAT(35) SQ_NUM_FORMAT_SCALED SQ_FORMAT_COMP_SIGNED
+ 0x00080000, // ENDIAN_SWAP(SQ_ENDIAN_NONE) MEGA_FETCH(1)
+ 0x00000000, // VTX_DWORD_PAD
+};
+
+const uint32_t r6xx_ps[] =
+{
+ 0x00000002, // CF_DWORD0 AADR(2)
+ 0x80800000, // SQ_CF_INST_TEX COUNT(1)
+ 0x00000000, // CF_ALLOC_IMP_EXP0 SQ_EXPORT_PIXEL RW_GPR(0) ELEM_SIZE(0)
+ 0x94200688, // SQ_CF_INST_EXPORT_DONE EOP(1) BARRIER(1) SWZ: XYZW
+ 0x00000010, // SQ_TEX_INST_SAMPLE SRC_GPR(0) RESOURCE_ID(0)
+ 0x000d1000, // DST_GPR(0) SWZ: XYZW TEX_UNNORMALIZED
+ 0xb0800000, // SAMPLER_ID(0) SRC_SWZ XYZW
+ 0x00000000, // TEX_DWORD_PAD
+};
+
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index cb549497f5..68112c49dc 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -65,6 +65,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r600_emit.h"
#include "radeon_bocs_wrapper.h"
#include "radeon_queryobj.h"
+#include "r600_blit.h"
#include "r700_state.h"
#include "r700_ioctl.h"
@@ -240,6 +241,7 @@ static void r600_init_vtbl(radeonContextPtr radeon)
radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms;
radeon->vtbl.fallback = r600_fallback;
radeon->vtbl.emit_query_finish = r600_emit_query_finish;
+ radeon->vtbl.blit = r600_blit;
}
static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
@@ -378,7 +380,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
_mesa_init_driver_functions(&functions);
r700InitStateFuncs(&functions);
- r600InitTextureFuncs(&functions);
+ r600InitTextureFuncs(&r600->radeon, &functions);
r700InitShaderFuncs(&functions);
radeonInitQueryObjFunctions(&functions);
r700InitIoctlFuncs(&functions);
diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h
index a1b4af715e..72c8c869b7 100644
--- a/src/mesa/drivers/dri/r600/r600_context.h
+++ b/src/mesa/drivers/dri/r600/r600_context.h
@@ -148,6 +148,8 @@ struct r600_context {
GLint nNumActiveAos;
StreamDesc stream_desc[VERT_ATTRIB_MAX];
struct r700_index_buffer ind_buf;
+ struct radeon_bo *blit_bo;
+ GLboolean blit_bo_loaded;
};
#define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx))
@@ -178,6 +180,8 @@ extern GLboolean r700SyncSurf(context_t *context,
uint32_t write_domain,
uint32_t sync_type);
+extern void r700WaitForIdleClean(context_t *context);
+
extern void r700Start3D(context_t *context);
extern void r600InitAtoms(context_t *context);
extern void r700InitDraw(GLcontext *ctx);
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index f745fe3e8a..71dfd7e059 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -396,7 +396,7 @@ static struct gl_texture_object *r600NewTextureObject(GLcontext * ctx,
return &t->base;
}
-void r600InitTextureFuncs(struct dd_function_table *functions)
+void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
{
/* Note: we only plug in the functions we implement in the driver
* since _mesa_init_driver_functions() was already called.
@@ -424,6 +424,11 @@ void r600InitTextureFuncs(struct dd_function_table *functions)
functions->CompressedTexImage2D = radeonCompressedTexImage2D;
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
+ if (radeon->radeonScreen->kernel_mm) {
+ functions->CopyTexImage2D = radeonCopyTexImage2D;
+ functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
+ }
+
functions->GenerateMipmap = radeonGenerateMipmap;
driInitTextureFormats();
diff --git a/src/mesa/drivers/dri/r600/r600_tex.h b/src/mesa/drivers/dri/r600/r600_tex.h
index fb0e1a023e..c2141ef5e5 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.h
+++ b/src/mesa/drivers/dri/r600/r600_tex.h
@@ -58,6 +58,6 @@ extern void r600SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
extern GLboolean r600ValidateBuffers(GLcontext * ctx);
-extern void r600InitTextureFuncs(struct dd_function_table *functions);
+extern void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions);
#endif /* __r600_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 0ff16b4ddd..c01b2fbb14 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -4469,7 +4469,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
}
pAsm->D2.dst2.SaturateMode = 1;
- pAsm->S[0].src.rtype = pAsm->D.dst.rtype;
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
pAsm->S[0].src.reg = pAsm->D.dst.reg;
noswizzle_PVSSRC(&(pAsm->S[0].src));
noneg_PVSSRC(&(pAsm->S[0].src));
@@ -5090,15 +5090,15 @@ void add_return_inst(r700_AssemblerBase *pAsm)
{
if(GL_FALSE == add_cf_instruction(pAsm) )
{
- return GL_FALSE;
+ return;
}
//pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
@@ -5302,7 +5302,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
{
- GLfloat fLiteral[2] = {0.1, 0.0};
+ /*GLfloat fLiteral[2] = {0.1, 0.0};*/
pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
pAsm->D.dst.op3 = 0;
@@ -5353,7 +5353,7 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
GLboolean testFlag(r700_AssemblerBase *pAsm)
{
- GLfloat fLiteral[2] = {0.1, 0.0};
+ /*GLfloat fLiteral[2] = {0.1, 0.0};*/
//Test flag
GLuint tmp = gethelpr(pAsm);
@@ -6123,7 +6123,7 @@ GLboolean callPreSub(r700_AssemblerBase* pAsm,
R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
- /* copy srcs to presub inputs */
+ /* copy srcs to presub inputs */
pAsm->alu_x_opcode = SQ_CF_INST_ALU;
for(i=0; i<uNumValidSrc; i++)
{
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h
index 56baf5b0d9..0064d0814f 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.h
+++ b/src/mesa/drivers/dri/r600/r700_assembler.h
@@ -619,6 +619,7 @@ GLboolean assemble_RCP(r700_AssemblerBase *pAsm);
GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
+GLboolean assemble_CONT(r700_AssemblerBase *pAsm);
GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode);
GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode);
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 3bc2d2ba02..1a1a87c3cf 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -453,13 +453,31 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *
R600_OUT_BATCH((2 << id));
END_BATCH();
}
+ /* Set CMASK & TILE buffer to the offset of color buffer as
+ * we don't use those this shouldn't cause any issue and we
+ * then have a valid cmd stream
+ */
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1);
+ R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All);
+ R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+ rrb->bo,
+ r700->render_target[id].CB_COLOR0_BASE.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1);
+ R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All);
+ R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+ rrb->bo,
+ r700->render_target[id].CB_COLOR0_BASE.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
- BEGIN_BATCH_NO_AUTOSTATE(18);
+ BEGIN_BATCH_NO_AUTOSTATE(12);
R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All);
R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All);
R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All);
- R600_OUT_BATCH_REGVAL(CB_COLOR0_TILE + (4 * id), r700->render_target[id].CB_COLOR0_TILE.u32All);
- R600_OUT_BATCH_REGVAL(CB_COLOR0_FRAG + (4 * id), r700->render_target[id].CB_COLOR0_FRAG.u32All);
R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All);
END_BATCH();
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index eab27cbd84..3a6210c53a 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -422,7 +422,7 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end,
}
/* start 3d, idle, cb/db flush */
-#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
+#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 18
static GLuint r700PredictRenderSize(GLcontext* ctx,
const struct _mesa_prim *prim,
diff --git a/src/mesa/drivers/dri/r600/radeon_tex_copy.c b/src/mesa/drivers/dri/r600/radeon_tex_copy.c
new file mode 120000
index 0000000000..dfa5ba34e6
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_tex_copy.c
@@ -0,0 +1 @@
+../radeon/radeon_tex_copy.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile
index 2b2f2c4aa7..c776be0e60 100644
--- a/src/mesa/drivers/dri/radeon/Makefile
+++ b/src/mesa/drivers/dri/radeon/Makefile
@@ -26,7 +26,8 @@ RADEON_COMMON_SOURCES = \
radeon_mipmap_tree.c \
radeon_queryobj.c \
radeon_span.c \
- radeon_texture.c
+ radeon_texture.c \
+ radeon_tex_copy.c
DRIVER_SOURCES = \
radeon_context.c \
@@ -40,6 +41,7 @@ DRIVER_SOURCES = \
radeon_swtcl.c \
radeon_maos.c \
radeon_sanity.c \
+ radeon_blit.c \
$(RADEON_COMMON_SOURCES)
C_SOURCES = \
diff --git a/src/mesa/drivers/dri/radeon/radeon_blit.c b/src/mesa/drivers/dri/radeon/radeon_blit.c
new file mode 100644
index 0000000000..0df4fbb33c
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_blit.c
@@ -0,0 +1,403 @@
+/*
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_common.h"
+#include "radeon_context.h"
+#include "radeon_blit.h"
+
+static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
+ int reg, int count)
+{
+ if (count)
+ return CP_PACKET0(reg, count - 1);
+ return CP_PACKET2;
+}
+
+/* common formats supported as both textures and render targets */
+static unsigned is_blit_supported(gl_format mesa_format)
+{
+ /* XXX others? BE/LE? */
+ switch (mesa_format) {
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_ARGB4444:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_A8:
+ break;
+ default:
+ return 0;
+ }
+
+ /* ??? */
+ if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0)
+ return 0;
+
+ return 1;
+}
+
+static inline void emit_vtx_state(struct r100_context *r100)
+{
+ BATCH_LOCALS(&r100->radeon);
+
+ BEGIN_BATCH(8);
+ if (r100->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, 0);
+ } else {
+ OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS);
+
+ }
+ OUT_BATCH_REGVAL(RADEON_SE_COORD_FMT, (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+ RADEON_TEX1_W_ROUTING_USE_W0));
+ OUT_BATCH_REGVAL(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY | RADEON_SE_VTX_FMT_ST0);
+ OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD |
+ RADEON_BFACE_SOLID |
+ RADEON_FFACE_SOLID |
+ RADEON_VTX_PIX_CENTER_OGL |
+ RADEON_ROUND_MODE_ROUND |
+ RADEON_ROUND_PREC_4TH_PIX));
+ END_BATCH();
+}
+
+static void inline emit_tx_setup(struct r100_context *r100,
+ gl_format mesa_format,
+ struct radeon_bo *bo,
+ intptr_t offset,
+ unsigned width,
+ unsigned height,
+ unsigned pitch)
+{
+ uint32_t txformat = RADEON_TXFORMAT_NON_POWER2;
+ BATCH_LOCALS(&r100->radeon);
+
+ assert(width <= 2047);
+ assert(height <= 2047);
+ assert(offset % 32 == 0);
+
+ /* XXX others? BE/LE? */
+ switch (mesa_format) {
+ case MESA_FORMAT_ARGB8888:
+ txformat |= RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+ break;
+ case MESA_FORMAT_XRGB8888:
+ txformat |= RADEON_TXFORMAT_ARGB8888;
+ break;
+ case MESA_FORMAT_RGB565:
+ txformat |= RADEON_TXFORMAT_RGB565;
+ break;
+ case MESA_FORMAT_ARGB4444:
+ txformat |= RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+ break;
+ case MESA_FORMAT_ARGB1555:
+ txformat |= RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+ break;
+ case MESA_FORMAT_A8:
+ txformat |= RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+ break;
+ default:
+ break;
+ }
+
+ BEGIN_BATCH(18);
+ OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
+ OUT_BATCH_REGVAL(RADEON_PP_TXCBLEND_0, (RADEON_COLOR_ARG_A_ZERO |
+ RADEON_COLOR_ARG_B_ZERO |
+ RADEON_COLOR_ARG_C_T0_COLOR |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX));
+ OUT_BATCH_REGVAL(RADEON_PP_TXABLEND_0, (RADEON_ALPHA_ARG_A_ZERO |
+ RADEON_ALPHA_ARG_B_ZERO |
+ RADEON_ALPHA_ARG_C_T0_ALPHA |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX));
+ OUT_BATCH_REGVAL(RADEON_PP_TXFILTER_0, (RADEON_CLAMP_S_CLAMP_LAST |
+ RADEON_CLAMP_T_CLAMP_LAST |
+ RADEON_MAG_FILTER_NEAREST |
+ RADEON_MIN_FILTER_NEAREST));
+ OUT_BATCH_REGVAL(RADEON_PP_TXFORMAT_0, txformat);
+ OUT_BATCH_REGVAL(RADEON_PP_TEX_SIZE_0, ((width - 1) |
+ ((height - 1) << RADEON_TEX_VSIZE_SHIFT)));
+ OUT_BATCH_REGVAL(RADEON_PP_TEX_PITCH_0, pitch * _mesa_get_format_bytes(mesa_format) - 32);
+
+ OUT_BATCH_REGSEQ(RADEON_PP_TXOFFSET_0, 1);
+ OUT_BATCH_RELOC(0, bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+
+ END_BATCH();
+}
+
+static inline void emit_cb_setup(struct r100_context *r100,
+ struct radeon_bo *bo,
+ intptr_t offset,
+ gl_format mesa_format,
+ unsigned pitch,
+ unsigned width,
+ unsigned height)
+{
+ uint32_t dst_pitch = pitch;
+ uint32_t dst_format = 0;
+ BATCH_LOCALS(&r100->radeon);
+
+ /* XXX others? BE/LE? */
+ switch (mesa_format) {
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888:
+ dst_format = RADEON_COLOR_FORMAT_ARGB8888;
+ break;
+ case MESA_FORMAT_RGB565:
+ dst_format = RADEON_COLOR_FORMAT_RGB565;
+ break;
+ case MESA_FORMAT_ARGB4444:
+ dst_format = RADEON_COLOR_FORMAT_ARGB4444;
+ break;
+ case MESA_FORMAT_ARGB1555:
+ dst_format = RADEON_COLOR_FORMAT_ARGB1555;
+ break;
+ case MESA_FORMAT_A8:
+ dst_format = RADEON_COLOR_FORMAT_RGB8;
+ break;
+ default:
+ break;
+ }
+
+ BEGIN_BATCH_NO_AUTOSTATE(18);
+ OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0);
+ OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, ((width << RADEON_RE_WIDTH_SHIFT) |
+ (height << RADEON_RE_HEIGHT_SHIFT)));
+ OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff);
+ OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+ OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format);
+
+ OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1);
+ OUT_BATCH_RELOC(0, bo, 0, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1);
+ OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+
+ END_BATCH();
+}
+
+static GLboolean validate_buffers(struct r100_context *r100,
+ struct radeon_bo *src_bo,
+ struct radeon_bo *dst_bo)
+{
+ int ret;
+ radeon_cs_space_add_persistent_bo(r100->radeon.cmdbuf.cs,
+ src_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ radeon_cs_space_add_persistent_bo(r100->radeon.cmdbuf.cs,
+ dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
+
+ ret = radeon_cs_space_check_with_bo(r100->radeon.cmdbuf.cs,
+ first_elem(&r100->radeon.dma.reserved)->bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+/**
+ * Calculate texcoords for given image region.
+ * Output values are [minx, maxx, miny, maxy]
+ */
+static inline void calc_tex_coords(float img_width, float img_height,
+ float x, float y,
+ float reg_width, float reg_height,
+ unsigned flip_y, float *buf)
+{
+ buf[0] = x / img_width;
+ buf[1] = buf[0] + reg_width / img_width;
+ buf[2] = y / img_height;
+ buf[3] = buf[2] + reg_height / img_height;
+ if (flip_y)
+ {
+ buf[2] = 1.0 - buf[2];
+ buf[3] = 1.0 - buf[3];
+ }
+}
+
+static inline void emit_draw_packet(struct r100_context *r100,
+ unsigned src_width, unsigned src_height,
+ unsigned src_x_offset, unsigned src_y_offset,
+ unsigned dst_x_offset, unsigned dst_y_offset,
+ unsigned reg_width, unsigned reg_height,
+ unsigned flip_y)
+{
+ float texcoords[4];
+ float verts[12];
+ BATCH_LOCALS(&r100->radeon);
+
+ calc_tex_coords(src_width, src_height,
+ src_x_offset, src_y_offset,
+ reg_width, reg_height,
+ flip_y, texcoords);
+
+ verts[0] = dst_x_offset;
+ verts[1] = dst_y_offset + reg_height;
+ verts[2] = texcoords[0];
+ verts[3] = texcoords[3];
+
+ verts[4] = dst_x_offset + reg_width;
+ verts[5] = dst_y_offset + reg_height;
+ verts[6] = texcoords[1];
+ verts[7] = texcoords[3];
+
+ verts[8] = dst_x_offset + reg_width;
+ verts[9] = dst_y_offset;
+ verts[10] = texcoords[1];
+ verts[11] = texcoords[2];
+
+ BEGIN_BATCH(15);
+ OUT_BATCH(RADEON_CP_PACKET3_3D_DRAW_IMMD | (13 << 16));
+ OUT_BATCH(RADEON_CP_VC_FRMT_XY | RADEON_CP_VC_FRMT_ST0);
+ OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+ RADEON_CP_VC_CNTL_MAOS_ENABLE |
+ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+ (3 << 16));
+ OUT_BATCH_TABLE(verts, 12);
+ END_BATCH();
+}
+
+/**
+ * Copy a region of [@a width x @a height] pixels from source buffer
+ * to destination buffer.
+ * @param[in] r100 r100 context
+ * @param[in] src_bo source radeon buffer object
+ * @param[in] src_offset offset of the source image in the @a src_bo
+ * @param[in] src_mesaformat source image format
+ * @param[in] src_pitch aligned source image width
+ * @param[in] src_width source image width
+ * @param[in] src_height source image height
+ * @param[in] src_x_offset x offset in the source image
+ * @param[in] src_y_offset y offset in the source image
+ * @param[in] dst_bo destination radeon buffer object
+ * @param[in] dst_offset offset of the destination image in the @a dst_bo
+ * @param[in] dst_mesaformat destination image format
+ * @param[in] dst_pitch aligned destination image width
+ * @param[in] dst_width destination image width
+ * @param[in] dst_height destination image height
+ * @param[in] dst_x_offset x offset in the destination image
+ * @param[in] dst_y_offset y offset in the destination image
+ * @param[in] width region width
+ * @param[in] height region height
+ * @param[in] flip_y set if y coords of the source image need to be flipped
+ */
+unsigned r100_blit(GLcontext *ctx,
+ struct radeon_bo *src_bo,
+ intptr_t src_offset,
+ gl_format src_mesaformat,
+ unsigned src_pitch,
+ unsigned src_width,
+ unsigned src_height,
+ unsigned src_x_offset,
+ unsigned src_y_offset,
+ struct radeon_bo *dst_bo,
+ intptr_t dst_offset,
+ gl_format dst_mesaformat,
+ unsigned dst_pitch,
+ unsigned dst_width,
+ unsigned dst_height,
+ unsigned dst_x_offset,
+ unsigned dst_y_offset,
+ unsigned reg_width,
+ unsigned reg_height,
+ unsigned flip_y)
+{
+ struct r100_context *r100 = R100_CONTEXT(ctx);
+
+ if (!is_blit_supported(dst_mesaformat))
+ return GL_FALSE;
+
+ /* Make sure that colorbuffer has even width - hw limitation */
+ if (dst_pitch % 2 > 0)
+ ++dst_pitch;
+
+ /* Rendering to small buffer doesn't work.
+ * Looks like a hw limitation.
+ */
+ if (dst_pitch < 32)
+ return GL_FALSE;
+
+ /* Need to clamp the region size to make sure
+ * we don't read outside of the source buffer
+ * or write outside of the destination buffer.
+ */
+ if (reg_width + src_x_offset > src_width)
+ reg_width = src_width - src_x_offset;
+ if (reg_height + src_y_offset > src_height)
+ reg_height = src_height - src_y_offset;
+ if (reg_width + dst_x_offset > dst_width)
+ reg_width = dst_width - dst_x_offset;
+ if (reg_height + dst_y_offset > dst_height)
+ reg_height = dst_height - dst_y_offset;
+
+ if (src_bo == dst_bo) {
+ return GL_FALSE;
+ }
+
+ if (src_offset % 32 || dst_offset % 32) {
+ return GL_FALSE;
+ }
+
+ if (0) {
+ fprintf(stderr, "src: size [%d x %d], pitch %d, "
+ "offset [%d x %d], format %s, bo %p\n",
+ src_width, src_height, src_pitch,
+ src_x_offset, src_y_offset,
+ _mesa_get_format_name(src_mesaformat),
+ src_bo);
+ fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n",
+ dst_pitch, dst_x_offset, dst_y_offset,
+ _mesa_get_format_name(dst_mesaformat), dst_bo);
+ fprintf(stderr, "region: %d x %d\n", reg_width, reg_height);
+ }
+
+ /* Flush is needed to make sure that source buffer has correct data */
+ radeonFlush(ctx);
+
+ rcommonEnsureCmdBufSpace(&r100->radeon, 59, __FUNCTION__);
+
+ if (!validate_buffers(r100, src_bo, dst_bo))
+ return GL_FALSE;
+
+ /* 8 */
+ emit_vtx_state(r100);
+ /* 18 */
+ emit_tx_setup(r100, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch);
+ /* 18 */
+ emit_cb_setup(r100, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
+ /* 15 */
+ emit_draw_packet(r100, src_width, src_height,
+ src_x_offset, src_y_offset,
+ dst_x_offset, dst_y_offset,
+ reg_width, reg_height,
+ flip_y);
+
+ radeonFlush(ctx);
+
+ return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_blit.h b/src/mesa/drivers/dri/radeon/radeon_blit.h
new file mode 100644
index 0000000000..d36366ff79
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_blit.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_BLIT_H
+#define RADEON_BLIT_H
+
+void r100_blit_init(struct r100_context *r100);
+
+unsigned r100_blit(GLcontext *ctx,
+ struct radeon_bo *src_bo,
+ intptr_t src_offset,
+ gl_format src_mesaformat,
+ unsigned src_pitch,
+ unsigned src_width,
+ unsigned src_height,
+ unsigned src_x_offset,
+ unsigned src_y_offset,
+ struct radeon_bo *dst_bo,
+ intptr_t dst_offset,
+ gl_format dst_mesaformat,
+ unsigned dst_pitch,
+ unsigned dst_width,
+ unsigned dst_height,
+ unsigned dst_x_offset,
+ unsigned dst_y_offset,
+ unsigned width,
+ unsigned height,
+ unsigned flip_y);
+
+#endif // RADEON_BLIT_H
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index ab79d2dc0f..e397ee8c22 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -518,6 +518,26 @@ struct radeon_context {
void (*free_context)(GLcontext *ctx);
void (*emit_query_finish)(radeonContextPtr radeon);
void (*update_scissor)(GLcontext *ctx);
+ unsigned (*blit)(GLcontext *ctx,
+ struct radeon_bo *src_bo,
+ intptr_t src_offset,
+ gl_format src_mesaformat,
+ unsigned src_pitch,
+ unsigned src_width,
+ unsigned src_height,
+ unsigned src_x_offset,
+ unsigned src_y_offset,
+ struct radeon_bo *dst_bo,
+ intptr_t dst_offset,
+ gl_format dst_mesaformat,
+ unsigned dst_pitch,
+ unsigned dst_width,
+ unsigned dst_height,
+ unsigned dst_x_offset,
+ unsigned dst_y_offset,
+ unsigned reg_width,
+ unsigned reg_height,
+ unsigned flip_y);
} vtbl;
};
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
index 3cd305b0a2..6c08a90bbd 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -63,6 +63,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_tcl.h"
#include "radeon_maos.h"
#include "radeon_queryobj.h"
+#include "radeon_blit.h"
#define need_GL_ARB_occlusion_query
#define need_GL_EXT_blend_minmax
@@ -202,6 +203,7 @@ static void r100_init_vtbl(radeonContextPtr radeon)
radeon->vtbl.fallback = radeonFallback;
radeon->vtbl.free_context = r100_vtbl_free_context;
radeon->vtbl.emit_query_finish = r100_emit_query_finish;
+ radeon->vtbl.blit = r100_blit;
}
/* Create the device specific context.
@@ -228,6 +230,7 @@ r100CreateContext( const __GLcontextModes *glVisual,
if ( !rmesa )
return GL_FALSE;
+ rmesa->radeon.radeonScreen = screen;
r100_init_vtbl(&rmesa->radeon);
/* init exp fog table data */
@@ -257,7 +260,7 @@ r100CreateContext( const __GLcontextModes *glVisual,
* (the texture functions are especially important)
*/
_mesa_init_driver_functions( &functions );
- radeonInitTextureFuncs( &functions );
+ radeonInitTextureFuncs( &rmesa->radeon, &functions );
radeonInitQueryObjFunctions(&functions);
if (!radeonInitContext(&rmesa->radeon, &functions,
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
index dfedc38bfd..d84760bf74 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
@@ -453,7 +453,6 @@ struct r100_context {
extern GLboolean r100CreateContext( const __GLcontextModes *glVisual,
__DRIcontext *driContextPriv,
void *sharedContextPrivate);
-
#endif /* __RADEON_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
index 14163f13af..882ee5c194 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
@@ -434,7 +434,7 @@ radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
-void radeonInitTextureFuncs( struct dd_function_table *functions )
+void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions )
{
functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa;
functions->TexImage1D = radeonTexImage1D;
@@ -455,6 +455,11 @@ void radeonInitTextureFuncs( struct dd_function_table *functions )
functions->CompressedTexImage2D = radeonCompressedTexImage2D;
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
+ if (radeon->radeonScreen->kernel_mm) {
+ functions->CopyTexImage2D = radeonCopyTexImage2D;
+ functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
+ }
+
functions->GenerateMipmap = radeonGenerateMipmap;
functions->NewTextureImage = radeonNewTextureImage;
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h
index a4aaddc74f..0113ffd3da 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.h
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.h
@@ -52,6 +52,6 @@ extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t,
extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t );
-extern void radeonInitTextureFuncs( struct dd_function_table *functions );
+extern void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions );
#endif /* __RADEON_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_texcopy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
index ebc9c05b8a..44e144c80f 100644
--- a/src/mesa/drivers/dri/r300/r300_texcopy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
@@ -26,7 +26,7 @@
*/
#include "radeon_common.h"
-#include "r300_context.h"
+#include "radeon_texture.h"
#include "main/image.h"
#include "main/teximage.h"
@@ -34,11 +34,8 @@
#include "drivers/common/meta.h"
#include "radeon_mipmap_tree.h"
-#include "r300_blit.h"
#include <main/debug.h>
-// TODO:
-// need to pass correct pitch for small dst textures!
static GLboolean
do_copy_texsubimage(GLcontext *ctx,
GLenum target, GLint level,
@@ -48,13 +45,13 @@ do_copy_texsubimage(GLcontext *ctx,
GLint x, GLint y,
GLsizei width, GLsizei height)
{
- struct r300_context *r300 = R300_CONTEXT(ctx);
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
struct radeon_renderbuffer *rrb;
if (_mesa_get_format_bits(timg->base.TexFormat, GL_DEPTH_BITS) > 0) {
- rrb = radeon_get_depthbuffer(&r300->radeon);
+ rrb = radeon_get_depthbuffer(radeon);
} else {
- rrb = radeon_get_colorbuffer(&r300->radeon);
+ rrb = radeon_get_colorbuffer(radeon);
}
if (!timg->mt) {
@@ -69,10 +66,6 @@ do_copy_texsubimage(GLcontext *ctx,
intptr_t src_offset = rrb->draw_offset;
intptr_t dst_offset = radeon_miptree_image_offset(timg->mt, _mesa_tex_target_to_face(target), level);
- if (src_offset % 32 || dst_offset % 32) {
- return GL_FALSE;
- }
-
if (0) {
fprintf(stderr, "%s: copying to face %d, level %d\n",
__FUNCTION__, _mesa_tex_target_to_face(target), level);
@@ -84,18 +77,19 @@ do_copy_texsubimage(GLcontext *ctx,
}
/* blit from src buffer to texture */
- return r300_blit(r300, rrb->bo, src_offset, rrb->base.Format, rrb->pitch/rrb->cpp,
- rrb->base.Width, rrb->base.Height, x, y,
- timg->mt->bo, dst_offset, timg->base.TexFormat,
- timg->base.Width, timg->base.Width, timg->base.Height,
- dstx, dsty, width, height, 1);
+ return radeon->vtbl.blit(ctx, rrb->bo, src_offset, rrb->base.Format, rrb->pitch/rrb->cpp,
+ rrb->base.Width, rrb->base.Height, x, y,
+ timg->mt->bo, dst_offset, timg->base.TexFormat,
+ timg->mt->levels[level].rowstride / _mesa_get_format_bytes(timg->base.TexFormat),
+ timg->base.Width, timg->base.Height,
+ dstx, dsty, width, height, 1);
}
-static void
-r300CopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLint border)
+void
+radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
+ GLenum internalFormat,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ GLint border)
{
struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
struct gl_texture_object *texObj =
@@ -139,11 +133,11 @@ fail:
width, height, border);
}
-static void
-r300CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
- GLint xoffset, GLint yoffset,
- GLint x, GLint y,
- GLsizei width, GLsizei height)
+void
+radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset,
+ GLint x, GLint y,
+ GLsizei width, GLsizei height)
{
struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target);
@@ -159,10 +153,3 @@ r300CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
xoffset, yoffset, x, y, width, height);
}
}
-
-
-void r300_init_texcopy_functions(struct dd_function_table *table)
-{
- table->CopyTexImage2D = r300CopyTexImage2D;
- table->CopyTexSubImage2D = r300CopyTexSubImage2D;
-} \ No newline at end of file
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index 03178116c1..20a27ad9a7 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -197,21 +197,6 @@ void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
radeon_bo_unmap(t->mt->bo);
}
-GLuint radeon_face_for_target(GLenum target)
-{
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- default:
- return 0;
- }
-}
-
/**
* Wraps Mesa's implementation to ensure that the base level image is mapped.
*
@@ -248,7 +233,7 @@ static void radeon_generate_mipmap(GLcontext *ctx, GLenum target,
void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj)
{
- GLuint face = radeon_face_for_target(target);
+ GLuint face = _mesa_tex_target_to_face(target);
radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[face][texObj->BaseLevel]);
radeon_teximage_map(baseimage, GL_FALSE);
@@ -710,7 +695,7 @@ static void radeon_teximage(
radeon_texture_image* image = get_radeon_texture_image(texImage);
GLint postConvWidth = width;
GLint postConvHeight = height;
- GLuint face = radeon_face_for_target(target);
+ GLuint face = _mesa_tex_target_to_face(target);
{
struct radeon_bo *bo;
@@ -863,7 +848,7 @@ static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int leve
if (RADEON_DEBUG & RADEON_TEXTURE) {
fprintf(stderr, "radeon_texsubimage%dd: texObj %p, texImage %p, face %d, level %d\n",
- dims, texObj, texImage, radeon_face_for_target(target), level);
+ dims, texObj, texImage, _mesa_tex_target_to_face(target), level);
}
t->validated = GL_FALSE;
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
index 906daf12d0..f09dd65214 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.h
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
@@ -44,7 +44,6 @@ void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj);
int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj);
-GLuint radeon_face_for_target(GLenum target);
gl_format radeonChooseTextureFormat_mesa(GLcontext * ctx,
GLint internalFormat,
@@ -126,4 +125,14 @@ void radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
struct gl_texture_object *texObj,
struct gl_texture_image *texImage);
+void radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
+ GLenum internalFormat,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ GLint border);
+
+void radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset,
+ GLint x, GLint y,
+ GLsizei width, GLsizei height);
+
#endif
diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
index e81d7fdcd0..1b33de1edf 100644
--- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h
+++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
@@ -1959,7 +1959,30 @@
#define RADEON_SE_ZBIAS_FACTOR 0x1db0
#define RADEON_SE_ZBIAS_CONSTANT 0x1db4
-
+#define RADEON_SE_VTX_FMT 0x2080
+# define RADEON_SE_VTX_FMT_XY 0x00000000
+# define RADEON_SE_VTX_FMT_W0 0x00000001
+# define RADEON_SE_VTX_FMT_FPCOLOR 0x00000002
+# define RADEON_SE_VTX_FMT_FPALPHA 0x00000004
+# define RADEON_SE_VTX_FMT_PKCOLOR 0x00000008
+# define RADEON_SE_VTX_FMT_FPSPEC 0x00000010
+# define RADEON_SE_VTX_FMT_FPFOG 0x00000020
+# define RADEON_SE_VTX_FMT_PKSPEC 0x00000040
+# define RADEON_SE_VTX_FMT_ST0 0x00000080
+# define RADEON_SE_VTX_FMT_ST1 0x00000100
+# define RADEON_SE_VTX_FMT_Q1 0x00000200
+# define RADEON_SE_VTX_FMT_ST2 0x00000400
+# define RADEON_SE_VTX_FMT_Q2 0x00000800
+# define RADEON_SE_VTX_FMT_ST3 0x00001000
+# define RADEON_SE_VTX_FMT_Q3 0x00002000
+# define RADEON_SE_VTX_FMT_Q0 0x00004000
+# define RADEON_SE_VTX_FMT_BLND_WEIGHT_CNT_MASK 0x00038000
+# define RADEON_SE_VTX_FMT_N0 0x00040000
+# define RADEON_SE_VTX_FMT_XY1 0x08000000
+# define RADEON_SE_VTX_FMT_Z1 0x10000000
+# define RADEON_SE_VTX_FMT_W1 0x20000000
+# define RADEON_SE_VTX_FMT_N1 0x40000000
+# define RADEON_SE_VTX_FMT_Z 0x80000000
/* Registers for CP and Microcode Engine */
#define RADEON_CP_ME_RAM_ADDR 0x07d4
diff --git a/src/mesa/glapi/gl_XML.py b/src/mesa/glapi/gl_XML.py
index bafb00306f..a10a35e513 100644
--- a/src/mesa/glapi/gl_XML.py
+++ b/src/mesa/glapi/gl_XML.py
@@ -184,7 +184,7 @@ class gl_print_base:
The name is also added to the file's undef_list.
"""
self.undef_list.append("PURE")
- print """# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
+ print """# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
# define PURE __attribute__((pure))
# else
# define PURE
@@ -224,7 +224,7 @@ class gl_print_base:
"""
self.undef_list.append(S)
- print """# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) && defined(__ELF__)
+ print """# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined(__ELF__)
# define %s __attribute__((visibility("%s")))
# else
# define %s
@@ -244,7 +244,7 @@ class gl_print_base:
"""
self.undef_list.append("NOINLINE")
- print """# if defined(__GNUC__)
+ print """# if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
# define NOINLINE __attribute__((noinline))
# else
# define NOINLINE
diff --git a/src/mesa/glapi/glapitemp.h b/src/mesa/glapi/glapitemp.h
index 6767a07673..b8bfcc1a16 100644
--- a/src/mesa/glapi/glapitemp.h
+++ b/src/mesa/glapi/glapitemp.h
@@ -27,7 +27,7 @@
*/
-# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) && defined(__ELF__)
+# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined(__ELF__)
# define HIDDEN __attribute__((visibility("hidden")))
# else
# define HIDDEN
diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index 4eb249b4af..9eab1ead24 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -173,7 +173,8 @@ extern "C" {
* We also need to define a USED attribute, so the optimizer doesn't
* inline a static function that we later use in an alias. - ajax
*/
-#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303
+#if (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \
+ || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
# define PUBLIC __attribute__((visibility("default")))
# define USED __attribute__((used))
#else
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 7b3599f932..4da245ab49 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -861,6 +861,9 @@ _mesa_GenRenderbuffersEXT(GLsizei n, GLuint *renderbuffers)
*
* \return one of GL_RGB, GL_RGBA, GL_STENCIL_INDEX, GL_DEPTH_COMPONENT
* GL_DEPTH_STENCIL_EXT or zero if error.
+ *
+ * XXX in the future when we support red-only and red-green formats
+ * we'll also return GL_RED and GL_RG.
*/
GLenum
_mesa_base_fbo_format(GLcontext *ctx, GLenum internalFormat)
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c
index 77153889b6..d70cf877e8 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -57,7 +57,7 @@ void st_upload_constants( struct st_context *st,
unsigned shader_type)
{
struct pipe_context *pipe = st->pipe;
- struct pipe_constant_buffer *cbuf = &st->state.constants[shader_type];
+ struct pipe_buffer **cbuf = &st->state.constants[shader_type];
assert(shader_type == PIPE_SHADER_VERTEX ||
shader_type == PIPE_SHADER_FRAGMENT);
@@ -71,8 +71,8 @@ void st_upload_constants( struct st_context *st,
/* We always need to get a new buffer, to keep the drivers simple and
* avoid gratuitous rendering synchronization.
*/
- pipe_buffer_reference(&cbuf->buffer, NULL );
- cbuf->buffer = pipe_buffer_create(pipe->screen, 16,
+ pipe_buffer_reference(cbuf, NULL );
+ *cbuf = pipe_buffer_create(pipe->screen, 16,
PIPE_BUFFER_USAGE_CONSTANT,
paramBytes );
@@ -84,12 +84,12 @@ void st_upload_constants( struct st_context *st,
}
/* load Mesa constants into the constant buffer */
- if (cbuf->buffer)
- st_no_flush_pipe_buffer_write(st, cbuf->buffer,
+ if (cbuf)
+ st_no_flush_pipe_buffer_write(st, *cbuf,
0, paramBytes,
params->ParameterValues);
- st->pipe->set_constant_buffer(st->pipe, shader_type, 0, cbuf);
+ st->pipe->set_constant_buffer(st->pipe, shader_type, 0, *cbuf);
}
else {
st->constants.tracked_state[shader_type].dirty.mesa = 0x0;
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index 46c8cbb309..176f3ea68d 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -35,8 +35,6 @@
* Brian Paul
*/
-
-
#include "main/imports.h"
#include "main/mtypes.h"
#include "main/macros.h"
@@ -57,9 +55,7 @@
-
-
-/*
+/**
* Translate fragment program if needed.
*/
static void
@@ -155,8 +151,10 @@ find_translated_vp(struct st_context *st,
}
-
-
+/**
+ * Return pointer to a pass-through fragment shader.
+ * This shader is used when a texture is missing/incomplete.
+ */
static void *
get_passthrough_fs(struct st_context *st)
{
@@ -168,6 +166,11 @@ get_passthrough_fs(struct st_context *st)
return st->passthrough_fs;
}
+
+/**
+ * Update fragment program state/atom. This involves translating the
+ * Mesa fragment program into a gallium fragment program and binding it.
+ */
static void
update_fp( struct st_context *st )
{
@@ -191,6 +194,7 @@ update_fp( struct st_context *st )
}
}
+
const struct st_tracked_state st_update_fp = {
"st_update_fp", /* name */
{ /* dirty */
@@ -202,7 +206,10 @@ const struct st_tracked_state st_update_fp = {
-
+/**
+ * Update vertex program state/atom. This involves translating the
+ * Mesa vertex program into a gallium fragment program and binding it.
+ */
static void
update_vp( struct st_context *st )
{
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 145bd62b83..9e6ce30db0 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -218,8 +218,8 @@ static void st_destroy_context_priv( struct st_context *st )
}
for (i = 0; i < Elements(st->state.constants); i++) {
- if (st->state.constants[i].buffer) {
- pipe_buffer_reference(&st->state.constants[i].buffer, NULL);
+ if (st->state.constants[i]) {
+ pipe_buffer_reference(&st->state.constants[i], NULL);
}
}
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 831909a3f8..2c4943cfb0 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -92,7 +92,7 @@ struct st_context
struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS];
struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS];
struct pipe_clip_state clip;
- struct pipe_constant_buffer constants[2];
+ struct pipe_buffer *constants[2];
struct pipe_framebuffer_state framebuffer;
struct pipe_texture *sampler_texture[PIPE_MAX_SAMPLERS];
struct pipe_scissor_state scissor;
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index e54f21be60..b0d5b993a7 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -365,6 +365,7 @@ setup_interleaved_attribs(GLcontext *ctx,
velements[attr].src_offset =
(unsigned) (arrays[mesaAttr]->Ptr - offset0);
+ velements[attr].instance_divisor = 0;
velements[attr].vertex_buffer_index = 0;
velements[attr].nr_components = arrays[mesaAttr]->Size;
velements[attr].src_format =
@@ -454,6 +455,7 @@ setup_non_interleaved_attribs(GLcontext *ctx,
/* common-case setup */
vbuffer[attr].stride = stride; /* in bytes */
vbuffer[attr].max_index = max_index;
+ velements[attr].instance_divisor = 0;
velements[attr].vertex_buffer_index = attr;
velements[attr].nr_components = arrays[mesaAttr]->Size;
velements[attr].src_format
@@ -522,7 +524,6 @@ st_draw_vbo(GLcontext *ctx,
struct pipe_context *pipe = ctx->st->pipe;
const struct st_vertex_program *vp;
const struct st_vp_varient *vpv;
- const struct pipe_shader_state *vs;
struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
GLuint attr;
struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
@@ -550,7 +551,6 @@ st_draw_vbo(GLcontext *ctx,
/* must get these after state validation! */
vp = ctx->st->vp;
vpv = ctx->st->vp_varient;
- vs = &vpv->state;
#if 0
if (MESA_VERBOSE & VERBOSE_GLSL) {
diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c
index cfc0caac98..a05d6dd06b 100644
--- a/src/mesa/state_tracker/st_draw_feedback.c
+++ b/src/mesa/state_tracker/st_draw_feedback.c
@@ -177,6 +177,7 @@ st_feedback_draw_vbo(GLcontext *ctx,
/* common-case setup */
vbuffers[attr].stride = arrays[mesaAttr]->StrideB; /* in bytes */
vbuffers[attr].max_index = max_index;
+ velements[attr].instance_divisor = 0;
velements[attr].vertex_buffer_index = attr;
velements[attr].nr_components = arrays[mesaAttr]->Size;
velements[attr].src_format =
@@ -239,11 +240,11 @@ st_feedback_draw_vbo(GLcontext *ctx,
/* map constant buffers */
mapped_constants = pipe_buffer_map(pipe->screen,
- st->state.constants[PIPE_SHADER_VERTEX].buffer,
+ st->state.constants[PIPE_SHADER_VERTEX],
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_constant_buffer(st->draw, PIPE_SHADER_VERTEX,
mapped_constants,
- st->state.constants[PIPE_SHADER_VERTEX].buffer->size);
+ st->state.constants[PIPE_SHADER_VERTEX]->size);
/* draw here */
@@ -253,7 +254,7 @@ st_feedback_draw_vbo(GLcontext *ctx,
/* unmap constant buffers */
- pipe_buffer_unmap(pipe->screen, st->state.constants[PIPE_SHADER_VERTEX].buffer);
+ pipe_buffer_unmap(pipe->screen, st->state.constants[PIPE_SHADER_VERTEX]);
/*
* unmap vertex/index buffers
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index e25a613d8a..2a5fb27d8f 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -168,6 +168,7 @@ void st_init_extensions(struct st_context *st)
ctx->Extensions.EXT_blend_subtract = GL_TRUE;
ctx->Extensions.EXT_framebuffer_blit = GL_TRUE;
ctx->Extensions.EXT_framebuffer_object = GL_TRUE;
+ ctx->Extensions.EXT_framebuffer_multisample = GL_TRUE;
ctx->Extensions.EXT_fog_coord = GL_TRUE;
ctx->Extensions.EXT_multi_draw_arrays = GL_TRUE;
ctx->Extensions.EXT_pixel_buffer_object = GL_TRUE;
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index e788008dfe..05b56c9b58 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -48,6 +48,10 @@ struct label {
unsigned token;
};
+
+/**
+ * Intermediate state used during shader translation.
+ */
struct st_translate {
struct ureg_program *ureg;
@@ -730,6 +734,7 @@ emit_face_var( struct st_translate *t,
t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
}
+
static void
emit_edgeflags( struct st_translate *t,
const struct gl_program *program )
@@ -741,6 +746,7 @@ emit_edgeflags( struct st_translate *t,
ureg_MOV( ureg, edge_dst, edge_src );
}
+
/**
* Translate Mesa program to TGSI format.
* \param program the program to translate
@@ -758,7 +764,7 @@ emit_edgeflags( struct st_translate *t,
* \param outputSemanticIndex the semantic index (ex: which texcoord) for
* each output
*
- * \return array of translated tokens, caller's responsibility to free
+ * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
*/
enum pipe_error
st_translate_mesa_program(
@@ -779,6 +785,7 @@ st_translate_mesa_program(
{
struct st_translate translate, *t;
unsigned i;
+ enum pipe_error ret = PIPE_OK;
t = &translate;
memset(t, 0, sizeof *t);
@@ -865,8 +872,10 @@ st_translate_mesa_program(
t->constants = CALLOC( program->Parameters->NumParameters,
sizeof t->constants[0] );
- if (t->constants == NULL)
+ if (t->constants == NULL) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
goto out;
+ }
for (i = 0; i < program->Parameters->NumParameters; i++) {
switch (program->Parameters->Parameters[i].Type) {
@@ -920,8 +929,6 @@ st_translate_mesa_program(
t->insn[t->labels[i].branch_target] );
}
- return PIPE_OK;
-
out:
FREE(t->insn);
FREE(t->labels);
@@ -931,7 +938,7 @@ out:
debug_printf("%s: translate error flag set\n", __FUNCTION__);
}
- return PIPE_ERROR_OUT_OF_MEMORY;
+ return ret;
}
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 6a869fae90..5c87e47ca3 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -44,7 +44,6 @@
#include "st_debug.h"
#include "st_context.h"
-#include "st_atom.h"
#include "st_program.h"
#include "st_mesa_to_tgsi.h"
#include "cso_cache/cso_context.h"
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index 8a3e4cd3ac..b210ac9187 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -35,7 +35,6 @@
#include "main/texfetch.h"
#include "main/teximage.h"
#include "main/texobj.h"
-#include "main/texstore.h"
#undef Elements /* fix re-defined macro warning */
diff --git a/src/mesa/swrast/s_accum.c b/src/mesa/swrast/s_accum.c
index 0e0876efcb..cf53f01b7c 100644
--- a/src/mesa/swrast/s_accum.c
+++ b/src/mesa/swrast/s_accum.c
@@ -27,7 +27,6 @@
#include "main/context.h"
#include "main/macros.h"
#include "main/imports.h"
-#include "main/fbobject.h"
#include "s_accum.h"
#include "s_context.h"
diff --git a/src/mesa/swrast/s_atifragshader.c b/src/mesa/swrast/s_atifragshader.c
index e88ff19123..353e9999d6 100644
--- a/src/mesa/swrast/s_atifragshader.c
+++ b/src/mesa/swrast/s_atifragshader.c
@@ -23,7 +23,6 @@
#include "main/colormac.h"
#include "main/context.h"
#include "main/macros.h"
-#include "shader/program.h"
#include "shader/atifragshader.h"
#include "swrast/s_atifragshader.h"
diff --git a/src/mesa/swrast/s_bitmap.c b/src/mesa/swrast/s_bitmap.c
index 46c63aa645..59e26e9ea3 100644
--- a/src/mesa/swrast/s_bitmap.c
+++ b/src/mesa/swrast/s_bitmap.c
@@ -33,7 +33,6 @@
#include "main/condrender.h"
#include "main/image.h"
#include "main/macros.h"
-#include "main/pixel.h"
#include "s_context.h"
#include "s_span.h"
diff --git a/src/mesa/swrast/s_copypix.c b/src/mesa/swrast/s_copypix.c
index 986b6aff4f..e881d1be30 100644
--- a/src/mesa/swrast/s_copypix.c
+++ b/src/mesa/swrast/s_copypix.c
@@ -28,11 +28,9 @@
#include "main/colormac.h"
#include "main/condrender.h"
#include "main/convolve.h"
-#include "main/histogram.h"
#include "main/image.h"
#include "main/macros.h"
#include "main/imports.h"
-#include "main/pixel.h"
#include "s_context.h"
#include "s_depth.h"
diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c
index c37a54eb3e..0b6bb7e3ec 100644
--- a/src/mesa/swrast/s_depth.c
+++ b/src/mesa/swrast/s_depth.c
@@ -28,7 +28,6 @@
#include "main/formats.h"
#include "main/macros.h"
#include "main/imports.h"
-#include "main/fbobject.h"
#include "s_depth.h"
#include "s_context.h"
diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c
index 55a4c4c3c6..248d6cc1c0 100644
--- a/src/mesa/swrast/s_drawpix.c
+++ b/src/mesa/swrast/s_drawpix.c
@@ -31,7 +31,6 @@
#include "main/image.h"
#include "main/macros.h"
#include "main/imports.h"
-#include "main/pixel.h"
#include "main/state.h"
#include "s_context.h"
diff --git a/src/mesa/swrast/s_feedback.c b/src/mesa/swrast/s_feedback.c
index 47ed25ee10..2e6066983d 100644
--- a/src/mesa/swrast/s_feedback.c
+++ b/src/mesa/swrast/s_feedback.c
@@ -25,7 +25,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
#include "main/context.h"
-#include "main/enums.h"
#include "main/feedback.h"
#include "main/macros.h"
diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c
index a22d34415d..9ac33a26a6 100644
--- a/src/mesa/swrast/s_fragprog.c
+++ b/src/mesa/swrast/s_fragprog.c
@@ -25,7 +25,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
#include "main/context.h"
-#include "main/texstate.h"
#include "shader/prog_instruction.h"
#include "s_fragprog.h"
diff --git a/src/mesa/swrast/s_lines.c b/src/mesa/swrast/s_lines.c
index 23cb9b57ef..5411229d70 100644
--- a/src/mesa/swrast/s_lines.c
+++ b/src/mesa/swrast/s_lines.c
@@ -29,7 +29,6 @@
#include "main/macros.h"
#include "s_aaline.h"
#include "s_context.h"
-#include "s_depth.h"
#include "s_feedback.h"
#include "s_lines.h"
#include "s_span.h"
diff --git a/src/mesa/swrast/s_points.c b/src/mesa/swrast/s_points.c
index 50ec2063a5..6b955429e9 100644
--- a/src/mesa/swrast/s_points.c
+++ b/src/mesa/swrast/s_points.c
@@ -27,7 +27,6 @@
#include "main/colormac.h"
#include "main/context.h"
#include "main/macros.h"
-#include "main/texstate.h"
#include "s_context.h"
#include "s_feedback.h"
#include "s_points.h"
diff --git a/src/mesa/swrast/s_readpix.c b/src/mesa/swrast/s_readpix.c
index 44a11cd6dd..94fb974eab 100644
--- a/src/mesa/swrast/s_readpix.c
+++ b/src/mesa/swrast/s_readpix.c
@@ -33,7 +33,6 @@
#include "main/image.h"
#include "main/macros.h"
#include "main/imports.h"
-#include "main/pixel.h"
#include "main/state.h"
#include "s_context.h"
diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c
index 889164b986..594b71a03c 100644
--- a/src/mesa/swrast/s_texcombine.c
+++ b/src/mesa/swrast/s_texcombine.c
@@ -29,7 +29,6 @@
#include "main/colormac.h"
#include "main/image.h"
#include "main/imports.h"
-#include "main/pixel.h"
#include "shader/prog_instruction.h"
#include "s_context.h"
diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c
index db21b4589d..5a14e595a0 100644
--- a/src/mesa/tnl/t_context.c
+++ b/src/mesa/tnl/t_context.c
@@ -38,7 +38,6 @@
#include "tnl.h"
#include "t_context.h"
#include "t_pipeline.h"
-#include "t_vp_build.h"
#include "vbo/vbo.h"
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index d31b29b9b4..38757a0e28 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -29,15 +29,11 @@
#include "main/condrender.h"
#include "main/context.h"
#include "main/imports.h"
-#include "main/state.h"
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/enums.h"
#include "t_context.h"
-#include "t_pipeline.h"
-#include "t_vp_build.h"
-#include "t_vertex.h"
#include "tnl.h"
diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c
index 01b30babb4..946b29e250 100644
--- a/src/mesa/tnl/t_pipeline.c
+++ b/src/mesa/tnl/t_pipeline.c
@@ -28,7 +28,6 @@
#include "main/glheader.h"
#include "main/context.h"
#include "main/imports.h"
-#include "main/state.h"
#include "main/mtypes.h"
#include "t_context.h"
diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c
index 99b6787455..13b84a7d77 100644
--- a/src/mesa/tnl/t_rasterpos.c
+++ b/src/mesa/tnl/t_rasterpos.c
@@ -29,7 +29,6 @@
#include "main/feedback.h"
#include "main/light.h"
#include "main/macros.h"
-#include "main/rastpos.h"
#include "main/simple_list.h"
#include "main/mtypes.h"
diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c
index 15a8a67b91..5396548666 100644
--- a/src/mesa/tnl/t_vb_program.c
+++ b/src/mesa/tnl/t_vb_program.c
@@ -40,7 +40,6 @@
#include "shader/prog_statevars.h"
#include "shader/prog_execute.h"
#include "swrast/s_context.h"
-#include "swrast/s_texfilter.h"
#include "tnl/tnl.h"
#include "tnl/t_context.h"
diff --git a/src/mesa/vbo/vbo_exec.c b/src/mesa/vbo/vbo_exec.c
index e168a89ea5..a057befed0 100644
--- a/src/mesa/vbo/vbo_exec.c
+++ b/src/mesa/vbo/vbo_exec.c
@@ -28,9 +28,6 @@
#include "main/api_arrayelt.h"
#include "main/glheader.h"
-#include "main/imports.h"
-#include "main/context.h"
-#include "main/macros.h"
#include "main/mtypes.h"
#include "main/vtxfmt.h"
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 6de8f059b7..2c82f7c9c5 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -35,7 +35,6 @@
#include "main/bufferobj.h"
#include "main/enums.h"
#include "main/macros.h"
-#include "glapi/dispatch.h"
#include "vbo_context.h"
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 4f43856016..d7dbbceb1b 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -30,7 +30,6 @@
#include "main/context.h"
#include "main/enums.h"
#include "main/state.h"
-#include "main/macros.h"
#include "vbo_context.h"
diff --git a/src/mesa/vbo/vbo_save.c b/src/mesa/vbo/vbo_save.c
index 9757c3d9f6..10f705cf84 100644
--- a/src/mesa/vbo/vbo_save.c
+++ b/src/mesa/vbo/vbo_save.c
@@ -28,8 +28,6 @@
#include "main/mtypes.h"
#include "main/bufferobj.h"
-#include "main/dlist.h"
-#include "main/vtxfmt.h"
#include "main/imports.h"
#include "vbo_context.h"
diff --git a/src/mesa/vbo/vbo_save_loopback.c b/src/mesa/vbo/vbo_save_loopback.c
index b7a74e4535..f13a16e3b5 100644
--- a/src/mesa/vbo/vbo_save_loopback.c
+++ b/src/mesa/vbo/vbo_save_loopback.c
@@ -29,7 +29,6 @@
#include "main/glheader.h"
#include "main/enums.h"
#include "main/imports.h"
-#include "main/macros.h"
#include "main/mtypes.h"
#include "glapi/dispatch.h"
#include "glapi/glapi.h"
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index c45190b9dd..2ca111217c 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -34,7 +34,6 @@
#include "main/imports.h"
#include "main/image.h"
#include "main/macros.h"
-#include "main/enums.h"
#include "main/mtypes.h"
#include "vbo_split.h"
@@ -221,8 +220,6 @@ begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
{
struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
-/* _mesa_printf("begin %s (%d)\n", _mesa_lookup_prim_by_nr(mode), begin_flag); */
-
prim->mode = mode;
prim->begin = begin_flag;
}
diff --git a/src/mesa/x86/x86_xform.c b/src/mesa/x86/x86_xform.c
index 52f6b25d81..c834e2b468 100644
--- a/src/mesa/x86/x86_xform.c
+++ b/src/mesa/x86/x86_xform.c
@@ -30,7 +30,6 @@
#include "main/glheader.h"
#include "main/context.h"
#include "math/m_xform.h"
-#include "tnl/t_context.h"
#include "x86_xform.h"
#include "common_x86_asm.h"