summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/Makefile1
-rw-r--r--src/gallium/auxiliary/SConscript1
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c11
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h7
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c32
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h16
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c433
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.h23
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c218
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.c19
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.h11
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.c604
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.h59
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c1
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.h6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c26
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp10
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_printf.c21
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_printf.h4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_quad.c28
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c725
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h113
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c490
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h9
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c582
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.c77
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.h8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h77
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c479
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c75
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c33
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_execmem.c1
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_sw_helper.h39
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h18
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c52
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.h9
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c9
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h2
-rw-r--r--src/gallium/auxiliary/util/u_atomic.h47
-rw-r--r--src/gallium/auxiliary/util/u_format.csv3
-rw-r--r--src/gallium/auxiliary/util/u_format_zs.c53
-rw-r--r--src/gallium/auxiliary/util/u_format_zs.h16
-rw-r--r--src/gallium/auxiliary/util/u_math.h5
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h50
-rw-r--r--src/gallium/auxiliary/util/u_sse.h156
-rw-r--r--src/gallium/auxiliary/util/u_tile.c84
-rw-r--r--src/gallium/docs/source/tgsi.rst8
-rw-r--r--src/gallium/drivers/i915/i915_state_emit.c1
-rw-r--r--src/gallium/drivers/i965/intel_decode.c20
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile7
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c412
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.h22
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c45
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h32
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_limits.h10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c64
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h64
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_debug.c40
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h19
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c286
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h136
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.h28
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c170
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h35
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef.c279
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef.h64
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c228
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_line.c184
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_point.c228
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c513
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_vbuf.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c85
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c524
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_sampler.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_setup.c759
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_setup.h80
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_round.c29
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_sincos.c5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.py267
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h3
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_optimize.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c10
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragprog.c2
-rw-r--r--src/gallium/drivers/r300/r300_debug.c1
-rw-r--r--src/gallium/drivers/r300/r300_fs.c5
-rw-r--r--src/gallium/drivers/r300/r300_query.c6
-rw-r--r--src/gallium/drivers/r300/r300_screen.c1
-rw-r--r--src/gallium/drivers/r300/r300_screen.h1
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h18
-rw-r--r--src/gallium/drivers/r300/r300_texture.c49
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.c6
-rw-r--r--src/gallium/drivers/r300/r300_vs.c5
-rw-r--r--src/gallium/drivers/r600/Makefile17
-rw-r--r--src/gallium/drivers/r600/SConscript10
-rw-r--r--src/gallium/drivers/r600/eg_asm.c13
-rw-r--r--src/gallium/drivers/r600/eg_hw_states.c1088
-rw-r--r--src/gallium/drivers/r600/eg_state_inlines.h213
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c789
-rw-r--r--src/gallium/drivers/r600/evergreend.h72
-rw-r--r--src/gallium/drivers/r600/r600.h99
-rw-r--r--src/gallium/drivers/r600/r600_asm.c16
-rw-r--r--src/gallium/drivers/r600/r600_asm.h15
-rw-r--r--src/gallium/drivers/r600/r600_blit.c236
-rw-r--r--src/gallium/drivers/r600/r600_buffer.c53
-rw-r--r--src/gallium/drivers/r600/r600_context.c178
-rw-r--r--src/gallium/drivers/r600/r600_context.h312
-rw-r--r--src/gallium/drivers/r600/r600_draw.c188
-rw-r--r--src/gallium/drivers/r600/r600_formats.h56
-rw-r--r--src/gallium/drivers/r600/r600_helper.c3
-rw-r--r--src/gallium/drivers/r600/r600_hw_states.c1215
-rw-r--r--src/gallium/drivers/r600/r600_opcodes.h8
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c (renamed from src/gallium/drivers/r600/r600_screen.c)284
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h117
-rw-r--r--src/gallium/drivers/r600/r600_public.h27
-rw-r--r--src/gallium/drivers/r600/r600_query.c209
-rw-r--r--src/gallium/drivers/r600/r600_resource.c24
-rw-r--r--src/gallium/drivers/r600/r600_resource.h42
-rw-r--r--src/gallium/drivers/r600/r600_screen.h90
-rw-r--r--src/gallium/drivers/r600/r600_shader.c646
-rw-r--r--src/gallium/drivers/r600/r600_shader.h4
-rw-r--r--src/gallium/drivers/r600/r600_state.c1827
-rw-r--r--src/gallium/drivers/r600/r600_state2.c2490
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h183
-rw-r--r--src/gallium/drivers/r600/r600_texture.c279
-rw-r--r--src/gallium/drivers/r600/r600d.h52
-rw-r--r--src/gallium/drivers/r600/r700_asm.c7
-rw-r--r--src/gallium/drivers/r600/radeon.h224
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_exec.c12
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_sse.c12
-rw-r--r--src/gallium/drivers/softpipe/sp_quad.h1
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_depth_test.c61
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_pipe.c3
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_state_sampler.c1
-rw-r--r--src/gallium/drivers/trace/tr_context.c30
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.c29
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.h2
-rw-r--r--src/gallium/include/pipe/p_compiler.h21
-rw-r--r--src/gallium/include/pipe/p_defines.h3
-rw-r--r--src/gallium/include/pipe/p_format.h4
-rw-r--r--src/gallium/include/pipe/p_shader_tokens.h3
-rw-r--r--src/gallium/state_trackers/dri/common/dri_context.c2
-rw-r--r--src/gallium/state_trackers/dri/common/dri_context.h2
-rw-r--r--src/gallium/state_trackers/dri/common/dri_drawable.c2
-rw-r--r--src/gallium/state_trackers/dri/common/dri_drawable.h2
-rw-r--r--src/gallium/state_trackers/dri/common/dri_screen.c2
-rw-r--r--src/gallium/state_trackers/dri/common/dri_screen.h2
-rw-r--r--src/gallium/state_trackers/dri/drm/dri2.c6
-rw-r--r--src/gallium/state_trackers/dri/sw/drisw.c2
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d.c61
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_api.c6
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_st.c1
-rw-r--r--src/gallium/state_trackers/egl/x11/glcore.h181
-rw-r--r--src/gallium/state_trackers/egl/x11/glxinit.c2
-rw-r--r--src/gallium/state_trackers/egl/x11/native_dri2.c4
-rw-r--r--src/gallium/state_trackers/egl/x11/x11_screen.h2
-rw-r--r--src/gallium/state_trackers/glx/xlib/xm_api.c19
-rw-r--r--src/gallium/state_trackers/glx/xlib/xm_api.h3
-rw-r--r--src/gallium/state_trackers/glx/xlib/xm_st.c8
-rw-r--r--src/gallium/state_trackers/python/st_device.c1
-rw-r--r--src/gallium/state_trackers/vega/api_images.c1
-rw-r--r--src/gallium/state_trackers/vega/vg_manager.c2
-rw-r--r--src/gallium/state_trackers/xorg/Makefile2
-rw-r--r--src/gallium/state_trackers/xorg/xorg_crtc.c12
-rw-r--r--src/gallium/state_trackers/xorg/xorg_dri2.c24
-rw-r--r--src/gallium/state_trackers/xorg/xorg_driver.c57
-rw-r--r--src/gallium/state_trackers/xorg/xorg_exa.c9
-rw-r--r--src/gallium/state_trackers/xorg/xorg_output.c8
-rw-r--r--src/gallium/state_trackers/xorg/xorg_tracker.h1
-rw-r--r--src/gallium/state_trackers/xorg/xorg_xv.c11
-rw-r--r--src/gallium/targets/Makefile.xorg4
-rw-r--r--src/gallium/targets/dri-i915/target.c3
-rw-r--r--src/gallium/targets/dri-i965/target.c3
-rw-r--r--src/gallium/targets/dri-r600/target.c29
-rw-r--r--src/gallium/targets/egl/Makefile10
-rw-r--r--src/gallium/targets/egl/pipe_i915.c1
-rw-r--r--src/gallium/targets/egl/pipe_i965.c5
-rw-r--r--src/gallium/targets/libgl-xlib/Makefile2
-rw-r--r--src/gallium/targets/xorg-i965/intel_target.c3
-rw-r--r--src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c5
-rw-r--r--src/gallium/targets/xorg-vmwgfx/vmw_ioctl.c6
-rw-r--r--src/gallium/targets/xorg-vmwgfx/vmw_screen.c1
-rw-r--r--src/gallium/targets/xorg-vmwgfx/vmw_video.c10
-rwxr-xr-xsrc/gallium/tests/python/retrace/interpreter.py42
-rw-r--r--src/gallium/tests/python/tests/regress/fragment-shader/frag-face.sh14
-rw-r--r--src/gallium/winsys/r600/drm/Makefile15
-rw-r--r--src/gallium/winsys/r600/drm/SConscript12
-rw-r--r--src/gallium/winsys/r600/drm/eg_states.h453
-rw-r--r--src/gallium/winsys/r600/drm/evergreen_hw_context.c919
-rw-r--r--src/gallium/winsys/r600/drm/evergreen_state.c885
-rw-r--r--src/gallium/winsys/r600/drm/gen_eg_states.py39
-rw-r--r--src/gallium/winsys/r600/drm/gen_r600_states.py39
-rw-r--r--src/gallium/winsys/r600/drm/r600.c8
-rw-r--r--src/gallium/winsys/r600/drm/r600_bo.c138
-rw-r--r--src/gallium/winsys/r600/drm/r600_drm.c219
-rw-r--r--src/gallium/winsys/r600/drm/r600_drm_public.h26
-rw-r--r--src/gallium/winsys/r600/drm/r600_hw_context.c1362
-rw-r--r--src/gallium/winsys/r600/drm/r600_priv.h128
-rw-r--r--src/gallium/winsys/r600/drm/r600_state.c662
-rw-r--r--src/gallium/winsys/r600/drm/r600_state2.c1317
-rw-r--r--src/gallium/winsys/r600/drm/r600_states.h522
-rw-r--r--src/gallium/winsys/r600/drm/r600d.h9
-rw-r--r--src/gallium/winsys/r600/drm/radeon.c200
-rw-r--r--src/gallium/winsys/r600/drm/radeon_bo.c151
-rw-r--r--src/gallium/winsys/r600/drm/radeon_bo_pb.c44
-rw-r--r--src/gallium/winsys/r600/drm/radeon_ctx.c376
-rw-r--r--src/gallium/winsys/r600/drm/radeon_draw.c57
-rw-r--r--src/gallium/winsys/r600/drm/radeon_pciid.c2
-rw-r--r--src/gallium/winsys/r600/drm/radeon_priv.h150
-rw-r--r--src/gallium/winsys/r600/drm/radeon_state.c203
-rw-r--r--src/gallium/winsys/r600/drm/radeon_ws_bo.c95
-rw-r--r--src/gallium/winsys/svga/drm/vmw_screen_dri.c1
-rw-r--r--src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c15
-rw-r--r--src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h11
228 files changed, 13127 insertions, 16357 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 02af4d9280..abd33f6eef 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -176,6 +176,7 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_struct.c \
gallivm/lp_bld_swizzle.c \
gallivm/lp_bld_tgsi_aos.c \
+ gallivm/lp_bld_tgsi_info.c \
gallivm/lp_bld_tgsi_soa.c \
gallivm/lp_bld_type.c \
draw/draw_llvm.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 48547c4b2c..94cd74424a 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -227,6 +227,7 @@ if env['llvm']:
'gallivm/lp_bld_struct.c',
'gallivm/lp_bld_swizzle.c',
'gallivm/lp_bld_tgsi_aos.c',
+ 'gallivm/lp_bld_tgsi_info.c',
'gallivm/lp_bld_tgsi_soa.c',
'gallivm/lp_bld_type.c',
'draw/draw_llvm.c',
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index c52234d8e3..39d82f3289 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -710,6 +710,11 @@ draw_set_samplers(struct draw_context *draw,
draw->samplers[i] = NULL;
draw->num_samplers = num;
+
+#ifdef HAVE_LLVM
+ if (draw->llvm)
+ draw_llvm_set_sampler_state(draw);
+#endif
}
void
@@ -717,9 +722,9 @@ draw_set_mapped_texture(struct draw_context *draw,
unsigned sampler_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t last_level,
- uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
- uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
- const void *data[DRAW_MAX_TEXTURE_LEVELS])
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
+ const void *data[PIPE_MAX_TEXTURE_LEVELS])
{
#ifdef HAVE_LLVM
if(draw->llvm)
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 1f27cbf488..ff4f753604 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -49,7 +49,6 @@ struct draw_geometry_shader;
struct draw_fragment_shader;
struct tgsi_sampler;
-#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */
struct draw_context *draw_create( struct pipe_context *pipe );
@@ -120,9 +119,9 @@ draw_set_mapped_texture(struct draw_context *draw,
unsigned sampler_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t last_level,
- uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
- uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
- const void *data[DRAW_MAX_TEXTURE_LEVELS]);
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
+ const void *data[PIPE_MAX_TEXTURE_LEVELS]);
/*
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 622250e7f7..338127dafe 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -47,6 +47,7 @@
#include "tgsi/tgsi_dump.h"
#include "util/u_cpu_detect.h"
+#include "util/u_math.h"
#include "util/u_pointer.h"
#include "util/u_string.h"
@@ -74,12 +75,12 @@ init_globals(struct draw_llvm *llvm)
elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
- LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+ LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
- LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+ LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
elem_types[DRAW_JIT_TEXTURE_DATA] =
LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
- DRAW_MAX_TEXTURE_LEVELS);
+ PIPE_MAX_TEXTURE_LEVELS);
elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType();
elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType();
elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType();
@@ -1458,9 +1459,9 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
unsigned sampler_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t last_level,
- uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
- uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
- const void *data[DRAW_MAX_TEXTURE_LEVELS])
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
+ const void *data[PIPE_MAX_TEXTURE_LEVELS])
{
unsigned j;
struct draw_jit_texture *jit_tex;
@@ -1482,6 +1483,25 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
}
}
+
+void
+draw_llvm_set_sampler_state(struct draw_context *draw)
+{
+ unsigned i;
+
+ for (i = 0; i < draw->num_samplers; i++) {
+ struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i];
+
+ if (draw->samplers[i]) {
+ jit_tex->min_lod = draw->samplers[i]->min_lod;
+ jit_tex->max_lod = draw->samplers[i]->max_lod;
+ jit_tex->lod_bias = draw->samplers[i]->lod_bias;
+ COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color);
+ }
+ }
+}
+
+
void
draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
{
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index b5b8c668d7..aa984ed3a2 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -41,7 +41,6 @@
#include <llvm-c/Target.h>
#include <llvm-c/ExecutionEngine.h>
-#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */
struct draw_llvm;
struct llvm_vertex_shader;
@@ -52,9 +51,9 @@ struct draw_jit_texture
uint32_t height;
uint32_t depth;
uint32_t last_level;
- uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS];
- uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS];
- const void *data[DRAW_MAX_TEXTURE_LEVELS];
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS];
+ const void *data[PIPE_MAX_TEXTURE_LEVELS];
float min_lod;
float max_lod;
float lod_bias;
@@ -292,12 +291,15 @@ draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
LLVMValueRef context_ptr);
void
+draw_llvm_set_sampler_state(struct draw_context *draw);
+
+void
draw_llvm_set_mapped_texture(struct draw_context *draw,
unsigned sampler_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t last_level,
- uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
- uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
- const void *data[DRAW_MAX_TEXTURE_LEVELS]);
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
+ const void *data[PIPE_MAX_TEXTURE_LEVELS]);
#endif
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index ff0c7f7ca8..f9a12a41a1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -983,41 +983,134 @@ enum lp_build_round_sse41_mode
};
+/**
+ * Helper for SSE4.1's ROUNDxx instructions.
+ *
+ * NOTE: In the SSE4.1's nearest mode, if two values are equally close, the
+ * result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0.
+ */
static INLINE LLVMValueRef
lp_build_round_sse41(struct lp_build_context *bld,
LLVMValueRef a,
enum lp_build_round_sse41_mode mode)
{
const struct lp_type type = bld->type;
- LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMTypeRef i32t = LLVMInt32Type();
const char *intrinsic;
+ LLVMValueRef res;
assert(type.floating);
- assert(type.width*type.length == 128);
+
assert(lp_check_value(type, a));
assert(util_cpu_caps.has_sse4_1);
- switch(type.width) {
- case 32:
- intrinsic = "llvm.x86.sse41.round.ps";
- break;
- case 64:
- intrinsic = "llvm.x86.sse41.round.pd";
- break;
- default:
- assert(0);
- return bld->undef;
+ if (type.length == 1) {
+ LLVMTypeRef vec_type;
+ LLVMValueRef undef;
+ LLVMValueRef args[3];
+ LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
+
+ switch(type.width) {
+ case 32:
+ intrinsic = "llvm.x86.sse41.round.ss";
+ break;
+ case 64:
+ intrinsic = "llvm.x86.sse41.round.sd";
+ break;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+
+ vec_type = LLVMVectorType(bld->elem_type, 4);
+
+ undef = LLVMGetUndef(vec_type);
+
+ args[0] = undef;
+ args[1] = LLVMBuildInsertElement(bld->builder, undef, a, index0, "");
+ args[2] = LLVMConstInt(i32t, mode, 0);
+
+ res = lp_build_intrinsic(bld->builder, intrinsic,
+ vec_type, args, Elements(args));
+
+ res = LLVMBuildExtractElement(bld->builder, res, index0, "");
+ }
+ else {
+ assert(type.width*type.length == 128);
+
+ switch(type.width) {
+ case 32:
+ intrinsic = "llvm.x86.sse41.round.ps";
+ break;
+ case 64:
+ intrinsic = "llvm.x86.sse41.round.pd";
+ break;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+
+ res = lp_build_intrinsic_binary(bld->builder, intrinsic,
+ bld->vec_type, a,
+ LLVMConstInt(i32t, mode, 0));
+ }
+
+ return res;
+}
+
+
+static INLINE LLVMValueRef
+lp_build_iround_nearest_sse2(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMTypeRef ret_type = lp_build_int_vec_type(type);
+ const char *intrinsic;
+ LLVMValueRef res;
+
+ assert(type.floating);
+ /* using the double precision conversions is a bit more complicated */
+ assert(type.width == 32);
+
+ assert(lp_check_value(type, a));
+ assert(util_cpu_caps.has_sse2);
+
+ /* This is relying on MXCSR rounding mode, which should always be nearest. */
+ if (type.length == 1) {
+ LLVMTypeRef vec_type;
+ LLVMValueRef undef;
+ LLVMValueRef arg;
+ LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
+
+ vec_type = LLVMVectorType(bld->elem_type, 4);
+
+ intrinsic = "llvm.x86.sse.cvtss2si";
+
+ undef = LLVMGetUndef(vec_type);
+
+ arg = LLVMBuildInsertElement(bld->builder, undef, a, index0, "");
+
+ res = lp_build_intrinsic_unary(bld->builder, intrinsic,
+ ret_type, arg);
+ }
+ else {
+ assert(type.width*type.length == 128);
+
+ intrinsic = "llvm.x86.sse2.cvtps2dq";
+
+ res = lp_build_intrinsic_unary(bld->builder, intrinsic,
+ ret_type, a);
}
- return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
- LLVMConstInt(LLVMInt32Type(), mode, 0));
+ return res;
}
/**
- * Return the integer part of a float (vector) value. The returned value is
- * a float (vector).
- * Ex: trunc(-1.5) = 1.0
+ * Return the integer part of a float (vector) value (== round toward zero).
+ * The returned value is a float (vector).
+ * Ex: trunc(-1.5) = -1.0
*/
LLVMValueRef
lp_build_trunc(struct lp_build_context *bld,
@@ -1028,8 +1121,10 @@ lp_build_trunc(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
@@ -1056,8 +1151,10 @@ lp_build_round(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
@@ -1082,8 +1179,10 @@ lp_build_floor(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
@@ -1108,8 +1207,10 @@ lp_build_ceil(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
@@ -1134,9 +1235,9 @@ lp_build_fract(struct lp_build_context *bld,
/**
- * Return the integer part of a float (vector) value. The returned value is
- * an integer (vector).
- * Ex: itrunc(-1.5) = 1
+ * Return the integer part of a float (vector) value (== round toward zero).
+ * The returned value is an integer (vector).
+ * Ex: itrunc(-1.5) = -1
*/
LLVMValueRef
lp_build_itrunc(struct lp_build_context *bld,
@@ -1163,31 +1264,40 @@ lp_build_iround(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
+ if (util_cpu_caps.has_sse2 &&
+ ((type.width == 32) && (type.length == 1 || type.length == 4))) {
+ return lp_build_iround_nearest_sse2(bld, a);
+ }
+ else if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
}
else {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
- LLVMValueRef sign;
LLVMValueRef half;
- /* get sign bit */
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- sign = LLVMBuildAnd(bld->builder, sign, mask, "");
-
- /* sign * 0.5 */
half = lp_build_const_vec(type, 0.5);
- half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
- half = LLVMBuildOr(bld->builder, sign, half, "");
- half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
+
+ if (type.sign) {
+ LLVMTypeRef vec_type = bld->vec_type;
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+
+ /* get sign bit */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+
+ /* sign * 0.5 */
+ half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
+ half = LLVMBuildOr(bld->builder, sign, half, "");
+ half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
+ }
res = LLVMBuildFAdd(bld->builder, a, half, "");
}
@@ -1208,37 +1318,42 @@ lp_build_ifloor(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
}
else {
- /* Take the sign bit and add it to 1 constant */
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- unsigned mantissa = lp_mantissa(type);
- LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
- LLVMValueRef sign;
- LLVMValueRef offset;
-
- /* sign = a < 0 ? ~0 : 0 */
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- sign = LLVMBuildAnd(bld->builder, sign, mask, "");
- sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");
-
- /* offset = -0.99999(9)f */
- offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
- offset = LLVMConstBitCast(offset, int_vec_type);
-
- /* offset = a < 0 ? offset : 0.0f */
- offset = LLVMBuildAnd(bld->builder, offset, sign, "");
- offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
-
- res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res");
+ res = a;
+
+ if (type.sign) {
+ /* Take the sign bit and add it to 1 constant */
+ LLVMTypeRef vec_type = bld->vec_type;
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef offset;
+
+ /* sign = a < 0 ? ~0 : 0 */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");
+
+ /* offset = -0.99999(9)f */
+ offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
+ offset = LLVMConstBitCast(offset, int_vec_type);
+
+ /* offset = a < 0 ? offset : 0.0f */
+ offset = LLVMBuildAnd(bld->builder, offset, sign, "");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
+
+ res = LLVMBuildFAdd(bld->builder, res, offset, "ifloor.res");
+ }
}
/* round to nearest (toward zero) */
@@ -1258,35 +1373,39 @@ lp_build_iceil(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
}
else {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMTypeRef vec_type = bld->vec_type;
unsigned mantissa = lp_mantissa(type);
- LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
- LLVMValueRef sign;
LLVMValueRef offset;
- /* sign = a < 0 ? 0 : ~0 */
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- sign = LLVMBuildAnd(bld->builder, sign, mask, "");
- sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
- sign = LLVMBuildNot(bld->builder, sign, "iceil.not");
-
/* offset = 0.99999(9)f */
offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
- offset = LLVMConstBitCast(offset, int_vec_type);
- /* offset = a < 0 ? 0.0 : offset */
- offset = LLVMBuildAnd(bld->builder, offset, sign, "");
- offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
+ if (type.sign) {
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+
+ /* sign = a < 0 ? 0 : ~0 */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
+ sign = LLVMBuildNot(bld->builder, sign, "iceil.not");
+
+ /* offset = a < 0 ? 0.0 : offset */
+ offset = LLVMConstBitCast(offset, int_vec_type);
+ offset = LLVMBuildAnd(bld->builder, offset, sign, "");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
+ }
res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res");
}
@@ -1298,6 +1417,46 @@ lp_build_iceil(struct lp_build_context *bld,
}
+/**
+ * Combined ifloor() & fract().
+ *
+ * Preferred to calling the functions separately, as it will ensure that the
+ * stratergy (floor() vs ifloor()) that results in less redundant work is used.
+ */
+void
+lp_build_ifloor_fract(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef *out_ipart,
+ LLVMValueRef *out_fpart)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef ipart;
+
+ assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
+ /*
+ * floor() is easier.
+ */
+
+ ipart = lp_build_floor(bld, a);
+ *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
+ *out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart");
+ }
+ else {
+ /*
+ * ifloor() is easier.
+ */
+
+ *out_ipart = lp_build_ifloor(bld, a);
+ ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart");
+ *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
+ }
+}
+
+
LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a)
@@ -2107,6 +2266,71 @@ lp_build_exp2(struct lp_build_context *bld,
/**
+ * Extract the exponent of a IEEE-754 floating point value.
+ *
+ * Optionally apply an integer bias.
+ *
+ * Result is an integer value with
+ *
+ * ifloor(log2(x)) + bias
+ */
+LLVMValueRef
+lp_build_extract_exponent(struct lp_build_context *bld,
+ LLVMValueRef x,
+ int bias)
+{
+ const struct lp_type type = bld->type;
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef res;
+
+ assert(type.floating);
+
+ assert(lp_check_value(bld->type, x));
+
+ x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, "");
+
+ res = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), "");
+ res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(type, 255), "");
+ res = LLVMBuildSub(bld->builder, res, lp_build_const_int_vec(type, 127 - bias), "");
+
+ return res;
+}
+
+
+/**
+ * Extract the mantissa of the a floating.
+ *
+ * Result is a floating point value with
+ *
+ * x / floor(log2(x))
+ */
+LLVMValueRef
+lp_build_extract_mantissa(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ const struct lp_type type = bld->type;
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1);
+ LLVMValueRef one = LLVMConstBitCast(bld->one, bld->int_vec_type);
+ LLVMValueRef res;
+
+ assert(lp_check_value(bld->type, x));
+
+ assert(type.floating);
+
+ x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, "");
+
+ /* res = x / 2**ipart */
+ res = LLVMBuildAnd(bld->builder, x, mantmask, "");
+ res = LLVMBuildOr(bld->builder, res, one, "");
+ res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
+
+ return res;
+}
+
+
+
+/**
* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
* These coefficients can be generate with
* http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
@@ -2225,3 +2449,62 @@ lp_build_log2(struct lp_build_context *bld,
lp_build_log2_approx(bld, x, NULL, NULL, &res);
return res;
}
+
+
+/**
+ * Faster (and less accurate) log2.
+ *
+ * log2(x) = floor(log2(x)) - 1 + x / 2**floor(log2(x))
+ *
+ * Piece-wise linear approximation, with exact results when x is a
+ * power of two.
+ *
+ * See http://www.flipcode.com/archives/Fast_log_Function.shtml
+ */
+LLVMValueRef
+lp_build_fast_log2(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ LLVMValueRef ipart;
+ LLVMValueRef fpart;
+
+ assert(lp_check_value(bld->type, x));
+
+ assert(bld->type.floating);
+
+ /* ipart = floor(log2(x)) - 1 */
+ ipart = lp_build_extract_exponent(bld, x, -1);
+ ipart = LLVMBuildSIToFP(bld->builder, ipart, bld->vec_type, "");
+
+ /* fpart = x / 2**ipart */
+ fpart = lp_build_extract_mantissa(bld, x);
+
+ /* ipart + fpart */
+ return LLVMBuildFAdd(bld->builder, ipart, fpart, "");
+}
+
+
+/**
+ * Fast implementation of iround(log2(x)).
+ *
+ * Not an approximation -- it should give accurate results all the time.
+ */
+LLVMValueRef
+lp_build_ilog2(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ LLVMValueRef sqrt2 = lp_build_const_vec(bld->type, M_SQRT2);
+ LLVMValueRef ipart;
+
+ assert(bld->type.floating);
+
+ assert(lp_check_value(bld->type, x));
+
+ /* x * 2^(0.5) i.e., add 0.5 to the log2(x) */
+ x = LLVMBuildFMul(bld->builder, x, sqrt2, "");
+
+ /* ipart = floor(log2(x) + 0.5) */
+ ipart = lp_build_extract_exponent(bld, x, 0);
+
+ return ipart;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 31efa9921c..c78b61decf 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -171,6 +171,12 @@ LLVMValueRef
lp_build_itrunc(struct lp_build_context *bld,
LLVMValueRef a);
+void
+lp_build_ifloor_fract(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef *out_ipart,
+ LLVMValueRef *out_fpart);
+
LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a);
@@ -209,9 +215,26 @@ lp_build_exp2(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
+lp_build_extract_exponent(struct lp_build_context *bld,
+ LLVMValueRef x,
+ int bias);
+
+LLVMValueRef
+lp_build_extract_mantissa(struct lp_build_context *bld,
+ LLVMValueRef x);
+
+LLVMValueRef
lp_build_log2(struct lp_build_context *bld,
LLVMValueRef a);
+LLVMValueRef
+lp_build_fast_log2(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_ilog2(struct lp_build_context *bld,
+ LLVMValueRef x);
+
void
lp_build_exp2_approx(struct lp_build_context *bld,
LLVMValueRef x,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 8b477313d4..6967dd2622 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -63,6 +63,7 @@
#include "util/u_debug.h"
#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
@@ -96,58 +97,104 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type);
LLVMValueRef res;
unsigned mantissa;
- unsigned n;
- unsigned long long ubound;
- unsigned long long mask;
- double scale;
- double bias;
assert(src_type.floating);
+ assert(dst_width <= src_type.width);
+ src_type.sign = FALSE;
mantissa = lp_mantissa(src_type);
- /* We cannot carry more bits than the mantissa */
- n = MIN2(mantissa, dst_width);
+ if (dst_width <= mantissa) {
+ /*
+ * Apply magic coefficients that will make the desired result to appear
+ * in the lowest significant bits of the mantissa, with correct rounding.
+ *
+ * This only works if the destination width fits in the mantissa.
+ */
- /* This magic coefficients will make the desired result to appear in the
- * lowest significant bits of the mantissa.
- */
- ubound = ((unsigned long long)1 << n);
- mask = ubound - 1;
- scale = (double)mask/ubound;
- bias = (double)((unsigned long long)1 << (mantissa - n));
+ unsigned long long ubound;
+ unsigned long long mask;
+ double scale;
+ double bias;
- res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
- res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
- res = LLVMBuildBitCast(builder, res, int_vec_type, "");
+ ubound = (1ULL << dst_width);
+ mask = ubound - 1;
+ scale = (double)mask/ubound;
+ bias = (double)(1ULL << (mantissa - dst_width));
- if(dst_width > n) {
- int shift = dst_width - n;
- res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), "");
+ res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
+ res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
+ res = LLVMBuildBitCast(builder, res, int_vec_type, "");
+ res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), "");
+ }
+ else if (dst_width == (mantissa + 1)) {
+ /*
+ * The destination width matches exactly what can be represented in
+ * floating point (i.e., mantissa + 1 bits). So do a straight
+ * multiplication followed by casting. No further rounding is necessary.
+ */
+
+ double scale;
- /* TODO: Fill in the empty lower bits for additional precision? */
- /* YES: this fixes progs/trivial/tri-z-eq.c.
- * Otherwise vertex Z=1.0 values get converted to something like
- * 0xfffffb00 and the test for equality with 0xffffffff fails.
+ scale = (double)((1ULL << dst_width) - 1);
+
+ res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
+ res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
+ }
+ else {
+ /*
+ * The destination exceeds what can be represented in the floating point.
+ * So multiply by the largest power two we get away with, and when
+ * subtract the most significant bit to rescale to normalized values.
+ *
+ * The largest power of two factor we can get away is
+ * (1 << (src_type.width - 1)), because we need to use signed . In theory it
+ * should be (1 << (src_type.width - 2)), but IEEE 754 rules states
+ * INT_MIN should be returned in FPToSI, which is the correct result for
+ * values near 1.0!
+ *
+ * This means we get (src_type.width - 1) correct bits for values near 0.0,
+ * and (mantissa + 1) correct bits for values near 1.0. Equally or more
+ * important, we also get exact results for 0.0 and 1.0.
*/
-#if 0
- {
- LLVMValueRef msb;
- msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), "");
- msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), "");
- msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), "");
- res = LLVMBuildOr(builder, res, msb, "");
- }
-#elif 0
- while(shift > 0) {
- res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), "");
- shift -= n;
- n *= 2;
+
+ unsigned n = MIN2(src_type.width - 1, dst_width);
+
+ double scale = (double)(1ULL << n);
+ unsigned lshift = dst_width - n;
+ unsigned rshift = n;
+ LLVMValueRef lshifted;
+ LLVMValueRef rshifted;
+
+ res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
+ res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
+
+ /*
+ * Align the most significant bit to its final place.
+ *
+ * This will cause 1.0 to overflow to 0, but the later adjustment will
+ * get it right.
+ */
+ if (lshift) {
+ lshifted = LLVMBuildShl(builder, res,
+ lp_build_const_int_vec(src_type, lshift), "");
+ } else {
+ lshifted = res;
}
-#endif
+
+ /*
+ * Align the most significant bit to the right.
+ */
+ rshifted = LLVMBuildAShr(builder, res,
+ lp_build_const_int_vec(src_type, rshift), "");
+
+ /*
+ * Subtract the MSB to the LSB, therefore re-scaling from
+ * (1 << dst_width) to ((1 << dst_width) - 1).
+ */
+
+ res = LLVMBuildSub(builder, lshifted, rshifted, "");
}
- else
- res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), "");
return res;
}
@@ -177,6 +224,16 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
assert(dst_type.floating);
+ /* Special-case int8->float, though most cases could be handled
+ * this way:
+ */
+ if (src_width == 8) {
+ scale = 1.0/255.0;
+ res = LLVMBuildSIToFP(builder, src, vec_type, "");
+ res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");
+ return res;
+ }
+
mantissa = lp_mantissa(dst_type);
n = MIN2(mantissa, src_width);
@@ -241,6 +298,87 @@ lp_build_conv(LLVMBuilderRef builder,
}
num_tmps = num_srcs;
+
+ /* Special case 4x4f --> 1x16ub
+ */
+ if (src_type.floating == 1 &&
+ src_type.fixed == 0 &&
+ src_type.sign == 1 &&
+ src_type.norm == 0 &&
+ src_type.width == 32 &&
+ src_type.length == 4 &&
+
+ dst_type.floating == 0 &&
+ dst_type.fixed == 0 &&
+ dst_type.sign == 0 &&
+ dst_type.norm == 1 &&
+ dst_type.width == 8 &&
+ dst_type.length == 16 &&
+
+ util_cpu_caps.has_sse2)
+ {
+ int i;
+
+ for (i = 0; i < num_dsts; i++, src += 4) {
+ struct lp_type int16_type = dst_type;
+ struct lp_type int32_type = dst_type;
+ LLVMValueRef lo, hi;
+ LLVMValueRef src_int0;
+ LLVMValueRef src_int1;
+ LLVMValueRef src_int2;
+ LLVMValueRef src_int3;
+ LLVMTypeRef int16_vec_type;
+ LLVMTypeRef int32_vec_type;
+ LLVMTypeRef src_vec_type;
+ LLVMTypeRef dst_vec_type;
+ LLVMValueRef const_255f;
+ LLVMValueRef a, b, c, d;
+
+ int16_type.width *= 2;
+ int16_type.length /= 2;
+ int16_type.sign = 1;
+
+ int32_type.width *= 4;
+ int32_type.length /= 4;
+ int32_type.sign = 1;
+
+ src_vec_type = lp_build_vec_type(src_type);
+ dst_vec_type = lp_build_vec_type(dst_type);
+ int16_vec_type = lp_build_vec_type(int16_type);
+ int32_vec_type = lp_build_vec_type(int32_type);
+
+ const_255f = lp_build_const_vec(src_type, 255.0f);
+
+ a = LLVMBuildFMul(builder, src[0], const_255f, "");
+ b = LLVMBuildFMul(builder, src[1], const_255f, "");
+ c = LLVMBuildFMul(builder, src[2], const_255f, "");
+ d = LLVMBuildFMul(builder, src[3], const_255f, "");
+
+ {
+ struct lp_build_context bld;
+
+ bld.builder = builder;
+ bld.type = src_type;
+ bld.vec_type = src_vec_type;
+ bld.int_elem_type = lp_build_elem_type(int32_type);
+ bld.int_vec_type = int32_vec_type;
+ bld.undef = lp_build_undef(src_type);
+ bld.zero = lp_build_zero(src_type);
+ bld.one = lp_build_one(src_type);
+
+ src_int0 = lp_build_iround(&bld, a);
+ src_int1 = lp_build_iround(&bld, b);
+ src_int2 = lp_build_iround(&bld, c);
+ src_int3 = lp_build_iround(&bld, d);
+ }
+ /* relying on clamping behavior of sse2 intrinsics here */
+ lo = lp_build_pack2(builder, int32_type, int16_type, src_int0, src_int1);
+ hi = lp_build_pack2(builder, int32_type, int16_type, src_int2, src_int3);
+ dst[i] = lp_build_pack2(builder, int16_type, dst_type, lo, hi);
+ }
+ return;
+ }
+
/*
* Clamp if necessary
*/
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
index d3a5afff8c..93e56553d7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
@@ -57,6 +57,8 @@ lp_disassemble(const void* func)
#ifdef HAVE_UDIS86
ud_t ud_obj;
uint64_t max_jmp_pc;
+ uint inst_no;
+ boolean emit_addrs = TRUE, emit_line_nos = FALSE;
ud_init(&ud_obj);
@@ -76,13 +78,18 @@ lp_disassemble(const void* func)
while (ud_disassemble(&ud_obj)) {
+ if (emit_addrs) {
#ifdef PIPE_ARCH_X86
- debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj));
+ debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj));
#endif
#ifdef PIPE_ARCH_X86_64
- debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj));
+ debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj));
#endif
-
+ }
+ else if (emit_line_nos) {
+ debug_printf("%6d:\t", inst_no);
+ inst_no++;
+ }
#if 0
debug_printf("%-16s ", ud_insn_hex(&ud_obj));
#endif
@@ -115,8 +122,10 @@ lp_disassemble(const void* func)
}
}
- if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) ||
- ud_obj.mnemonic == UD_Iinvalid)
+ if (ud_obj.mnemonic == UD_Iinvalid ||
+ (ud_insn_off(&ud_obj) >= max_jmp_pc &&
+ (ud_obj.mnemonic == UD_Iret ||
+ ud_obj.mnemonic == UD_Ijmp)))
break;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
index 369c1bbf09..eb11dcd4ef 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
@@ -36,11 +36,12 @@
#include "util/u_string.h"
-#define GALLIVM_DEBUG_TGSI 0x1
-#define GALLIVM_DEBUG_IR 0x2
-#define GALLIVM_DEBUG_ASM 0x4
-#define GALLIVM_DEBUG_NO_OPT 0x8
-#define GALLIVM_DEBUG_PERF 0x10
+#define GALLIVM_DEBUG_TGSI (1 << 0)
+#define GALLIVM_DEBUG_IR (1 << 1)
+#define GALLIVM_DEBUG_ASM (1 << 2)
+#define GALLIVM_DEBUG_NO_OPT (1 << 3)
+#define GALLIVM_DEBUG_PERF (1 << 4)
+#define GALLIVM_DEBUG_NO_BRILINEAR (1 << 5)
#ifdef DEBUG
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index 5bc9c741a8..a2cee199a0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -38,273 +38,15 @@
#include "lp_bld_flow.h"
-#define LP_BUILD_FLOW_MAX_VARIABLES 64
-#define LP_BUILD_FLOW_MAX_DEPTH 32
-
-/**
- * Enumeration of all possible flow constructs.
- */
-enum lp_build_flow_construct_kind {
- LP_BUILD_FLOW_SCOPE,
- LP_BUILD_FLOW_SKIP,
- LP_BUILD_FLOW_IF
-};
-
-
-/**
- * Variable declaration scope.
- */
-struct lp_build_flow_scope
-{
- /** Number of variables declared in this scope */
- unsigned num_variables;
-};
-
-
-/**
- * Early exit. Useful to skip to the end of a function or block when
- * the execution mask becomes zero or when there is an error condition.
- */
-struct lp_build_flow_skip
-{
- /** Block to skip to */
- LLVMBasicBlockRef block;
-
- /** Number of variables declared at the beginning */
- unsigned num_variables;
-
- LLVMValueRef *phi; /**< array [num_variables] */
-};
-
-
-/**
- * if/else/endif.
- */
-struct lp_build_flow_if
-{
- unsigned num_variables;
-
- LLVMValueRef *phi; /**< array [num_variables] */
-
- LLVMValueRef condition;
- LLVMBasicBlockRef entry_block, true_block, false_block, merge_block;
-};
-
-
-/**
- * Union of all possible flow constructs' data
- */
-union lp_build_flow_construct_data
-{
- struct lp_build_flow_scope scope;
- struct lp_build_flow_skip skip;
- struct lp_build_flow_if ifthen;
-};
-
-
-/**
- * Element of the flow construct stack.
- */
-struct lp_build_flow_construct
-{
- enum lp_build_flow_construct_kind kind;
- union lp_build_flow_construct_data data;
-};
-
-
/**
- * All necessary data to generate LLVM control flow constructs.
+ * Insert a new block, right where builder is pointing to.
*
- * Besides keeping track of the control flow construct themselves we also
- * need to keep track of variables in order to generate SSA Phi values.
- */
-struct lp_build_flow_context
-{
- LLVMBuilderRef builder;
-
- /**
- * Control flow stack.
- */
- struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH];
- unsigned num_constructs;
-
- /**
- * Variable stack
- */
- LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES];
- unsigned num_variables;
-};
-
-
-struct lp_build_flow_context *
-lp_build_flow_create(LLVMBuilderRef builder)
-{
- struct lp_build_flow_context *flow;
-
- flow = CALLOC_STRUCT(lp_build_flow_context);
- if(!flow)
- return NULL;
-
- flow->builder = builder;
-
- return flow;
-}
-
-
-void
-lp_build_flow_destroy(struct lp_build_flow_context *flow)
-{
- assert(flow->num_constructs == 0);
- assert(flow->num_variables == 0);
- FREE(flow);
-}
-
-
-/**
- * Begin/push a new flow control construct, such as a loop, skip block
- * or variable scope.
- */
-static union lp_build_flow_construct_data *
-lp_build_flow_push(struct lp_build_flow_context *flow,
- enum lp_build_flow_construct_kind kind)
-{
- assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH);
- if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH)
- return NULL;
-
- flow->constructs[flow->num_constructs].kind = kind;
- return &flow->constructs[flow->num_constructs++].data;
-}
-
-
-/**
- * Return the current/top flow control construct on the stack.
- * \param kind the expected type of the top-most construct
- */
-static union lp_build_flow_construct_data *
-lp_build_flow_peek(struct lp_build_flow_context *flow,
- enum lp_build_flow_construct_kind kind)
-{
- assert(flow->num_constructs);
- if(!flow->num_constructs)
- return NULL;
-
- assert(flow->constructs[flow->num_constructs - 1].kind == kind);
- if(flow->constructs[flow->num_constructs - 1].kind != kind)
- return NULL;
-
- return &flow->constructs[flow->num_constructs - 1].data;
-}
-
-
-/**
- * End/pop the current/top flow control construct on the stack.
- * \param kind the expected type of the top-most construct
- */
-static union lp_build_flow_construct_data *
-lp_build_flow_pop(struct lp_build_flow_context *flow,
- enum lp_build_flow_construct_kind kind)
-{
- assert(flow->num_constructs);
- if(!flow->num_constructs)
- return NULL;
-
- assert(flow->constructs[flow->num_constructs - 1].kind == kind);
- if(flow->constructs[flow->num_constructs - 1].kind != kind)
- return NULL;
-
- return &flow->constructs[--flow->num_constructs].data;
-}
-
-
-/**
- * Begin a variable scope.
+ * This is useful important not only for aesthetic reasons, but also for
+ * performance reasons, as frequently run blocks should be laid out next to
+ * each other and fall-throughs maximized.
*
+ * See also llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp.
*
- */
-void
-lp_build_flow_scope_begin(struct lp_build_flow_context *flow)
-{
- struct lp_build_flow_scope *scope;
-
- scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope;
- if(!scope)
- return;
-
- scope->num_variables = 0;
-}
-
-
-/**
- * Declare a variable.
- *
- * A variable is a named entity which can have different LLVMValueRef's at
- * different points of the program. This is relevant for control flow because
- * when there are multiple branches to a same location we need to replace
- * the variable's value with a Phi function as explained in
- * http://en.wikipedia.org/wiki/Static_single_assignment_form .
- *
- * We keep track of variables by keeping around a pointer to where they're
- * current.
- *
- * There are a few cautions to observe:
- *
- * - Variable's value must not be NULL. If there is no initial value then
- * LLVMGetUndef() should be used.
- *
- * - Variable's value must be kept up-to-date. If the variable is going to be
- * modified by a function then a pointer should be passed so that its value
- * is accurate. Failure to do this will cause some of the variables'
- * transient values to be lost, leading to wrong results.
- *
- * - A program should be written from top to bottom, by always appending
- * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or
- * modifying existing statements will most likely lead to wrong results.
- *
- */
-void
-lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
- LLVMValueRef *variable)
-{
- struct lp_build_flow_scope *scope;
-
- scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope;
- if(!scope)
- return;
-
- assert(*variable);
- if(!*variable)
- return;
-
- assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES);
- if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES)
- return;
-
- flow->variables[flow->num_variables++] = variable;
- ++scope->num_variables;
-}
-
-
-void
-lp_build_flow_scope_end(struct lp_build_flow_context *flow)
-{
- struct lp_build_flow_scope *scope;
-
- scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope;
- if(!scope)
- return;
-
- assert(flow->num_variables >= scope->num_variables);
- if(flow->num_variables < scope->num_variables) {
- flow->num_variables = 0;
- return;
- }
-
- flow->num_variables -= scope->num_variables;
-}
-
-
-/**
* Note: this function has no dependencies on the flow code and could
* be used elsewhere.
*/
@@ -334,52 +76,18 @@ lp_build_insert_new_block(LLVMBuilderRef builder, const char *name)
}
-static LLVMBasicBlockRef
-lp_build_flow_insert_block(struct lp_build_flow_context *flow)
-{
- return lp_build_insert_new_block(flow->builder, "");
-}
-
-
/**
* Begin a "skip" block. Inside this block we can test a condition and
* skip to the end of the block if the condition is false.
*/
void
-lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
+lp_build_flow_skip_begin(struct lp_build_skip_context *skip,
+ LLVMBuilderRef builder)
{
- struct lp_build_flow_skip *skip;
- LLVMBuilderRef builder;
- unsigned i;
-
- skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip;
- if(!skip)
- return;
+ skip->builder = builder;
/* create new basic block */
- skip->block = lp_build_flow_insert_block(flow);
-
- skip->num_variables = flow->num_variables;
- if(!skip->num_variables) {
- skip->phi = NULL;
- return;
- }
-
- /* Allocate a Phi node for each variable in this skip scope */
- skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi);
- if(!skip->phi) {
- skip->num_variables = 0;
- return;
- }
-
- builder = LLVMCreateBuilder();
- LLVMPositionBuilderAtEnd(builder, skip->block);
-
- /* create a Phi node for each variable */
- for(i = 0; i < skip->num_variables; ++i)
- skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");
-
- LLVMDisposeBuilder(builder);
+ skip->block = lp_build_insert_new_block(skip->builder, "skip");
}
@@ -388,83 +96,50 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
* skip block if the condition is true.
*/
void
-lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
+lp_build_flow_skip_cond_break(struct lp_build_skip_context *skip,
LLVMValueRef cond)
{
- struct lp_build_flow_skip *skip;
- LLVMBasicBlockRef current_block;
LLVMBasicBlockRef new_block;
- unsigned i;
-
- skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip;
- if(!skip)
- return;
- current_block = LLVMGetInsertBlock(flow->builder);
-
- new_block = lp_build_flow_insert_block(flow);
-
- /* for each variable, update the Phi node with a (variable, block) pair */
- for(i = 0; i < skip->num_variables; ++i) {
- assert(*flow->variables[i]);
- assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i]));
- LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
- }
+ new_block = lp_build_insert_new_block(skip->builder, "");
/* if cond is true, goto skip->block, else goto new_block */
- LLVMBuildCondBr(flow->builder, cond, skip->block, new_block);
+ LLVMBuildCondBr(skip->builder, cond, skip->block, new_block);
- LLVMPositionBuilderAtEnd(flow->builder, new_block);
+ LLVMPositionBuilderAtEnd(skip->builder, new_block);
}
void
-lp_build_flow_skip_end(struct lp_build_flow_context *flow)
+lp_build_flow_skip_end(struct lp_build_skip_context *skip)
{
- struct lp_build_flow_skip *skip;
- LLVMBasicBlockRef current_block;
- unsigned i;
-
- skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip;
- if(!skip)
- return;
-
- current_block = LLVMGetInsertBlock(flow->builder);
-
- /* add (variable, block) tuples to the phi nodes */
- for(i = 0; i < skip->num_variables; ++i) {
- assert(*flow->variables[i]);
- assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i]));
- LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
- *flow->variables[i] = skip->phi[i];
- }
-
/* goto block */
- LLVMBuildBr(flow->builder, skip->block);
- LLVMPositionBuilderAtEnd(flow->builder, skip->block);
-
- FREE(skip->phi);
+ LLVMBuildBr(skip->builder, skip->block);
+ LLVMPositionBuilderAtEnd(skip->builder, skip->block);
}
/**
* Check if the mask predicate is zero. If so, jump to the end of the block.
*/
-static void
+void
lp_build_mask_check(struct lp_build_mask_context *mask)
{
- LLVMBuilderRef builder = mask->flow->builder;
+ LLVMBuilderRef builder = mask->skip.builder;
+ LLVMValueRef value;
LLVMValueRef cond;
+ value = lp_build_mask_value(mask);
+
/* cond = (mask == 0) */
cond = LLVMBuildICmp(builder,
LLVMIntEQ,
- LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""),
+ LLVMBuildBitCast(builder, value, mask->reg_type, ""),
LLVMConstNull(mask->reg_type),
"");
/* if cond, goto end of block */
- lp_build_flow_skip_cond_break(mask->flow, cond);
+ lp_build_flow_skip_cond_break(&mask->skip, cond);
}
@@ -477,21 +152,27 @@ lp_build_mask_check(struct lp_build_mask_context *mask)
*/
void
lp_build_mask_begin(struct lp_build_mask_context *mask,
- struct lp_build_flow_context *flow,
+ LLVMBuilderRef builder,
struct lp_type type,
LLVMValueRef value)
{
memset(mask, 0, sizeof *mask);
- mask->flow = flow;
mask->reg_type = LLVMIntType(type.width * type.length);
- mask->value = value;
+ mask->var = lp_build_alloca(builder,
+ lp_build_int_vec_type(type),
+ "execution_mask");
- lp_build_flow_scope_begin(flow);
- lp_build_flow_scope_declare(flow, &mask->value);
- lp_build_flow_skip_begin(flow);
+ LLVMBuildStore(builder, value, mask->var);
- lp_build_mask_check(mask);
+ lp_build_flow_skip_begin(&mask->skip, builder);
+}
+
+
+LLVMValueRef
+lp_build_mask_value(struct lp_build_mask_context *mask)
+{
+ return LLVMBuildLoad(mask->skip.builder, mask->var, "");
}
@@ -504,9 +185,10 @@ void
lp_build_mask_update(struct lp_build_mask_context *mask,
LLVMValueRef value)
{
- mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, "");
-
- lp_build_mask_check(mask);
+ value = LLVMBuildAnd(mask->skip.builder,
+ lp_build_mask_value(mask),
+ value, "");
+ LLVMBuildStore(mask->skip.builder, value, mask->var);
}
@@ -516,9 +198,8 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
LLVMValueRef
lp_build_mask_end(struct lp_build_mask_context *mask)
{
- lp_build_flow_skip_end(mask->flow);
- lp_build_flow_scope_end(mask->flow);
- return mask->value;
+ lp_build_flow_skip_end(&mask->skip);
+ return lp_build_mask_value(mask);
}
@@ -528,59 +209,27 @@ lp_build_loop_begin(LLVMBuilderRef builder,
LLVMValueRef start,
struct lp_build_loop_state *state)
{
- LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
- LLVMValueRef function = LLVMGetBasicBlockParent(block);
+ state->block = lp_build_insert_new_block(builder, "loop_begin");
- state->block = LLVMAppendBasicBlock(function, "loop");
+ state->counter_var = lp_build_alloca(builder, LLVMTypeOf(start), "loop_counter");
+
+ LLVMBuildStore(builder, start, state->counter_var);
LLVMBuildBr(builder, state->block);
LLVMPositionBuilderAtEnd(builder, state->block);
- state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), "");
-
- LLVMAddIncoming(state->counter, &start, &block, 1);
-
+ state->counter = LLVMBuildLoad(builder, state->counter_var, "");
}
void
-lp_build_loop_end(LLVMBuilderRef builder,
- LLVMValueRef end,
- LLVMValueRef step,
- struct lp_build_loop_state *state)
-{
- LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
- LLVMValueRef function = LLVMGetBasicBlockParent(block);
- LLVMValueRef next;
- LLVMValueRef cond;
- LLVMBasicBlockRef after_block;
-
- if (!step)
- step = LLVMConstInt(LLVMTypeOf(end), 1, 0);
-
- next = LLVMBuildAdd(builder, state->counter, step, "");
-
- cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, "");
-
- after_block = LLVMAppendBasicBlock(function, "");
-
- LLVMBuildCondBr(builder, cond, after_block, state->block);
-
- LLVMAddIncoming(state->counter, &next, &block, 1);
-
- LLVMPositionBuilderAtEnd(builder, after_block);
-}
-
-void
lp_build_loop_end_cond(LLVMBuilderRef builder,
LLVMValueRef end,
LLVMValueRef step,
- int llvm_cond,
+ LLVMIntPredicate llvm_cond,
struct lp_build_loop_state *state)
{
- LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
- LLVMValueRef function = LLVMGetBasicBlockParent(block);
LLVMValueRef next;
LLVMValueRef cond;
LLVMBasicBlockRef after_block;
@@ -590,15 +239,27 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,
next = LLVMBuildAdd(builder, state->counter, step, "");
+ LLVMBuildStore(builder, next, state->counter_var);
+
cond = LLVMBuildICmp(builder, llvm_cond, next, end, "");
- after_block = LLVMAppendBasicBlock(function, "");
+ after_block = lp_build_insert_new_block(builder, "loop_end");
LLVMBuildCondBr(builder, cond, after_block, state->block);
- LLVMAddIncoming(state->counter, &next, &block, 1);
-
LLVMPositionBuilderAtEnd(builder, after_block);
+
+ state->counter = LLVMBuildLoad(builder, state->counter_var, "");
+}
+
+
+void
+lp_build_loop_end(LLVMBuilderRef builder,
+ LLVMValueRef end,
+ LLVMValueRef step,
+ struct lp_build_loop_state *state)
+{
+ lp_build_loop_end_cond(builder, end, step, LLVMIntNE, state);
}
@@ -616,24 +277,16 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,
Is built with:
- LLVMValueRef x = LLVMGetUndef(); // or something else
+ // x needs an alloca variable
+ x = lp_build_alloca(builder, type, "x");
- flow = lp_build_flow_create(builder);
- lp_build_flow_scope_begin(flow);
+ lp_build_if(ctx, builder, cond);
+ LLVMBuildStore(LLVMBuildAdd(1, 2), x);
+ lp_build_else(ctx);
+ LLVMBuildStore(LLVMBuildAdd(2, 3). x);
+ lp_build_endif(ctx);
- // x needs a phi node
- lp_build_flow_scope_declare(flow, &x);
-
- lp_build_if(ctx, flow, builder, cond);
- x = LLVMAdd(1, 2);
- lp_build_else(ctx);
- x = LLVMAdd(2, 3);
- lp_build_endif(ctx);
-
- lp_build_flow_scope_end(flow);
-
- lp_build_flow_destroy(flow);
*/
@@ -642,47 +295,19 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,
* Begin an if/else/endif construct.
*/
void
-lp_build_if(struct lp_build_if_state *ctx,
- struct lp_build_flow_context *flow,
+lp_build_if(struct lp_build_if_state *ifthen,
LLVMBuilderRef builder,
LLVMValueRef condition)
{
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
- struct lp_build_flow_if *ifthen;
- unsigned i;
-
- memset(ctx, 0, sizeof(*ctx));
- ctx->builder = builder;
- ctx->flow = flow;
- /* push/create new scope */
- ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen;
- assert(ifthen);
-
- ifthen->num_variables = flow->num_variables;
+ memset(ifthen, 0, sizeof *ifthen);
+ ifthen->builder = builder;
ifthen->condition = condition;
ifthen->entry_block = block;
- /* create a Phi node for each variable in this flow scope */
- ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi));
- if (!ifthen->phi) {
- ifthen->num_variables = 0;
- return;
- }
-
/* create endif/merge basic block for the phi functions */
ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block");
- LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
-
- /* create a phi node for each variable */
- for (i = 0; i < flow->num_variables; i++) {
- ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");
-
- /* add add the initial value of the var from the entry block */
- if (!LLVMIsUndef(*flow->variables[i]))
- LLVMAddIncoming(ifthen->phi[i], flow->variables[i],
- &ifthen->entry_block, 1);
- }
/* create/insert true_block before merge_block */
ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block");
@@ -696,27 +321,16 @@ lp_build_if(struct lp_build_if_state *ctx,
* Begin else-part of a conditional
*/
void
-lp_build_else(struct lp_build_if_state *ctx)
+lp_build_else(struct lp_build_if_state *ifthen)
{
- struct lp_build_flow_context *flow = ctx->flow;
- struct lp_build_flow_if *ifthen;
- unsigned i;
-
- ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen;
- assert(ifthen);
-
- /* for each variable, update the Phi node with a (variable, block) pair */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
- for (i = 0; i < flow->num_variables; i++) {
- assert(*flow->variables[i]);
- LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1);
- }
+ /* Append an unconditional Br(anch) instruction on the true_block */
+ LLVMBuildBr(ifthen->builder, ifthen->merge_block);
/* create/insert false_block before the merge block */
ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block");
/* successive code goes into the else block */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block);
+ LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->false_block);
}
@@ -724,75 +338,30 @@ lp_build_else(struct lp_build_if_state *ctx)
* End a conditional.
*/
void
-lp_build_endif(struct lp_build_if_state *ctx)
+lp_build_endif(struct lp_build_if_state *ifthen)
{
- struct lp_build_flow_context *flow = ctx->flow;
- struct lp_build_flow_if *ifthen;
- LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder);
- unsigned i;
-
- ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen;
- assert(ifthen);
-
/* Insert branch to the merge block from current block */
- LLVMBuildBr(ctx->builder, ifthen->merge_block);
+ LLVMBuildBr(ifthen->builder, ifthen->merge_block);
- if (ifthen->false_block) {
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
- /* for each variable, update the Phi node with a (variable, block) pair */
- for (i = 0; i < flow->num_variables; i++) {
- assert(*flow->variables[i]);
- LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1);
- /* replace the variable ref with the phi function */
- *flow->variables[i] = ifthen->phi[i];
- }
- }
- else {
- /* no else clause */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
- for (i = 0; i < flow->num_variables; i++) {
- assert(*flow->variables[i]);
- LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1);
-
- /* replace the variable ref with the phi function */
- *flow->variables[i] = ifthen->phi[i];
- }
- }
-
- FREE(ifthen->phi);
-
- /***
- *** Now patch in the various branch instructions.
- ***/
+ /*
+ * Now patch in the various branch instructions.
+ */
/* Insert the conditional branch instruction at the end of entry_block */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block);
+ LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->entry_block);
if (ifthen->false_block) {
/* we have an else clause */
- LLVMBuildCondBr(ctx->builder, ifthen->condition,
+ LLVMBuildCondBr(ifthen->builder, ifthen->condition,
ifthen->true_block, ifthen->false_block);
}
else {
/* no else clause */
- LLVMBuildCondBr(ctx->builder, ifthen->condition,
+ LLVMBuildCondBr(ifthen->builder, ifthen->condition,
ifthen->true_block, ifthen->merge_block);
}
- /* Insert branch from end of true_block to merge_block */
- if (ifthen->false_block) {
- /* Append an unconditional Br(anch) instruction on the true_block */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
- LLVMBuildBr(ctx->builder, ifthen->merge_block);
- }
- else {
- /* No else clause.
- * Note that we've already inserted the branch at the end of
- * true_block. See the very first LLVMBuildBr() call in this function.
- */
- }
-
/* Resume building code at end of the ifthen->merge_block */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
+ LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->merge_block);
}
@@ -830,6 +399,7 @@ lp_build_alloca(LLVMBuilderRef builder,
}
res = LLVMBuildAlloca(first_builder, type, name);
+ LLVMBuildStore(builder, LLVMConstNull(type), res);
LLVMDisposeBuilder(first_builder);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
index fffb493a93..e729ee6eaa 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
@@ -41,52 +41,49 @@
struct lp_type;
-struct lp_build_flow_context;
-
-
-struct lp_build_flow_context *
-lp_build_flow_create(LLVMBuilderRef builder);
-
-void
-lp_build_flow_destroy(struct lp_build_flow_context *flow);
-
-void
-lp_build_flow_scope_begin(struct lp_build_flow_context *flow);
-
-void
-lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
- LLVMValueRef *variable);
+/**
+ * Early exit. Useful to skip to the end of a function or block when
+ * the execution mask becomes zero or when there is an error condition.
+ */
+struct lp_build_skip_context
+{
+ LLVMBuilderRef builder;
-void
-lp_build_flow_scope_end(struct lp_build_flow_context *flow);
+ /** Block to skip to */
+ LLVMBasicBlockRef block;
+};
void
-lp_build_flow_skip_begin(struct lp_build_flow_context *flow);
+lp_build_flow_skip_begin(struct lp_build_skip_context *ctx,
+ LLVMBuilderRef builder);
void
-lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
+lp_build_flow_skip_cond_break(struct lp_build_skip_context *ctx,
LLVMValueRef cond);
void
-lp_build_flow_skip_end(struct lp_build_flow_context *flow);
+lp_build_flow_skip_end(struct lp_build_skip_context *ctx);
struct lp_build_mask_context
{
- struct lp_build_flow_context *flow;
+ struct lp_build_skip_context skip;
LLVMTypeRef reg_type;
- LLVMValueRef value;
+ LLVMValueRef var;
};
void
lp_build_mask_begin(struct lp_build_mask_context *mask,
- struct lp_build_flow_context *flow,
+ LLVMBuilderRef builder,
struct lp_type type,
LLVMValueRef value);
+LLVMValueRef
+lp_build_mask_value(struct lp_build_mask_context *mask);
+
/**
* Bitwise AND the mask with the given value, if a previous mask was set.
*/
@@ -94,6 +91,9 @@ void
lp_build_mask_update(struct lp_build_mask_context *mask,
LLVMValueRef value);
+void
+lp_build_mask_check(struct lp_build_mask_context *mask);
+
LLVMValueRef
lp_build_mask_end(struct lp_build_mask_context *mask);
@@ -108,6 +108,7 @@ lp_build_mask_end(struct lp_build_mask_context *mask);
struct lp_build_loop_state
{
LLVMBasicBlockRef block;
+ LLVMValueRef counter_var;
LLVMValueRef counter;
};
@@ -128,22 +129,28 @@ void
lp_build_loop_end_cond(LLVMBuilderRef builder,
LLVMValueRef end,
LLVMValueRef step,
- int cond, /* LLVM condition */
+ LLVMIntPredicate cond,
struct lp_build_loop_state *state);
+/**
+ * if/else/endif.
+ */
struct lp_build_if_state
{
LLVMBuilderRef builder;
- struct lp_build_flow_context *flow;
+ LLVMValueRef condition;
+ LLVMBasicBlockRef entry_block;
+ LLVMBasicBlockRef true_block;
+ LLVMBasicBlockRef false_block;
+ LLVMBasicBlockRef merge_block;
};
void
lp_build_if(struct lp_build_if_state *ctx,
- struct lp_build_flow_context *flow,
LLVMBuilderRef builder,
LLVMValueRef condition);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 761f33b578..5598ca5c48 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -44,6 +44,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = {
{ "asm", GALLIVM_DEBUG_ASM, NULL },
{ "nopt", GALLIVM_DEBUG_NO_OPT, NULL },
{ "perf", GALLIVM_DEBUG_PERF, NULL },
+ { "no_brilinear", GALLIVM_DEBUG_NO_BRILINEAR, NULL },
DEBUG_NAMED_VALUE_END
};
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h
index f26fdac466..0b4b1ca7d1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -47,4 +47,10 @@ lp_build_init(void);
extern void
lp_func_delete_body(LLVMValueRef func);
+
+extern LLVMValueRef
+lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
+ const char *Name);
+
+
#endif /* !LP_BLD_INIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index d5c62a3f73..026b60ac36 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -92,9 +92,23 @@ lp_build_compare(LLVMBuilderRef builder,
if(func == PIPE_FUNC_ALWAYS)
return ones;
- /* TODO: optimize the constant case */
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * There are no unsigned integer comparison instructions in SSE.
+ */
- /* XXX: It is not clear if we should use the ordered or unordered operators */
+ if (!type.floating && !type.sign &&
+ type.width * type.length == 128 &&
+ util_cpu_caps.has_sse2 &&
+ (func == PIPE_FUNC_LESS ||
+ func == PIPE_FUNC_LEQUAL ||
+ func == PIPE_FUNC_GREATER ||
+ func == PIPE_FUNC_GEQUAL) &&
+ (gallivm_debug & GALLIVM_DEBUG_PERF)) {
+ debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
+ __FUNCTION__, type.length, type.width);
+ }
+#endif
#if HAVE_LLVM < 0x0207
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
@@ -225,6 +239,8 @@ lp_build_compare(LLVMBuilderRef builder,
#endif
#endif /* HAVE_LLVM < 0x0207 */
+ /* XXX: It is not clear if we should use the ordered or unordered operators */
+
if(type.floating) {
LLVMRealPredicate op;
switch(func) {
@@ -446,10 +462,12 @@ lp_build_select(struct lp_build_context *bld,
LLVMTypeRef arg_type;
LLVMValueRef args[3];
- if (type.width == 64) {
+ if (type.floating &&
+ type.width == 64) {
intrinsic = "llvm.x86.sse41.blendvpd";
arg_type = LLVMVectorType(LLVMDoubleType(), 2);
- } else if (type.width == 32) {
+ } else if (type.floating &&
+ type.width == 32) {
intrinsic = "llvm.x86.sse41.blendvps";
arg_type = LLVMVectorType(LLVMFloatType(), 4);
} else {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 48baf7c425..f56ddee7fd 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -178,3 +178,13 @@ lp_func_delete_body(LLVMValueRef FF)
llvm::Function *func = llvm::unwrap<llvm::Function>(FF);
func->deleteBody();
}
+
+
+extern "C"
+LLVMValueRef
+lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
+ const char *Name)
+{
+ return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name));
+}
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
index 153ba5b15b..f418e96aff 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
@@ -29,6 +29,8 @@
#include "util/u_debug.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
+#include "lp_bld_const.h"
#include "lp_bld_printf.h"
@@ -119,3 +121,22 @@ lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...)
return LLVMBuildCall(builder, func_printf, params, argcount + 1, "");
}
+
+
+/**
+ * Print a float[4] vector.
+ */
+LLVMValueRef
+lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec)
+{
+ char format[1000];
+ LLVMValueRef x, y, z, w;
+
+ x = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(0), "");
+ y = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(1), "");
+ z = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(2), "");
+ w = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(3), "");
+
+ util_snprintf(format, sizeof(format), "%s %%f %%f %%f %%f\n", msg);
+ return lp_build_printf(builder, format, x, y, z, w);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.h b/src/gallium/auxiliary/gallivm/lp_bld_printf.h
index 83bd8f1d55..b6222c62eb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_printf.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.h
@@ -35,5 +35,9 @@
LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len);
LLVMValueRef lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...);
+LLVMValueRef
+lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec);
+
+
#endif
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
index 7b1088939b..c18c8b4710 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -81,11 +81,15 @@ LLVMValueRef
lp_build_scalar_ddx(struct lp_build_context *bld,
LLVMValueRef a)
{
- LLVMValueRef idx_left = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0);
- LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0);
- LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "");
- LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "");
- return lp_build_sub(bld, a_right, a_left);
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef idx_left = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0);
+ LLVMValueRef idx_right = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_RIGHT, 0);
+ LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "left");
+ LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "right");
+ if (bld->type.floating)
+ return LLVMBuildFSub(bld->builder, a_right, a_left, "ddx");
+ else
+ return LLVMBuildSub(bld->builder, a_right, a_left, "ddx");
}
@@ -93,9 +97,13 @@ LLVMValueRef
lp_build_scalar_ddy(struct lp_build_context *bld,
LLVMValueRef a)
{
- LLVMValueRef idx_top = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0);
- LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0);
- LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "");
- LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "");
- return lp_build_sub(bld, a_bottom, a_top);
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef idx_top = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0);
+ LLVMValueRef idx_bottom = LLVMConstInt(i32t, LP_BLD_QUAD_BOTTOM_LEFT, 0);
+ LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "top");
+ LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "bottom");
+ if (bld->type.floating)
+ return LLVMBuildFSub(bld->builder, a_bottom, a_top, "ddy");
+ else
+ return LLVMBuildSub(bld->builder, a_bottom, a_top, "ddy");
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index d9fbc0f305..844d1d935b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -39,12 +39,52 @@
#include "lp_bld_arit.h"
#include "lp_bld_const.h"
#include "lp_bld_debug.h"
+#include "lp_bld_printf.h"
#include "lp_bld_flow.h"
#include "lp_bld_sample.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_type.h"
+/*
+ * Bri-linear factor. Should be greater than one.
+ */
+#define BRILINEAR_FACTOR 2
+
+
+/**
+ * Does the given texture wrap mode allow sampling the texture border color?
+ * XXX maybe move this into gallium util code.
+ */
+boolean
+lp_sampler_wrap_mode_uses_border_color(unsigned mode,
+ unsigned min_img_filter,
+ unsigned mag_img_filter)
+{
+ switch (mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return FALSE;
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return TRUE;
+ default:
+ assert(0 && "unexpected wrap mode");
+ return FALSE;
+ }
+}
+
+
/**
* Initialize lp_sampler_static_state object with the gallium sampler
* and texture state.
@@ -93,17 +133,32 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
state->wrap_r = sampler->wrap_r;
state->min_img_filter = sampler->min_img_filter;
state->mag_img_filter = sampler->mag_img_filter;
- if (view->last_level) {
+
+ if (view->last_level && sampler->max_lod > 0.0f) {
state->min_mip_filter = sampler->min_mip_filter;
} else {
state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
}
- /* If min_lod == max_lod we can greatly simplify mipmap selection.
- * This is a case that occurs during automatic mipmap generation.
- */
- if (sampler->min_lod == sampler->max_lod) {
- state->min_max_lod_equal = 1;
+ if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ if (sampler->lod_bias != 0.0f) {
+ state->lod_bias_non_zero = 1;
+ }
+
+ /* If min_lod == max_lod we can greatly simplify mipmap selection.
+ * This is a case that occurs during automatic mipmap generation.
+ */
+ if (sampler->min_lod == sampler->max_lod) {
+ state->min_max_lod_equal = 1;
+ } else {
+ if (sampler->min_lod > 0.0f) {
+ state->apply_min_lod = 1;
+ }
+
+ if (sampler->max_lod < (float)view->last_level) {
+ state->apply_max_lod = 1;
+ }
+ }
}
state->compare_mode = sampler->compare_mode;
@@ -120,6 +175,220 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
/**
+ * Generate code to compute coordinate gradient (rho).
+ * \param ddx partial derivatives of (s, t, r, q) with respect to X
+ * \param ddy partial derivatives of (s, t, r, q) with respect to Y
+ *
+ * XXX: The resulting rho is scalar, so we ignore all but the first element of
+ * derivatives that are passed by the shader.
+ */
+static LLVMValueRef
+lp_build_rho(struct lp_build_sample_context *bld,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4])
+{
+ struct lp_build_context *float_size_bld = &bld->float_size_bld;
+ struct lp_build_context *float_bld = &bld->float_bld;
+ const unsigned dims = bld->dims;
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
+ LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
+ LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
+ LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+ LLVMValueRef rho_x, rho_y;
+ LLVMValueRef rho_vec;
+ LLVMValueRef float_size;
+ LLVMValueRef rho;
+
+ dsdx = ddx[0];
+ dsdy = ddy[0];
+
+ if (dims <= 1) {
+ rho_x = dsdx;
+ rho_y = dsdy;
+ }
+ else {
+ rho_x = float_size_bld->undef;
+ rho_y = float_size_bld->undef;
+
+ rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, "");
+ rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, "");
+
+ dtdx = ddx[1];
+ dtdy = ddy[1];
+
+ rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, "");
+ rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, "");
+
+ if (dims >= 3) {
+ drdx = ddx[2];
+ drdy = ddy[2];
+
+ rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, "");
+ rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, "");
+ }
+ }
+
+ rho_x = lp_build_abs(float_size_bld, rho_x);
+ rho_y = lp_build_abs(float_size_bld, rho_y);
+
+ rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);
+
+ float_size = lp_build_int_to_float(float_size_bld, bld->int_size);
+
+ rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
+
+ if (dims <= 1) {
+ rho = rho_vec;
+ }
+ else {
+ if (dims >= 2) {
+ LLVMValueRef rho_s, rho_t, rho_r;
+
+ rho_s = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
+ rho_t = LLVMBuildExtractElement(bld->builder, rho_vec, index1, "");
+
+ rho = lp_build_max(float_bld, rho_s, rho_t);
+
+ if (dims >= 3) {
+ rho_r = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
+ rho = lp_build_max(float_bld, rho, rho_r);
+ }
+ }
+ }
+
+ return rho;
+}
+
+
+/*
+ * Bri-linear lod computation
+ *
+ * Use a piece-wise linear approximation of log2 such that:
+ * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
+ * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
+ * with the steepness specified in 'factor'
+ * - exact result for 0.5, 1.5, etc.
+ *
+ *
+ * 1.0 - /----*
+ * /
+ * /
+ * /
+ * 0.5 - *
+ * /
+ * /
+ * /
+ * 0.0 - *----/
+ *
+ * | |
+ * 2^0 2^1
+ *
+ * This is a technique also commonly used in hardware:
+ * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
+ *
+ * TODO: For correctness, this should only be applied when texture is known to
+ * have regular mipmaps, i.e., mipmaps derived from the base level.
+ *
+ * TODO: This could be done in fixed point, where applicable.
+ */
+static void
+lp_build_brilinear_lod(struct lp_build_context *bld,
+ LLVMValueRef lod,
+ double factor,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart)
+{
+ LLVMValueRef lod_fpart;
+ double pre_offset = (factor - 0.5)/factor - 0.5;
+ double post_offset = 1 - factor;
+
+ if (0) {
+ lp_build_printf(bld->builder, "lod = %f\n", lod);
+ }
+
+ lod = lp_build_add(bld, lod,
+ lp_build_const_vec(bld->type, pre_offset));
+
+ lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
+
+ lod_fpart = lp_build_mul(bld, lod_fpart,
+ lp_build_const_vec(bld->type, factor));
+
+ lod_fpart = lp_build_add(bld, lod_fpart,
+ lp_build_const_vec(bld->type, post_offset));
+
+ /*
+ * It's not necessary to clamp lod_fpart since:
+ * - the above expression will never produce numbers greater than one.
+ * - the mip filtering branch is only taken if lod_fpart is positive
+ */
+
+ *out_lod_fpart = lod_fpart;
+
+ if (0) {
+ lp_build_printf(bld->builder, "lod_ipart = %i\n", *out_lod_ipart);
+ lp_build_printf(bld->builder, "lod_fpart = %f\n\n", *out_lod_fpart);
+ }
+}
+
+
+/*
+ * Combined log2 and brilinear lod computation.
+ *
+ * It's in all identical to calling lp_build_fast_log2() and
+ * lp_build_brilinear_lod() above, but by combining we can compute the interger
+ * and fractional part independently.
+ */
+static void
+lp_build_brilinear_rho(struct lp_build_context *bld,
+ LLVMValueRef rho,
+ double factor,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart)
+{
+ LLVMValueRef lod_ipart;
+ LLVMValueRef lod_fpart;
+
+ const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
+ const double post_offset = 1 - 2*factor;
+
+ assert(bld->type.floating);
+
+ assert(lp_check_value(bld->type, rho));
+
+ /*
+ * The pre factor will make the intersections with the exact powers of two
+ * happen precisely where we want then to be, which means that the integer
+ * part will not need any post adjustments.
+ */
+ rho = lp_build_mul(bld, rho,
+ lp_build_const_vec(bld->type, pre_factor));
+
+ /* ipart = ifloor(log2(rho)) */
+ lod_ipart = lp_build_extract_exponent(bld, rho, 0);
+
+ /* fpart = rho / 2**ipart */
+ lod_fpart = lp_build_extract_mantissa(bld, rho);
+
+ lod_fpart = lp_build_mul(bld, lod_fpart,
+ lp_build_const_vec(bld->type, factor));
+
+ lod_fpart = lp_build_add(bld, lod_fpart,
+ lp_build_const_vec(bld->type, post_offset));
+
+ /*
+ * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
+ * - the above expression will never produce numbers greater than one.
+ * - the mip filtering branch is only taken if lod_fpart is positive
+ */
+
+ *out_lod_ipart = lod_ipart;
+ *out_lod_fpart = lod_fpart;
+}
+
+
+/**
* Generate code to compute texture level of detail (lambda).
* \param ddx partial derivatives of (s, t, r, q) with respect to X
* \param ddy partial derivatives of (s, t, r, q) with respect to Y
@@ -132,85 +401,81 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
* XXX: The resulting lod is scalar, so ignore all but the first element of
* derivatives, lod_bias, etc that are passed by the shader.
*/
-LLVMValueRef
+void
lp_build_lod_selector(struct lp_build_sample_context *bld,
unsigned unit,
const LLVMValueRef ddx[4],
const LLVMValueRef ddy[4],
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth)
+ unsigned mip_filter,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart)
{
- LLVMValueRef min_lod =
- bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef lod;
+
+ *out_lod_ipart = bld->int_bld.zero;
+ *out_lod_fpart = bld->float_bld.zero;
if (bld->static_state->min_max_lod_equal) {
/* User is forcing sampling from a particular mipmap level.
* This is hit during mipmap generation.
*/
- return min_lod;
+ LLVMValueRef min_lod =
+ bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);
+
+ lod = min_lod;
}
else {
- struct lp_build_context *float_bld = &bld->float_bld;
LLVMValueRef sampler_lod_bias =
bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit);
- LLVMValueRef max_lod =
- bld->dynamic_state->max_lod(bld->dynamic_state, bld->builder, unit);
LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- LLVMValueRef lod;
if (explicit_lod) {
lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
index0, "");
}
else {
- const int dims = texture_dims(bld->static_state->target);
- LLVMValueRef dsdx, dsdy;
- LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
LLVMValueRef rho;
- dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
- dsdx = lp_build_abs(float_bld, dsdx);
- dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
- dsdy = lp_build_abs(float_bld, dsdy);
- if (dims > 1) {
- dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
- dtdx = lp_build_abs(float_bld, dtdx);
- dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
- dtdy = lp_build_abs(float_bld, dtdy);
- if (dims > 2) {
- drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
- drdx = lp_build_abs(float_bld, drdx);
- drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
- drdy = lp_build_abs(float_bld, drdy);
- }
- }
+ rho = lp_build_rho(bld, ddx, ddy);
- /* Compute rho = max of all partial derivatives scaled by texture size.
- * XXX this could be vectorized somewhat
+ /*
+ * Compute lod = log2(rho)
*/
- rho = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, dsdx, dsdy),
- lp_build_int_to_float(float_bld, width), "");
- if (dims > 1) {
- LLVMValueRef max;
- max = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, dtdx, dtdy),
- lp_build_int_to_float(float_bld, height), "");
- rho = lp_build_max(float_bld, rho, max);
- if (dims > 2) {
- max = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, drdx, drdy),
- lp_build_int_to_float(float_bld, depth), "");
- rho = lp_build_max(float_bld, rho, max);
+
+ if (!lod_bias &&
+ !bld->static_state->lod_bias_non_zero &&
+ !bld->static_state->apply_max_lod &&
+ !bld->static_state->apply_min_lod) {
+ /*
+ * Special case when there are no post-log2 adjustments, which
+ * saves instructions but keeping the integer and fractional lod
+ * computations separate from the start.
+ */
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
+ mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+ *out_lod_ipart = lp_build_ilog2(float_bld, rho);
+ *out_lod_fpart = bld->float_bld.zero;
+ return;
+ }
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
+ !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
+ lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR,
+ out_lod_ipart, out_lod_fpart);
+ return;
}
}
- /* compute lod = log2(rho) */
- lod = lp_build_log2(float_bld, rho);
+ if (0) {
+ lod = lp_build_log2(float_bld, rho);
+ }
+ else {
+ lod = lp_build_fast_log2(float_bld, rho);
+ }
/* add shader lod bias */
if (lod_bias) {
@@ -221,13 +486,43 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
}
/* add sampler lod bias */
- lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
+ if (bld->static_state->lod_bias_non_zero)
+ lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
+
/* clamp lod */
- lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
+ if (bld->static_state->apply_max_lod) {
+ LLVMValueRef max_lod =
+ bld->dynamic_state->max_lod(bld->dynamic_state, bld->builder, unit);
+
+ lod = lp_build_min(float_bld, lod, max_lod);
+ }
+ if (bld->static_state->apply_min_lod) {
+ LLVMValueRef min_lod =
+ bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);
- return lod;
+ lod = lp_build_max(float_bld, lod, min_lod);
+ }
}
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
+ lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR,
+ out_lod_ipart, out_lod_fpart);
+ }
+ else {
+ lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart);
+ }
+
+ lp_build_name(*out_lod_fpart, "lod_fpart");
+ }
+ else {
+ *out_lod_ipart = lp_build_iround(float_bld, lod);
+ }
+
+ lp_build_name(*out_lod_ipart, "lod_ipart");
+
+ return;
}
@@ -241,10 +536,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
void
lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
unsigned unit,
- LLVMValueRef lod,
+ LLVMValueRef lod_ipart,
LLVMValueRef *level_out)
{
- struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *int_bld = &bld->int_bld;
LLVMValueRef last_level, level;
@@ -254,7 +548,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
bld->builder, unit);
/* convert float lod to integer */
- level = lp_build_iround(float_bld, lod);
+ level = lod_ipart;
/* clamp level to legal range of levels */
*level_out = lp_build_clamp(int_bld, level, zero, last_level);
@@ -269,43 +563,77 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
void
lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
unsigned unit,
- LLVMValueRef lod,
+ LLVMValueRef lod_ipart,
+ LLVMValueRef *lod_fpart_inout,
LLVMValueRef *level0_out,
- LLVMValueRef *level1_out,
- LLVMValueRef *weight_out)
+ LLVMValueRef *level1_out)
{
- struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMBuilderRef builder = bld->builder;
struct lp_build_context *int_bld = &bld->int_bld;
- LLVMValueRef last_level, level;
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef last_level;
+ LLVMValueRef clamp_min;
+ LLVMValueRef clamp_max;
+
+ *level0_out = lod_ipart;
+ *level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one);
last_level = bld->dynamic_state->last_level(bld->dynamic_state,
bld->builder, unit);
- /* convert float lod to integer */
- level = lp_build_ifloor(float_bld, lod);
-
- /* compute level 0 and clamp to legal range of levels */
- *level0_out = lp_build_clamp(int_bld, level,
- int_bld->zero,
- last_level);
- /* compute level 1 and clamp to legal range of levels */
- level = lp_build_add(int_bld, level, int_bld->one);
- *level1_out = lp_build_clamp(int_bld, level,
- int_bld->zero,
- last_level);
-
- *weight_out = lp_build_fract(float_bld, lod);
+ /*
+ * Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the
+ * minimum number of comparisons, and zeroing lod_fpart in the extreme
+ * ends in the process.
+ */
+
+ /* lod_ipart < 0 */
+ clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
+ lod_ipart, int_bld->zero,
+ "clamp_lod_to_zero");
+
+ *level0_out = LLVMBuildSelect(builder, clamp_min,
+ int_bld->zero, *level0_out, "");
+
+ *level1_out = LLVMBuildSelect(builder, clamp_min,
+ int_bld->zero, *level1_out, "");
+
+ *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
+ float_bld->zero, *lod_fpart_inout, "");
+
+ /* lod_ipart >= last_level */
+ clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
+ lod_ipart, last_level,
+ "clamp_lod_to_last");
+
+ *level0_out = LLVMBuildSelect(builder, clamp_max,
+ last_level, *level0_out, "");
+
+ *level1_out = LLVMBuildSelect(builder, clamp_max,
+ last_level, *level1_out, "");
+
+ *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
+ float_bld->zero, *lod_fpart_inout, "");
+
+ lp_build_name(*level0_out, "sampler%u_miplevel0", unit);
+ lp_build_name(*level1_out, "sampler%u_miplevel1", unit);
+ lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit);
}
+/**
+ * Return pointer to a single mipmap level.
+ * \param data_array array of pointers to mipmap levels
+ * \param level integer mipmap level
+ */
LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, LLVMValueRef level)
+ LLVMValueRef level)
{
LLVMValueRef indexes[2], data_ptr;
indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
indexes[1] = level;
- data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
+ data_ptr = LLVMBuildGEP(bld->builder, bld->data_array, indexes, 2, "");
data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
return data_ptr;
}
@@ -313,10 +641,10 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
LLVMValueRef
lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, int level)
+ int level)
{
LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
- return lp_build_get_mipmap_level(bld, data_array, lvl);
+ return lp_build_get_mipmap_level(bld, lvl);
}
@@ -325,13 +653,24 @@ lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
* Return max(1, base_size >> level);
*/
static LLVMValueRef
-lp_build_minify(struct lp_build_sample_context *bld,
+lp_build_minify(struct lp_build_context *bld,
LLVMValueRef base_size,
LLVMValueRef level)
{
- LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
- size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
- return size;
+ assert(lp_check_value(bld->type, base_size));
+ assert(lp_check_value(bld->type, level));
+
+ if (level == bld->zero) {
+ /* if we're using mipmap level zero, no minification is needed */
+ return base_size;
+ }
+ else {
+ LLVMValueRef size =
+ LLVMBuildLShr(bld->builder, base_size, level, "minify");
+ assert(bld->type.sign);
+ size = lp_build_max(bld, size, bld->one);
+ return size;
+ }
}
@@ -360,71 +699,113 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
*/
void
lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
- unsigned dims,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
- LLVMValueRef ilevel0,
- LLVMValueRef ilevel1,
- LLVMValueRef row_stride_array,
- LLVMValueRef img_stride_array,
- LLVMValueRef *width0_vec,
- LLVMValueRef *width1_vec,
- LLVMValueRef *height0_vec,
- LLVMValueRef *height1_vec,
- LLVMValueRef *depth0_vec,
- LLVMValueRef *depth1_vec,
- LLVMValueRef *row_stride0_vec,
- LLVMValueRef *row_stride1_vec,
- LLVMValueRef *img_stride0_vec,
- LLVMValueRef *img_stride1_vec)
+ LLVMValueRef ilevel,
+ LLVMValueRef *out_size,
+ LLVMValueRef *row_stride_vec,
+ LLVMValueRef *img_stride_vec)
{
- const unsigned mip_filter = bld->static_state->min_mip_filter;
- LLVMValueRef ilevel0_vec, ilevel1_vec;
+ const unsigned dims = bld->dims;
+ LLVMValueRef ilevel_vec;
- ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
- ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
+ ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
/*
- * Compute width, height, depth at mipmap level 'ilevel0'
+ * Compute width, height, depth at mipmap level 'ilevel'
*/
- *width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
+ *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+
if (dims >= 2) {
- *height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
- *row_stride0_vec = lp_build_get_level_stride_vec(bld,
- row_stride_array,
- ilevel0);
+ *row_stride_vec = lp_build_get_level_stride_vec(bld,
+ bld->row_stride_array,
+ ilevel);
if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
- *img_stride0_vec = lp_build_get_level_stride_vec(bld,
- img_stride_array,
- ilevel0);
- if (dims == 3) {
- *depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
- }
+ *img_stride_vec = lp_build_get_level_stride_vec(bld,
+ bld->img_stride_array,
+ ilevel);
}
}
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* compute width, height, depth for second mipmap level at 'ilevel1' */
- *width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
- if (dims >= 2) {
- *height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
- *row_stride1_vec = lp_build_get_level_stride_vec(bld,
- row_stride_array,
- ilevel1);
- if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
- *img_stride1_vec = lp_build_get_level_stride_vec(bld,
- img_stride_array,
- ilevel1);
- if (dims == 3) {
- *depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
- }
- }
+}
+
+
+/**
+ * Extract and broadcast texture size.
+ *
+ * @param size_type type of the texture size vector (either
+ * bld->int_size_type or bld->float_size_type)
+ * @param coord_type type of the texture size vector (either
+ * bld->int_coord_type or bld->coord_type)
+ * @param int_size vector with the integer texture size (width, height,
+ * depth)
+ */
+void
+lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
+ struct lp_type size_type,
+ struct lp_type coord_type,
+ LLVMValueRef size,
+ LLVMValueRef *out_width,
+ LLVMValueRef *out_height,
+ LLVMValueRef *out_depth)
+{
+ const unsigned dims = bld->dims;
+ LLVMTypeRef i32t = LLVMInt32Type();
+
+ *out_width = lp_build_extract_broadcast(bld->builder,
+ size_type,
+ coord_type,
+ size,
+ LLVMConstInt(i32t, 0, 0));
+ if (dims >= 2) {
+ *out_height = lp_build_extract_broadcast(bld->builder,
+ size_type,
+ coord_type,
+ size,
+ LLVMConstInt(i32t, 1, 0));
+ if (dims == 3) {
+ *out_depth = lp_build_extract_broadcast(bld->builder,
+ size_type,
+ coord_type,
+ size,
+ LLVMConstInt(i32t, 2, 0));
}
}
}
+/**
+ * Unnormalize coords.
+ *
+ * @param int_size vector with the integer texture size (width, height, depth)
+ */
+void
+lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
+ LLVMValueRef flt_size,
+ LLVMValueRef *s,
+ LLVMValueRef *t,
+ LLVMValueRef *r)
+{
+ const unsigned dims = bld->dims;
+ LLVMValueRef width;
+ LLVMValueRef height;
+ LLVMValueRef depth;
+
+ lp_build_extract_image_sizes(bld,
+ bld->float_size_type,
+ bld->coord_type,
+ flt_size,
+ &width,
+ &height,
+ &depth);
+
+ /* s = s * width, t = t * height */
+ *s = lp_build_mul(&bld->coord_bld, *s, width);
+ if (dims >= 2) {
+ *t = lp_build_mul(&bld->coord_bld, *t, height);
+ if (dims >= 3) {
+ *r = lp_build_mul(&bld->coord_bld, *r, depth);
+ }
+ }
+}
+
/** Helper used by lp_build_cube_lookup() */
static LLVMValueRef
@@ -543,25 +924,16 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
{
- struct lp_build_flow_context *flow_ctx;
struct lp_build_if_state if_ctx;
+ LLVMValueRef face_s_var;
+ LLVMValueRef face_t_var;
+ LLVMValueRef face_var;
- flow_ctx = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx);
-
- *face_s = bld->coord_bld.undef;
- *face_t = bld->coord_bld.undef;
- *face = bld->int_bld.undef;
+ face_s_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_s_var");
+ face_t_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_t_var");
+ face_var = lp_build_alloca(bld->builder, bld->int_bld.vec_type, "face_var");
- lp_build_name(*face_s, "face_s");
- lp_build_name(*face_t, "face_t");
- lp_build_name(*face, "face");
-
- lp_build_flow_scope_declare(flow_ctx, face_s);
- lp_build_flow_scope_declare(flow_ctx, face_t);
- lp_build_flow_scope_declare(flow_ctx, face);
-
- lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
+ lp_build_if(&if_ctx, bld->builder, arx_ge_ary_arz);
{
/* +/- X face */
LLVMValueRef sign = lp_build_sgn(float_bld, rx);
@@ -571,57 +943,52 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
*face = lp_build_cube_face(bld, rx,
PIPE_TEX_FACE_POS_X,
PIPE_TEX_FACE_NEG_X);
+ LLVMBuildStore(bld->builder, *face_s, face_s_var);
+ LLVMBuildStore(bld->builder, *face_t, face_t_var);
+ LLVMBuildStore(bld->builder, *face, face_var);
}
lp_build_else(&if_ctx);
{
- struct lp_build_flow_context *flow_ctx2;
struct lp_build_if_state if_ctx2;
- LLVMValueRef face_s2 = bld->coord_bld.undef;
- LLVMValueRef face_t2 = bld->coord_bld.undef;
- LLVMValueRef face2 = bld->int_bld.undef;
-
- flow_ctx2 = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx2);
- lp_build_flow_scope_declare(flow_ctx2, &face_s2);
- lp_build_flow_scope_declare(flow_ctx2, &face_t2);
- lp_build_flow_scope_declare(flow_ctx2, &face2);
-
ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
- lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
+ lp_build_if(&if_ctx2, bld->builder, ary_ge_arx_arz);
{
/* +/- Y face */
LLVMValueRef sign = lp_build_sgn(float_bld, ry);
LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
- face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
- face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
- face2 = lp_build_cube_face(bld, ry,
+ *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
+ *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
+ *face = lp_build_cube_face(bld, ry,
PIPE_TEX_FACE_POS_Y,
PIPE_TEX_FACE_NEG_Y);
+ LLVMBuildStore(bld->builder, *face_s, face_s_var);
+ LLVMBuildStore(bld->builder, *face_t, face_t_var);
+ LLVMBuildStore(bld->builder, *face, face_var);
}
lp_build_else(&if_ctx2);
{
/* +/- Z face */
LLVMValueRef sign = lp_build_sgn(float_bld, rz);
LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
- face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
- face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
- face2 = lp_build_cube_face(bld, rz,
+ *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
+ *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+ *face = lp_build_cube_face(bld, rz,
PIPE_TEX_FACE_POS_Z,
PIPE_TEX_FACE_NEG_Z);
+ LLVMBuildStore(bld->builder, *face_s, face_s_var);
+ LLVMBuildStore(bld->builder, *face_t, face_t_var);
+ LLVMBuildStore(bld->builder, *face, face_var);
}
lp_build_endif(&if_ctx2);
- lp_build_flow_scope_end(flow_ctx2);
- lp_build_flow_destroy(flow_ctx2);
- *face_s = face_s2;
- *face_t = face_t2;
- *face = face2;
}
lp_build_endif(&if_ctx);
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
+
+ *face_s = LLVMBuildLoad(bld->builder, face_s_var, "face_s");
+ *face_t = LLVMBuildLoad(bld->builder, face_t_var, "face_t");
+ *face = LLVMBuildLoad(bld->builder, face_var, "face");
}
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index bb485784ef..ffed27cee8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -83,6 +83,9 @@ struct lp_sampler_static_state
unsigned compare_func:3;
unsigned normalized_coords:1;
unsigned min_max_lod_equal:1; /**< min_lod == max_lod ? */
+ unsigned lod_bias_non_zero:1;
+ unsigned apply_min_lod:1; /**< min_lod > 0 ? */
+ unsigned apply_max_lod:1; /**< max_lod < last_level ? */
};
@@ -99,61 +102,64 @@ struct lp_sampler_static_state
struct lp_sampler_dynamic_state
{
- /** Obtain the base texture width. */
+ /** Obtain the base texture width (returns int32) */
LLVMValueRef
(*width)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
- /** Obtain the base texture height. */
+ /** Obtain the base texture height (returns int32) */
LLVMValueRef
(*height)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
- /** Obtain the base texture depth. */
+ /** Obtain the base texture depth (returns int32) */
LLVMValueRef
(*depth)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
- /** Obtain the number of mipmap levels (minus one). */
+ /** Obtain the number of mipmap levels minus one (returns int32) */
LLVMValueRef
(*last_level)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain stride in bytes between image rows/blocks (returns int32) */
LLVMValueRef
(*row_stride)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain stride in bytes between image slices (returns int32) */
LLVMValueRef
(*img_stride)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain pointer to array of pointers to mimpap levels */
LLVMValueRef
(*data_ptr)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
- /** Obtain texture min lod */
+ /** Obtain texture min lod (returns float) */
LLVMValueRef
(*min_lod)(const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder, unsigned unit);
- /** Obtain texture max lod */
+ /** Obtain texture max lod (returns float) */
LLVMValueRef
(*max_lod)(const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder, unsigned unit);
- /** Obtain texture lod bias */
+ /** Obtain texture lod bias (returns float) */
LLVMValueRef
(*lod_bias)(const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder, unsigned unit);
- /** Obtain texture border color */
+ /** Obtain texture border color (returns ptr to float[4]) */
LLVMValueRef
(*border_color)(const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder, unsigned unit);
@@ -173,6 +179,9 @@ struct lp_build_sample_context
const struct util_format_description *format_desc;
+ /* See texture_dims() */
+ unsigned dims;
+
/** regular scalar float type */
struct lp_type float_type;
struct lp_build_context float_bld;
@@ -188,17 +197,32 @@ struct lp_build_sample_context
struct lp_type coord_type;
struct lp_build_context coord_bld;
- /** Unsigned integer coordinates */
- struct lp_type uint_coord_type;
- struct lp_build_context uint_coord_bld;
-
/** Signed integer coordinates */
struct lp_type int_coord_type;
struct lp_build_context int_coord_bld;
+ /** Unsigned integer texture size */
+ struct lp_type int_size_type;
+ struct lp_build_context int_size_bld;
+
+ /** Unsigned integer texture size */
+ struct lp_type float_size_type;
+ struct lp_build_context float_size_bld;
+
/** Output texels type and build context */
struct lp_type texel_type;
struct lp_build_context texel_bld;
+
+ /* Common dynamic state values */
+ LLVMValueRef width;
+ LLVMValueRef height;
+ LLVMValueRef depth;
+ LLVMValueRef row_stride_array;
+ LLVMValueRef img_stride_array;
+ LLVMValueRef data_array;
+
+ /** Integer vector with texture width, height, depth */
+ LLVMValueRef int_size;
};
@@ -235,7 +259,7 @@ apply_sampler_swizzle(struct lp_build_sample_context *bld,
}
-static INLINE int
+static INLINE unsigned
texture_dims(enum pipe_texture_target tex)
{
switch (tex) {
@@ -254,6 +278,11 @@ texture_dims(enum pipe_texture_target tex)
}
+boolean
+lp_sampler_wrap_mode_uses_border_color(unsigned mode,
+ unsigned min_img_filter,
+ unsigned mag_img_filter);
+
/**
* Derive the sampler static state.
*/
@@ -263,16 +292,16 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);
-LLVMValueRef
+void
lp_build_lod_selector(struct lp_build_sample_context *bld,
unsigned unit,
const LLVMValueRef ddx[4],
const LLVMValueRef ddy[4],
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth);
+ unsigned mip_filter,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart);
void
lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
@@ -283,40 +312,44 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
void
lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
unsigned unit,
- LLVMValueRef lod,
+ LLVMValueRef lod_ipart,
+ LLVMValueRef *lod_fpart_inout,
LLVMValueRef *level0_out,
- LLVMValueRef *level1_out,
- LLVMValueRef *weight_out);
+ LLVMValueRef *level1_out);
LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, LLVMValueRef level);
+ LLVMValueRef level);
LLVMValueRef
lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, int level);
+ int level);
void
lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
- unsigned dims,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
- LLVMValueRef ilevel0,
- LLVMValueRef ilevel1,
- LLVMValueRef row_stride_array,
- LLVMValueRef img_stride_array,
- LLVMValueRef *width0_vec,
- LLVMValueRef *width1_vec,
- LLVMValueRef *height0_vec,
- LLVMValueRef *height1_vec,
- LLVMValueRef *depth0_vec,
- LLVMValueRef *depth1_vec,
- LLVMValueRef *row_stride0_vec,
- LLVMValueRef *row_stride1_vec,
- LLVMValueRef *img_stride0_vec,
- LLVMValueRef *img_stride1_vec);
+ LLVMValueRef ilevel,
+ LLVMValueRef *out_size_vec,
+ LLVMValueRef *row_stride_vec,
+ LLVMValueRef *img_stride_vec);
+
+
+void
+lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
+ struct lp_type size_type,
+ struct lp_type coord_type,
+ LLVMValueRef size,
+ LLVMValueRef *out_width,
+ LLVMValueRef *out_height,
+ LLVMValueRef *out_depth);
+
+
+void
+lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
+ LLVMValueRef flt_size,
+ LLVMValueRef *s,
+ LLVMValueRef *t,
+ LLVMValueRef *r);
void
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 49a6eed615..d3e3b242af 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -45,6 +45,7 @@
#include "lp_bld_const.h"
#include "lp_bld_conv.h"
#include "lp_bld_arit.h"
+#include "lp_bld_bitarit.h"
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_pack.h"
@@ -80,11 +81,10 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
LLVMValueRef *out_offset,
LLVMValueRef *out_i)
{
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef length_minus_one;
- length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+ length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
switch(wrap_mode) {
case PIPE_TEX_WRAP_REPEAT:
@@ -92,7 +92,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
else {
/* Add a bias to the texcoord to handle negative coords */
- LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
+ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
coord = LLVMBuildAdd(bld->builder, coord, bias, "");
coord = LLVMBuildURem(bld->builder, coord, length, "");
}
@@ -113,7 +113,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
assert(0);
}
- lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
+ lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
out_offset, out_i);
}
@@ -146,7 +146,6 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
LLVMValueRef *i0,
LLVMValueRef *i1)
{
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef length_minus_one;
LLVMValueRef lmask, umask, mask;
@@ -188,8 +187,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
* multiplication.
*/
- *i0 = uint_coord_bld->zero;
- *i1 = uint_coord_bld->zero;
+ *i0 = int_coord_bld->zero;
+ *i1 = int_coord_bld->zero;
length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
@@ -200,7 +199,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
}
else {
/* Add a bias to the texcoord to handle negative coords */
- LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
+ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
}
@@ -208,9 +207,9 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
mask = lp_build_compare(bld->builder, int_coord_bld->type,
PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
- *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
*offset1 = LLVMBuildAnd(bld->builder,
- lp_build_add(uint_coord_bld, *offset0, stride),
+ lp_build_add(int_coord_bld, *offset0, stride),
mask, "");
break;
@@ -225,8 +224,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
- *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
- *offset1 = lp_build_add(uint_coord_bld,
+ *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
+ *offset1 = lp_build_add(int_coord_bld,
*offset0,
LLVMBuildAnd(bld->builder, stride, mask, ""));
break;
@@ -239,8 +238,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
default:
assert(0);
- *offset0 = uint_coord_bld->zero;
- *offset1 = uint_coord_bld->zero;
+ *offset0 = int_coord_bld->zero;
+ *offset1 = int_coord_bld->zero;
break;
}
}
@@ -253,9 +252,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
*/
static void
lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
+ LLVMValueRef int_size,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
@@ -265,11 +262,12 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
LLVMValueRef *colors_lo,
LLVMValueRef *colors_hi)
{
- const int dims = texture_dims(bld->static_state->target);
+ const unsigned dims = bld->dims;
LLVMBuilderRef builder = bld->builder;
struct lp_build_context i32, h16, u8n;
LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
LLVMValueRef i32_c8;
+ LLVMValueRef width_vec, height_vec, depth_vec;
LLVMValueRef s_ipart, t_ipart, r_ipart;
LLVMValueRef x_stride;
LLVMValueRef x_offset, offset;
@@ -283,30 +281,33 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
h16_vec_type = lp_build_vec_type(h16.type);
u8n_vec_type = lp_build_vec_type(u8n.type);
+ lp_build_extract_image_sizes(bld,
+ bld->int_size_type,
+ bld->int_coord_type,
+ int_size,
+ &width_vec,
+ &height_vec,
+ &depth_vec);
+
if (bld->static_state->normalized_coords) {
- /* s = s * width, t = t * height */
- LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
- LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
- coord_vec_type, "");
- s = lp_build_mul(&bld->coord_bld, s, fp_width);
- if (dims >= 2) {
- LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
- coord_vec_type, "");
- t = lp_build_mul(&bld->coord_bld, t, fp_height);
- if (dims >= 3) {
- LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
- coord_vec_type, "");
- r = lp_build_mul(&bld->coord_bld, r, fp_depth);
- }
- }
- }
+ LLVMValueRef scaled_size;
+ LLVMValueRef flt_size;
- /* scale coords by 256 (8 fractional bits) */
- s = lp_build_mul_imm(&bld->coord_bld, s, 256);
- if (dims >= 2)
- t = lp_build_mul_imm(&bld->coord_bld, t, 256);
- if (dims >= 3)
- r = lp_build_mul_imm(&bld->coord_bld, r, 256);
+ /* scale size by 256 (8 fractional bits) */
+ scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
+
+ flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
+
+ lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
+ }
+ else {
+ /* scale coords by 256 (8 fractional bits) */
+ s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+ if (dims >= 2)
+ t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+ if (dims >= 3)
+ r = lp_build_mul_imm(&bld->coord_bld, r, 256);
+ }
/* convert float to int */
s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
@@ -324,7 +325,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
/* get pixel, row, image strides */
- x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+ x_stride = lp_build_const_vec(bld->int_coord_bld.type,
bld->format_desc->block.bits/8);
/* Do texcoord wrapping, compute texel offset */
@@ -343,7 +344,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
bld->static_state->pot_height,
bld->static_state->wrap_t,
&y_offset, &y_subcoord);
- offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
+ offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
if (dims >= 3) {
LLVMValueRef z_offset;
lp_build_sample_wrap_nearest_int(bld,
@@ -352,13 +353,13 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
bld->static_state->pot_height,
bld->static_state->wrap_r,
&z_offset, &z_subcoord);
- offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
+ offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
}
else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
LLVMValueRef z_offset;
/* The r coord is the cube face in [0,5] */
- z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
- offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
+ z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
+ offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
}
}
@@ -417,9 +418,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
*/
static void
lp_build_sample_image_linear(struct lp_build_sample_context *bld,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
+ LLVMValueRef int_size,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
@@ -429,11 +428,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef *colors_lo,
LLVMValueRef *colors_hi)
{
- const int dims = texture_dims(bld->static_state->target);
+ const unsigned dims = bld->dims;
LLVMBuilderRef builder = bld->builder;
struct lp_build_context i32, h16, u8n;
LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
LLVMValueRef i32_c8, i32_c128, i32_c255;
+ LLVMValueRef width_vec, height_vec, depth_vec;
LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
@@ -458,30 +458,33 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
h16_vec_type = lp_build_vec_type(h16.type);
u8n_vec_type = lp_build_vec_type(u8n.type);
+ lp_build_extract_image_sizes(bld,
+ bld->int_size_type,
+ bld->int_coord_type,
+ int_size,
+ &width_vec,
+ &height_vec,
+ &depth_vec);
+
if (bld->static_state->normalized_coords) {
- /* s = s * width, t = t * height */
- LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
- LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
- coord_vec_type, "");
- s = lp_build_mul(&bld->coord_bld, s, fp_width);
- if (dims >= 2) {
- LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
- coord_vec_type, "");
- t = lp_build_mul(&bld->coord_bld, t, fp_height);
- }
- if (dims >= 3) {
- LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
- coord_vec_type, "");
- r = lp_build_mul(&bld->coord_bld, r, fp_depth);
- }
- }
+ LLVMValueRef scaled_size;
+ LLVMValueRef flt_size;
- /* scale coords by 256 (8 fractional bits) */
- s = lp_build_mul_imm(&bld->coord_bld, s, 256);
- if (dims >= 2)
- t = lp_build_mul_imm(&bld->coord_bld, t, 256);
- if (dims >= 3)
- r = lp_build_mul_imm(&bld->coord_bld, r, 256);
+ /* scale size by 256 (8 fractional bits) */
+ scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
+
+ flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
+
+ lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
+ }
+ else {
+ /* scale coords by 256 (8 fractional bits) */
+ s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+ if (dims >= 2)
+ t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+ if (dims >= 3)
+ r = lp_build_mul_imm(&bld->coord_bld, r, 256);
+ }
/* convert float to int */
s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
@@ -517,7 +520,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
/* get pixel, row and image strides */
- x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+ x_stride = lp_build_const_vec(bld->int_coord_bld.type,
bld->format_desc->block.bits/8);
y_stride = row_stride_vec;
z_stride = img_stride_vec;
@@ -548,9 +551,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
for (z = 0; z < 2; z++) {
for (x = 0; x < 2; x++) {
- offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
offset[z][0][x], y_offset0);
- offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
offset[z][1][x], y_offset1);
}
}
@@ -566,20 +569,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
&z_subcoord[0], &z_subcoord[1]);
for (y = 0; y < 2; y++) {
for (x = 0; x < 2; x++) {
- offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
offset[0][y][x], z_offset0);
- offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
offset[1][y][x], z_offset1);
}
}
}
else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
LLVMValueRef z_offset;
- z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
+ z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
for (y = 0; y < 2; y++) {
for (x = 0; x < 2; x++) {
/* The r coord is the cube face in [0,5] */
- offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
offset[0][y][x], z_offset);
}
}
@@ -781,76 +784,124 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
+ LLVMValueRef ilevel0,
+ LLVMValueRef ilevel1,
LLVMValueRef lod_fpart,
- LLVMValueRef width0_vec,
- LLVMValueRef width1_vec,
- LLVMValueRef height0_vec,
- LLVMValueRef height1_vec,
- LLVMValueRef depth0_vec,
- LLVMValueRef depth1_vec,
- LLVMValueRef row_stride0_vec,
- LLVMValueRef row_stride1_vec,
- LLVMValueRef img_stride0_vec,
- LLVMValueRef img_stride1_vec,
- LLVMValueRef data_ptr0,
- LLVMValueRef data_ptr1,
- LLVMValueRef *colors_lo,
- LLVMValueRef *colors_hi)
+ LLVMValueRef colors_lo_var,
+ LLVMValueRef colors_hi_var)
{
+ LLVMBuilderRef builder = bld->builder;
+ LLVMValueRef size0;
+ LLVMValueRef size1;
+ LLVMValueRef row_stride0_vec;
+ LLVMValueRef row_stride1_vec;
+ LLVMValueRef img_stride0_vec;
+ LLVMValueRef img_stride1_vec;
+ LLVMValueRef data_ptr0;
+ LLVMValueRef data_ptr1;
LLVMValueRef colors0_lo, colors0_hi;
LLVMValueRef colors1_lo, colors1_hi;
+
+ /* sample the first mipmap level */
+ lp_build_mipmap_level_sizes(bld, ilevel0,
+ &size0,
+ &row_stride0_vec, &img_stride0_vec);
+ data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
- /* sample the first mipmap level */
lp_build_sample_image_nearest(bld,
- width0_vec, height0_vec, depth0_vec,
+ size0,
row_stride0_vec, img_stride0_vec,
data_ptr0, s, t, r,
&colors0_lo, &colors0_hi);
-
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level */
- lp_build_sample_image_nearest(bld,
- width1_vec, height1_vec, depth1_vec,
- row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r,
- &colors1_lo, &colors1_hi);
- }
}
else {
assert(img_filter == PIPE_TEX_FILTER_LINEAR);
-
- /* sample the first mipmap level */
lp_build_sample_image_linear(bld,
- width0_vec, height0_vec, depth0_vec,
+ size0,
row_stride0_vec, img_stride0_vec,
data_ptr0, s, t, r,
&colors0_lo, &colors0_hi);
-
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level */
- lp_build_sample_image_linear(bld,
- width1_vec, height1_vec, depth1_vec,
- row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r,
- &colors1_lo, &colors1_hi);
- }
}
+ /* Store the first level's colors in the output variables */
+ LLVMBuildStore(builder, colors0_lo, colors_lo_var);
+ LLVMBuildStore(builder, colors0_hi, colors_hi_var);
+
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* interpolate samples from the two mipmap levels */
- struct lp_build_context h16;
- lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16));
-
- *colors_lo = lp_build_lerp(&h16, lod_fpart,
- colors0_lo, colors1_lo);
- *colors_hi = lp_build_lerp(&h16, lod_fpart,
- colors0_hi, colors1_hi);
- }
- else {
- /* use first/only level's colors */
- *colors_lo = colors0_lo;
- *colors_hi = colors0_hi;
+ LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0);
+ LLVMTypeRef i32_type = LLVMIntType(32);
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef need_lerp;
+
+ lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
+ lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");
+
+ /* need_lerp = lod_fpart > 0 */
+ need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
+ lod_fpart, LLVMConstNull(i32_type),
+ "need_lerp");
+
+ lp_build_if(&if_ctx, builder, need_lerp);
+ {
+ struct lp_build_context h16_bld;
+
+ lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));
+
+ /* sample the second mipmap level */
+ lp_build_mipmap_level_sizes(bld, ilevel1,
+ &size1,
+ &row_stride1_vec, &img_stride1_vec);
+ data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+ if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ lp_build_sample_image_nearest(bld,
+ size1,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ &colors1_lo, &colors1_hi);
+ }
+ else {
+ lp_build_sample_image_linear(bld,
+ size1,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ &colors1_lo, &colors1_hi);
+ }
+
+ /* interpolate samples from the two mipmap levels */
+
+ lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
+ lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
+
+#if HAVE_LLVM == 0x208
+ /* This is a work-around for a bug in LLVM 2.8.
+ * Evidently, something goes wrong in the construction of the
+ * lod_fpart short[8] vector. Adding this no-effect shuffle seems
+ * to force the vector to be properly constructed.
+ * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
+ */
+ {
+ LLVMValueRef shuffles[8], shuffle;
+ int i;
+ assert(h16_bld.type.length <= Elements(shuffles));
+ for (i = 0; i < h16_bld.type.length; i++)
+ shuffles[i] = lp_build_const_int32(2 * (i & 1));
+ shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
+ lod_fpart = LLVMBuildShuffleVector(builder,
+ lod_fpart, lod_fpart,
+ shuffle, "");
+ }
+#endif
+
+ colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
+ colors0_lo, colors1_lo);
+ colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
+ colors0_hi, colors1_hi);
+
+ LLVMBuildStore(builder, colors0_lo, colors_lo_var);
+ LLVMBuildStore(builder, colors0_hi, colors_hi_var);
+ }
+ lp_build_endif(&if_ctx);
}
}
@@ -871,35 +922,22 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
const LLVMValueRef *ddy,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
- LLVMValueRef row_stride_array,
- LLVMValueRef img_stride_array,
- LLVMValueRef data_array,
LLVMValueRef texel_out[4])
{
- struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
LLVMBuilderRef builder = bld->builder;
const unsigned mip_filter = bld->static_state->min_mip_filter;
const unsigned min_filter = bld->static_state->min_img_filter;
const unsigned mag_filter = bld->static_state->mag_img_filter;
- const int dims = texture_dims(bld->static_state->target);
- LLVMValueRef lod = NULL, lod_fpart = NULL;
+ const unsigned dims = bld->dims;
+ LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
LLVMValueRef ilevel0, ilevel1 = NULL;
- LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
- LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
- LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
- LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
- LLVMValueRef data_ptr0, data_ptr1 = NULL;
LLVMValueRef packed, packed_lo, packed_hi;
LLVMValueRef unswizzled[4];
LLVMValueRef face_ddx[4], face_ddy[4];
- struct lp_build_context h16;
- LLVMTypeRef h16_vec_type;
+ struct lp_build_context h16_bld;
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0);
/* we only support the common/simple wrap modes at this time */
assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
@@ -910,9 +948,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
/* make 16-bit fixed-pt builder context */
- lp_build_context_init(&h16, builder, lp_type_ufixed(16));
- h16_vec_type = lp_build_vec_type(h16.type);
-
+ lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));
/* cube face selection, compute pre-face coords, etc. */
if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
@@ -924,19 +960,18 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
/* recompute ddx, ddy using the new (s,t) face texcoords */
- face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
- face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
+ face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s);
+ face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t);
face_ddx[2] = NULL;
face_ddx[3] = NULL;
- face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
- face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
+ face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s);
+ face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t);
face_ddy[2] = NULL;
face_ddy[3] = NULL;
ddx = face_ddx;
ddy = face_ddy;
}
-
/*
* Compute the level of detail (float).
*/
@@ -945,15 +980,16 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lod = lp_build_lod_selector(bld, unit, ddx, ddy,
- lod_bias, explicit_lod,
- width, height, depth);
+ lp_build_lod_selector(bld, unit, ddx, ddy,
+ lod_bias, explicit_lod,
+ mip_filter,
+ &lod_ipart, &lod_fpart);
+ } else {
+ lod_ipart = i32t_zero;
}
/*
* Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
- * If mipfilter=linear, also compute the weight between the two
- * mipmap levels: lod_fpart
*/
switch (mip_filter) {
default:
@@ -966,135 +1002,81 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
* We should be able to set ilevel0 = const(0) but that causes
* bad x86 code to be emitted.
*/
- lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
}
else {
- ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ ilevel0 = i32t_zero;
}
break;
case PIPE_TEX_MIPFILTER_NEAREST:
- assert(lod);
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
break;
case PIPE_TEX_MIPFILTER_LINEAR:
- {
- LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
- LLVMValueRef i255 = lp_build_const_int32(255);
- LLVMTypeRef i16_type = LLVMIntType(16);
-
- assert(lod);
-
- lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
- &lod_fpart);
- lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
- lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart);
- lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, "");
- lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
- lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);
-
- /* the lod_fpart values will be fixed pt values in [0,1) */
- }
+ assert(lod_ipart);
+ assert(lod_fpart);
+ lp_build_linear_mip_levels(bld, unit,
+ lod_ipart, &lod_fpart,
+ &ilevel0, &ilevel1);
break;
}
- /* compute image size(s) of source mipmap level(s) */
- lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
- ilevel0, ilevel1,
- row_stride_array, img_stride_array,
- &width0_vec, &width1_vec,
- &height0_vec, &height1_vec,
- &depth0_vec, &depth1_vec,
- &row_stride0_vec, &row_stride1_vec,
- &img_stride0_vec, &img_stride1_vec);
-
/*
- * Get pointer(s) to image data for mipmap level(s).
+ * Get/interpolate texture colors.
*/
- data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
- }
+ packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo");
+ packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi");
- /*
- * Get/interpolate texture colors.
- */
if (min_filter == mag_filter) {
/* no need to distinquish between minification and magnification */
- lp_build_sample_mipmap(bld, min_filter, mip_filter,
- s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- &packed_lo, &packed_hi);
+ lp_build_sample_mipmap(bld,
+ min_filter, mip_filter,
+ s, t, r,
+ ilevel0, ilevel1, lod_fpart,
+ packed_lo, packed_hi);
}
else {
/* Emit conditional to choose min image filter or mag image filter
* depending on the lod being > 0 or <= 0, respectively.
*/
- struct lp_build_flow_context *flow_ctx;
struct lp_build_if_state if_ctx;
LLVMValueRef minify;
- flow_ctx = lp_build_flow_create(builder);
- lp_build_flow_scope_begin(flow_ctx);
+ /* minify = lod >= 0.0 */
+ minify = LLVMBuildICmp(builder, LLVMIntSGE,
+ lod_ipart, int_bld->zero, "");
- packed_lo = LLVMGetUndef(h16_vec_type);
- packed_hi = LLVMGetUndef(h16_vec_type);
-
- lp_build_flow_scope_declare(flow_ctx, &packed_lo);
- lp_build_flow_scope_declare(flow_ctx, &packed_hi);
-
- /* minify = lod > 0.0 */
- minify = LLVMBuildFCmp(builder, LLVMRealUGE,
- lod, float_bld->zero, "");
-
- lp_build_if(&if_ctx, flow_ctx, builder, minify);
+ lp_build_if(&if_ctx, builder, minify);
{
/* Use the minification filter */
- lp_build_sample_mipmap(bld, min_filter, mip_filter,
- s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- &packed_lo, &packed_hi);
+ lp_build_sample_mipmap(bld,
+ min_filter, mip_filter,
+ s, t, r,
+ ilevel0, ilevel1, lod_fpart,
+ packed_lo, packed_hi);
}
lp_build_else(&if_ctx);
{
/* Use the magnification filter */
- lp_build_sample_mipmap(bld, mag_filter, mip_filter,
- s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- &packed_lo, &packed_hi);
+ lp_build_sample_mipmap(bld,
+ mag_filter, PIPE_TEX_MIPFILTER_NONE,
+ s, t, r,
+ i32t_zero, NULL, NULL,
+ packed_lo, packed_hi);
}
lp_build_endif(&if_ctx);
-
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
}
- /* combine 'packed_lo', 'packed_hi' into 'packed' */
- {
- struct lp_build_context h16, u8n;
-
- lp_build_context_init(&h16, builder, lp_type_ufixed(16));
- lp_build_context_init(&u8n, builder, lp_type_unorm(8));
-
- packed = lp_build_pack2(builder, h16.type, u8n.type,
- packed_lo, packed_hi);
- }
+ /*
+ * combine the values stored in 'packed_lo' and 'packed_hi' variables
+ * into 'packed'
+ */
+ packed = lp_build_pack2(builder,
+ h16_bld.type, lp_type_unorm(8),
+ LLVMBuildLoad(builder, packed_lo, ""),
+ LLVMBuildLoad(builder, packed_hi, ""));
/*
* Convert to SoA and swizzle.
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
index e1045bbbc2..5d9ecac4d5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
@@ -50,15 +50,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
const LLVMValueRef *ddy,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
- LLVMValueRef row_stride_array,
- LLVMValueRef img_stride_array,
- LLVMValueRef data_array,
LLVMValueRef texel_out[4]);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 33740f9759..53cc0c5f34 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -59,31 +59,6 @@
/**
- * Does the given texture wrap mode allow sampling the texture border color?
- * XXX maybe move this into gallium util code.
- */
-static boolean
-wrap_mode_uses_border_color(unsigned mode)
-{
- switch (mode) {
- case PIPE_TEX_WRAP_REPEAT:
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- return FALSE;
- case PIPE_TEX_WRAP_CLAMP:
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- return TRUE;
- default:
- assert(0 && "unexpected wrap mode");
- return FALSE;
- }
-}
-
-
-/**
* Generate code to fetch a texel from a texture at int coords (x, y, z).
* The computation depends on whether the texture is 1D, 2D or 3D.
* The result, texel, will be float vectors:
@@ -106,21 +81,27 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
LLVMValueRef data_ptr,
LLVMValueRef texel_out[4])
{
- const int dims = texture_dims(bld->static_state->target);
+ const struct lp_sampler_static_state *static_state = bld->static_state;
+ const unsigned dims = bld->dims;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef offset;
LLVMValueRef i, j;
LLVMValueRef use_border = NULL;
/* use_border = x < 0 || x >= width || y < 0 || y >= height */
- if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
+ if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
+ static_state->min_img_filter,
+ static_state->mag_img_filter)) {
LLVMValueRef b1, b2;
b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
}
- if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
+ if (dims >= 2 &&
+ lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
+ static_state->min_img_filter,
+ static_state->mag_img_filter)) {
LLVMValueRef b1, b2;
b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
@@ -133,7 +114,10 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
}
- if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
+ if (dims == 3 &&
+ lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
+ static_state->min_img_filter,
+ static_state->mag_img_filter)) {
LLVMValueRef b1, b2;
b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
@@ -147,7 +131,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
/* convert x,y,z coords to linear offset from start of texture, in bytes */
- lp_build_sample_offset(&bld->uint_coord_bld,
+ lp_build_sample_offset(&bld->int_coord_bld,
bld->format_desc,
x, y, z, y_stride, z_stride,
&offset, &i, &j);
@@ -161,7 +145,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
* coords which are out of bounds to become zero. Zero's guaranteed
* to be inside the texture image.
*/
- offset = lp_build_andnot(&bld->uint_coord_bld, offset, use_border);
+ offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
}
lp_build_fetch_rgba_soa(bld->builder,
@@ -218,11 +202,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef fract, flr, isOdd;
- /* fract = coord - floor(coord) */
- fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
-
- /* flr = ifloor(coord); */
- flr = lp_build_ifloor(coord_bld, coord);
+ lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
/* isOdd = flr & 1 */
isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
@@ -250,6 +230,7 @@ static void
lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
LLVMValueRef coord,
LLVMValueRef length,
+ LLVMValueRef length_f,
boolean is_pot,
unsigned wrap_mode,
LLVMValueRef *x0_out,
@@ -258,10 +239,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
{
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
- LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
- LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+ LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
LLVMValueRef coord0, coord1, weight;
switch(wrap_mode) {
@@ -269,23 +248,25 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
/* mul by size and subtract 0.5 */
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
- /* convert to int */
- coord0 = lp_build_ifloor(coord_bld, coord);
- coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
/* repeat wrap */
if (is_pot) {
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
}
else {
/* Add a bias to the texcoord to handle negative coords */
- LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
+ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
+ LLVMValueRef mask;
coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
- coord1 = LLVMBuildAdd(bld->builder, coord1, bias, "");
coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
- coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
+ mask = lp_build_compare(bld->builder, int_coord_bld->type,
+ PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
+ coord1 = LLVMBuildAnd(bld->builder,
+ lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
+ mask, "");
}
break;
@@ -300,53 +281,47 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- if (bld->static_state->normalized_coords) {
- /* clamp to [0,1] */
- coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
- /* mul by tex size and subtract 0.5 */
- coord = lp_build_mul(coord_bld, coord, length_f);
+ {
+ struct lp_build_context abs_coord_bld = bld->coord_bld;
+ abs_coord_bld.type.sign = FALSE;
+
+ if (bld->static_state->normalized_coords) {
+ /* mul by tex size */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
+ /* clamp to length max */
+ coord = lp_build_min(coord_bld, coord, length_f);
+ /* subtract 0.5 */
coord = lp_build_sub(coord_bld, coord, half);
+ /* clamp to [0, length - 0.5] */
+ coord = lp_build_max(coord_bld, coord, coord_bld->zero);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ /* coord1 = min(coord1, length-1) */
+ coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
+ break;
}
- else {
- LLVMValueRef min, max;
- /* clamp to [0.5, length - 0.5] */
- min = half;
- max = lp_build_sub(coord_bld, length_f, min);
- coord = lp_build_clamp(coord_bld, coord, min, max);
- }
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
- /* coord0 = floor(coord); */
- coord0 = lp_build_ifloor(coord_bld, coord);
- coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
- /* coord0 = max(coord0, 0) */
- coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
- /* coord1 = min(coord1, length-1) */
- coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
- break;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
{
- LLVMValueRef min, max;
+ LLVMValueRef min;
if (bld->static_state->normalized_coords) {
/* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
}
- /* clamp to [-0.5, length + 0.5] */
- min = lp_build_const_vec(coord_bld->type, -0.5F);
- max = lp_build_sub(coord_bld, length_f, min);
- coord = lp_build_clamp(coord_bld, coord, min, max);
+ /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
coord = lp_build_sub(coord_bld, coord, half);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
- /* convert to int */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ min = lp_build_const_vec(coord_bld->type, -1.0F);
+ coord = lp_build_clamp(coord_bld, coord, min, length_f);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
@@ -359,11 +334,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
-
- /* convert to int coords */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
/* coord0 = max(coord0, 0) */
@@ -385,15 +357,16 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
{
LLVMValueRef min, max;
-
+ struct lp_build_context abs_coord_bld = bld->coord_bld;
+ abs_coord_bld.type.sign = FALSE;
coord = lp_build_abs(coord_bld, coord);
if (bld->static_state->normalized_coords) {
@@ -408,16 +381,14 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
{
- LLVMValueRef min, max;
-
coord = lp_build_abs(coord_bld, coord);
if (bld->static_state->normalized_coords) {
@@ -425,15 +396,13 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_mul(coord_bld, coord, length_f);
}
- /* clamp to [-0.5, length + 0.5] */
- min = lp_build_negate(coord_bld, half);
- max = lp_build_sub(coord_bld, length_f, min);
- coord = lp_build_clamp(coord_bld, coord, min, max);
-
+ /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
+ /* skip -0.5 clamp (always positive), do sub first */
coord = lp_build_sub(coord_bld, coord, half);
+ coord = lp_build_min(coord_bld, coord, length_f);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
@@ -462,14 +431,13 @@ static LLVMValueRef
lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
LLVMValueRef coord,
LLVMValueRef length,
+ LLVMValueRef length_f,
boolean is_pot,
unsigned wrap_mode)
{
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
- LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+ LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
LLVMValueRef icoord;
switch(wrap_mode) {
@@ -480,7 +448,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
else {
/* Add a bias to the texcoord to handle negative coords */
- LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
+ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
icoord = LLVMBuildAdd(bld->builder, icoord, bias, "");
icoord = LLVMBuildURem(bld->builder, icoord, length, "");
}
@@ -494,7 +462,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
}
/* floor */
- icoord = lp_build_ifloor(coord_bld, coord);
+ /* use itrunc instead since we clamp to 0 anyway */
+ icoord = lp_build_itrunc(coord_bld, coord);
/* clamp to [0, length - 1]. */
icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
@@ -528,7 +497,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
assert(bld->static_state->normalized_coords);
coord = lp_build_mul(coord_bld, coord, length_f);
- icoord = lp_build_ifloor(coord_bld, coord);
+ /* itrunc == ifloor here */
+ icoord = lp_build_itrunc(coord_bld, coord);
/* clamp to [0, length - 1] */
icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
@@ -543,7 +513,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
coord = lp_build_mul(coord_bld, coord, length_f);
}
- icoord = lp_build_ifloor(coord_bld, coord);
+ /* itrunc == ifloor here */
+ icoord = lp_build_itrunc(coord_bld, coord);
/* clamp to [0, length - 1] */
icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
@@ -557,7 +528,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
coord = lp_build_mul(coord_bld, coord, length_f);
}
- icoord = lp_build_ifloor(coord_bld, coord);
+ /* itrunc == ifloor here */
+ icoord = lp_build_itrunc(coord_bld, coord);
/* clamp to [0, length] */
icoord = lp_build_min(int_coord_bld, icoord, length);
@@ -579,9 +551,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
static void
lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
unsigned unit,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
+ LLVMValueRef size,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
@@ -590,25 +560,46 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
LLVMValueRef r,
LLVMValueRef colors_out[4])
{
- const int dims = texture_dims(bld->static_state->target);
+ const unsigned dims = bld->dims;
+ LLVMValueRef width_vec;
+ LLVMValueRef height_vec;
+ LLVMValueRef depth_vec;
+ LLVMValueRef flt_size;
+ LLVMValueRef flt_width_vec;
+ LLVMValueRef flt_height_vec;
+ LLVMValueRef flt_depth_vec;
LLVMValueRef x, y, z;
+ lp_build_extract_image_sizes(bld,
+ bld->int_size_type,
+ bld->int_coord_type,
+ size,
+ &width_vec, &height_vec, &depth_vec);
+
+ flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
+
+ lp_build_extract_image_sizes(bld,
+ bld->float_size_type,
+ bld->coord_type,
+ flt_size,
+ &flt_width_vec, &flt_height_vec, &flt_depth_vec);
+
/*
* Compute integer texcoords.
*/
- x = lp_build_sample_wrap_nearest(bld, s, width_vec,
+ x = lp_build_sample_wrap_nearest(bld, s, width_vec, flt_width_vec,
bld->static_state->pot_width,
bld->static_state->wrap_s);
lp_build_name(x, "tex.x.wrapped");
if (dims >= 2) {
- y = lp_build_sample_wrap_nearest(bld, t, height_vec,
+ y = lp_build_sample_wrap_nearest(bld, t, height_vec, flt_height_vec,
bld->static_state->pot_height,
bld->static_state->wrap_t);
lp_build_name(y, "tex.y.wrapped");
if (dims == 3) {
- z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
+ z = lp_build_sample_wrap_nearest(bld, r, depth_vec, flt_depth_vec,
bld->static_state->pot_depth,
bld->static_state->wrap_r);
lp_build_name(z, "tex.z.wrapped");
@@ -642,9 +633,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
static void
lp_build_sample_image_linear(struct lp_build_sample_context *bld,
unsigned unit,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
+ LLVMValueRef size,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
@@ -653,16 +642,37 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef r,
LLVMValueRef colors_out[4])
{
- const int dims = texture_dims(bld->static_state->target);
+ const unsigned dims = bld->dims;
+ LLVMValueRef width_vec;
+ LLVMValueRef height_vec;
+ LLVMValueRef depth_vec;
+ LLVMValueRef flt_size;
+ LLVMValueRef flt_width_vec;
+ LLVMValueRef flt_height_vec;
+ LLVMValueRef flt_depth_vec;
LLVMValueRef x0, y0, z0, x1, y1, z1;
LLVMValueRef s_fpart, t_fpart, r_fpart;
LLVMValueRef neighbors[2][2][4];
int chan;
+ lp_build_extract_image_sizes(bld,
+ bld->int_size_type,
+ bld->int_coord_type,
+ size,
+ &width_vec, &height_vec, &depth_vec);
+
+ flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
+
+ lp_build_extract_image_sizes(bld,
+ bld->float_size_type,
+ bld->coord_type,
+ flt_size,
+ &flt_width_vec, &flt_height_vec, &flt_depth_vec);
+
/*
* Compute integer texcoords.
*/
- lp_build_sample_wrap_linear(bld, s, width_vec,
+ lp_build_sample_wrap_linear(bld, s, width_vec, flt_width_vec,
bld->static_state->pot_width,
bld->static_state->wrap_s,
&x0, &x1, &s_fpart);
@@ -670,7 +680,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
lp_build_name(x1, "tex.x1.wrapped");
if (dims >= 2) {
- lp_build_sample_wrap_linear(bld, t, height_vec,
+ lp_build_sample_wrap_linear(bld, t, height_vec, flt_height_vec,
bld->static_state->pot_height,
bld->static_state->wrap_t,
&y0, &y1, &t_fpart);
@@ -678,7 +688,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
lp_build_name(y1, "tex.y1.wrapped");
if (dims == 3) {
- lp_build_sample_wrap_linear(bld, r, depth_vec,
+ lp_build_sample_wrap_linear(bld, r, depth_vec, flt_depth_vec,
bld->static_state->pot_depth,
bld->static_state->wrap_r,
&z0, &z1, &r_fpart);
@@ -815,69 +825,92 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
+ LLVMValueRef ilevel0,
+ LLVMValueRef ilevel1,
LLVMValueRef lod_fpart,
- LLVMValueRef width0_vec,
- LLVMValueRef width1_vec,
- LLVMValueRef height0_vec,
- LLVMValueRef height1_vec,
- LLVMValueRef depth0_vec,
- LLVMValueRef depth1_vec,
- LLVMValueRef row_stride0_vec,
- LLVMValueRef row_stride1_vec,
- LLVMValueRef img_stride0_vec,
- LLVMValueRef img_stride1_vec,
- LLVMValueRef data_ptr0,
- LLVMValueRef data_ptr1,
LLVMValueRef *colors_out)
{
+ LLVMBuilderRef builder = bld->builder;
+ LLVMValueRef size0;
+ LLVMValueRef size1;
+ LLVMValueRef row_stride0_vec;
+ LLVMValueRef row_stride1_vec;
+ LLVMValueRef img_stride0_vec;
+ LLVMValueRef img_stride1_vec;
+ LLVMValueRef data_ptr0;
+ LLVMValueRef data_ptr1;
LLVMValueRef colors0[4], colors1[4];
- int chan;
+ unsigned chan;
+ /* sample the first mipmap level */
+ lp_build_mipmap_level_sizes(bld, ilevel0,
+ &size0,
+ &row_stride0_vec, &img_stride0_vec);
+ data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
- /* sample the first mipmap level */
lp_build_sample_image_nearest(bld, unit,
- width0_vec, height0_vec, depth0_vec,
+ size0,
row_stride0_vec, img_stride0_vec,
- data_ptr0, s, t, r, colors0);
-
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level */
- lp_build_sample_image_nearest(bld, unit,
- width1_vec, height1_vec, depth1_vec,
- row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r, colors1);
- }
+ data_ptr0, s, t, r,
+ colors0);
}
else {
assert(img_filter == PIPE_TEX_FILTER_LINEAR);
-
- /* sample the first mipmap level */
lp_build_sample_image_linear(bld, unit,
- width0_vec, height0_vec, depth0_vec,
+ size0,
row_stride0_vec, img_stride0_vec,
- data_ptr0, s, t, r, colors0);
+ data_ptr0, s, t, r,
+ colors0);
+ }
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level */
- lp_build_sample_image_linear(bld, unit,
- width1_vec, height1_vec, depth1_vec,
- row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r, colors1);
- }
+ /* Store the first level's colors in the output variables */
+ for (chan = 0; chan < 4; chan++) {
+ LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
}
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* interpolate samples from the two mipmap levels */
- for (chan = 0; chan < 4; chan++) {
- colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef need_lerp;
+
+ /* need_lerp = lod_fpart > 0 */
+ need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
+ lod_fpart,
+ bld->float_bld.zero,
+ "need_lerp");
+
+ lp_build_if(&if_ctx, builder, need_lerp);
+ {
+ /* sample the second mipmap level */
+ lp_build_mipmap_level_sizes(bld, ilevel1,
+ &size1,
+ &row_stride1_vec, &img_stride1_vec);
+ data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+ if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ lp_build_sample_image_nearest(bld, unit,
+ size1,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ colors1);
+ }
+ else {
+ lp_build_sample_image_linear(bld, unit,
+ size1,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ colors1);
+ }
+
+ /* interpolate samples from the two mipmap levels */
+
+ lod_fpart = lp_build_broadcast_scalar(&bld->texel_bld, lod_fpart);
+
+ for (chan = 0; chan < 4; chan++) {
+ colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
colors0[chan], colors1[chan]);
+ LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
+ }
}
- }
- else {
- /* use first/only level's colors */
- for (chan = 0; chan < 4; chan++) {
- colors_out[chan] = colors0[chan];
- }
+ lp_build_endif(&if_ctx);
}
}
@@ -898,30 +931,20 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
const LLVMValueRef *ddy,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
- LLVMValueRef row_stride_array,
- LLVMValueRef img_stride_array,
- LLVMValueRef data_array,
LLVMValueRef *colors_out)
{
- struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMBuilderRef builder = bld->builder;
const unsigned mip_filter = bld->static_state->min_mip_filter;
const unsigned min_filter = bld->static_state->min_img_filter;
const unsigned mag_filter = bld->static_state->mag_img_filter;
- const int dims = texture_dims(bld->static_state->target);
- LLVMValueRef lod = NULL, lod_fpart = NULL;
+ LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
LLVMValueRef ilevel0, ilevel1 = NULL;
- LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
- LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
- LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
- LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
- LLVMValueRef data_ptr0, data_ptr1 = NULL;
LLVMValueRef face_ddx[4], face_ddy[4];
+ LLVMValueRef texels[4];
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0);
+ unsigned chan;
/*
printf("%s mip %d min %d mag %d\n", __FUNCTION__,
@@ -940,12 +963,12 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
/* recompute ddx, ddy using the new (s,t) face texcoords */
- face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
- face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
+ face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s);
+ face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t);
face_ddx[2] = NULL;
face_ddx[3] = NULL;
- face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
- face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
+ face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s);
+ face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t);
face_ddy[2] = NULL;
face_ddy[3] = NULL;
ddx = face_ddx;
@@ -960,126 +983,100 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lod = lp_build_lod_selector(bld, unit, ddx, ddy,
- lod_bias, explicit_lod,
- width, height, depth);
+ lp_build_lod_selector(bld, unit, ddx, ddy,
+ lod_bias, explicit_lod,
+ mip_filter,
+ &lod_ipart, &lod_fpart);
+ } else {
+ lod_ipart = i32t_zero;
}
/*
- * Compute integer mipmap level(s) to fetch texels from.
+ * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
*/
- if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+ switch (mip_filter) {
+ default:
+ assert(0 && "bad mip_filter value in lp_build_sample_soa()");
+ /* fall-through */
+ case PIPE_TEX_MIPFILTER_NONE:
/* always use mip level 0 */
if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
/* XXX this is a work-around for an apparent bug in LLVM 2.7.
* We should be able to set ilevel0 = const(0) but that causes
* bad x86 code to be emitted.
*/
- lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
- }
- else {
- ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- }
- }
- else {
- assert(lod);
- if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
}
else {
- assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
- lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
- &lod_fpart);
- lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
+ ilevel0 = i32t_zero;
}
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ assert(lod_ipart);
+ assert(lod_fpart);
+ lp_build_linear_mip_levels(bld, unit,
+ lod_ipart, &lod_fpart,
+ &ilevel0, &ilevel1);
+ break;
}
- /* compute image size(s) of source mipmap level(s) */
- lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
- ilevel0, ilevel1,
- row_stride_array, img_stride_array,
- &width0_vec, &width1_vec,
- &height0_vec, &height1_vec,
- &depth0_vec, &depth1_vec,
- &row_stride0_vec, &row_stride1_vec,
- &img_stride0_vec, &img_stride1_vec);
-
/*
- * Get pointer(s) to image data for mipmap level(s).
+ * Get/interpolate texture colors.
*/
- data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
+
+ for (chan = 0; chan < 4; ++chan) {
+ texels[chan] = lp_build_alloca(builder, bld->texel_bld.vec_type, "");
+ lp_build_name(texels[chan], "sampler%u_texel_%c_var", unit, "xyzw"[chan]);
}
- /*
- * Get/interpolate texture colors.
- */
if (min_filter == mag_filter) {
/* no need to distinquish between minification and magnification */
lp_build_sample_mipmap(bld, unit,
- min_filter, mip_filter, s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- colors_out);
+ min_filter, mip_filter,
+ s, t, r,
+ ilevel0, ilevel1, lod_fpart,
+ texels);
}
else {
/* Emit conditional to choose min image filter or mag image filter
- * depending on the lod being >0 or <= 0, respectively.
+ * depending on the lod being > 0 or <= 0, respectively.
*/
- struct lp_build_flow_context *flow_ctx;
struct lp_build_if_state if_ctx;
LLVMValueRef minify;
- flow_ctx = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx);
-
- lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
- lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
- lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
- lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
-
- /* minify = lod > 0.0 */
- minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
- lod, float_bld->zero, "");
+ /* minify = lod >= 0.0 */
+ minify = LLVMBuildICmp(builder, LLVMIntSGE,
+ lod_ipart, int_bld->zero, "");
- lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
+ lp_build_if(&if_ctx, builder, minify);
{
/* Use the minification filter */
lp_build_sample_mipmap(bld, unit,
min_filter, mip_filter,
- s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- colors_out);
+ s, t, r,
+ ilevel0, ilevel1, lod_fpart,
+ texels);
}
lp_build_else(&if_ctx);
{
/* Use the magnification filter */
lp_build_sample_mipmap(bld, unit,
- mag_filter, mip_filter,
- s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- colors_out);
+ mag_filter, PIPE_TEX_MIPFILTER_NONE,
+ s, t, r,
+ i32t_zero, NULL, NULL,
+ texels);
}
lp_build_endif(&if_ctx);
+ }
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
+ for (chan = 0; chan < 4; ++chan) {
+ colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
+ lp_build_name(colors_out[chan], "sampler%u_texel_%c", unit, "xyzw"[chan]);
}
}
@@ -1163,12 +1160,10 @@ lp_build_sample_soa(LLVMBuilderRef builder,
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef texel_out[4])
{
+ unsigned dims = texture_dims(static_state->target);
struct lp_build_sample_context bld;
- LLVMValueRef width, width_vec;
- LLVMValueRef height, height_vec;
- LLVMValueRef depth, depth_vec;
- LLVMValueRef row_stride_array, img_stride_array;
- LLVMValueRef data_array;
+ LLVMTypeRef i32t = LLVMInt32Type();
+
LLVMValueRef s;
LLVMValueRef t;
LLVMValueRef r;
@@ -1187,12 +1182,15 @@ lp_build_sample_soa(LLVMBuilderRef builder,
bld.static_state = static_state;
bld.dynamic_state = dynamic_state;
bld.format_desc = util_format_description(static_state->format);
+ bld.dims = dims;
bld.float_type = lp_type_float(32);
bld.int_type = lp_type_int(32);
bld.coord_type = type;
- bld.uint_coord_type = lp_uint_type(type);
bld.int_coord_type = lp_int_type(type);
+ bld.float_size_type = lp_type_float(32);
+ bld.float_size_type.length = dims > 1 ? 4 : 1;
+ bld.int_size_type = lp_int_type(bld.float_size_type);
bld.texel_type = type;
float_vec_type = lp_type_float_vec(32);
@@ -1201,27 +1199,40 @@ lp_build_sample_soa(LLVMBuilderRef builder,
lp_build_context_init(&bld.float_vec_bld, builder, float_vec_type);
lp_build_context_init(&bld.int_bld, builder, bld.int_type);
lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
- lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
+ lp_build_context_init(&bld.int_size_bld, builder, bld.int_size_type);
+ lp_build_context_init(&bld.float_size_bld, builder, bld.float_size_type);
lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
/* Get the dynamic state */
- width = dynamic_state->width(dynamic_state, builder, unit);
- height = dynamic_state->height(dynamic_state, builder, unit);
- depth = dynamic_state->depth(dynamic_state, builder, unit);
- row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
- img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
- data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
+ bld.width = dynamic_state->width(dynamic_state, builder, unit);
+ bld.height = dynamic_state->height(dynamic_state, builder, unit);
+ bld.depth = dynamic_state->depth(dynamic_state, builder, unit);
+ bld.row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
+ bld.img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
+ bld.data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
/* Note that data_array is an array[level] of pointers to texture images */
s = coords[0];
t = coords[1];
r = coords[2];
- /* width, height, depth as uint vectors */
- width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
- height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
- depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
+ /* width, height, depth as single int vector */
+ if (dims <= 1) {
+ bld.int_size = bld.width;
+ }
+ else {
+ bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_bld.undef,
+ bld.width, LLVMConstInt(i32t, 0, 0), "");
+ if (dims >= 2) {
+ bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
+ bld.height, LLVMConstInt(i32t, 1, 0), "");
+ if (dims >= 3) {
+ bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
+ bld.depth, LLVMConstInt(i32t, 2, 0), "");
+ }
+ }
+ }
if (0) {
/* For debug: no-op texture sampling */
@@ -1233,10 +1244,7 @@ lp_build_sample_soa(LLVMBuilderRef builder,
/* do sampling/filtering with fixed pt arithmetic */
lp_build_sample_aos(&bld, unit, s, t, r, ddx, ddy,
lod_bias, explicit_lod,
- width, height, depth,
- width_vec, height_vec, depth_vec,
- row_stride_array, img_stride_array,
- data_array, texel_out);
+ texel_out);
}
else {
@@ -1254,10 +1262,6 @@ lp_build_sample_soa(LLVMBuilderRef builder,
lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
lod_bias, explicit_lod,
- width, height, depth,
- width_vec, height_vec, depth_vec,
- row_stride_array, img_stride_array,
- data_array,
texel_out);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
index 2e9e8386de..4685a90e41 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
@@ -101,6 +101,83 @@ lp_build_broadcast_scalar(struct lp_build_context *bld,
/**
+ * Combined extract and broadcast (or a mere shuffle when the two types match)
+ */
+LLVMValueRef
+lp_build_extract_broadcast(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef vector,
+ LLVMValueRef index)
+{
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef res;
+
+ assert(src_type.floating == dst_type.floating);
+ assert(src_type.width == dst_type.width);
+
+ assert(lp_check_value(src_type, vector));
+ assert(LLVMTypeOf(index) == i32t);
+
+ if (src_type.length == 1) {
+ if (dst_type.length == 1) {
+ /*
+ * Trivial scalar -> scalar.
+ */
+
+ res = vector;
+ }
+ else {
+ /*
+ * Broadcast scalar -> vector.
+ */
+
+ res = lp_build_broadcast(builder,
+ lp_build_vec_type(dst_type),
+ vector);
+ }
+ }
+ else {
+ if (dst_type.length == src_type.length) {
+ /*
+ * Special shuffle of the same size.
+ */
+
+ LLVMValueRef shuffle;
+ shuffle = lp_build_broadcast(builder,
+ LLVMVectorType(i32t, dst_type.length),
+ index);
+ res = LLVMBuildShuffleVector(builder, vector,
+ LLVMGetUndef(lp_build_vec_type(dst_type)),
+ shuffle, "");
+ }
+ else {
+ LLVMValueRef scalar;
+ scalar = LLVMBuildExtractElement(builder, vector, index, "");
+ if (dst_type.length == 1) {
+ /*
+ * Trivial extract scalar from vector.
+ */
+
+ res = scalar;
+ }
+ else {
+ /*
+ * General case of different sized vectors.
+ */
+
+ res = lp_build_broadcast(builder,
+ lp_build_vec_type(dst_type),
+ vector);
+ }
+ }
+ }
+
+ return res;
+}
+
+
+/**
* Swizzle one channel into all other three channels.
*/
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
index f9b6a5e725..fdea8442ae 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
@@ -55,6 +55,14 @@ lp_build_broadcast_scalar(struct lp_build_context *bld,
LLVMValueRef scalar);
+LLVMValueRef
+lp_build_extract_broadcast(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef vector,
+ LLVMValueRef index);
+
+
/**
* Broadcast one channel of a vector composed of arrays of XYZW structures into
* all four channel.
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 97318b3456..a4d3b750c3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -36,6 +36,9 @@
#define LP_BLD_TGSI_H
#include "gallivm/lp_bld.h"
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
struct tgsi_token;
@@ -55,6 +58,75 @@ enum lp_build_tex_modifier {
/**
+ * Describe a channel of a register.
+ *
+ * The value can be a:
+ * - immediate value (i.e. derived from a IMM register)
+ * - CONST[n].x/y/z/w
+ * - IN[n].x/y/z/w
+ * - undetermined (when .file == TGSI_FILE_NULL)
+ *
+ * This is one of the analysis results, and is used to described
+ * the output color in terms of inputs.
+ */
+struct lp_tgsi_channel_info
+{
+ unsigned file:4; /* TGSI_FILE_* */
+ unsigned swizzle:3; /* PIPE_SWIZZLE_x */
+ union {
+ uint32_t index;
+ float value; /* for TGSI_FILE_IMMEDIATE */
+ } u;
+};
+
+
+/**
+ * Describe a texture sampler interpolator.
+ *
+ * The interpolation is described in terms of regular inputs.
+ */
+struct lp_tgsi_texture_info
+{
+ struct lp_tgsi_channel_info coord[4];
+ unsigned target:8; /* TGSI_TEXTURE_* */
+ unsigned unit:8; /* Sampler unit */
+ unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */
+};
+
+
+struct lp_tgsi_info
+{
+ struct tgsi_shader_info base;
+
+ /*
+ * Whether any of the texture opcodes access a register file other than
+ * TGSI_FILE_INPUT.
+ *
+ * We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little
+ * benefit.
+ */
+ unsigned indirect_textures:1;
+
+ /*
+ * Texture opcode description. Aimed at detecting and described direct
+ * texture opcodes.
+ */
+ unsigned num_texs;
+ struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS];
+
+ /*
+ * Output description. Aimed at detecting and describing simple blit
+ * shaders.
+ */
+ struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4];
+
+ /*
+ * Shortcut pointers into the above (for fragment shaders).
+ */
+ const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS];
+};
+
+/**
* Sampler code generation interface.
*
* Although texture sampling is a requirement for TGSI translation, it is
@@ -97,6 +169,11 @@ struct lp_build_sampler_aos
void
+lp_build_tgsi_info(const struct tgsi_token *tokens,
+ struct lp_tgsi_info *info);
+
+
+void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
struct lp_type type,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
new file mode 100644
index 0000000000..ad514463de
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
@@ -0,0 +1,479 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_dump.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_tgsi.h"
+
+
+/**
+ * Analysis context.
+ *
+ * This is where we keep store the value of each channel of the IMM/TEMP/OUT
+ * register values, as we walk the shader.
+ */
+struct analysis_context
+{
+ struct lp_tgsi_info *info;
+
+ unsigned num_imms;
+ float imm[32][4];
+
+ struct lp_tgsi_channel_info temp[32][4];
+};
+
+
+/**
+ * Describe the specified channel of the src register.
+ */
+static void
+analyse_src(struct analysis_context *ctx,
+ struct lp_tgsi_channel_info *chan_info,
+ const struct tgsi_src_register *src,
+ unsigned chan)
+{
+ chan_info->file = TGSI_FILE_NULL;
+ if (!src->Indirect && !src->Absolute && !src->Negate) {
+ unsigned swizzle = tgsi_util_get_src_register_swizzle(src, chan);
+ if (src->File == TGSI_FILE_TEMPORARY) {
+ if (src->Index < Elements(ctx->temp)) {
+ *chan_info = ctx->temp[src->Index][swizzle];
+ }
+ } else {
+ chan_info->file = src->File;
+ if (src->File == TGSI_FILE_IMMEDIATE) {
+ assert(src->Index < Elements(ctx->imm));
+ if (src->Index < Elements(ctx->imm)) {
+ chan_info->u.value = ctx->imm[src->Index][swizzle];
+ }
+ } else {
+ chan_info->u.index = src->Index;
+ chan_info->swizzle = swizzle;
+ }
+ }
+ }
+}
+
+
+/**
+ * Whether this register channel refers to a specific immediate value.
+ */
+static boolean
+is_immediate(const struct lp_tgsi_channel_info *chan_info, float value)
+{
+ return chan_info->file == TGSI_FILE_IMMEDIATE &&
+ chan_info->u.value == value;
+}
+
+
+static void
+analyse_tex(struct analysis_context *ctx,
+ const struct tgsi_full_instruction *inst,
+ enum lp_build_tex_modifier modifier)
+{
+ struct lp_tgsi_info *info = ctx->info;
+ unsigned chan;
+
+ if (info->num_texs < Elements(info->tex)) {
+ struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
+ bool indirect = FALSE;
+ unsigned readmask = 0;
+
+ tex_info->target = inst->Texture.Texture;
+ switch (inst->Texture.Texture) {
+ case TGSI_TEXTURE_1D:
+ readmask = TGSI_WRITEMASK_X;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ readmask = TGSI_WRITEMASK_XY;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ readmask = TGSI_WRITEMASK_XYZ;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
+ /* We don't track explicit derivatives, although we could */
+ indirect = TRUE;
+ tex_info->unit = inst->Src[3].Register.Index;
+ } else {
+ if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED ||
+ modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
+ modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
+ readmask |= TGSI_WRITEMASK_W;
+ }
+ tex_info->unit = inst->Src[1].Register.Index;
+ }
+
+ for (chan = 0; chan < 4; ++chan) {
+ struct lp_tgsi_channel_info *chan_info = &tex_info->coord[chan];
+ if (readmask & (1 << chan)) {
+ analyse_src(ctx, chan_info, &inst->Src[0].Register, chan);
+ if (chan_info->file != TGSI_FILE_INPUT) {
+ indirect = TRUE;
+ }
+ } else {
+ memset(chan_info, 0, sizeof *chan_info);
+ }
+ }
+
+ if (indirect) {
+ info->indirect_textures = TRUE;
+ }
+
+ ++info->num_texs;
+ } else {
+ info->indirect_textures = TRUE;
+ }
+}
+
+
+/**
+ * Process an instruction, and update the register values accordingly.
+ */
+static void
+analyse_instruction(struct analysis_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct lp_tgsi_info *info = ctx->info;
+ struct lp_tgsi_channel_info (*regs)[4];
+ unsigned max_regs;
+ unsigned i;
+ unsigned index;
+ unsigned chan;
+
+ for (i = 0; i < inst->Instruction.NumDstRegs; ++i) {
+ const struct tgsi_dst_register *dst = &inst->Dst[i].Register;
+
+ /*
+ * Get the lp_tgsi_channel_info array corresponding to the destination
+ * register file.
+ */
+
+ if (dst->File == TGSI_FILE_TEMPORARY) {
+ regs = ctx->temp;
+ max_regs = Elements(ctx->temp);
+ } else if (dst->File == TGSI_FILE_OUTPUT) {
+ regs = info->output;
+ max_regs = Elements(info->output);
+ } else if (dst->File == TGSI_FILE_ADDRESS ||
+ dst->File == TGSI_FILE_PREDICATE) {
+ continue;
+ } else {
+ assert(0);
+ continue;
+ }
+
+ /*
+ * Detect direct TEX instructions
+ */
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_TEX:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_NONE);
+ break;
+ case TGSI_OPCODE_TXD:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
+ break;
+ case TGSI_OPCODE_TXB:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
+ break;
+ case TGSI_OPCODE_TXL:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
+ break;
+ case TGSI_OPCODE_TXP:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Keep track of assignments and writes
+ */
+
+ if (dst->Indirect) {
+ /*
+ * It could be any register index so clear all register indices.
+ */
+
+ for (chan = 0; chan < 4; ++chan) {
+ if (dst->WriteMask & (1 << chan)) {
+ for (index = 0; index < max_regs; ++index) {
+ regs[index][chan].file = TGSI_FILE_NULL;
+ }
+ }
+ }
+ } else if (dst->Index < max_regs) {
+ /*
+ * Update this destination register value.
+ */
+
+ struct lp_tgsi_channel_info res[4];
+
+ memset(res, 0, sizeof res);
+
+ if (!inst->Instruction.Predicate &&
+ !inst->Instruction.Saturate) {
+ for (chan = 0; chan < 4; ++chan) {
+ if (dst->WriteMask & (1 << chan)) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
+ analyse_src(ctx, &res[chan],
+ &inst->Src[0].Register, chan);
+ } else if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) {
+ /*
+ * Propagate values across 1.0 and 0.0 multiplications.
+ */
+
+ struct lp_tgsi_channel_info src0;
+ struct lp_tgsi_channel_info src1;
+
+ analyse_src(ctx, &src0, &inst->Src[0].Register, chan);
+ analyse_src(ctx, &src1, &inst->Src[1].Register, chan);
+
+ if (is_immediate(&src0, 0.0f)) {
+ res[chan] = src0;
+ } else if (is_immediate(&src1, 0.0f)) {
+ res[chan] = src1;
+ } else if (is_immediate(&src0, 1.0f)) {
+ res[chan] = src1;
+ } else if (is_immediate(&src1, 1.0f)) {
+ res[chan] = src0;
+ }
+ }
+ }
+ }
+ }
+
+ for (chan = 0; chan < 4; ++chan) {
+ if (dst->WriteMask & (1 << chan)) {
+ regs[dst->Index][chan] = res[chan];
+ }
+ }
+ }
+ }
+
+ /*
+ * Clear all temporaries information in presence of a control flow opcode.
+ */
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_IFC:
+ case TGSI_OPCODE_ELSE:
+ case TGSI_OPCODE_ENDIF:
+ case TGSI_OPCODE_BGNLOOP:
+ case TGSI_OPCODE_BRK:
+ case TGSI_OPCODE_BREAKC:
+ case TGSI_OPCODE_CONT:
+ case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_CALLNZ:
+ case TGSI_OPCODE_CAL:
+ case TGSI_OPCODE_BGNSUB:
+ case TGSI_OPCODE_ENDSUB:
+ case TGSI_OPCODE_SWITCH:
+ case TGSI_OPCODE_CASE:
+ case TGSI_OPCODE_DEFAULT:
+ case TGSI_OPCODE_ENDSWITCH:
+ case TGSI_OPCODE_RET:
+ case TGSI_OPCODE_END:
+ /* XXX: Are there more cases? */
+ memset(&ctx->temp, 0, sizeof ctx->temp);
+ memset(&info->output, 0, sizeof info->output);
+ default:
+ break;
+ }
+}
+
+
+static INLINE void
+dump_info(const struct tgsi_token *tokens,
+ struct lp_tgsi_info *info)
+{
+ unsigned index;
+ unsigned chan;
+
+ tgsi_dump(tokens, 0);
+
+ for (index = 0; index < info->num_texs; ++index) {
+ const struct lp_tgsi_texture_info *tex_info = &info->tex[index];
+ debug_printf("TEX[%u] =", index);
+ for (chan = 0; chan < 4; ++chan) {
+ const struct lp_tgsi_channel_info *chan_info =
+ &tex_info->coord[chan];
+ if (chan_info->file != TGSI_FILE_NULL) {
+ debug_printf(" %s[%u].%c",
+ tgsi_file_names[chan_info->file],
+ chan_info->u.index,
+ "xyzw01"[chan_info->swizzle]);
+ } else {
+ debug_printf(" _");
+ }
+ }
+ debug_printf(", SAMP[%u], %s\n",
+ tex_info->unit,
+ tgsi_texture_names[tex_info->target]);
+ }
+
+ for (index = 0; index < PIPE_MAX_SHADER_OUTPUTS; ++index) {
+ for (chan = 0; chan < 4; ++chan) {
+ const struct lp_tgsi_channel_info *chan_info =
+ &info->output[index][chan];
+ if (chan_info->file != TGSI_FILE_NULL) {
+ debug_printf("OUT[%u].%c = ", index, "xyzw"[chan]);
+ if (chan_info->file == TGSI_FILE_IMMEDIATE) {
+ debug_printf("%f", chan_info->u.value);
+ } else {
+ const char *file_name;
+ switch (chan_info->file) {
+ case TGSI_FILE_CONSTANT:
+ file_name = "CONST";
+ break;
+ case TGSI_FILE_INPUT:
+ file_name = "IN";
+ break;
+ default:
+ file_name = "???";
+ break;
+ }
+ debug_printf("%s[%u].%c",
+ file_name,
+ chan_info->u.index,
+ "xyzw01"[chan_info->swizzle]);
+ }
+ debug_printf("\n");
+ }
+ }
+ }
+}
+
+
+/**
+ * Detect any direct relationship between the output color
+ */
+void
+lp_build_tgsi_info(const struct tgsi_token *tokens,
+ struct lp_tgsi_info *info)
+{
+ struct tgsi_parse_context parse;
+ struct analysis_context ctx;
+ unsigned index;
+ unsigned chan;
+
+ memset(info, 0, sizeof *info);
+
+ tgsi_scan_shader(tokens, &info->base);
+
+ memset(&ctx, 0, sizeof ctx);
+ ctx.info = info;
+
+ tgsi_parse_init(&parse, tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ struct tgsi_full_instruction *inst =
+ &parse.FullToken.FullInstruction;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_END ||
+ inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
+ /* We reached the end of main function body. */
+ goto finished;
+ }
+
+ analyse_instruction(&ctx, inst);
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const unsigned size =
+ parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ assert(size <= 4);
+ if (ctx.num_imms < Elements(ctx.imm)) {
+ for (chan = 0; chan < size; ++chan) {
+ ctx.imm[ctx.num_imms][chan] =
+ parse.FullToken.FullImmediate.u[chan].Float;
+ }
+ ++ctx.num_imms;
+ }
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+finished:
+
+ tgsi_parse_free(&parse);
+
+
+ /*
+ * Link the output color values.
+ */
+
+ for (index = 0; index < PIPE_MAX_COLOR_BUFS; ++index) {
+ const struct lp_tgsi_channel_info null_output[4];
+ info->cbuf[index] = null_output;
+ }
+
+ for (index = 0; index < info->base.num_outputs; ++index) {
+ unsigned semantic_name = info->base.output_semantic_name[index];
+ unsigned semantic_index = info->base.output_semantic_index[index];
+ if (semantic_name == TGSI_SEMANTIC_COLOR &&
+ semantic_index < PIPE_MAX_COLOR_BUFS) {
+ info->cbuf[semantic_index] = info->output[index];
+ }
+ }
+
+ if (gallivm_debug & GALLIVM_DEBUG_TGSI) {
+ dump_info(tokens, info);
+ }
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 441aebae29..3c318cc8c8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -887,21 +887,25 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
}
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
for (i = 0; i < num_coords; i++) {
- ddx[i] = emit_fetch( bld, inst, 1, i );
- ddy[i] = emit_fetch( bld, inst, 2, i );
+ LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
+ LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
+ ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, "");
+ ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, "");
}
unit = inst->Src[3].Register.Index;
} else {
for (i = 0; i < num_coords; i++) {
- ddx[i] = lp_build_ddx( &bld->base, coords[i] );
- ddy[i] = lp_build_ddy( &bld->base, coords[i] );
+ ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
+ ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
}
unit = inst->Src[1].Register.Index;
}
for (i = num_coords; i < 3; i++) {
- ddx[i] = bld->base.undef;
- ddy[i] = bld->base.undef;
+ ddx[i] = LLVMGetUndef(bld->base.elem_type);
+ ddy[i] = LLVMGetUndef(bld->base.elem_type);
}
bld->sampler->emit_fetch_texel(bld->sampler,
@@ -913,6 +917,43 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
texel);
}
+static boolean
+near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
+ int pc)
+{
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ unsigned opcode;
+
+ if (pc + i >= bld->info->num_instructions)
+ return TRUE;
+
+ opcode = bld->instructions[pc + i].Instruction.Opcode;
+
+ if (opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ if (opcode == TGSI_OPCODE_TEX ||
+ opcode == TGSI_OPCODE_TXP ||
+ opcode == TGSI_OPCODE_TXD ||
+ opcode == TGSI_OPCODE_TXB ||
+ opcode == TGSI_OPCODE_TXL ||
+ opcode == TGSI_OPCODE_TXF ||
+ opcode == TGSI_OPCODE_TXQ ||
+ opcode == TGSI_OPCODE_CAL ||
+ opcode == TGSI_OPCODE_CALLNZ ||
+ opcode == TGSI_OPCODE_IF ||
+ opcode == TGSI_OPCODE_IFC ||
+ opcode == TGSI_OPCODE_BGNLOOP ||
+ opcode == TGSI_OPCODE_SWITCH)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
/**
* Kill fragment if any of the src register values are negative.
@@ -920,7 +961,8 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
static void
emit_kil(
struct lp_build_tgsi_soa_context *bld,
- const struct tgsi_full_instruction *inst )
+ const struct tgsi_full_instruction *inst,
+ int pc)
{
const struct tgsi_full_src_register *reg = &inst->Src[0];
LLVMValueRef terms[NUM_CHANNELS];
@@ -959,8 +1001,12 @@ emit_kil(
}
}
- if(mask)
+ if(mask) {
lp_build_mask_update(bld->mask, mask);
+
+ if (!near_end_of_shader(bld, pc))
+ lp_build_mask_check(bld->mask);
+ }
}
@@ -972,7 +1018,8 @@ emit_kil(
*/
static void
emit_kilp(struct lp_build_tgsi_soa_context *bld,
- const struct tgsi_full_instruction *inst)
+ const struct tgsi_full_instruction *inst,
+ int pc)
{
LLVMValueRef mask;
@@ -983,10 +1030,14 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
}
else {
- mask = bld->base.zero;
+ LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
+ mask = zero;
}
lp_build_mask_update(bld->mask, mask);
+
+ if (!near_end_of_shader(bld, pc))
+ lp_build_mask_check(bld->mask);
}
static void
@@ -1535,12 +1586,12 @@ emit_instruction(
case TGSI_OPCODE_KILP:
/* predicated kill */
- emit_kilp( bld, inst );
+ emit_kilp( bld, inst, (*pc)-1 );
break;
case TGSI_OPCODE_KIL:
/* conditional kill */
- emit_kil( bld, inst );
+ emit_kil( bld, inst, (*pc)-1 );
break;
case TGSI_OPCODE_PK2H:
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index b4d8107372..a6eb403962 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -222,7 +222,7 @@ pb_cache_buffer_vtbl = {
};
-static INLINE boolean
+static INLINE int
pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
pb_size size,
const struct pb_desc *desc)
@@ -230,26 +230,26 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
void *map;
if(buf->base.base.size < size)
- return FALSE;
+ return 0;
/* be lenient with size */
if(buf->base.base.size >= 2*size)
- return FALSE;
+ return 0;
if(!pb_check_alignment(desc->alignment, buf->base.base.alignment))
- return FALSE;
+ return 0;
if(!pb_check_usage(desc->usage, buf->base.base.usage))
- return FALSE;
+ return 0;
map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL);
if (!map) {
- return FALSE;
+ return -1;
}
pb_unmap(buf->buffer);
- return TRUE;
+ return 1;
}
@@ -263,7 +263,8 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
struct pb_cache_buffer *curr_buf;
struct list_head *curr, *next;
int64_t now;
-
+ int ret = 0;
+
pipe_mutex_lock(mgr->mutex);
buf = NULL;
@@ -274,25 +275,30 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
now = os_time_get();
while(curr != &mgr->delayed) {
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
- if(!buf && pb_cache_is_buffer_compat(curr_buf, size, desc))
- buf = curr_buf;
+ if(!buf && (ret = pb_cache_is_buffer_compat(curr_buf, size, desc) > 0))
+ buf = curr_buf;
else if(os_time_timeout(curr_buf->start, curr_buf->end, now))
- _pb_cache_buffer_destroy(curr_buf);
+ _pb_cache_buffer_destroy(curr_buf);
else
/* This buffer (and all hereafter) are still hot in cache */
break;
+ if (ret == -1)
+ break;
curr = next;
next = curr->next;
}
/* keep searching in the hot buffers */
- if(!buf) {
+ if(!buf && ret != -1) {
while(curr != &mgr->delayed) {
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
- if(pb_cache_is_buffer_compat(curr_buf, size, desc)) {
+ ret = pb_cache_is_buffer_compat(curr_buf, size, desc);
+ if (ret > 0) {
buf = curr_buf;
break;
}
+ if (ret == -1)
+ break;
/* no need to check the timeout here */
curr = next;
next = curr->next;
@@ -301,6 +307,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
if(buf) {
LIST_DEL(&buf->head);
+ --mgr->numDelayed;
pipe_mutex_unlock(mgr->mutex);
/* Increase refcount */
pipe_reference_init(&buf->base.base.reference, 1);
diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
index 65d5ce795b..fbde1d191a 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
@@ -58,7 +58,6 @@
#include <unistd.h>
#include <sys/mman.h>
-#include "os/os_thread.h"
#include "util/u_mm.h"
#define EXEC_HEAP_SIZE (10*1024*1024)
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
index 036c1ee48a..34bfa527db 100644
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -23,26 +23,13 @@
#include "cell/ppu/cell_public.h"
#endif
+
static INLINE struct pipe_screen *
-sw_screen_create(struct sw_winsys *winsys)
+sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
{
- const char *default_driver;
- const char *driver;
struct pipe_screen *screen = NULL;
#if defined(GALLIUM_CELL)
- default_driver = "cell";
-#elif defined(GALLIUM_LLVMPIPE)
- default_driver = "llvmpipe";
-#elif defined(GALLIUM_SOFTPIPE)
- default_driver = "softpipe";
-#else
- default_driver = "";
-#endif
-
- driver = debug_get_option("GALLIUM_DRIVER", default_driver);
-
-#if defined(GALLIUM_CELL)
if (screen == NULL && strcmp(driver, "cell") == 0)
screen = cell_create_screen(winsys);
#endif
@@ -60,4 +47,26 @@ sw_screen_create(struct sw_winsys *winsys)
return screen;
}
+
+static INLINE struct pipe_screen *
+sw_screen_create(struct sw_winsys *winsys)
+{
+ const char *default_driver;
+ const char *driver;
+
+#if defined(GALLIUM_CELL)
+ default_driver = "cell";
+#elif defined(GALLIUM_LLVMPIPE)
+ default_driver = "llvmpipe";
+#elif defined(GALLIUM_SOFTPIPE)
+ default_driver = "softpipe";
+#else
+ default_driver = "";
+#endif
+
+ driver = debug_get_option("GALLIUM_DRIVER", default_driver);
+ return sw_screen_create_named(winsys, driver);
+}
+
+
#endif
diff --git a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
index 0b4e740403..e4effa713e 100644
--- a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
@@ -13,22 +13,28 @@ static INLINE struct pipe_screen *
sw_screen_wrap(struct pipe_screen *screen)
{
struct sw_winsys *sws;
- struct pipe_screen *sw_screen;
+ struct pipe_screen *sw_screen = NULL;
+ const char *driver;
- sws = wrapper_sw_winsys_warp_pipe_screen(screen);
+ driver = debug_get_option("GALLIUM_DRIVER", "native");
+ if (strcmp(driver, "native") == 0)
+ return screen;
+
+ sws = wrapper_sw_winsys_wrap_pipe_screen(screen);
if (!sws)
goto err;
- sw_screen = sw_screen_create(sws);
- if (sw_screen == screen)
+ sw_screen = sw_screen_create_named(sws, driver);
+
+ if (!sw_screen)
goto err_winsys;
return sw_screen;
err_winsys:
- sws->destroy(sws);
+ return wrapper_sw_winsys_dewrap_pipe_screen(sws);
err:
- return screen;
+ return screen;
}
#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index f71ffb7030..77bde86684 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -90,7 +90,8 @@ static const char *processor_type_names[] =
"GEOM"
};
-static const char *file_names[TGSI_FILE_COUNT] =
+const char *
+tgsi_file_names[TGSI_FILE_COUNT] =
{
"NULL",
"CONST",
@@ -125,7 +126,8 @@ static const char *semantic_names[] =
"FACE",
"EDGEFLAG",
"PRIM_ID",
- "INSTANCEID"
+ "INSTANCEID",
+ "STENCIL"
};
static const char *immediate_type_names[] =
@@ -135,7 +137,8 @@ static const char *immediate_type_names[] =
"INT32"
};
-static const char *swizzle_names[] =
+const char *
+tgsi_swizzle_names[] =
{
"x",
"y",
@@ -143,7 +146,8 @@ static const char *swizzle_names[] =
"w"
};
-static const char *texture_names[] =
+const char *
+tgsi_texture_names[] =
{
"UNKNOWN",
"1D",
@@ -201,15 +205,15 @@ _dump_register_src(
struct dump_ctx *ctx,
const struct tgsi_full_src_register *src )
{
- ENM(src->Register.File, file_names);
+ ENM(src->Register.File, tgsi_file_names);
if (src->Register.Dimension) {
if (src->Dimension.Indirect) {
CHR( '[' );
- ENM( src->DimIndirect.File, file_names );
+ ENM( src->DimIndirect.File, tgsi_file_names );
CHR( '[' );
SID( src->DimIndirect.Index );
TXT( "]." );
- ENM( src->DimIndirect.SwizzleX, swizzle_names );
+ ENM( src->DimIndirect.SwizzleX, tgsi_swizzle_names );
if (src->Dimension.Index != 0) {
if (src->Dimension.Index > 0)
CHR( '+' );
@@ -224,11 +228,11 @@ _dump_register_src(
}
if (src->Register.Indirect) {
CHR( '[' );
- ENM( src->Indirect.File, file_names );
+ ENM( src->Indirect.File, tgsi_file_names );
CHR( '[' );
SID( src->Indirect.Index );
TXT( "]." );
- ENM( src->Indirect.SwizzleX, swizzle_names );
+ ENM( src->Indirect.SwizzleX, tgsi_swizzle_names );
if (src->Register.Index != 0) {
if (src->Register.Index > 0)
CHR( '+' );
@@ -248,15 +252,15 @@ _dump_register_dst(
struct dump_ctx *ctx,
const struct tgsi_full_dst_register *dst )
{
- ENM(dst->Register.File, file_names);
+ ENM(dst->Register.File, tgsi_file_names);
if (dst->Register.Dimension) {
if (dst->Dimension.Indirect) {
CHR( '[' );
- ENM( dst->DimIndirect.File, file_names );
+ ENM( dst->DimIndirect.File, tgsi_file_names );
CHR( '[' );
SID( dst->DimIndirect.Index );
TXT( "]." );
- ENM( dst->DimIndirect.SwizzleX, swizzle_names );
+ ENM( dst->DimIndirect.SwizzleX, tgsi_swizzle_names );
if (dst->Dimension.Index != 0) {
if (dst->Dimension.Index > 0)
CHR( '+' );
@@ -271,11 +275,11 @@ _dump_register_dst(
}
if (dst->Register.Indirect) {
CHR( '[' );
- ENM( dst->Indirect.File, file_names );
+ ENM( dst->Indirect.File, tgsi_file_names );
CHR( '[' );
SID( dst->Indirect.Index );
TXT( "]." );
- ENM( dst->Indirect.SwizzleX, swizzle_names );
+ ENM( dst->Indirect.SwizzleX, tgsi_swizzle_names );
if (dst->Register.Index != 0) {
if (dst->Register.Index > 0)
CHR( '+' );
@@ -351,7 +355,7 @@ iter_declaration(
TXT( "DCL " );
- ENM(decl->Declaration.File, file_names);
+ ENM(decl->Declaration.File, tgsi_file_names);
/* all geometry shader inputs are two dimensional */
if (decl->Declaration.File == TGSI_FILE_INPUT &&
@@ -585,10 +589,10 @@ iter_instruction(
inst->Predicate.SwizzleZ != TGSI_SWIZZLE_Z ||
inst->Predicate.SwizzleW != TGSI_SWIZZLE_W) {
CHR( '.' );
- ENM( inst->Predicate.SwizzleX, swizzle_names );
- ENM( inst->Predicate.SwizzleY, swizzle_names );
- ENM( inst->Predicate.SwizzleZ, swizzle_names );
- ENM( inst->Predicate.SwizzleW, swizzle_names );
+ ENM( inst->Predicate.SwizzleX, tgsi_swizzle_names );
+ ENM( inst->Predicate.SwizzleY, tgsi_swizzle_names );
+ ENM( inst->Predicate.SwizzleZ, tgsi_swizzle_names );
+ ENM( inst->Predicate.SwizzleW, tgsi_swizzle_names );
}
TXT( ") " );
@@ -641,10 +645,10 @@ iter_instruction(
src->Register.SwizzleZ != TGSI_SWIZZLE_Z ||
src->Register.SwizzleW != TGSI_SWIZZLE_W) {
CHR( '.' );
- ENM( src->Register.SwizzleX, swizzle_names );
- ENM( src->Register.SwizzleY, swizzle_names );
- ENM( src->Register.SwizzleZ, swizzle_names );
- ENM( src->Register.SwizzleW, swizzle_names );
+ ENM( src->Register.SwizzleX, tgsi_swizzle_names );
+ ENM( src->Register.SwizzleY, tgsi_swizzle_names );
+ ENM( src->Register.SwizzleZ, tgsi_swizzle_names );
+ ENM( src->Register.SwizzleW, tgsi_swizzle_names );
}
if (src->Register.Absolute)
@@ -655,7 +659,7 @@ iter_instruction(
if (inst->Instruction.Texture) {
TXT( ", " );
- ENM( inst->Texture.Texture, texture_names );
+ ENM( inst->Texture.Texture, tgsi_texture_names );
}
switch (inst->Instruction.Opcode) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index dd78b36100..fc0429ad8d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -35,6 +35,15 @@
extern "C" {
#endif
+extern const char *
+tgsi_file_names[TGSI_FILE_COUNT];
+
+extern const char *
+tgsi_swizzle_names[];
+
+extern const char *
+tgsi_texture_names[];
+
void
tgsi_dump_str(
const struct tgsi_token *tokens,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 90198a4f60..6585da3e83 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -147,6 +147,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name;
info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index;
info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate;
+ info->input_centroid[reg] = (ubyte)fulldecl->Declaration.Centroid;
info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap;
info->num_inputs++;
}
@@ -157,9 +158,11 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
/* extra info for special outputs */
if (procType == TGSI_PROCESSOR_FRAGMENT &&
- fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
- info->writes_z = TRUE;
- }
+ fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION)
+ info->writes_z = TRUE;
+ if (procType == TGSI_PROCESSOR_FRAGMENT &&
+ fulldecl->Semantic.Name == TGSI_SEMANTIC_STENCIL)
+ info->writes_stencil = TRUE;
if (procType == TGSI_PROCESSOR_VERTEX &&
fulldecl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) {
info->writes_edgeflag = TRUE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index f8aa90cf06..104097fbc0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -45,6 +45,7 @@ struct tgsi_shader_info
ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
+ ubyte input_centroid[PIPE_MAX_SHADER_INPUTS];
ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS];
ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
@@ -60,6 +61,7 @@ struct tgsi_shader_info
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */
boolean writes_z; /**< does fragment shader write Z value? */
+ boolean writes_stencil; /**< does fragment shader write stencil value? */
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
boolean uses_kill; /**< KIL or KILP instruction used? */
diff --git a/src/gallium/auxiliary/util/u_atomic.h b/src/gallium/auxiliary/util/u_atomic.h
index a156823390..8434491a42 100644
--- a/src/gallium/auxiliary/util/u_atomic.h
+++ b/src/gallium/auxiliary/util/u_atomic.h
@@ -29,6 +29,8 @@
#define PIPE_ATOMIC_ASM_MSVC_X86
#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86))
#define PIPE_ATOMIC_ASM_GCC_X86
+#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64))
+#define PIPE_ATOMIC_ASM_GCC_X86_64
#elif defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 401)
#define PIPE_ATOMIC_GCC_INTRINSIC
#else
@@ -36,6 +38,51 @@
#endif
+#if defined(PIPE_ATOMIC_ASM_GCC_X86_64)
+#define PIPE_ATOMIC "GCC x86_64 assembly"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define p_atomic_set(_v, _i) (*(_v) = (_i))
+#define p_atomic_read(_v) (*(_v))
+
+static INLINE boolean
+p_atomic_dec_zero(int32_t *v)
+{
+ unsigned char c;
+
+ __asm__ __volatile__("lock; decl %0; sete %1":"+m"(*v), "=qm"(c)
+ ::"memory");
+
+ return c != 0;
+}
+
+static INLINE void
+p_atomic_inc(int32_t *v)
+{
+ __asm__ __volatile__("lock; incl %0":"+m"(*v));
+}
+
+static INLINE void
+p_atomic_dec(int32_t *v)
+{
+ __asm__ __volatile__("lock; decl %0":"+m"(*v));
+}
+
+static INLINE int32_t
+p_atomic_cmpxchg(int32_t *v, int32_t old, int32_t _new)
+{
+ return __sync_val_compare_and_swap(v, old, _new);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PIPE_ATOMIC_ASM_GCC_X86_64 */
+
#if defined(PIPE_ATOMIC_ASM_GCC_X86)
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index 016e73c4a1..1fbd83841c 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -109,9 +109,12 @@ PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___,
PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs
PIPE_FORMAT_Z24_UNORM_S8_USCALED , plain, 1, 1, un24, u8 , , , xy__, zs
PIPE_FORMAT_S8_USCALED_Z24_UNORM , plain, 1, 1, u8 , un24, , , yx__, zs
+PIPE_FORMAT_X24S8_USCALED , plain, 1, 1, x24, u8 , , , _y__, zs
+PIPE_FORMAT_S8X24_USCALED , plain, 1, 1, u8 , x24 , , , _x__, zs
PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, x8 , , , x___, zs
PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, x8 , un24, , , y___, zs
PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32, u8 , x24 , , xy__, zs
+PIPE_FORMAT_X32_S8X24_USCALED , plain, 1, 1, x32, u8 , x24 , , _y__, zs
# YUV formats
# http://www.fourcc.org/yuv.php#UYVY
diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c
index 792d69c214..80081e22f7 100644
--- a/src/gallium/auxiliary/util/u_format_zs.c
+++ b/src/gallium/auxiliary/util/u_format_zs.c
@@ -918,3 +918,56 @@ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned d
}
}
+
+void
+util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
+
+void
+util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
+
+void
+util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
+
+void
+util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
+
+void
+util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+
+}
+
+void
+util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
diff --git a/src/gallium/auxiliary/util/u_format_zs.h b/src/gallium/auxiliary/util/u_format_zs.h
index 650db4b95f..1604cc3eee 100644
--- a/src/gallium/auxiliary/util/u_format_zs.h
+++ b/src/gallium/auxiliary/util/u_format_zs.h
@@ -192,5 +192,21 @@ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned
void
util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+void
+util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+void
+util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_sride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
#endif /* U_FORMAT_ZS_H_ */
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 69a7681494..37294b7203 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -118,6 +118,11 @@ __inline double __cdecl atan2(double val)
#endif
+#ifndef M_SQRT2
+#define M_SQRT2 1.41421356237309504880
+#endif
+
+
#if defined(_MSC_VER)
#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index c90b0fdbc3..5378f2d782 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -434,8 +434,8 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color *
/* Integer versions of util_pack_z and util_pack_z_stencil - useful for
* constructing clear masks.
*/
-static INLINE uint
-util_pack_uint_z(enum pipe_format format, unsigned z)
+static INLINE uint32_t
+util_pack_mask_z(enum pipe_format format, uint32_t z)
{
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
@@ -452,29 +452,32 @@ util_pack_uint_z(enum pipe_format format, unsigned z)
case PIPE_FORMAT_S8_USCALED:
return 0;
default:
- debug_print_format("gallium: unhandled format in util_pack_z()", format);
+ debug_print_format("gallium: unhandled format in util_pack_mask_z()", format);
assert(0);
return 0;
}
}
-static INLINE uint
-util_pack_uint_z_stencil(enum pipe_format format, double z, uint s)
+static INLINE uint32_t
+util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s)
{
- unsigned packed = util_pack_uint_z(format, z);
-
- s &= 0xff;
+ uint32_t packed = util_pack_mask_z(format, z);
switch (format) {
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- return packed | (s << 24);
+ packed |= (uint32_t)s << 24;
+ break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- return packed | s;
+ packed |= s;
+ break;
case PIPE_FORMAT_S8_USCALED:
- return packed | s;
+ packed |= s;
+ break;
default:
- return packed;
+ break;
}
+
+ return packed;
}
@@ -482,9 +485,11 @@ util_pack_uint_z_stencil(enum pipe_format format, double z, uint s)
/**
* Note: it's assumed that z is in [0,1]
*/
-static INLINE uint
+static INLINE uint32_t
util_pack_z(enum pipe_format format, double z)
{
+ union fi fui;
+
if (z == 0.0)
return 0;
@@ -492,24 +497,25 @@ util_pack_z(enum pipe_format format, double z)
case PIPE_FORMAT_Z16_UNORM:
if (z == 1.0)
return 0xffff;
- return (uint) (z * 0xffff);
+ return (uint32_t) (z * 0xffff);
case PIPE_FORMAT_Z32_UNORM:
/* special-case to avoid overflow */
if (z == 1.0)
return 0xffffffff;
- return (uint) (z * 0xffffffff);
+ return (uint32_t) (z * 0xffffffff);
case PIPE_FORMAT_Z32_FLOAT:
- return (uint)z;
+ fui.f = (float)z;
+ return fui.ui;
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
case PIPE_FORMAT_Z24X8_UNORM:
if (z == 1.0)
return 0xffffff;
- return (uint) (z * 0xffffff);
+ return (uint32_t) (z * 0xffffff);
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
if (z == 1.0)
return 0xffffff00;
- return ((uint) (z * 0xffffff)) << 8;
+ return ((uint32_t) (z * 0xffffff)) << 8;
case PIPE_FORMAT_S8_USCALED:
/* this case can get it via util_pack_z_stencil() */
return 0;
@@ -525,14 +531,14 @@ util_pack_z(enum pipe_format format, double z)
* Pack Z and/or stencil values into a 32-bit value described by format.
* Note: it's assumed that z is in [0,1] and s in [0,255]
*/
-static INLINE uint
-util_pack_z_stencil(enum pipe_format format, double z, uint s)
+static INLINE uint32_t
+util_pack_z_stencil(enum pipe_format format, double z, uint8_t s)
{
- unsigned packed = util_pack_z(format, z);
+ uint32_t packed = util_pack_z(format, z);
switch (format) {
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- packed |= s << 24;
+ packed |= (uint32_t)s << 24;
break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
packed |= s;
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
index 87959ab0aa..1df6c87267 100644
--- a/src/gallium/auxiliary/util/u_sse.h
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -71,6 +71,96 @@ _mm_castps_si128(__m128 a)
#endif /* defined(_MSC_VER) && _MSC_VER < 1500 */
+union m128i {
+ __m128i m;
+ ubyte ub[16];
+ ushort us[8];
+ uint ui[4];
+};
+
+static INLINE void u_print_epi8(const char *name, __m128i r)
+{
+ union { __m128i m; ubyte ub[16]; } u;
+ u.m = r;
+
+ debug_printf("%s: "
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x\n",
+ name,
+ u.ub[0], u.ub[1], u.ub[2], u.ub[3],
+ u.ub[4], u.ub[5], u.ub[6], u.ub[7],
+ u.ub[8], u.ub[9], u.ub[10], u.ub[11],
+ u.ub[12], u.ub[13], u.ub[14], u.ub[15]);
+}
+
+static INLINE void u_print_epi16(const char *name, __m128i r)
+{
+ union { __m128i m; ushort us[8]; } u;
+ u.m = r;
+
+ debug_printf("%s: "
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x\n",
+ name,
+ u.us[0], u.us[1], u.us[2], u.us[3],
+ u.us[4], u.us[5], u.us[6], u.us[7]);
+}
+
+static INLINE void u_print_epi32(const char *name, __m128i r)
+{
+ union { __m128i m; uint ui[4]; } u;
+ u.m = r;
+
+ debug_printf("%s: "
+ "%08x/"
+ "%08x/"
+ "%08x/"
+ "%08x\n",
+ name,
+ u.ui[0], u.ui[1], u.ui[2], u.ui[3]);
+}
+
+static INLINE void u_print_ps(const char *name, __m128 r)
+{
+ union { __m128 m; float f[4]; } u;
+ u.m = r;
+
+ debug_printf("%s: "
+ "%f/"
+ "%f/"
+ "%f/"
+ "%f\n",
+ name,
+ u.f[0], u.f[1], u.f[2], u.f[3]);
+}
+
+
+#define U_DUMP_EPI32(a) u_print_epi32(#a, a)
+#define U_DUMP_EPI16(a) u_print_epi16(#a, a)
+#define U_DUMP_EPI8(a) u_print_epi8(#a, a)
+#define U_DUMP_PS(a) u_print_ps(#a, a)
+
+
#if defined(PIPE_ARCH_SSSE3)
@@ -78,8 +168,6 @@ _mm_castps_si128(__m128 a)
#else /* !PIPE_ARCH_SSSE3 */
-#include <emmintrin.h>
-
/**
* Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases
* where -mssse3 is not supported/enabled.
@@ -100,6 +188,68 @@ _mm_shuffle_epi8(__m128i a, __m128i mask)
#endif /* !PIPE_ARCH_SSSE3 */
-#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
+
+
+/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of
+ * _mm_mul_epu32().
+ *
+ * I suspect this works fine for us because one of our operands is
+ * always positive, but not sure that this can be used for general
+ * signed integer multiplication.
+ *
+ * This seems close enough to the speed of SSE4 and the real
+ * _mm_mullo_epi32() intrinsic as to not justify adding an sse4
+ * dependency at this point.
+ */
+static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
+{
+ __m128i a4 = _mm_srli_epi64(a, 32); /* shift by one dword */
+ __m128i b4 = _mm_srli_epi64(b, 32); /* shift by one dword */
+ __m128i ba = _mm_mul_epu32(b, a); /* multply dwords 0, 2 */
+ __m128i b4a4 = _mm_mul_epu32(b4, a4); /* multiply dwords 1, 3 */
+
+ /* Interleave the results, either with shuffles or (slightly
+ * faster) direct bit operations:
+ */
+#if 0
+ __m128i ba8 = _mm_shuffle_epi32(ba, 8);
+ __m128i b4a48 = _mm_shuffle_epi32(b4a4, 8);
+ __m128i result = _mm_unpacklo_epi32(ba8, b4a48);
+#else
+ __m128i mask = _mm_setr_epi32(~0,0,~0,0);
+ __m128i ba_mask = _mm_and_si128(ba, mask);
+ __m128i b4a4_mask_shift = _mm_slli_epi64(b4a4, 32);
+ __m128i result = _mm_or_si128(ba_mask, b4a4_mask_shift);
+#endif
+
+ return result;
+}
+
+
+static INLINE void
+transpose4_epi32(const __m128i * restrict a,
+ const __m128i * restrict b,
+ const __m128i * restrict c,
+ const __m128i * restrict d,
+ __m128i * restrict o,
+ __m128i * restrict p,
+ __m128i * restrict q,
+ __m128i * restrict r)
+{
+ __m128i t0 = _mm_unpacklo_epi32(*a, *b);
+ __m128i t1 = _mm_unpacklo_epi32(*c, *d);
+ __m128i t2 = _mm_unpackhi_epi32(*a, *b);
+ __m128i t3 = _mm_unpackhi_epi32(*c, *d);
+
+ *o = _mm_unpacklo_epi64(t0, t1);
+ *p = _mm_unpackhi_epi64(t0, t1);
+ *q = _mm_unpacklo_epi64(t2, t3);
+ *r = _mm_unpackhi_epi64(t2, t3);
+}
+
+#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i))
+
+
+#endif /* PIPE_ARCH_SSE */
#endif /* U_SSE_H_ */
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index f7aa1403d0..44cadbfcdd 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -217,6 +217,81 @@ z24s8_get_tile_rgba(const unsigned *src,
}
}
+/*** PIPE_FORMAT_S8X24_USCALED ***/
+
+/**
+ * Return S component as four uint32_t in [0..255]. Z part ignored.
+ */
+static void
+s8x24_get_tile_rgba(const unsigned *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (float)((*src++ >> 24) & 0xff);
+ }
+
+ p += dst_stride;
+ }
+}
+
+/*** PIPE_FORMAT_X24S8_USCALED ***/
+
+/**
+ * Return S component as four uint32_t in [0..255]. Z part ignored.
+ */
+static void
+x24s8_get_tile_rgba(const unsigned *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (float)(*src++ & 0xff);
+ }
+ p += dst_stride;
+ }
+}
+
+
+/**
+ * Return S component as four uint32_t in [0..255]. Z part ignored.
+ */
+static void
+s8_get_tile_rgba(const unsigned char *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (float)(*src++ & 0xff);
+ }
+ p += dst_stride;
+ }
+}
/*** PIPE_FORMAT_Z32_FLOAT ***/
@@ -261,10 +336,19 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
case PIPE_FORMAT_Z24X8_UNORM:
s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
break;
+ case PIPE_FORMAT_S8_USCALED:
+ s8_get_tile_rgba((unsigned char *) src, w, h, dst, dst_stride);
+ break;
+ case PIPE_FORMAT_X24S8_USCALED:
+ s8x24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+ break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
break;
+ case PIPE_FORMAT_S8X24_USCALED:
+ x24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+ break;
case PIPE_FORMAT_Z32_FLOAT:
z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride);
break;
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 9e02d43ab7..d99ed7c6d6 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -1415,6 +1415,12 @@ Edge flags are used to control which lines or points are actually
drawn when the polygon mode converts triangles/quads/polygons into
points or lines.
+TGSI_SEMANTIC_STENCIL
+""""""""""""""""""""""
+
+For fragment shaders, this semantic label indicates than an output
+is a writable stencil reference value. Only the Y component is writable.
+This allows the fragment shader to change the fragments stencilref value.
Properties
@@ -1493,6 +1499,8 @@ well.
| Z | XXX TBD | (z, z, z, 1) | (0, z, 0, 1) |
| | | [#depth-tex-mode]_ | |
+--------------------+--------------+--------------------+--------------+
+| S | (s, s, s, s) | unknown | unknown |
++--------------------+--------------+--------------------+--------------+
.. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt
.. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z)
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 7bb7893d93..bd059d5716 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -30,7 +30,6 @@
#include "i915_context.h"
#include "i915_batch.h"
#include "i915_debug.h"
-#include "i915_reg.h"
#include "i915_resource.h"
#include "pipe/p_context.h"
diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c
index bd8b9174a8..36c04a3165 100644
--- a/src/gallium/drivers/i965/intel_decode.c
+++ b/src/gallium/drivers/i965/intel_decode.c
@@ -40,6 +40,7 @@
#include <stdint.h>
#include <string.h>
+#include "util/u_memory.h"
#include "util/u_string.h"
#include "intel_decode.h"
@@ -116,8 +117,7 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
};
- for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]);
- opcode++) {
+ for (opcode = 0; opcode < Elements(opcodes_mi); opcode++) {
if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) {
unsigned int len = 1, i;
@@ -275,8 +275,7 @@ decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
return len;
}
- for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]);
- opcode++) {
+ for (opcode = 0; opcode < Elements(opcodes_2d); opcode++) {
if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) {
unsigned int i;
@@ -1037,9 +1036,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
return len;
}
- for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]);
- opcode++)
- {
+ for (opcode = 0; opcode < Elements(opcodes_3d_1d); opcode++) {
if (opcodes_3d_1d[opcode].i830_only && !i830)
continue;
@@ -1291,8 +1288,7 @@ decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
return decode_3d_1c(data, count, hw_offset, failures);
}
- for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
- opcode++) {
+ for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) {
if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
unsigned int len = 1, i;
@@ -1637,8 +1633,7 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
return len;
}
- for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
- opcode++) {
+ for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) {
if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) {
unsigned int i;
len = 1;
@@ -1705,8 +1700,7 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure
return decode_3d_1c(data, count, hw_offset, failures);
}
- for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
- opcode++) {
+ for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) {
if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
unsigned int len = 1, i;
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 55b877b4ab..08da2286b0 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -28,8 +28,6 @@ C_SOURCES = \
lp_scene_queue.c \
lp_screen.c \
lp_setup.c \
- lp_setup_coef.c \
- lp_setup_coef_intrin.c \
lp_setup_line.c \
lp_setup_point.c \
lp_setup_tri.c \
@@ -38,6 +36,7 @@ C_SOURCES = \
lp_state_clip.c \
lp_state_derived.c \
lp_state_fs.c \
+ lp_state_setup.c \
lp_state_gs.c \
lp_state_rasterizer.c \
lp_state_sampler.c \
@@ -63,12 +62,12 @@ PROGS := lp_test_format \
# Need this for the lp_test_*.o files
CLEAN_EXTRA = *.o
+include ../../Makefile.template
+
lp_test_sincos.o : sse_mathfun.h
PROGS_DEPS := ../../auxiliary/libgallium.a
-include ../../Makefile.template
-
lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_pack.py ../../auxiliary/util/u_format.csv
python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 650435f0f1..49950153a4 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -27,13 +27,7 @@ env.Depends('lp_tile_soa.c', [
])
-# Only enable SSSE3 for lp_tile_soa_sse3.c
-ssse3_env = env.Clone()
-if env['gcc'] \
- and distutils.version.LooseVersion(env['CCVERSION']) >= distutils.version.LooseVersion('4.3') \
- and env['machine'] in ('x86', 'x86_64') :
- ssse3_env.Append(CCFLAGS = ['-mssse3'])
-lp_tile_soa_os = ssse3_env.SharedObject('lp_tile_soa.c')
+lp_tile_soa_os = env.SharedObject('lp_tile_soa.c')
llvmpipe = env.ConvenienceLibrary(
@@ -64,13 +58,12 @@ llvmpipe = env.ConvenienceLibrary(
'lp_setup_line.c',
'lp_setup_point.c',
'lp_setup_tri.c',
- 'lp_setup_coef.c',
- 'lp_setup_coef_intrin.c',
'lp_setup_vbuf.c',
'lp_state_blend.c',
'lp_state_clip.c',
'lp_state_derived.c',
'lp_state_fs.c',
+ 'lp_state_setup.c',
'lp_state_gs.c',
'lp_state_rasterizer.c',
'lp_state_sampler.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
index e28efe778f..e50643790c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
@@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder,
struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef alpha,
- LLVMValueRef ref)
+ LLVMValueRef ref,
+ boolean do_branch)
{
struct lp_build_context bld;
LLVMValueRef test;
@@ -60,4 +61,7 @@ lp_build_alpha_test(LLVMBuilderRef builder,
lp_build_name(test, "alpha_mask");
lp_build_mask_update(mask, test);
+
+ if (do_branch)
+ lp_build_mask_check(mask);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
index 44603b418c..27ca8aad4d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
@@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder,
struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef alpha,
- LLVMValueRef ref);
+ LLVMValueRef ref,
+ boolean do_branch);
#endif /* !LP_BLD_ALPHA_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 7561899a74..7eb76d4fb3 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2009-2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -53,15 +53,8 @@
* ... ... ... ... ... ... ... ... ...
*
*
- * Stencil test:
- * Two-sided stencil test is supported but probably not as efficient as
- * it could be. Currently, we use if/then/else constructs to do the
- * operations for front vs. back-facing polygons. We could probably do
- * both the front and back arithmetic then use a Select() instruction to
- * choose the result depending on polyon orientation. We'd have to
- * measure performance both ways and see which is better.
- *
* @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Brian Paul <jfonseca@vmware.com>
*/
#include "pipe/p_state.h"
@@ -71,6 +64,7 @@
#include "gallivm/lp_bld_arit.h"
#include "gallivm/lp_bld_bitarit.h"
#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_conv.h"
#include "gallivm/lp_bld_logic.h"
#include "gallivm/lp_bld_flow.h"
#include "gallivm/lp_bld_intr.h"
@@ -128,57 +122,32 @@ lp_build_stencil_test_single(struct lp_build_context *bld,
/**
* Do the one or two-sided stencil test comparison.
* \sa lp_build_stencil_test_single
- * \param face an integer indicating front (+) or back (-) facing polygon.
- * If NULL, assume front-facing.
+ * \param front_facing an integer vector mask, indicating front (~0) or back
+ * (0) facing polygon. If NULL, assume front-facing.
*/
static LLVMValueRef
lp_build_stencil_test(struct lp_build_context *bld,
const struct pipe_stencil_state stencil[2],
LLVMValueRef stencilRefs[2],
LLVMValueRef stencilVals,
- LLVMValueRef face)
+ LLVMValueRef front_facing)
{
LLVMValueRef res;
assert(stencil[0].enabled);
- if (stencil[1].enabled && face) {
- /* do two-sided test */
- struct lp_build_flow_context *flow_ctx;
- struct lp_build_if_state if_ctx;
- LLVMValueRef front_facing;
- LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
- LLVMValueRef result = bld->undef;
-
- flow_ctx = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx);
+ /* do front face test */
+ res = lp_build_stencil_test_single(bld, &stencil[0],
+ stencilRefs[0], stencilVals);
- lp_build_flow_scope_declare(flow_ctx, &result);
+ if (stencil[1].enabled && front_facing) {
+ /* do back face test */
+ LLVMValueRef back_res;
- /* front_facing = face > 0.0 */
- front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, "");
-
- lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
- {
- result = lp_build_stencil_test_single(bld, &stencil[0],
- stencilRefs[0], stencilVals);
- }
- lp_build_else(&if_ctx);
- {
- result = lp_build_stencil_test_single(bld, &stencil[1],
- stencilRefs[1], stencilVals);
- }
- lp_build_endif(&if_ctx);
+ back_res = lp_build_stencil_test_single(bld, &stencil[1],
+ stencilRefs[1], stencilVals);
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
-
- res = result;
- }
- else {
- /* do single-side test */
- res = lp_build_stencil_test_single(bld, &stencil[0],
- stencilRefs[0], stencilVals);
+ res = lp_build_select(bld, front_facing, res, back_res);
}
return res;
@@ -195,14 +164,12 @@ lp_build_stencil_op_single(struct lp_build_context *bld,
const struct pipe_stencil_state *stencil,
enum stencil_op op,
LLVMValueRef stencilRef,
- LLVMValueRef stencilVals,
- LLVMValueRef mask)
+ LLVMValueRef stencilVals)
{
- const unsigned stencilMax = 255; /* XXX fix */
struct lp_type type = bld->type;
LLVMValueRef res;
- LLVMValueRef max = lp_build_const_int_vec(type, stencilMax);
+ LLVMValueRef max = lp_build_const_int_vec(type, 0xff);
unsigned stencil_op;
assert(type.sign);
@@ -255,19 +222,7 @@ lp_build_stencil_op_single(struct lp_build_context *bld,
break;
default:
assert(0 && "bad stencil op mode");
- res = NULL;
- }
-
- if (stencil->writemask != stencilMax) {
- /* mask &= stencil->writemask */
- LLVMValueRef writemask = lp_build_const_int_vec(type, stencil->writemask);
- mask = LLVMBuildAnd(bld->builder, mask, writemask, "");
- /* res = (res & mask) | (stencilVals & ~mask) */
- res = lp_build_select_bitwise(bld, writemask, res, stencilVals);
- }
- else {
- /* res = mask ? res : stencilVals */
- res = lp_build_select(bld, mask, res, stencilVals);
+ res = bld->undef;
}
return res;
@@ -284,49 +239,40 @@ lp_build_stencil_op(struct lp_build_context *bld,
LLVMValueRef stencilRefs[2],
LLVMValueRef stencilVals,
LLVMValueRef mask,
- LLVMValueRef face)
+ LLVMValueRef front_facing)
{
- assert(stencil[0].enabled);
-
- if (stencil[1].enabled && face) {
- /* do two-sided op */
- struct lp_build_flow_context *flow_ctx;
- struct lp_build_if_state if_ctx;
- LLVMValueRef front_facing;
- LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
- LLVMValueRef result = bld->undef;
+ LLVMValueRef res;
- flow_ctx = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx);
+ assert(stencil[0].enabled);
- lp_build_flow_scope_declare(flow_ctx, &result);
+ /* do front face op */
+ res = lp_build_stencil_op_single(bld, &stencil[0], op,
+ stencilRefs[0], stencilVals);
- /* front_facing = face > 0.0 */
- front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, "");
+ if (stencil[1].enabled && front_facing) {
+ /* do back face op */
+ LLVMValueRef back_res;
- lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
- {
- result = lp_build_stencil_op_single(bld, &stencil[0], op,
- stencilRefs[0], stencilVals, mask);
- }
- lp_build_else(&if_ctx);
- {
- result = lp_build_stencil_op_single(bld, &stencil[1], op,
- stencilRefs[1], stencilVals, mask);
- }
- lp_build_endif(&if_ctx);
+ back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
+ stencilRefs[1], stencilVals);
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
+ res = lp_build_select(bld, front_facing, res, back_res);
+ }
- return result;
+ if (stencil->writemask != 0xff) {
+ /* mask &= stencil->writemask */
+ LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask);
+ mask = LLVMBuildAnd(bld->builder, mask, writemask, "");
+ /* res = (res & mask) | (stencilVals & ~mask) */
+ res = lp_build_select_bitwise(bld, writemask, res, stencilVals);
}
else {
- /* do single-sided op */
- return lp_build_stencil_op_single(bld, &stencil[0], op,
- stencilRefs[0], stencilVals, mask);
+ /* res = mask ? res : stencilVals */
+ res = lp_build_select(bld, mask, res, stencilVals);
}
+
+ return res;
}
@@ -358,8 +304,13 @@ lp_depth_type(const struct util_format_description *format_desc,
}
else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
assert(format_desc->block.bits <= 32);
- if(format_desc->channel[swizzle].normalized)
- type.norm = TRUE;
+ assert(format_desc->channel[swizzle].normalized);
+ if (format_desc->channel[swizzle].size < format_desc->block.bits) {
+ /* Prefer signed integers when possible, as SSE has less support
+ * for unsigned comparison;
+ */
+ type.sign = TRUE;
+ }
}
else
assert(0);
@@ -381,7 +332,7 @@ lp_depth_type(const struct util_format_description *format_desc,
*/
static boolean
get_z_shift_and_mask(const struct util_format_description *format_desc,
- unsigned *shift, unsigned *mask)
+ unsigned *shift, unsigned *width, unsigned *mask)
{
const unsigned total_bits = format_desc->block.bits;
unsigned z_swizzle;
@@ -397,12 +348,14 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,
if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
return FALSE;
+ *width = format_desc->channel[z_swizzle].size;
+
padding_right = 0;
for (chan = 0; chan < z_swizzle; ++chan)
padding_right += format_desc->channel[chan].size;
padding_left =
- total_bits - (padding_right + format_desc->channel[z_swizzle].size);
+ total_bits - (padding_right + *width);
if (padding_left || padding_right) {
unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
@@ -413,7 +366,7 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,
*mask = 0xffffffff;
}
- *shift = padding_left;
+ *shift = padding_right;
return TRUE;
}
@@ -457,7 +410,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
* \param maskvalue is the depth test mask.
* \param counter is a pointer of the uint32 counter.
*/
-static void
+void
lp_build_occlusion_count(LLVMBuilderRef builder,
struct lp_type type,
LLVMValueRef maskvalue,
@@ -494,31 +447,57 @@ lp_build_occlusion_count(LLVMBuilderRef builder,
* \param format_desc description of the depth/stencil surface
* \param mask the alive/dead pixel mask for the quad (vector)
* \param stencil_refs the front/back stencil ref values (scalar)
- * \param z_src the incoming depth/stencil values (a 2x2 quad)
+ * \param z_src the incoming depth/stencil values (a 2x2 quad, float32)
* \param zs_dst_ptr pointer to depth/stencil values in framebuffer
- * \param facing contains float value indicating front/back facing polygon
+ * \param facing contains boolean value indicating front/back facing polygon
*/
void
lp_build_depth_stencil_test(LLVMBuilderRef builder,
const struct pipe_depth_state *depth,
const struct pipe_stencil_state stencil[2],
- struct lp_type type,
+ struct lp_type z_src_type,
const struct util_format_description *format_desc,
struct lp_build_mask_context *mask,
LLVMValueRef stencil_refs[2],
LLVMValueRef z_src,
LLVMValueRef zs_dst_ptr,
LLVMValueRef face,
- LLVMValueRef counter)
+ LLVMValueRef *zs_value,
+ boolean do_branch)
{
- struct lp_build_context bld;
- struct lp_build_context sbld;
+ struct lp_type z_type;
+ struct lp_build_context z_bld;
+ struct lp_build_context s_bld;
struct lp_type s_type;
+ unsigned z_shift = 0, z_width = 0, z_mask = 0;
LLVMValueRef zs_dst, z_dst = NULL;
LLVMValueRef stencil_vals = NULL;
LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
- LLVMValueRef orig_mask = mask->value;
+ LLVMValueRef orig_mask = lp_build_mask_value(mask);
+ LLVMValueRef front_facing = NULL;
+
+
+ /*
+ * Depths are expected to be between 0 and 1, even if they are stored in
+ * floats. Setting these bits here will ensure that the lp_build_conv() call
+ * below won't try to unnecessarily clamp the incoming values.
+ */
+ if(z_src_type.floating) {
+ z_src_type.sign = FALSE;
+ z_src_type.norm = TRUE;
+ }
+ else {
+ assert(!z_src_type.sign);
+ assert(z_src_type.norm);
+ }
+
+ /* Pick the depth type. */
+ z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
+
+ /* FIXME: Cope with a depth test type with a different bit width. */
+ assert(z_type.width == z_src_type.width);
+ assert(z_type.length == z_src_type.length);
/* Sanity checking */
{
@@ -540,8 +519,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
}
assert(z_swizzle < 4);
- assert(format_desc->block.bits == type.width);
- if (type.floating) {
+ assert(format_desc->block.bits == z_type.width);
+ if (z_type.floating) {
assert(z_swizzle == 0);
assert(format_desc->channel[z_swizzle].type ==
UTIL_FORMAT_TYPE_FLOAT);
@@ -552,54 +531,56 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
assert(format_desc->channel[z_swizzle].type ==
UTIL_FORMAT_TYPE_UNSIGNED);
assert(format_desc->channel[z_swizzle].normalized);
- assert(!type.fixed);
- assert(!type.sign);
- assert(type.norm);
+ assert(!z_type.fixed);
}
}
/* Setup build context for Z vals */
- lp_build_context_init(&bld, builder, type);
+ lp_build_context_init(&z_bld, builder, z_type);
/* Setup build context for stencil vals */
- s_type = lp_type_int_vec(type.width);
- lp_build_context_init(&sbld, builder, s_type);
+ s_type = lp_type_int_vec(z_type.width);
+ lp_build_context_init(&s_bld, builder, s_type);
/* Load current z/stencil value from z/stencil buffer */
+ zs_dst_ptr = LLVMBuildBitCast(builder,
+ zs_dst_ptr,
+ LLVMPointerType(z_bld.vec_type, 0), "");
zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
- lp_build_name(zs_dst, "zsbufval");
+ lp_build_name(zs_dst, "zs_dst");
/* Compute and apply the Z/stencil bitmasks and shifts.
*/
{
- unsigned z_shift, z_mask;
unsigned s_shift, s_mask;
- if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) {
- if (z_shift) {
- LLVMValueRef shift = lp_build_const_int_vec(type, z_shift);
- z_src = LLVMBuildLShr(builder, z_src, shift, "");
- }
-
+ if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
if (z_mask != 0xffffffff) {
- LLVMValueRef mask = lp_build_const_int_vec(type, z_mask);
- z_src = LLVMBuildAnd(builder, z_src, mask, "");
- z_dst = LLVMBuildAnd(builder, zs_dst, mask, "");
- z_bitmask = mask; /* used below */
+ z_bitmask = lp_build_const_int_vec(z_type, z_mask);
}
- else {
+
+ /*
+ * Align the framebuffer Z 's LSB to the right.
+ */
+ if (z_shift) {
+ LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
+ z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
+ } else if (z_bitmask) {
+ /* TODO: Instead of loading a mask from memory and ANDing, it's
+ * probably faster to just shake the bits with two shifts. */
+ z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
+ } else {
z_dst = zs_dst;
+ lp_build_name(z_dst, "z_dst");
}
-
- lp_build_name(z_dst, "zsbuf.z");
}
if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
if (s_shift) {
- LLVMValueRef shift = lp_build_const_int_vec(type, s_shift);
+ LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift);
stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
stencil_shift = shift; /* used below */
}
@@ -608,35 +589,85 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
}
if (s_mask != 0xffffffff) {
- LLVMValueRef mask = lp_build_const_int_vec(type, s_mask);
+ LLVMValueRef mask = lp_build_const_int_vec(s_type, s_mask);
stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
}
- lp_build_name(stencil_vals, "stencil");
+ lp_build_name(stencil_vals, "s_dst");
}
}
-
if (stencil[0].enabled) {
+
+ if (face) {
+ LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+ /* front_facing = face != 0 ? ~0 : 0 */
+ front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
+ front_facing = LLVMBuildSExt(builder, front_facing,
+ LLVMIntType(s_bld.type.length*s_bld.type.width),
+ "");
+ front_facing = LLVMBuildBitCast(builder, front_facing,
+ s_bld.int_vec_type, "");
+ }
+
/* convert scalar stencil refs into vectors */
- stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]);
- stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]);
+ stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
+ stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
- s_pass_mask = lp_build_stencil_test(&sbld, stencil,
- stencil_refs, stencil_vals, face);
+ s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
+ stencil_refs, stencil_vals,
+ front_facing);
/* apply stencil-fail operator */
{
- LLVMValueRef s_fail_mask = lp_build_andnot(&bld, orig_mask, s_pass_mask);
- stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP,
+ LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
stencil_refs, stencil_vals,
- s_fail_mask, face);
+ s_fail_mask, front_facing);
}
}
if (depth->enabled) {
+ /*
+ * Convert fragment Z to the desired type, aligning the LSB to the right.
+ */
+
+ assert(z_type.width == z_src_type.width);
+ assert(z_type.length == z_src_type.length);
+ assert(lp_check_value(z_src_type, z_src));
+ if (z_src_type.floating) {
+ /*
+ * Convert from floating point values
+ */
+
+ if (!z_type.floating) {
+ z_src = lp_build_clamped_float_to_unsigned_norm(builder,
+ z_src_type,
+ z_width,
+ z_src);
+ }
+ } else {
+ /*
+ * Convert from unsigned normalized values.
+ */
+
+ assert(!z_src_type.sign);
+ assert(!z_src_type.fixed);
+ assert(z_src_type.norm);
+ assert(!z_type.floating);
+ if (z_src_type.width > z_width) {
+ LLVMValueRef shift = lp_build_const_int_vec(z_src_type,
+ z_src_type.width - z_width);
+ z_src = LLVMBuildLShr(builder, z_src, shift, "");
+ }
+ }
+ assert(lp_check_value(z_type, z_src));
+
+ lp_build_name(z_src, "z_src");
+
/* compare src Z to dst Z, returning 'pass' mask */
- z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst);
+ z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
if (!stencil[0].enabled) {
/* We can potentially skip all remaining operations here, but only
@@ -644,28 +675,28 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
* buffer values. Don't need to update Z buffer values.
*/
lp_build_mask_update(mask, z_pass);
+
+ if (do_branch) {
+ lp_build_mask_check(mask);
+ do_branch = FALSE;
+ }
}
if (depth->writemask) {
- LLVMValueRef zselectmask = mask->value;
+ LLVMValueRef zselectmask;
/* mask off bits that failed Z test */
- zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, "");
+ zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
/* mask off bits that failed stencil test */
if (s_pass_mask) {
zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
}
- /* if combined Z/stencil format, mask off the stencil bits */
- if (z_bitmask) {
- zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, "");
- }
-
/* Mix the old and new Z buffer values.
- * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i])
+ * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
*/
- z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst);
+ z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
}
if (stencil[0].enabled) {
@@ -673,33 +704,35 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
LLVMValueRef z_fail_mask, z_pass_mask;
/* apply Z-fail operator */
- z_fail_mask = lp_build_andnot(&bld, orig_mask, z_pass);
- stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP,
+ z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
stencil_refs, stencil_vals,
- z_fail_mask, face);
+ z_fail_mask, front_facing);
/* apply Z-pass operator */
- z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, "");
- stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
+ z_pass_mask = LLVMBuildAnd(z_bld.builder, orig_mask, z_pass, "");
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
stencil_refs, stencil_vals,
- z_pass_mask, face);
+ z_pass_mask, front_facing);
}
}
else {
/* No depth test: apply Z-pass operator to stencil buffer values which
* passed the stencil test.
*/
- s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, "");
- stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
+ s_pass_mask = LLVMBuildAnd(s_bld.builder, orig_mask, s_pass_mask, "");
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
stencil_refs, stencil_vals,
- s_pass_mask, face);
+ s_pass_mask, front_facing);
}
- /* The Z bits are already in the right place but we may need to shift the
- * stencil bits before ORing Z with Stencil to make the final pixel value.
- */
+ /* Put Z and ztencil bits in the right place */
+ if (z_dst && z_shift) {
+ LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
+ z_dst = LLVMBuildShl(builder, z_dst, shift, "");
+ }
if (stencil_vals && stencil_shift)
- stencil_vals = LLVMBuildShl(bld.builder, stencil_vals,
+ stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals,
stencil_shift, "");
/* Finally, merge/store the z/stencil values */
@@ -707,13 +740,13 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
(stencil[0].enabled && stencil[0].writemask)) {
if (z_dst && stencil_vals)
- zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, "");
+ zs_dst = LLVMBuildOr(z_bld.builder, z_dst, stencil_vals, "");
else if (z_dst)
zs_dst = z_dst;
else
zs_dst = stencil_vals;
- LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
+ *zs_value = zs_dst;
}
if (s_pass_mask)
@@ -722,6 +755,47 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
if (depth->enabled && stencil[0].enabled)
lp_build_mask_update(mask, z_pass);
- if (counter)
- lp_build_occlusion_count(builder, type, mask->value, counter);
+ if (do_branch)
+ lp_build_mask_check(mask);
+
+}
+
+
+void
+lp_build_depth_write(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ LLVMValueRef zs_dst_ptr,
+ LLVMValueRef zs_value)
+{
+ zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
+ LLVMPointerType(LLVMTypeOf(zs_value), 0), "");
+
+ LLVMBuildStore(builder, zs_value, zs_dst_ptr);
+}
+
+
+void
+lp_build_deferred_depth_write(LLVMBuilderRef builder,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef zs_dst_ptr,
+ LLVMValueRef zs_value)
+{
+ struct lp_type z_type;
+ struct lp_build_context z_bld;
+ LLVMValueRef z_dst;
+
+ /* XXX: pointlessly redo type logic:
+ */
+ z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
+ lp_build_context_init(&z_bld, builder, z_type);
+
+ zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
+ LLVMPointerType(z_bld.vec_type, 0), "");
+
+ z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
+ z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst);
+
+ LLVMBuildStore(builder, z_dst, zs_dst_ptr);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index e257a5bd7d..a54ef3a711 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -61,7 +61,27 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
LLVMValueRef zs_src,
LLVMValueRef zs_dst_ptr,
LLVMValueRef facing,
- LLVMValueRef counter);
+ LLVMValueRef *zs_value,
+ boolean do_branch);
+void
+lp_build_depth_write(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ LLVMValueRef zs_dst_ptr,
+ LLVMValueRef zs_value);
+
+void
+lp_build_deferred_depth_write(LLVMBuilderRef builder,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef zs_dst_ptr,
+ LLVMValueRef zs_value);
+
+void
+lp_build_occlusion_count(LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef maskvalue,
+ LLVMValueRef counter);
#endif /* !LP_BLD_DEPTH_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 2a374f8c39..c9da8900d0 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -206,7 +206,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
/*
- * a = a0 + x * dadx + y * dady
+ * a = a0 + (x * dadx + y * dady)
*/
if (attrib == 0 && chan == 0) {
@@ -219,11 +219,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
a = a0;
if (interp != LP_INTERP_CONSTANT &&
interp != LP_INTERP_FACING) {
- LLVMValueRef tmp;
- tmp = LLVMBuildFMul(builder, bld->x, dadx, "");
- a = LLVMBuildFAdd(builder, a, tmp, "");
- tmp = LLVMBuildFMul(builder, bld->y, dady, "");
- a = LLVMBuildFAdd(builder, a, tmp, "");
+ LLVMValueRef ax, ay, axy;
+ ax = LLVMBuildFMul(builder, bld->x, dadx, "");
+ ay = LLVMBuildFMul(builder, bld->y, dady, "");
+ axy = LLVMBuildFAdd(builder, ax, ay, "");
+ a = LLVMBuildFAdd(builder, a, axy, "");
}
}
@@ -272,7 +272,10 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
* This is called when we move from one quad to the next.
*/
static void
-attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
+attribs_update(struct lp_build_interp_soa_context *bld,
+ int quad_index,
+ int start,
+ int end)
{
struct lp_build_context *coeff_bld = &bld->coeff_bld;
LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
@@ -282,7 +285,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
assert(quad_index < 4);
- for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+ for(attrib = start; attrib < end; ++attrib) {
const unsigned mask = bld->mask[attrib];
const unsigned interp = bld->interp[attrib];
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
@@ -350,6 +353,14 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
}
#endif
+ if (attrib == 0 && chan == 2) {
+ /* FIXME: Depth values can exceed 1.0, due to the fact that
+ * setup interpolation coefficients refer to (0,0) which causes
+ * precision loss. So we must clamp to 1.0 here to avoid artifacts
+ */
+ a = lp_build_min(coeff_bld, a, coeff_bld->one);
+ }
+
attrib_name(a, attrib, chan, "");
}
bld->attribs[attrib][chan] = a;
@@ -434,8 +445,6 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
pos_init(bld, x0, y0);
coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
-
- attribs_update(bld, 0);
}
@@ -443,10 +452,20 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
* Advance the position and inputs to the given quad within the block.
*/
void
-lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
- int quad_index)
+lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
+ int quad_index)
+{
+ assert(quad_index < 4);
+
+ attribs_update(bld, quad_index, 1, bld->num_attribs);
+}
+
+void
+lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
+ int quad_index)
{
assert(quad_index < 4);
- attribs_update(bld, quad_index);
+ attribs_update(bld, quad_index, 0, 1);
}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 3054030f73..a7ebdd1bfa 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -46,7 +46,31 @@
#include "tgsi/tgsi_exec.h"
-#include "lp_setup.h"
+/**
+ * Describes how to compute the interpolation coefficients (a0, dadx, dady)
+ * from the vertices passed into our triangle/line/point functions by the
+ * draw module.
+ *
+ * Vertices are treated as an array of float[4] values, indexed by
+ * src_index.
+ *
+ * LP_INTERP_COLOR is translated to either LP_INTERP_CONSTANT or
+ * LINEAR depending on flatshade state.
+ */
+enum lp_interp {
+ LP_INTERP_CONSTANT,
+ LP_INTERP_COLOR,
+ LP_INTERP_LINEAR,
+ LP_INTERP_PERSPECTIVE,
+ LP_INTERP_POSITION,
+ LP_INTERP_FACING
+};
+
+struct lp_shader_input {
+ ushort interp:4; /* enum lp_interp */
+ ushort usage_mask:4; /* bitmask of TGSI_WRITEMASK_x flags */
+ ushort src_index:8; /* where to find values in incoming vertices */
+};
struct lp_build_interp_soa_context
@@ -89,7 +113,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
LLVMValueRef y);
void
-lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
+lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
+ int quad_index);
+
+void
+lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
int quad_index);
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 39f2c6085e..763432ed71 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -82,6 +82,8 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
}
}
+ lp_delete_setup_variants(llvmpipe);
+
align_free( llvmpipe );
}
@@ -108,6 +110,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
memset(llvmpipe, 0, sizeof *llvmpipe);
make_empty_list(&llvmpipe->fs_variants_list);
+ make_empty_list(&llvmpipe->setup_variants_list);
llvmpipe->pipe.winsys = screen->winsys;
llvmpipe->pipe.screen = screen;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 34fa20e204..db09c95b27 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -39,6 +39,7 @@
#include "lp_jit.h"
#include "lp_setup.h"
#include "lp_state_fs.h"
+#include "lp_state_setup.h"
struct llvmpipe_vbuf_render;
@@ -48,6 +49,7 @@ struct lp_fragment_shader;
struct lp_vertex_shader;
struct lp_blend_state;
struct lp_setup_context;
+struct lp_setup_variant;
struct lp_velems_state;
struct llvmpipe_context {
@@ -105,12 +107,9 @@ struct llvmpipe_context {
/** Which vertex shader output slot contains point size */
int psize_slot;
- /** Fragment shader input interpolation info */
- unsigned num_inputs;
- struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
-
/** The tiling engine */
struct lp_setup_context *setup;
+ struct lp_setup_variant setup_variant;
/** The primitive drawing context */
struct draw_context *draw;
@@ -120,6 +119,9 @@ struct llvmpipe_context {
struct lp_fs_variant_list_item fs_variants_list;
unsigned nr_fs_variants;
+
+ struct lp_setup_variant_list_item setup_variants_list;
+ unsigned nr_setup_variants;
};
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h
index bb538b2bd8..3626ce4a86 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.h
+++ b/src/gallium/drivers/llvmpipe/lp_flush.h
@@ -32,6 +32,7 @@
struct pipe_context;
struct pipe_fence_handle;
+struct pipe_resource;
void
llvmpipe_flush(struct pipe_context *pipe,
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 04b12dedcc..e09ec504ab 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -162,9 +162,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
void
lp_jit_screen_cleanup(struct llvmpipe_screen *screen)
{
- if(screen->engine)
- LLVMDisposeExecutionEngine(screen->engine);
-
if(screen->pass)
LLVMDisposePassManager(screen->pass);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 16e04fce0c..114f21f2d1 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -144,7 +144,7 @@ typedef void
(*lp_jit_frag_func)(const struct lp_jit_context *context,
uint32_t x,
uint32_t y,
- float facing,
+ uint32_t facing,
const void *a0,
const void *dadx,
const void *dady,
diff --git a/src/gallium/drivers/llvmpipe/lp_limits.h b/src/gallium/drivers/llvmpipe/lp_limits.h
index d1c431475d..2538164ffa 100644
--- a/src/gallium/drivers/llvmpipe/lp_limits.h
+++ b/src/gallium/drivers/llvmpipe/lp_limits.h
@@ -72,4 +72,14 @@
*/
#define LP_MAX_SHADER_VARIANTS 1024
+/**
+ * Max number of setup variants that will be kept around.
+ *
+ * These are determined by the combination of the fragment shader
+ * input signature and a small amount of rasterization state (eg
+ * flatshading). It is likely that many active fragment shaders will
+ * share the same setup variant.
+ */
+#define LP_MAX_SETUP_VARIANTS 64
+
#endif /* LP_LIMITS_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index d7e6415e13..d358a98394 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -211,8 +211,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
const struct lp_scene *scene = task->scene;
- unsigned clear_value = arg.clear_zstencil.value;
- unsigned clear_mask = arg.clear_zstencil.mask;
+ uint32_t clear_value = arg.clear_zstencil.value;
+ uint32_t clear_mask = arg.clear_zstencil.mask;
const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT;
const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT;
const unsigned block_size = scene->zsbuf.blocksize;
@@ -220,7 +220,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
uint8_t *dst;
unsigned i, j;
- LP_DBG(DEBUG_RAST, "%s 0x%x%x\n", __FUNCTION__, clear_value, clear_mask);
+ LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n",
+ __FUNCTION__, clear_value, clear_mask);
/*
* Clear the aera of the swizzled depth/depth buffer matching this tile, in
@@ -232,16 +233,31 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
dst = task->depth_tile;
+ clear_value &= clear_mask;
+
switch (block_size) {
case 1:
+ assert(clear_mask == 0xff);
memset(dst, (uint8_t) clear_value, height * width);
break;
case 2:
- for (i = 0; i < height; i++) {
- uint16_t *row = (uint16_t *)dst;
- for (j = 0; j < width; j++)
- *row++ = (uint16_t) clear_value;
- dst += dst_stride;
+ if (clear_mask == 0xffff) {
+ for (i = 0; i < height; i++) {
+ uint16_t *row = (uint16_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = (uint16_t) clear_value;
+ dst += dst_stride;
+ }
+ }
+ else {
+ for (i = 0; i < height; i++) {
+ uint16_t *row = (uint16_t *)dst;
+ for (j = 0; j < width; j++) {
+ uint16_t tmp = ~clear_mask & *row;
+ *row++ = clear_value | tmp;
+ }
+ dst += dst_stride;
+ }
}
break;
case 4:
@@ -258,7 +274,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
uint32_t *row = (uint32_t *)dst;
for (j = 0; j < width; j++) {
uint32_t tmp = ~clear_mask & *row;
- *row++ = (clear_value & clear_mask) | tmp;
+ *row++ = clear_value | tmp;
}
dst += dst_stride;
}
@@ -318,7 +334,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
{
const struct lp_scene *scene = task->scene;
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
- const struct lp_rast_state *state = inputs->state;
+ const struct lp_rast_state *state = task->state;
struct lp_fragment_shader_variant *variant = state->variant;
const unsigned tile_x = task->x, tile_y = task->y;
unsigned x, y;
@@ -349,10 +365,10 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
BEGIN_JIT_CALL(state);
variant->jit_function[RAST_WHOLE]( &state->jit_context,
tile_x + x, tile_y + y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
+ inputs->frontfacing,
+ GET_A0(inputs),
+ GET_DADX(inputs),
+ GET_DADY(inputs),
color,
depth,
0xffff,
@@ -398,7 +414,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
unsigned x, unsigned y,
unsigned mask)
{
- const struct lp_rast_state *state = inputs->state;
+ const struct lp_rast_state *state = task->state;
struct lp_fragment_shader_variant *variant = state->variant;
const struct lp_scene *scene = task->scene;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
@@ -430,10 +446,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
BEGIN_JIT_CALL(state);
variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
x, y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
+ inputs->frontfacing,
+ GET_A0(inputs),
+ GET_DADX(inputs),
+ GET_DADY(inputs),
color,
depth,
mask,
@@ -474,6 +490,14 @@ lp_rast_end_query(struct lp_rasterizer_task *task,
}
+void
+lp_rast_set_state(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ task->state = arg.state;
+}
+
+
/**
* Set top row and left column of the tile's pixels to white. For debugging.
@@ -581,10 +605,12 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
lp_rast_triangle_8,
lp_rast_triangle_3_4,
lp_rast_triangle_3_16,
+ lp_rast_triangle_4_16,
lp_rast_shade_tile,
lp_rast_shade_tile_opaque,
lp_rast_begin_query,
lp_rast_end_query,
+ lp_rast_set_state,
};
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index c55b97a9d1..a64c152cf8 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -78,30 +78,28 @@ struct lp_rast_state {
* These pointers point into the bin data buffer.
*/
struct lp_rast_shader_inputs {
- float facing; /** Positive for front-facing, negative for back-facing */
- unsigned disable:1; /** Partially binned, disable this command */
- unsigned opaque:1; /** Is opaque */
-
- float (*a0)[4];
- float (*dadx)[4];
- float (*dady)[4];
-
- const struct lp_rast_state *state;
+ unsigned frontfacing:1; /** True for front-facing */
+ unsigned disable:1; /** Partially binned, disable this command */
+ unsigned opaque:1; /** Is opaque */
+ unsigned pad0:29; /* wasted space */
+ unsigned stride; /* how much to advance data between a0, dadx, dady */
+ unsigned pad2; /* wasted space */
+ unsigned pad3; /* wasted space */
+ /* followed by a0, dadx, dady and planes[] */
};
-
+/* Note: the order of these values is important as they are loaded by
+ * sse code in rasterization:
+ */
struct lp_rast_plane {
- /* one-pixel sized trivial accept offsets for each plane */
- int ei;
-
- /* one-pixel sized trivial reject offsets for each plane */
- int eo;
-
/* edge function values at minx,miny ?? */
int c;
int dcdx;
int dcdy;
+
+ /* one-pixel sized trivial reject offsets for each plane */
+ int eo;
};
/**
@@ -111,17 +109,24 @@ struct lp_rast_plane {
* Objects of this type are put into the lp_setup_context::data buffer.
*/
struct lp_rast_triangle {
- /* inputs for the shader */
- struct lp_rast_shader_inputs inputs;
-
#ifdef DEBUG
float v[3][2];
+ float pad0;
+ float pad1;
#endif
- struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */
+ /* inputs for the shader */
+ struct lp_rast_shader_inputs inputs;
+ /* planes are also allocated here */
};
+#define GET_A0(inputs) ((float (*)[4])((inputs)+1))
+#define GET_DADX(inputs) ((float (*)[4])((char *)((inputs) + 1) + (inputs)->stride))
+#define GET_DADY(inputs) ((float (*)[4])((char *)((inputs) + 1) + 2 * (inputs)->stride))
+#define GET_PLANES(tri) ((struct lp_rast_plane *)((char *)(&(tri)->inputs + 1) + 3 * (tri)->inputs.stride))
+
+
struct lp_rasterizer *
lp_rast_create( unsigned num_threads );
@@ -149,9 +154,10 @@ union lp_rast_cmd_arg {
const struct lp_rast_state *set_state;
uint8_t clear_color[4];
struct {
- unsigned value;
- unsigned mask;
+ uint32_t value;
+ uint32_t mask;
} clear_zstencil;
+ const struct lp_rast_state *state;
struct lp_fence *fence;
struct llvmpipe_query *query_obj;
};
@@ -238,12 +244,14 @@ lp_rast_arg_null( void )
#define LP_RAST_OP_TRIANGLE_8 0x9
#define LP_RAST_OP_TRIANGLE_3_4 0xa
#define LP_RAST_OP_TRIANGLE_3_16 0xb
-#define LP_RAST_OP_SHADE_TILE 0xc
-#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xd
-#define LP_RAST_OP_BEGIN_QUERY 0xe
-#define LP_RAST_OP_END_QUERY 0xf
-
-#define LP_RAST_OP_MAX 0x10
+#define LP_RAST_OP_TRIANGLE_4_16 0xc
+#define LP_RAST_OP_SHADE_TILE 0xd
+#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xe
+#define LP_RAST_OP_BEGIN_QUERY 0xf
+#define LP_RAST_OP_END_QUERY 0x10
+#define LP_RAST_OP_SET_STATE 0x11
+
+#define LP_RAST_OP_MAX 0x12
#define LP_RAST_OP_MASK 0xff
void
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
index 9fc78645a3..64ac616f62 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
@@ -12,6 +12,7 @@ static INLINE int u_bit_scan(unsigned *mask)
struct tile {
int coverage;
int overdraw;
+ const struct lp_rast_state *state;
char data[TILE_SIZE][TILE_SIZE];
};
@@ -42,10 +43,12 @@ static const char *cmd_names[LP_RAST_OP_MAX] =
"triangle_8",
"triangle_3_4",
"triangle_3_16",
+ "triangle_4_16",
"shade_tile",
"shade_tile_opaque",
"begin_query",
"end_query",
+ "set_state",
};
static const char *cmd_name(unsigned cmd)
@@ -55,31 +58,31 @@ static const char *cmd_name(unsigned cmd)
}
static const struct lp_fragment_shader_variant *
-get_variant( const struct cmd_block *block,
- int k )
+get_variant( const struct lp_rast_state *state,
+ const struct cmd_block *block,
+ int k )
{
if (block->cmd[k] == LP_RAST_OP_SHADE_TILE ||
- block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE)
- return block->arg[k].shade_tile->state->variant;
-
- if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 ||
+ block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_1 ||
block->cmd[k] == LP_RAST_OP_TRIANGLE_2 ||
block->cmd[k] == LP_RAST_OP_TRIANGLE_3 ||
block->cmd[k] == LP_RAST_OP_TRIANGLE_4 ||
block->cmd[k] == LP_RAST_OP_TRIANGLE_5 ||
block->cmd[k] == LP_RAST_OP_TRIANGLE_6 ||
block->cmd[k] == LP_RAST_OP_TRIANGLE_7)
- return block->arg[k].triangle.tri->inputs.state->variant;
+ return state->variant;
return NULL;
}
static boolean
-is_blend( const struct cmd_block *block,
+is_blend( const struct lp_rast_state *state,
+ const struct cmd_block *block,
int k )
{
- const struct lp_fragment_shader_variant *variant = get_variant(block, k);
+ const struct lp_fragment_shader_variant *variant = get_variant(state, block, k);
if (variant)
return variant->key.blend.rt[0].blend_enable;
@@ -92,6 +95,7 @@ is_blend( const struct cmd_block *block,
static void
debug_bin( const struct cmd_bin *bin )
{
+ const struct lp_rast_state *state = NULL;
const struct cmd_block *head = bin->head;
int i, j = 0;
@@ -99,9 +103,12 @@ debug_bin( const struct cmd_bin *bin )
while (head) {
for (i = 0; i < head->count; i++, j++) {
+ if (head->cmd[i] == LP_RAST_OP_SET_STATE)
+ state = head->arg[i].state;
+
debug_printf("%d: %s %s\n", j,
cmd_name(head->cmd[i]),
- is_blend(head, i) ? "blended" : "");
+ is_blend(state, head, i) ? "blended" : "");
}
head = head->next;
}
@@ -133,7 +140,7 @@ debug_shade_tile(int x, int y,
char val)
{
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
- boolean blend = inputs->state->variant->key.blend.rt[0].blend_enable;
+ boolean blend = tile->state->variant->key.blend.rt[0].blend_enable;
unsigned i,j;
if (inputs->disable)
@@ -171,11 +178,12 @@ debug_triangle(int tilex, int tiley,
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
unsigned plane_mask = arg.triangle.plane_mask;
+ const struct lp_rast_plane *tri_plane = GET_PLANES(tri);
struct lp_rast_plane plane[8];
int x, y;
int count = 0;
unsigned i, nr_planes = 0;
- boolean blend = tri->inputs.state->variant->key.blend.rt[0].blend_enable;
+ boolean blend = tile->state->variant->key.blend.rt[0].blend_enable;
if (tri->inputs.disable) {
/* This triangle was partially binned and has been disabled */
@@ -183,7 +191,7 @@ debug_triangle(int tilex, int tiley,
}
while (plane_mask) {
- plane[nr_planes] = tri->plane[u_bit_scan(&plane_mask)];
+ plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)];
plane[nr_planes].c = (plane[nr_planes].c +
plane[nr_planes].dcdy * tiley -
plane[nr_planes].dcdx * tilex);
@@ -232,15 +240,19 @@ do_debug_bin( struct tile *tile,
memset(tile->data, ' ', sizeof tile->data);
tile->coverage = 0;
tile->overdraw = 0;
+ tile->state = NULL;
for (block = bin->head; block; block = block->next) {
for (k = 0; k < block->count; k++, j++) {
- boolean blend = is_blend(block, k);
+ boolean blend = is_blend(tile->state, block, k);
char val = get_label(j);
int count = 0;
if (print_cmds)
debug_printf("%c: %15s", val, cmd_name(block->cmd[k]));
+
+ if (block->cmd[k] == LP_RAST_OP_SET_STATE)
+ tile->state = block->arg[k].state;
if (block->cmd[k] == LP_RAST_OP_CLEAR_COLOR ||
block->cmd[k] == LP_RAST_OP_CLEAR_ZSTENCIL)
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 7370119e96..b30408f097 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -77,6 +77,7 @@ struct cmd_bin;
struct lp_rasterizer_task
{
const struct cmd_bin *bin;
+ const struct lp_rast_state *state;
struct lp_scene *scene;
unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */
@@ -244,7 +245,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
unsigned x, unsigned y )
{
const struct lp_scene *scene = task->scene;
- const struct lp_rast_state *state = inputs->state;
+ const struct lp_rast_state *state = task->state;
struct lp_fragment_shader_variant *variant = state->variant;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
void *depth;
@@ -260,10 +261,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
BEGIN_JIT_CALL(state);
variant->jit_function[RAST_WHOLE]( &state->jit_context,
x, y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
+ inputs->frontfacing,
+ GET_A0(inputs),
+ GET_DADX(inputs),
+ GET_DADY(inputs),
color,
depth,
0xffff,
@@ -293,6 +294,14 @@ void lp_rast_triangle_3_4(struct lp_rasterizer_task *,
void lp_rast_triangle_3_16( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_4_16( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void
+lp_rast_set_state(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg);
+
void
lp_debug_bin( const struct cmd_bin *bin );
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index a1f309d4b0..042c315635 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -123,6 +123,16 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
}
void
+lp_rast_triangle_4_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ union lp_rast_cmd_arg arg2;
+ arg2.triangle.tri = arg.triangle.tri;
+ arg2.triangle.plane_mask = (1<<4)-1;
+ lp_rast_triangle_3(task, arg2);
+}
+
+void
lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
@@ -230,144 +240,207 @@ sign_bits4(const __m128i *cstep, int cdiff)
}
-/* Special case for 3 plane triangle which is contained entirely
- * within a 16x16 block.
- */
+#define NR_PLANES 3
+
+
+
+
+
+
+
void
lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
- const struct lp_rast_plane *plane = tri->plane;
- unsigned mask = arg.triangle.plane_mask;
- const int x = task->x + (mask & 0xff);
- const int y = task->y + (mask >> 8);
- unsigned outmask, inmask, partmask, partial_mask;
- unsigned j;
- __m128i cstep4[3][4];
-
- outmask = 0; /* outside one or more trivial reject planes */
- partmask = 0; /* outside one or more trivial accept planes */
-
- for (j = 0; j < 3; j++) {
- const int dcdx = -plane[j].dcdx * 4;
- const int dcdy = plane[j].dcdy * 4;
- __m128i xdcdy = _mm_set1_epi32(dcdy);
-
- cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3);
- cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy);
- cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy);
- cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy);
-
- {
- const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
- const int cox = plane[j].eo * 4;
- const int cio = plane[j].ei * 4 - 1;
-
- outmask |= sign_bits4(cstep4[j], c + cox);
- partmask |= sign_bits4(cstep4[j], c + cio);
- }
- }
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ int x = (arg.triangle.plane_mask & 0xff) + task->x;
+ int y = (arg.triangle.plane_mask >> 8) + task->y;
+ unsigned i, j;
+
+ struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
+ unsigned nr = 0;
+
+ __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */
+ __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */
+ __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */
+ __m128i zero = _mm_setzero_si128();
+
+ __m128i c;
+ __m128i dcdx;
+ __m128i dcdy;
+ __m128i rej4;
+
+ __m128i dcdx2;
+ __m128i dcdx3;
+
+ __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
+ __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
+ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
+ __m128i unused;
+
+ transpose4_epi32(&p0, &p1, &p2, &zero,
+ &c, &dcdx, &dcdy, &rej4);
+
+ /* Adjust dcdx;
+ */
+ dcdx = _mm_sub_epi32(zero, dcdx);
- if (outmask == 0xffff)
- return;
+ c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x)));
+ c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y)));
+ rej4 = _mm_slli_epi32(rej4, 2);
- /* Mask of sub-blocks which are inside all trivial accept planes:
- */
- inmask = ~partmask & 0xffff;
+ dcdx2 = _mm_add_epi32(dcdx, dcdx);
+ dcdx3 = _mm_add_epi32(dcdx2, dcdx);
- /* Mask of sub-blocks which are inside all trivial reject planes,
- * but outside at least one trivial accept plane:
- */
- partial_mask = partmask & ~outmask;
+ transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
+ &span_0, &span_1, &span_2, &unused);
- assert((partial_mask & inmask) == 0);
+ for (i = 0; i < 4; i++) {
+ __m128i cx = c;
- /* Iterate over partials:
- */
- while (partial_mask) {
- int i = ffs(partial_mask) - 1;
- int ix = (i & 3) * 4;
- int iy = (i >> 2) * 4;
- int px = x + ix;
- int py = y + iy;
- unsigned mask = 0xffff;
-
- partial_mask &= ~(1 << i);
-
- for (j = 0; j < 3; j++) {
- const int cx = (plane[j].c
- - plane[j].dcdx * px
- + plane[j].dcdy * py) * 4;
-
- mask &= ~sign_bits4(cstep4[j], cx);
- }
+ for (j = 0; j < 4; j++) {
+ __m128i c4rej = _mm_add_epi32(cx, rej4);
+ __m128i rej_masks = _mm_srai_epi32(c4rej, 31);
- if (mask)
- lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask);
- }
+ /* if (is_zero(rej_masks)) */
+ if (_mm_movemask_epi8(rej_masks) == 0) {
+ __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(cx, 0), span_0);
+ __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(cx, 1), span_1);
+ __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(cx, 2), span_2);
- /* Iterate over fulls:
- */
- while (inmask) {
- int i = ffs(inmask) - 1;
- int ix = (i & 3) * 4;
- int iy = (i >> 2) * 4;
- int px = x + ix;
- int py = y + iy;
+ __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0);
+
+ __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2));
+
+ __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1);
+ __m128i c_01 = _mm_packs_epi32(c_0, c_1);
+
+ __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2));
+
+ __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2);
- inmask &= ~(1 << i);
+ __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2));
- block_full_4(task, tri, px, py);
+ __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3);
+ __m128i c_23 = _mm_packs_epi32(c_2, c_3);
+ __m128i c_0123 = _mm_packs_epi16(c_01, c_23);
+
+ unsigned mask = _mm_movemask_epi8(c_0123);
+
+ out[nr].i = i;
+ out[nr].j = j;
+ out[nr].mask = mask;
+ if (mask != 0xffff)
+ nr++;
+ }
+ cx = _mm_add_epi32(cx, _mm_slli_epi32(dcdx, 2));
+ }
+
+ c = _mm_add_epi32(c, _mm_slli_epi32(dcdy, 2));
}
+
+ for (i = 0; i < nr; i++)
+ lp_rast_shade_quads_mask(task,
+ &tri->inputs,
+ x + 4 * out[i].j,
+ y + 4 * out[i].i,
+ 0xffff & ~out[i].mask);
}
+
+
+
void
lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+ const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
- const struct lp_rast_plane *plane = tri->plane;
- unsigned mask = arg.triangle.plane_mask;
- const int x = task->x + (mask & 0xff);
- const int y = task->y + (mask >> 8);
- unsigned j;
-
- /* Iterate over partials:
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ int x = (arg.triangle.plane_mask & 0xff) + task->x;
+ int y = (arg.triangle.plane_mask >> 8) + task->y;
+
+ __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */
+ __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */
+ __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */
+ __m128i zero = _mm_setzero_si128();
+
+ __m128i c;
+ __m128i dcdx;
+ __m128i dcdy;
+
+ __m128i dcdx2;
+ __m128i dcdx3;
+
+ __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
+ __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
+ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
+ __m128i unused;
+
+ transpose4_epi32(&p0, &p1, &p2, &zero,
+ &c, &dcdx, &dcdy, &unused);
+
+ /* Adjust dcdx;
*/
+ dcdx = _mm_sub_epi32(zero, dcdx);
+
+ c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x)));
+ c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y)));
+
+ dcdx2 = _mm_add_epi32(dcdx, dcdx);
+ dcdx3 = _mm_add_epi32(dcdx2, dcdx);
+
+ transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
+ &span_0, &span_1, &span_2, &unused);
+
+
{
- unsigned mask = 0xffff;
+ __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(c, 0), span_0);
+ __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(c, 1), span_1);
+ __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(c, 2), span_2);
+
+ __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0);
- for (j = 0; j < 3; j++) {
- const int cx = (plane[j].c
- - plane[j].dcdx * x
- + plane[j].dcdy * y);
+ __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2));
- const int dcdx = -plane[j].dcdx;
- const int dcdy = plane[j].dcdy;
- __m128i xdcdy = _mm_set1_epi32(dcdy);
+ __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1);
+ __m128i c_01 = _mm_packs_epi32(c_0, c_1);
- __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3);
- __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
- __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
- __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+ __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2));
- __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
- __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
- __m128i result = _mm_packs_epi16(cstep01, cstep23);
+ __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2);
- /* Extract the sign bits
- */
- mask &= ~_mm_movemask_epi8(result);
- }
+ __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2));
- if (mask)
- lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
+ __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3);
+ __m128i c_23 = _mm_packs_epi32(c_2, c_3);
+ __m128i c_0123 = _mm_packs_epi16(c_01, c_23);
+
+ unsigned mask = _mm_movemask_epi8(c_0123);
+
+ if (mask != 0xffff)
+ lp_rast_shade_quads_mask(task,
+ &tri->inputs,
+ x,
+ y,
+ 0xffff & ~mask);
}
}
-
+#undef NR_PLANES
#endif
@@ -383,10 +456,13 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
#define TAG(x) x##_3
#define NR_PLANES 3
+/*#define TRI_4 lp_rast_triangle_3_4*/
+/*#define TRI_16 lp_rast_triangle_3_16*/
#include "lp_rast_tri_tmp.h"
#define TAG(x) x##_4
#define NR_PLANES 4
+#define TRI_16 lp_rast_triangle_4_16
#include "lp_rast_tri_tmp.h"
#define TAG(x) x##_5
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
index 9830a43ba5..4825d651c0 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -82,7 +82,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
const int dcdx = -plane[j].dcdx * 4;
const int dcdy = plane[j].dcdy * 4;
const int cox = plane[j].eo * 4;
- const int cio = plane[j].ei * 4 - 1;
+ const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
+ const int cio = ei * 4 - 1;
build_masks(c[j] + cox,
cio - cox,
@@ -156,6 +157,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
unsigned plane_mask = arg.triangle.plane_mask;
+ const struct lp_rast_plane *tri_plane = GET_PLANES(tri);
const int x = task->x, y = task->y;
struct lp_rast_plane plane[NR_PLANES];
int c[NR_PLANES];
@@ -172,7 +174,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
while (plane_mask) {
int i = ffs(plane_mask) - 1;
- plane[j] = tri->plane[i];
+ plane[j] = tri_plane[i];
plane_mask &= ~(1 << i);
c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
@@ -180,7 +182,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
const int dcdx = -plane[j].dcdx * 16;
const int dcdy = plane[j].dcdy * 16;
const int cox = plane[j].eo * 16;
- const int cio = plane[j].ei * 16 - 1;
+ const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
+ const int cio = ei * 16 - 1;
build_masks(c[j] + cox,
cio - cox,
@@ -245,6 +248,133 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
}
}
+#if defined(PIPE_ARCH_SSE) && defined(TRI_16)
+/* XXX: special case this when intersection is not required.
+ * - tile completely within bbox,
+ * - bbox completely within tile.
+ */
+void
+TRI_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ unsigned mask = arg.triangle.plane_mask;
+ unsigned outmask, partial_mask;
+ unsigned j;
+ __m128i cstep4[NR_PLANES][4];
+
+ int x = (mask & 0xff);
+ int y = (mask >> 8);
+
+ outmask = 0; /* outside one or more trivial reject planes */
+
+ x += task->x;
+ y += task->y;
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int dcdx = -plane[j].dcdx * 4;
+ const int dcdy = plane[j].dcdy * 4;
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3);
+ cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy);
+ cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy);
+ cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy);
+
+ {
+ const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+ const int cox = plane[j].eo * 4;
+
+ outmask |= sign_bits4(cstep4[j], c + cox);
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = 0xffff & ~outmask;
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+ unsigned mask = 0xffff;
+
+ partial_mask &= ~(1 << i);
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int cx = (plane[j].c - 1
+ - plane[j].dcdx * px
+ + plane[j].dcdy * py) * 4;
+
+ mask &= ~sign_bits4(cstep4[j], cx);
+ }
+
+ if (mask)
+ lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask);
+ }
+}
+#endif
+
+#if defined(PIPE_ARCH_SSE) && defined(TRI_4)
+void
+TRI_4(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ unsigned mask = arg.triangle.plane_mask;
+ const int x = task->x + (mask & 0xff);
+ const int y = task->y + (mask >> 8);
+ unsigned j;
+
+ /* Iterate over partials:
+ */
+ {
+ unsigned mask = 0xffff;
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int cx = (plane[j].c
+ - plane[j].dcdx * x
+ + plane[j].dcdy * y);
+
+ const int dcdx = -plane[j].dcdx;
+ const int dcdy = plane[j].dcdy;
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3);
+ __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+ __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+ __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+ __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
+ __m128i result = _mm_packs_epi16(cstep01, cstep23);
+
+ /* Extract the sign bits
+ */
+ mask &= ~_mm_movemask_epi8(result);
+ }
+
+ if (mask)
+ lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
+ }
+}
+#endif
+
+
+
#undef TAG
+#undef TRI_4
+#undef TRI_16
#undef NR_PLANES
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index 8b504f23a3..a4fdf7cff3 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -203,7 +203,9 @@ lp_scene_end_rasterization(struct lp_scene *scene )
for (i = 0; i < scene->tiles_x; i++) {
for (j = 0; j < scene->tiles_y; j++) {
struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
- bin->head = bin->tail = NULL;
+ bin->head = NULL;
+ bin->tail = NULL;
+ bin->last_state = NULL;
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index dbef7692e4..622c522f11 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -41,6 +41,7 @@
#include "lp_debug.h"
struct lp_scene_queue;
+struct lp_rast_state;
/* We're limited to 2K by 2K for 32bit fixed point rasterization.
* Will need a 64-bit version for larger framebuffers.
@@ -94,6 +95,7 @@ struct data_block {
struct cmd_bin {
ushort x;
ushort y;
+ const struct lp_rast_state *last_state; /* most recent state set in bin */
struct cmd_block *head;
struct cmd_block *tail;
};
@@ -297,7 +299,7 @@ lp_scene_bin_command( struct lp_scene *scene,
assert(x < scene->tiles_x);
assert(y < scene->tiles_y);
- assert(cmd <= LP_RAST_OP_END_QUERY);
+ assert(cmd < LP_RAST_OP_MAX);
if (tail == NULL || tail->count == CMD_BLOCK_MAX) {
tail = lp_scene_new_cmd_block( scene, bin );
@@ -318,6 +320,30 @@ lp_scene_bin_command( struct lp_scene *scene,
}
+static INLINE boolean
+lp_scene_bin_cmd_with_state( struct lp_scene *scene,
+ unsigned x, unsigned y,
+ const struct lp_rast_state *state,
+ unsigned cmd,
+ union lp_rast_cmd_arg arg )
+{
+ struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+
+ if (state != bin->last_state) {
+ bin->last_state = state;
+ if (!lp_scene_bin_command(scene, x, y,
+ LP_RAST_OP_SET_STATE,
+ lp_rast_arg_state(state)))
+ return FALSE;
+ }
+
+ if (!lp_scene_bin_command( scene, x, y, cmd, arg ))
+ return FALSE;
+
+ return TRUE;
+}
+
+
/* Add a command to all active bins.
*/
static INLINE boolean
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index eade400087..6118434d3d 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -56,7 +56,7 @@
#include "draw/draw_vbuf.h"
-static void set_scene_state( struct lp_setup_context *, enum setup_state,
+static boolean set_scene_state( struct lp_setup_context *, enum setup_state,
const char *reason);
static boolean try_update_scene_state( struct lp_setup_context *setup );
@@ -167,7 +167,7 @@ lp_setup_rasterize_scene( struct lp_setup_context *setup )
-static void
+static boolean
begin_binning( struct lp_setup_context *setup )
{
struct lp_scene *scene = setup->scene;
@@ -181,6 +181,8 @@ begin_binning( struct lp_setup_context *setup )
/* Always create a fence:
*/
scene->fence = lp_fence_create(MAX2(1, setup->num_threads));
+ if (!scene->fence)
+ return FALSE;
/* Initialize the bin flags and x/y coords:
*/
@@ -192,7 +194,8 @@ begin_binning( struct lp_setup_context *setup )
}
ok = try_update_scene_state(setup);
- assert(ok);
+ if (!ok)
+ return FALSE;
if (setup->fb.zsbuf &&
((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
@@ -208,7 +211,8 @@ begin_binning( struct lp_setup_context *setup )
ok = lp_scene_bin_everywhere( scene,
LP_RAST_OP_CLEAR_COLOR,
setup->clear.color );
- assert(ok);
+ if (!ok)
+ return FALSE;
}
}
@@ -216,12 +220,14 @@ begin_binning( struct lp_setup_context *setup )
if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) {
if (!need_zsload)
scene->has_depthstencil_clear = TRUE;
+
ok = lp_scene_bin_everywhere( scene,
LP_RAST_OP_CLEAR_ZSTENCIL,
lp_rast_arg_clearzs(
setup->clear.zsvalue,
setup->clear.zsmask));
- assert(ok);
+ if (!ok)
+ return FALSE;
}
}
@@ -229,15 +235,16 @@ begin_binning( struct lp_setup_context *setup )
ok = lp_scene_bin_everywhere( scene,
LP_RAST_OP_BEGIN_QUERY,
lp_rast_arg_query(setup->active_query) );
- assert(ok);
+ if (!ok)
+ return FALSE;
}
-
setup->clear.flags = 0;
setup->clear.zsmask = 0;
setup->clear.zsvalue = 0;
LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__);
+ return TRUE;
}
@@ -246,12 +253,12 @@ begin_binning( struct lp_setup_context *setup )
*
* TODO: fast path for fullscreen clears and no triangles.
*/
-static void
+static boolean
execute_clears( struct lp_setup_context *setup )
{
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
- begin_binning( setup );
+ return begin_binning( setup );
}
const char *states[] = {
@@ -262,7 +269,7 @@ const char *states[] = {
};
-static void
+static boolean
set_scene_state( struct lp_setup_context *setup,
enum setup_state new_state,
const char *reason)
@@ -270,7 +277,7 @@ set_scene_state( struct lp_setup_context *setup,
unsigned old_state = setup->state;
if (old_state == new_state)
- return;
+ return TRUE;
if (LP_DEBUG & DEBUG_SCENE) {
debug_printf("%s old %s new %s%s%s\n",
@@ -294,12 +301,14 @@ set_scene_state( struct lp_setup_context *setup,
break;
case SETUP_ACTIVE:
- begin_binning( setup );
+ if (!begin_binning( setup ))
+ goto fail;
break;
case SETUP_FLUSHED:
if (old_state == SETUP_CLEARED)
- execute_clears( setup );
+ if (!execute_clears( setup ))
+ goto fail;
lp_setup_rasterize_scene( setup );
assert(setup->scene == NULL);
@@ -307,9 +316,21 @@ set_scene_state( struct lp_setup_context *setup,
default:
assert(0 && "invalid setup state mode");
+ goto fail;
}
setup->state = new_state;
+ return TRUE;
+
+fail:
+ if (setup->scene) {
+ lp_scene_end_rasterization(setup->scene);
+ setup->scene = NULL;
+ }
+
+ setup->state = SETUP_FLUSHED;
+ lp_setup_reset( setup );
+ return FALSE;
}
@@ -377,16 +398,19 @@ lp_setup_try_clear( struct lp_setup_context *setup,
}
if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
- unsigned zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
- unsigned smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
+ uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
+ uint32_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format,
depth,
stencil);
- zsmask = util_pack_uint_z_stencil(setup->fb.zsbuf->format,
+
+ zsmask = util_pack_mask_z_stencil(setup->fb.zsbuf->format,
zmask,
smask);
+
+ zsvalue &= zsmask;
}
if (setup->state == SETUP_ACTIVE) {
@@ -431,7 +455,7 @@ lp_setup_try_clear( struct lp_setup_context *setup,
if (flags & PIPE_CLEAR_COLOR) {
memcpy(setup->clear.color.clear_color,
&color_arg,
- sizeof color_arg);
+ sizeof setup->clear.color.clear_color);
}
}
@@ -502,14 +526,12 @@ lp_setup_set_point_state( struct lp_setup_context *setup,
}
void
-lp_setup_set_fs_inputs( struct lp_setup_context *setup,
- const struct lp_shader_input *input,
- unsigned nr )
+lp_setup_set_setup_variant( struct lp_setup_context *setup,
+ const struct lp_setup_variant *variant)
{
- LP_DBG(DEBUG_SETUP, "%s %p %u\n", __FUNCTION__, (void *) input, nr);
-
- memcpy( setup->fs.input, input, nr * sizeof input[0] );
- setup->fs.nr_inputs = nr;
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ setup->setup.variant = variant;
}
void
@@ -617,8 +639,7 @@ lp_setup_set_vertex_info( struct lp_setup_context *setup,
void
lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
unsigned num,
- struct pipe_sampler_view **views,
- const struct pipe_sampler_state **samplers)
+ struct pipe_sampler_view **views)
{
unsigned i;
@@ -629,7 +650,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
struct pipe_sampler_view *view = i < num ? views[i] : NULL;
- if(view) {
+ if (view) {
struct pipe_resource *tex = view->texture;
struct llvmpipe_resource *lp_tex = llvmpipe_resource(tex);
struct lp_jit_texture *jit_tex;
@@ -639,12 +660,6 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
jit_tex->depth = tex->depth0;
jit_tex->last_level = tex->last_level;
- /* sampler state */
- jit_tex->min_lod = samplers[i]->min_lod;
- jit_tex->max_lod = samplers[i]->max_lod;
- jit_tex->lod_bias = samplers[i]->lod_bias;
- COPY_4V(jit_tex->border_color, samplers[i]->border_color);
-
/* We're referencing the texture's internal data, so save a
* reference to it.
*/
@@ -694,6 +709,38 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
/**
+ * Called during state validation when LP_NEW_SAMPLER is set.
+ */
+void
+lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup,
+ unsigned num,
+ const struct pipe_sampler_state **samplers)
+{
+ unsigned i;
+
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ const struct pipe_sampler_state *sampler = i < num ? samplers[i] : NULL;
+
+ if (sampler) {
+ struct lp_jit_texture *jit_tex;
+ jit_tex = &setup->fs.current.jit_context.textures[i];
+
+ jit_tex->min_lod = sampler->min_lod;
+ jit_tex->max_lod = sampler->max_lod;
+ jit_tex->lod_bias = sampler->lod_bias;
+ COPY_4V(jit_tex->border_color, sampler->border_color);
+ }
+ }
+
+ setup->dirty |= LP_SETUP_NEW_FS;
+}
+
+
+/**
* Is the given texture referenced by any scene?
* Note: we have to check all scenes including any scenes currently
* being rendered and the current scene being built.
@@ -850,7 +897,7 @@ try_update_scene_state( struct lp_setup_context *setup )
return TRUE;
}
-void
+boolean
lp_setup_update_state( struct lp_setup_context *setup,
boolean update_scene )
{
@@ -872,22 +919,47 @@ lp_setup_update_state( struct lp_setup_context *setup,
setup->psize = lp->psize_slot;
assert(lp->dirty == 0);
+
+ assert(lp->setup_variant.key.size ==
+ setup->setup.variant->key.size);
+
+ assert(memcmp(&lp->setup_variant.key,
+ &setup->setup.variant->key,
+ setup->setup.variant->key.size) == 0);
}
- if (update_scene)
- set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ );
+ if (update_scene) {
+ if (!set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ ))
+ return FALSE;
+ }
/* Only call into update_scene_state() if we already have a
* scene:
*/
if (update_scene && setup->scene) {
assert(setup->state == SETUP_ACTIVE);
- if (!try_update_scene_state(setup)) {
- lp_setup_flush_and_restart(setup);
- if (!try_update_scene_state(setup))
- assert(0);
- }
+
+ if (try_update_scene_state(setup))
+ return TRUE;
+
+ /* Update failed, try to restart the scene.
+ *
+ * Cannot call lp_setup_flush_and_restart() directly here
+ * because of potential recursion.
+ */
+ if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__))
+ return FALSE;
+
+ if (!set_scene_state(setup, SETUP_ACTIVE, __FUNCTION__))
+ return FALSE;
+
+ if (!setup->scene)
+ return FALSE;
+
+ return try_update_scene_state(setup);
}
+
+ return TRUE;
}
@@ -991,12 +1063,12 @@ lp_setup_begin_query(struct lp_setup_context *setup,
LP_RAST_OP_BEGIN_QUERY,
lp_rast_arg_query(pq))) {
- lp_setup_flush_and_restart(setup);
+ if (!lp_setup_flush_and_restart(setup))
+ return;
if (!lp_scene_bin_everywhere(setup->scene,
LP_RAST_OP_BEGIN_QUERY,
lp_rast_arg_query(pq))) {
- assert(0);
return;
}
}
@@ -1040,14 +1112,20 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq)
}
-void
+boolean
lp_setup_flush_and_restart(struct lp_setup_context *setup)
{
if (0) debug_printf("%s\n", __FUNCTION__);
assert(setup->state == SETUP_ACTIVE);
- set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__);
- lp_setup_update_state(setup, TRUE);
+
+ if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__))
+ return FALSE;
+
+ if (!lp_setup_update_state(setup, TRUE))
+ return FALSE;
+
+ return TRUE;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index 868bd3ad2f..ebb18f8134 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -33,28 +33,6 @@
struct draw_context;
struct vertex_info;
-enum lp_interp {
- LP_INTERP_CONSTANT,
- LP_INTERP_LINEAR,
- LP_INTERP_PERSPECTIVE,
- LP_INTERP_POSITION,
- LP_INTERP_FACING
-};
-
-
-/**
- * Describes how to compute the interpolation coefficients (a0, dadx, dady)
- * from the vertices passed into our triangle/line/point functions by the
- * draw module.
- *
- * Vertices are treated as an array of float[4] values, indexed by
- * src_index.
- */
-struct lp_shader_input {
- enum lp_interp interp; /* how to interpolate values */
- unsigned src_index; /* where to find values in incoming vertices */
- unsigned usage_mask; /* bitmask of TGSI_WRITEMASK_x flags */
-};
struct pipe_resource;
struct pipe_query;
@@ -66,7 +44,7 @@ struct lp_fragment_shader_variant;
struct lp_jit_context;
struct llvmpipe_query;
struct pipe_fence_handle;
-
+struct lp_setup_variant;
struct lp_setup_context *
lp_setup_create( struct pipe_context *pipe,
@@ -111,9 +89,8 @@ lp_setup_set_point_state( struct lp_setup_context *setup,
uint sprite_coord_origin);
void
-lp_setup_set_fs_inputs( struct lp_setup_context *setup,
- const struct lp_shader_input *interp,
- unsigned nr );
+lp_setup_set_setup_variant( struct lp_setup_context *setup,
+ const struct lp_setup_variant *variant );
void
lp_setup_set_fs_variant( struct lp_setup_context *setup,
@@ -143,7 +120,11 @@ lp_setup_set_scissor( struct lp_setup_context *setup,
void
lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
unsigned num,
- struct pipe_sampler_view **views,
+ struct pipe_sampler_view **views);
+
+void
+lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup,
+ unsigned num,
const struct pipe_sampler_state **samplers);
unsigned
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c
deleted file mode 100644
index 8dc2688ddb..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_setup_coef.c
+++ /dev/null
@@ -1,279 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010, VMware.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/*
- * Binning code for triangles
- */
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-#include "lp_perf.h"
-#include "lp_setup_context.h"
-#include "lp_setup_coef.h"
-#include "lp_rast.h"
-#include "lp_state_fs.h"
-
-#if !defined(PIPE_ARCH_SSE)
-
-/**
- * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
- */
-static void constant_coef( struct lp_rast_shader_inputs *inputs,
- unsigned slot,
- const float value,
- unsigned i )
-{
- inputs->a0[slot][i] = value;
- inputs->dadx[slot][i] = 0.0f;
- inputs->dady[slot][i] = 0.0f;
-}
-
-
-
-static void linear_coef( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- unsigned vert_attr,
- unsigned i)
-{
- float a0 = info->v0[vert_attr][i];
- float a1 = info->v1[vert_attr][i];
- float a2 = info->v2[vert_attr][i];
-
- float da01 = a0 - a1;
- float da20 = a2 - a0;
- float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20);
- float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01);
-
- inputs->dadx[slot][i] = dadx;
- inputs->dady[slot][i] = dady;
-
- /* calculate a0 as the value which would be sampled for the
- * fragment at (0,0), taking into account that we want to sample at
- * pixel centers, in other words (0.5, 0.5).
- *
- * this is neat but unfortunately not a good way to do things for
- * triangles with very large values of dadx or dady as it will
- * result in the subtraction and re-addition from a0 of a very
- * large number, which means we'll end up loosing a lot of the
- * fractional bits and precision from a0. the way to fix this is
- * to define a0 as the sample at a pixel center somewhere near vmin
- * instead - i'll switch to this later.
- */
- inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
- dady * info->y0_center);
-}
-
-
-/**
- * Compute a0, dadx and dady for a perspective-corrected interpolant,
- * for a triangle.
- * We basically multiply the vertex value by 1/w before computing
- * the plane coefficients (a0, dadx, dady).
- * Later, when we compute the value at a particular fragment position we'll
- * divide the interpolated value by the interpolated W at that fragment.
- */
-static void perspective_coef( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- unsigned vert_attr,
- unsigned i)
-{
- /* premultiply by 1/w (v[0][3] is always 1/w):
- */
- float a0 = info->v0[vert_attr][i] * info->v0[0][3];
- float a1 = info->v1[vert_attr][i] * info->v1[0][3];
- float a2 = info->v2[vert_attr][i] * info->v2[0][3];
- float da01 = a0 - a1;
- float da20 = a2 - a0;
- float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20;
- float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01;
-
- inputs->dadx[slot][i] = dadx;
- inputs->dady[slot][i] = dady;
- inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
- dady * info->y0_center);
-}
-
-
-/**
- * Special coefficient setup for gl_FragCoord.
- * X and Y are trivial
- * Z and W are copied from position_coef which should have already been computed.
- * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
- */
-static void
-setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- unsigned usage_mask)
-{
- /*X*/
- if (usage_mask & TGSI_WRITEMASK_X) {
- inputs->a0[slot][0] = 0.0;
- inputs->dadx[slot][0] = 1.0;
- inputs->dady[slot][0] = 0.0;
- }
-
- /*Y*/
- if (usage_mask & TGSI_WRITEMASK_Y) {
- inputs->a0[slot][1] = 0.0;
- inputs->dadx[slot][1] = 0.0;
- inputs->dady[slot][1] = 1.0;
- }
-
- /*Z*/
- if (usage_mask & TGSI_WRITEMASK_Z) {
- linear_coef(inputs, info, slot, 0, 2);
- }
-
- /*W*/
- if (usage_mask & TGSI_WRITEMASK_W) {
- linear_coef(inputs, info, slot, 0, 3);
- }
-}
-
-
-/**
- * Setup the fragment input attribute with the front-facing value.
- * \param frontface is the triangle front facing?
- */
-static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
- unsigned slot,
- boolean frontface,
- unsigned usage_mask)
-{
- /* convert TRUE to 1.0 and FALSE to -1.0 */
- if (usage_mask & TGSI_WRITEMASK_X)
- constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 );
-
- if (usage_mask & TGSI_WRITEMASK_Y)
- constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */
-
- if (usage_mask & TGSI_WRITEMASK_Z)
- constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */
-
- if (usage_mask & TGSI_WRITEMASK_W)
- constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */
-}
-
-
-/**
- * Compute the tri->coef[] array dadx, dady, a0 values.
- */
-void lp_setup_tri_coef( struct lp_setup_context *setup,
- struct lp_rast_shader_inputs *inputs,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4],
- boolean frontfacing)
-{
- unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
- unsigned slot;
- unsigned i;
- struct lp_tri_info info;
- float dx01 = v0[0][0] - v1[0][0];
- float dy01 = v0[0][1] - v1[0][1];
- float dx20 = v2[0][0] - v0[0][0];
- float dy20 = v2[0][1] - v0[0][1];
- float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01);
-
- info.v0 = v0;
- info.v1 = v1;
- info.v2 = v2;
- info.frontfacing = frontfacing;
- info.x0_center = v0[0][0] - setup->pixel_offset;
- info.y0_center = v0[0][1] - setup->pixel_offset;
- info.dx01_ooa = dx01 * oneoverarea;
- info.dx20_ooa = dx20 * oneoverarea;
- info.dy01_ooa = dy01 * oneoverarea;
- info.dy20_ooa = dy20 * oneoverarea;
-
-
- /* setup interpolation for all the remaining attributes:
- */
- for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
- unsigned vert_attr = setup->fs.input[slot].src_index;
- unsigned usage_mask = setup->fs.input[slot].usage_mask;
-
- switch (setup->fs.input[slot].interp) {
- case LP_INTERP_CONSTANT:
- if (setup->flatshade_first) {
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- constant_coef(inputs, slot+1, info.v0[vert_attr][i], i);
- }
- else {
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- constant_coef(inputs, slot+1, info.v2[vert_attr][i], i);
- }
- break;
-
- case LP_INTERP_LINEAR:
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- linear_coef(inputs, &info, slot+1, vert_attr, i);
- break;
-
- case LP_INTERP_PERSPECTIVE:
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- perspective_coef(inputs, &info, slot+1, vert_attr, i);
- fragcoord_usage_mask |= TGSI_WRITEMASK_W;
- break;
-
- case LP_INTERP_POSITION:
- /*
- * The generated pixel interpolators will pick up the coeffs from
- * slot 0, so all need to ensure that the usage mask is covers all
- * usages.
- */
- fragcoord_usage_mask |= usage_mask;
- break;
-
- case LP_INTERP_FACING:
- setup_facing_coef(inputs, slot+1, info.frontfacing, usage_mask);
- break;
-
- default:
- assert(0);
- }
- }
-
- /* The internal position input is in slot zero:
- */
- setup_fragcoord_coef(inputs, &info, 0, fragcoord_usage_mask);
-}
-
-#else
-extern void lp_setup_coef_dummy(void);
-void lp_setup_coef_dummy(void)
-{
-}
-
-#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h
deleted file mode 100644
index 87a3255ccc..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_setup_coef.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * The setup code is concerned with point/line/triangle setup and
- * putting commands/data into the bins.
- */
-
-
-#ifndef LP_SETUP_COEF_H
-#define LP_SETUP_COEF_H
-
-
-struct lp_tri_info {
-
- float x0_center;
- float y0_center;
-
- /* turn these into an aligned float[4] */
- float dy01_ooa;
- float dy20_ooa;
- float dx01_ooa;
- float dx20_ooa;
-
- const float (*v0)[4];
- const float (*v1)[4];
- const float (*v2)[4];
-
- boolean frontfacing; /* remove eventually */
-};
-
-void lp_setup_tri_coef( struct lp_setup_context *setup,
- struct lp_rast_shader_inputs *inputs,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4],
- boolean frontfacing);
-
-#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
deleted file mode 100644
index 3742fd672b..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010 VMware.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/*
- * Binning code for triangles
- */
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-#include "lp_perf.h"
-#include "lp_setup_context.h"
-#include "lp_setup_coef.h"
-#include "lp_rast.h"
-
-#if defined(PIPE_ARCH_SSE)
-#include <emmintrin.h>
-
-
-static void constant_coef4( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- const float *attr)
-{
- *(__m128 *)inputs->a0[slot] = *(__m128 *)attr;
- *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
- *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
-}
-
-
-
-/**
- * Setup the fragment input attribute with the front-facing value.
- * \param frontface is the triangle front facing?
- */
-static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot )
-{
- /* XXX: just pass frontface directly to the shader, don't bother
- * treating it as an input.
- */
- __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0,
- 0, 0, 0);
-
- *(__m128 *)inputs->a0[slot] = a0;
- *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
- *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
-}
-
-
-
-static void calc_coef4( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- __m128 a0,
- __m128 a1,
- __m128 a2)
-{
- __m128 da01 = _mm_sub_ps(a0, a1);
- __m128 da20 = _mm_sub_ps(a2, a0);
-
- __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dy20_ooa));
- __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dy01_ooa));
- __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa);
-
- __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dx20_ooa));
- __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dx01_ooa));
- __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa);
-
- __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(info->x0_center));
- __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(info->y0_center));
- __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0);
- __m128 attr_0 = _mm_sub_ps(a0, attr_v0);
-
- *(__m128 *)inputs->a0[slot] = attr_0;
- *(__m128 *)inputs->dadx[slot] = dadx;
- *(__m128 *)inputs->dady[slot] = dady;
-}
-
-
-static void linear_coef( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- unsigned vert_attr)
-{
- __m128 a0 = *(const __m128 *)info->v0[vert_attr];
- __m128 a1 = *(const __m128 *)info->v1[vert_attr];
- __m128 a2 = *(const __m128 *)info->v2[vert_attr];
-
- calc_coef4(inputs, info, slot, a0, a1, a2);
-}
-
-
-
-/**
- * Compute a0, dadx and dady for a perspective-corrected interpolant,
- * for a triangle.
- * We basically multiply the vertex value by 1/w before computing
- * the plane coefficients (a0, dadx, dady).
- * Later, when we compute the value at a particular fragment position we'll
- * divide the interpolated value by the interpolated W at that fragment.
- */
-static void perspective_coef( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- unsigned vert_attr)
-{
- /* premultiply by 1/w (v[0][3] is always 1/w):
- */
- __m128 a0 = *(const __m128 *)info->v0[vert_attr];
- __m128 a1 = *(const __m128 *)info->v1[vert_attr];
- __m128 a2 = *(const __m128 *)info->v2[vert_attr];
-
- __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(info->v0[0][3]));
- __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3]));
- __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3]));
-
- calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow);
-}
-
-
-
-
-
-/**
- * Compute the inputs-> dadx, dady, a0 values.
- */
-void lp_setup_tri_coef( struct lp_setup_context *setup,
- struct lp_rast_shader_inputs *inputs,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4],
- boolean frontfacing)
-{
- unsigned slot;
- struct lp_tri_info info;
- float dx01 = v0[0][0] - v1[0][0];
- float dy01 = v0[0][1] - v1[0][1];
- float dx20 = v2[0][0] - v0[0][0];
- float dy20 = v2[0][1] - v0[0][1];
- float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01);
-
- info.v0 = v0;
- info.v1 = v1;
- info.v2 = v2;
- info.frontfacing = frontfacing;
- info.x0_center = v0[0][0] - setup->pixel_offset;
- info.y0_center = v0[0][1] - setup->pixel_offset;
- info.dx01_ooa = dx01 * oneoverarea;
- info.dx20_ooa = dx20 * oneoverarea;
- info.dy01_ooa = dy01 * oneoverarea;
- info.dy20_ooa = dy20 * oneoverarea;
-
-
- /* The internal position input is in slot zero:
- */
- linear_coef(inputs, &info, 0, 0);
-
- /* setup interpolation for all the remaining attributes:
- */
- for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
- unsigned vert_attr = setup->fs.input[slot].src_index;
-
- switch (setup->fs.input[slot].interp) {
- case LP_INTERP_CONSTANT:
- if (setup->flatshade_first) {
- constant_coef4(inputs, &info, slot+1, info.v0[vert_attr]);
- }
- else {
- constant_coef4(inputs, &info, slot+1, info.v2[vert_attr]);
- }
- break;
-
- case LP_INTERP_LINEAR:
- linear_coef(inputs, &info, slot+1, vert_attr);
- break;
-
- case LP_INTERP_PERSPECTIVE:
- perspective_coef(inputs, &info, slot+1, vert_attr);
- break;
-
- case LP_INTERP_POSITION:
- /*
- * The generated pixel interpolators will pick up the coeffs from
- * slot 0.
- */
- break;
-
- case LP_INTERP_FACING:
- setup_facing_coef(inputs, &info, slot+1);
- break;
-
- default:
- assert(0);
- }
- }
-}
-
-#else
-extern void lp_setup_coef_dummy(void);
-void lp_setup_coef_dummy(void)
-{
-}
-#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 8506ed2dc9..dc2533bedc 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -39,6 +39,7 @@
#include "lp_rast.h"
#include "lp_tile_soa.h" /* for TILE_SIZE */
#include "lp_scene.h"
+#include "lp_bld_interp.h" /* for struct lp_shader_input */
#include "draw/draw_vbuf.h"
#include "util/u_rect.h"
@@ -49,6 +50,8 @@
#define LP_SETUP_NEW_SCISSOR 0x08
+struct lp_setup_variant;
+
/** Max number of scenes */
#define MAX_SCENES 2
@@ -118,9 +121,6 @@ struct lp_setup_context
} state;
struct {
- struct lp_shader_input input[PIPE_MAX_ATTRIBS];
- unsigned nr_inputs;
-
const struct lp_rast_state *stored; /**< what's in the scene */
struct lp_rast_state current; /**< currently set state */
struct pipe_resource *current_tex[PIPE_MAX_SAMPLERS];
@@ -139,6 +139,10 @@ struct lp_setup_context
} blend_color;
+ struct {
+ const struct lp_setup_variant *variant;
+ } setup;
+
unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */
void (*point)( struct lp_setup_context *,
@@ -160,12 +164,12 @@ void lp_setup_choose_point( struct lp_setup_context *setup );
void lp_setup_init_vbuf(struct lp_setup_context *setup);
-void lp_setup_update_state( struct lp_setup_context *setup,
+boolean lp_setup_update_state( struct lp_setup_context *setup,
boolean update_scene);
void lp_setup_destroy( struct lp_setup_context *setup );
-void lp_setup_flush_and_restart(struct lp_setup_context *setup);
+boolean lp_setup_flush_and_restart(struct lp_setup_context *setup);
void
lp_setup_print_triangle(struct lp_setup_context *setup,
@@ -181,7 +185,7 @@ lp_setup_print_vertex(struct lp_setup_context *setup,
struct lp_rast_triangle *
lp_setup_alloc_triangle(struct lp_scene *scene,
- unsigned nr_inputs,
+ unsigned num_inputs,
unsigned nr_planes,
unsigned *tri_size);
@@ -191,6 +195,4 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
const struct u_rect *bbox,
int nr_planes );
-void lp_setup_flush_and_restart(struct lp_setup_context *setup);
-
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index 156bd63375..827413bb33 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -35,6 +35,7 @@
#include "lp_setup_context.h"
#include "lp_rast.h"
#include "lp_state_fs.h"
+#include "lp_state_setup.h"
#define NUM_CHANNELS 4
@@ -46,6 +47,10 @@ struct lp_line_info {
const float (*v1)[4];
const float (*v2)[4];
+
+ float (*a0)[4];
+ float (*dadx)[4];
+ float (*dady)[4];
};
@@ -53,14 +58,14 @@ struct lp_line_info {
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
*/
static void constant_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
+ struct lp_line_info *info,
unsigned slot,
const float value,
unsigned i )
{
- tri->inputs.a0[slot][i] = value;
- tri->inputs.dadx[slot][i] = 0.0f;
- tri->inputs.dady[slot][i] = 0.0f;
+ info->a0[slot][i] = value;
+ info->dadx[slot][i] = 0.0f;
+ info->dady[slot][i] = 0.0f;
}
@@ -69,7 +74,6 @@ static void constant_coef( struct lp_setup_context *setup,
* for a triangle.
*/
static void linear_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
struct lp_line_info *info,
unsigned slot,
unsigned vert_attr,
@@ -82,10 +86,10 @@ static void linear_coef( struct lp_setup_context *setup,
float dadx = da21 * info->dx * info->oneoverarea;
float dady = da21 * info->dy * info->oneoverarea;
- tri->inputs.dadx[slot][i] = dadx;
- tri->inputs.dady[slot][i] = dady;
+ info->dadx[slot][i] = dadx;
+ info->dady[slot][i] = dady;
- tri->inputs.a0[slot][i] = (a1 -
+ info->a0[slot][i] = (a1 -
(dadx * (info->v1[0][0] - setup->pixel_offset) +
dady * (info->v1[0][1] - setup->pixel_offset)));
}
@@ -100,7 +104,6 @@ static void linear_coef( struct lp_setup_context *setup,
* divide the interpolated value by the interpolated W at that fragment.
*/
static void perspective_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
struct lp_line_info *info,
unsigned slot,
unsigned vert_attr,
@@ -115,43 +118,42 @@ static void perspective_coef( struct lp_setup_context *setup,
float dadx = da21 * info->dx * info->oneoverarea;
float dady = da21 * info->dy * info->oneoverarea;
- tri->inputs.dadx[slot][i] = dadx;
- tri->inputs.dady[slot][i] = dady;
+ info->dadx[slot][i] = dadx;
+ info->dady[slot][i] = dady;
- tri->inputs.a0[slot][i] = (a1 -
- (dadx * (info->v1[0][0] - setup->pixel_offset) +
- dady * (info->v1[0][1] - setup->pixel_offset)));
+ info->a0[slot][i] = (a1 -
+ (dadx * (info->v1[0][0] - setup->pixel_offset) +
+ dady * (info->v1[0][1] - setup->pixel_offset)));
}
static void
setup_fragcoord_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
struct lp_line_info *info,
unsigned slot,
unsigned usage_mask)
{
/*X*/
if (usage_mask & TGSI_WRITEMASK_X) {
- tri->inputs.a0[slot][0] = 0.0;
- tri->inputs.dadx[slot][0] = 1.0;
- tri->inputs.dady[slot][0] = 0.0;
+ info->a0[slot][0] = 0.0;
+ info->dadx[slot][0] = 1.0;
+ info->dady[slot][0] = 0.0;
}
/*Y*/
if (usage_mask & TGSI_WRITEMASK_Y) {
- tri->inputs.a0[slot][1] = 0.0;
- tri->inputs.dadx[slot][1] = 0.0;
- tri->inputs.dady[slot][1] = 1.0;
+ info->a0[slot][1] = 0.0;
+ info->dadx[slot][1] = 0.0;
+ info->dady[slot][1] = 1.0;
}
/*Z*/
if (usage_mask & TGSI_WRITEMASK_Z) {
- linear_coef(setup, tri, info, slot, 0, 2);
+ linear_coef(setup, info, slot, 0, 2);
}
/*W*/
if (usage_mask & TGSI_WRITEMASK_W) {
- linear_coef(setup, tri, info, slot, 0, 3);
+ linear_coef(setup, info, slot, 0, 3);
}
}
@@ -159,43 +161,43 @@ setup_fragcoord_coef( struct lp_setup_context *setup,
* Compute the tri->coef[] array dadx, dady, a0 values.
*/
static void setup_line_coefficients( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
struct lp_line_info *info)
{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
unsigned slot;
/* setup interpolation for all the remaining attributes:
*/
- for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
- unsigned vert_attr = setup->fs.input[slot].src_index;
- unsigned usage_mask = setup->fs.input[slot].usage_mask;
+ for (slot = 0; slot < key->num_inputs; slot++) {
+ unsigned vert_attr = key->inputs[slot].src_index;
+ unsigned usage_mask = key->inputs[slot].usage_mask;
unsigned i;
- switch (setup->fs.input[slot].interp) {
+ switch (key->inputs[slot].interp) {
case LP_INTERP_CONSTANT:
- if (setup->flatshade_first) {
+ if (key->flatshade_first) {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i);
+ constant_coef(setup, info, slot+1, info->v1[vert_attr][i], i);
}
else {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i);
+ constant_coef(setup, info, slot+1, info->v2[vert_attr][i], i);
}
break;
case LP_INTERP_LINEAR:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- linear_coef(setup, tri, info, slot+1, vert_attr, i);
+ linear_coef(setup, info, slot+1, vert_attr, i);
break;
case LP_INTERP_PERSPECTIVE:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- perspective_coef(setup, tri, info, slot+1, vert_attr, i);
+ perspective_coef(setup, info, slot+1, vert_attr, i);
fragcoord_usage_mask |= TGSI_WRITEMASK_W;
break;
@@ -211,7 +213,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup,
case LP_INTERP_FACING:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, tri, slot+1, 1.0, i);
+ constant_coef(setup, info, slot+1, 1.0, i);
break;
default:
@@ -221,7 +223,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup,
/* The internal position input is in slot zero:
*/
- setup_fragcoord_coef(setup, tri, info, 0,
+ setup_fragcoord_coef(setup, info, 0,
fragcoord_usage_mask);
}
@@ -241,14 +243,15 @@ print_line(struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4])
{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
uint i;
debug_printf("llvmpipe line\n");
- for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + key->num_inputs; i++) {
debug_printf(" v1[%d]: %f %f %f %f\n", i,
v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
}
- for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + key->num_inputs; i++) {
debug_printf(" v2[%d]: %f %f %f %f\n", i,
v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
}
@@ -275,7 +278,9 @@ try_setup_line( struct lp_setup_context *setup,
const float (*v2)[4])
{
struct lp_scene *scene = setup->scene;
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
struct lp_rast_triangle *line;
+ struct lp_rast_plane *plane;
struct lp_line_info info;
float width = MAX2(1.0, setup->line_width);
struct u_rect bbox;
@@ -475,7 +480,7 @@ try_setup_line( struct lp_setup_context *setup,
else {
/* do intersection test */
float xintersect = fracf(v2[0][0]) + y2diff * dxdy;
- draw_end = (xintersect < 1.0 && xintersect > 0.0);
+ draw_end = (xintersect < 1.0 && xintersect >= 0.0);
}
/* Are we already drawing start/end?
@@ -513,7 +518,7 @@ try_setup_line( struct lp_setup_context *setup,
x_offset_end = y_offset_end * dxdy;
}
}
-
+
/* x/y positions in fixed point */
x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2;
x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2;
@@ -567,7 +572,7 @@ try_setup_line( struct lp_setup_context *setup,
u_rect_find_intersection(&setup->draw_region, &bbox);
line = lp_setup_alloc_triangle(scene,
- setup->fs.nr_inputs,
+ key->num_inputs,
nr_planes,
&tri_bytes);
if (!line)
@@ -581,33 +586,35 @@ try_setup_line( struct lp_setup_context *setup,
#endif
/* calculate the deltas */
- line->plane[0].dcdy = x[0] - x[1];
- line->plane[1].dcdy = x[1] - x[2];
- line->plane[2].dcdy = x[2] - x[3];
- line->plane[3].dcdy = x[3] - x[0];
+ plane = GET_PLANES(line);
+ plane[0].dcdy = x[0] - x[1];
+ plane[1].dcdy = x[1] - x[2];
+ plane[2].dcdy = x[2] - x[3];
+ plane[3].dcdy = x[3] - x[0];
- line->plane[0].dcdx = y[0] - y[1];
- line->plane[1].dcdx = y[1] - y[2];
- line->plane[2].dcdx = y[2] - y[3];
- line->plane[3].dcdx = y[3] - y[0];
+ plane[0].dcdx = y[0] - y[1];
+ plane[1].dcdx = y[1] - y[2];
+ plane[2].dcdx = y[2] - y[3];
+ plane[3].dcdx = y[3] - y[0];
/* Setup parameter interpolants:
*/
- setup_line_coefficients( setup, line, &info);
+ info.a0 = GET_A0(&line->inputs);
+ info.dadx = GET_DADX(&line->inputs);
+ info.dady = GET_DADY(&line->inputs);
+ setup_line_coefficients(setup, &info);
- line->inputs.facing = 1.0F;
- line->inputs.state = setup->fs.stored;
+ line->inputs.frontfacing = TRUE;
line->inputs.disable = FALSE;
line->inputs.opaque = FALSE;
for (i = 0; i < 4; i++) {
- struct lp_rast_plane *plane = &line->plane[i];
/* half-edge constants, will be interated over the whole render
* target.
*/
- plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
+ plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i];
/* correct for top-left vs. bottom-left fill convention.
@@ -623,38 +630,34 @@ try_setup_line( struct lp_setup_context *setup,
* to its usual method, in which case it will probably want
* to use the opposite, top-left convention.
*/
- if (plane->dcdx < 0) {
+ if (plane[i].dcdx < 0) {
/* both fill conventions want this - adjust for left edges */
- plane->c++;
+ plane[i].c++;
}
- else if (plane->dcdx == 0) {
+ else if (plane[i].dcdx == 0) {
if (setup->pixel_offset == 0) {
/* correct for top-left fill convention:
*/
- if (plane->dcdy > 0) plane->c++;
+ if (plane[i].dcdy > 0) plane[i].c++;
}
else {
/* correct for bottom-left fill convention:
*/
- if (plane->dcdy < 0) plane->c++;
+ if (plane[i].dcdy < 0) plane[i].c++;
}
}
- plane->dcdx *= FIXED_ONE;
- plane->dcdy *= FIXED_ONE;
+ plane[i].dcdx *= FIXED_ONE;
+ plane[i].dcdy *= FIXED_ONE;
/* find trivial reject offsets for each edge for a single-pixel
* sized block. These will be scaled up at each recursive level to
* match the active blocksize. Scaling in this way works best if
* the blocks are square.
*/
- plane->eo = 0;
- if (plane->dcdx < 0) plane->eo -= plane->dcdx;
- if (plane->dcdy > 0) plane->eo += plane->dcdy;
-
- /* Calculate trivial accept offsets from the above.
- */
- plane->ei = plane->dcdy - plane->dcdx - plane->eo;
+ plane[i].eo = 0;
+ if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
+ if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
}
@@ -677,29 +680,25 @@ try_setup_line( struct lp_setup_context *setup,
* these planes elsewhere.
*/
if (nr_planes == 8) {
- line->plane[4].dcdx = -1;
- line->plane[4].dcdy = 0;
- line->plane[4].c = 1-bbox.x0;
- line->plane[4].ei = 0;
- line->plane[4].eo = 1;
-
- line->plane[5].dcdx = 1;
- line->plane[5].dcdy = 0;
- line->plane[5].c = bbox.x1+1;
- line->plane[5].ei = -1;
- line->plane[5].eo = 0;
-
- line->plane[6].dcdx = 0;
- line->plane[6].dcdy = 1;
- line->plane[6].c = 1-bbox.y0;
- line->plane[6].ei = 0;
- line->plane[6].eo = 1;
-
- line->plane[7].dcdx = 0;
- line->plane[7].dcdy = -1;
- line->plane[7].c = bbox.y1+1;
- line->plane[7].ei = -1;
- line->plane[7].eo = 0;
+ plane[4].dcdx = -1;
+ plane[4].dcdy = 0;
+ plane[4].c = 1-bbox.x0;
+ plane[4].eo = 1;
+
+ plane[5].dcdx = 1;
+ plane[5].dcdy = 0;
+ plane[5].c = bbox.x1+1;
+ plane[5].eo = 0;
+
+ plane[6].dcdx = 0;
+ plane[6].dcdy = 1;
+ plane[6].c = 1-bbox.y0;
+ plane[6].eo = 1;
+
+ plane[7].dcdx = 0;
+ plane[7].dcdy = -1;
+ plane[7].c = bbox.y1+1;
+ plane[7].eo = 0;
}
return lp_setup_bin_triangle(setup, line, &bbox, nr_planes);
@@ -712,10 +711,11 @@ static void lp_setup_line( struct lp_setup_context *setup,
{
if (!try_setup_line( setup, v0, v1 ))
{
- lp_setup_flush_and_restart(setup);
+ if (!lp_setup_flush_and_restart(setup))
+ return;
if (!try_setup_line( setup, v0, v1 ))
- assert(0);
+ return;
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index a95c15751c..146f1bd07c 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -33,9 +33,9 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "lp_perf.h"
-#include "lp_setup_context.h"
#include "lp_rast.h"
#include "lp_state_fs.h"
+#include "lp_state_setup.h"
#include "tgsi/tgsi_scan.h"
#define NUM_CHANNELS 4
@@ -46,6 +46,10 @@ struct point_info {
int dx01, dx12;
const float (*v0)[4];
+
+ float (*a0)[4];
+ float (*dadx)[4];
+ float (*dady)[4];
};
@@ -54,14 +58,37 @@ struct point_info {
*/
static void
constant_coef(struct lp_setup_context *setup,
- struct lp_rast_triangle *point,
+ struct point_info *info,
unsigned slot,
const float value,
unsigned i)
{
- point->inputs.a0[slot][i] = value;
- point->inputs.dadx[slot][i] = 0.0f;
- point->inputs.dady[slot][i] = 0.0f;
+ info->a0[slot][i] = value;
+ info->dadx[slot][i] = 0.0f;
+ info->dady[slot][i] = 0.0f;
+}
+
+
+static void
+point_persp_coeff(struct lp_setup_context *setup,
+ const struct point_info *info,
+ unsigned slot,
+ unsigned i)
+{
+ /*
+ * Fragment shader expects pre-multiplied w for LP_INTERP_PERSPECTIVE. A
+ * better stratergy would be to take the primitive in consideration when
+ * generating the fragment shader key, and therefore avoid the per-fragment
+ * perspective divide.
+ */
+
+ float w0 = info->v0[0][3];
+
+ assert(i < 4);
+
+ info->a0[slot][i] = info->v0[slot][i]*w0;
+ info->dadx[slot][i] = 0.0f;
+ info->dady[slot][i] = 0.0f;
}
@@ -70,17 +97,19 @@ constant_coef(struct lp_setup_context *setup,
* \param slot the vertex attribute slot to setup
* \param i the attribute channel in [0,3]
* \param sprite_coord_origin one of PIPE_SPRITE_COORD_x
- * \param perspective_proj will the TEX instruction do a divide by Q?
+ * \param perspective does the shader expects pre-multiplied w, i.e.,
+ * LP_INTERP_PERSPECTIVE is specified in the shader key
*/
static void
texcoord_coef(struct lp_setup_context *setup,
- struct lp_rast_triangle *point,
const struct point_info *info,
unsigned slot,
unsigned i,
unsigned sprite_coord_origin,
- boolean perspective_proj)
+ boolean perspective)
{
+ float w0 = info->v0[0][3];
+
assert(i < 4);
if (i == 0) {
@@ -89,21 +118,14 @@ texcoord_coef(struct lp_setup_context *setup,
float x0 = info->v0[0][0] - setup->pixel_offset;
float y0 = info->v0[0][1] - setup->pixel_offset;
- point->inputs.dadx[slot][0] = dadx;
- point->inputs.dady[slot][0] = dady;
- point->inputs.a0[slot][0] = 0.5 - (dadx * x0 + dady * y0);
+ info->dadx[slot][0] = dadx;
+ info->dady[slot][0] = dady;
+ info->a0[slot][0] = 0.5 - (dadx * x0 + dady * y0);
- if (!perspective_proj) {
- /* Divide coefficients by vertex.w here.
- *
- * It would be clearer to always multiply by w0 above and
- * then divide it out for perspective projection here, but
- * doing it this way involves less algebra.
- */
- float w0 = info->v0[0][3];
- point->inputs.dadx[slot][0] *= w0;
- point->inputs.dady[slot][0] *= w0;
- point->inputs.a0[slot][0] *= w0;
+ if (perspective) {
+ info->dadx[slot][0] *= w0;
+ info->dady[slot][0] *= w0;
+ info->a0[slot][0] *= w0;
}
}
else if (i == 1) {
@@ -116,26 +138,25 @@ texcoord_coef(struct lp_setup_context *setup,
dady = -dady;
}
- point->inputs.dadx[slot][1] = dadx;
- point->inputs.dady[slot][1] = dady;
- point->inputs.a0[slot][1] = 0.5 - (dadx * x0 + dady * y0);
+ info->dadx[slot][1] = dadx;
+ info->dady[slot][1] = dady;
+ info->a0[slot][1] = 0.5 - (dadx * x0 + dady * y0);
- if (!perspective_proj) {
- float w0 = info->v0[0][3];
- point->inputs.dadx[slot][1] *= w0;
- point->inputs.dady[slot][1] *= w0;
- point->inputs.a0[slot][1] *= w0;
+ if (perspective) {
+ info->dadx[slot][1] *= w0;
+ info->dady[slot][1] *= w0;
+ info->a0[slot][1] *= w0;
}
}
else if (i == 2) {
- point->inputs.a0[slot][2] = 0.0f;
- point->inputs.dadx[slot][2] = 0.0f;
- point->inputs.dady[slot][2] = 0.0f;
+ info->a0[slot][2] = 0.0f;
+ info->dadx[slot][2] = 0.0f;
+ info->dady[slot][2] = 0.0f;
}
else {
- point->inputs.a0[slot][3] = 1.0f;
- point->inputs.dadx[slot][3] = 0.0f;
- point->inputs.dady[slot][3] = 0.0f;
+ info->a0[slot][3] = perspective ? w0 : 1.0f;
+ info->dadx[slot][3] = 0.0f;
+ info->dady[slot][3] = 0.0f;
}
}
@@ -148,33 +169,32 @@ texcoord_coef(struct lp_setup_context *setup,
*/
static void
setup_point_fragcoord_coef(struct lp_setup_context *setup,
- struct lp_rast_triangle *point,
- const struct point_info *info,
+ struct point_info *info,
unsigned slot,
unsigned usage_mask)
{
/*X*/
if (usage_mask & TGSI_WRITEMASK_X) {
- point->inputs.a0[slot][0] = 0.0;
- point->inputs.dadx[slot][0] = 1.0;
- point->inputs.dady[slot][0] = 0.0;
+ info->a0[slot][0] = 0.0;
+ info->dadx[slot][0] = 1.0;
+ info->dady[slot][0] = 0.0;
}
/*Y*/
if (usage_mask & TGSI_WRITEMASK_Y) {
- point->inputs.a0[slot][1] = 0.0;
- point->inputs.dadx[slot][1] = 0.0;
- point->inputs.dady[slot][1] = 1.0;
+ info->a0[slot][1] = 0.0;
+ info->dadx[slot][1] = 0.0;
+ info->dady[slot][1] = 1.0;
}
/*Z*/
if (usage_mask & TGSI_WRITEMASK_Z) {
- constant_coef(setup, point, slot, info->v0[0][2], 2);
+ constant_coef(setup, info, slot, info->v0[0][2], 2);
}
/*W*/
if (usage_mask & TGSI_WRITEMASK_W) {
- constant_coef(setup, point, slot, info->v0[0][3], 3);
+ constant_coef(setup, info, slot, info->v0[0][3], 3);
}
}
@@ -184,21 +204,27 @@ setup_point_fragcoord_coef(struct lp_setup_context *setup,
*/
static void
setup_point_coefficients( struct lp_setup_context *setup,
- struct lp_rast_triangle *point,
- const struct point_info *info)
+ struct point_info *info)
{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
const struct lp_fragment_shader *shader = setup->fs.current.variant->shader;
unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
unsigned slot;
/* setup interpolation for all the remaining attributes:
*/
- for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
- unsigned vert_attr = setup->fs.input[slot].src_index;
- unsigned usage_mask = setup->fs.input[slot].usage_mask;
+ for (slot = 0; slot < key->num_inputs; slot++) {
+ unsigned vert_attr = key->inputs[slot].src_index;
+ unsigned usage_mask = key->inputs[slot].usage_mask;
+ enum lp_interp interp = key->inputs[slot].interp;
+ boolean perspective = !!(interp == LP_INTERP_PERSPECTIVE);
unsigned i;
+
+ if (perspective & usage_mask) {
+ fragcoord_usage_mask |= TGSI_WRITEMASK_W;
+ }
- switch (setup->fs.input[slot].interp) {
+ switch (interp) {
case LP_INTERP_POSITION:
/*
* The generated pixel interpolators will pick up the coeffs from
@@ -211,39 +237,45 @@ setup_point_coefficients( struct lp_setup_context *setup,
case LP_INTERP_LINEAR:
/* Sprite tex coords may use linear interpolation someday */
/* fall-through */
-
case LP_INTERP_PERSPECTIVE:
/* check if the sprite coord flag is set for this attribute.
* If so, set it up so it up so x and y vary from 0 to 1.
*/
- if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) {
- const int index = shader->info.input_semantic_index[slot];
+ if (shader->info.base.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) {
+ unsigned semantic_index = shader->info.base.input_semantic_index[slot];
/* Note that sprite_coord enable is a bitfield of
* PIPE_MAX_SHADER_OUTPUTS bits.
*/
- if (index < PIPE_MAX_SHADER_OUTPUTS &&
- (setup->sprite_coord_enable & (1 << index))) {
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- texcoord_coef(setup, point, info, slot + 1, i,
+ if (semantic_index < PIPE_MAX_SHADER_OUTPUTS &&
+ (setup->sprite_coord_enable & (1 << semantic_index))) {
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (usage_mask & (1 << i)) {
+ texcoord_coef(setup, info, slot + 1, i,
setup->sprite_coord_origin,
- (usage_mask & TGSI_WRITEMASK_W));
- fragcoord_usage_mask |= TGSI_WRITEMASK_W;
- break;
+ perspective);
+ }
+ }
+ break;
}
}
- /* FALLTHROUGH */
+ /* fall-through */
case LP_INTERP_CONSTANT:
for (i = 0; i < NUM_CHANNELS; i++) {
- if (usage_mask & (1 << i))
- constant_coef(setup, point, slot+1, info->v0[vert_attr][i], i);
+ if (usage_mask & (1 << i)) {
+ if (perspective) {
+ point_persp_coeff(setup, info, slot+1, i);
+ }
+ else {
+ constant_coef(setup, info, slot+1, info->v0[vert_attr][i], i);
+ }
+ }
}
break;
case LP_INTERP_FACING:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, point, slot+1, 1.0, i);
+ constant_coef(setup, info, slot+1, 1.0, i);
break;
default:
@@ -254,7 +286,7 @@ setup_point_coefficients( struct lp_setup_context *setup,
/* The internal position input is in slot zero:
*/
- setup_point_fragcoord_coef(setup, point, info, 0,
+ setup_point_fragcoord_coef(setup, info, 0,
fragcoord_usage_mask);
}
@@ -271,6 +303,7 @@ try_setup_point( struct lp_setup_context *setup,
const float (*v0)[4] )
{
/* x/y positions in fixed point */
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
const int sizeAttr = setup->psize;
const float size
= (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0]
@@ -322,7 +355,7 @@ try_setup_point( struct lp_setup_context *setup,
u_rect_find_intersection(&setup->draw_region, &bbox);
point = lp_setup_alloc_triangle(scene,
- setup->fs.nr_inputs,
+ key->num_inputs,
nr_planes,
&bytes);
if (!point)
@@ -338,40 +371,40 @@ try_setup_point( struct lp_setup_context *setup,
info.dx12 = fixed_width;
info.dy01 = fixed_width;
info.dy12 = 0;
+ info.a0 = GET_A0(&point->inputs);
+ info.dadx = GET_DADX(&point->inputs);
+ info.dady = GET_DADY(&point->inputs);
/* Setup parameter interpolants:
*/
- setup_point_coefficients(setup, point, &info);
+ setup_point_coefficients(setup, &info);
- point->inputs.facing = 1.0F;
- point->inputs.state = setup->fs.stored;
+ point->inputs.frontfacing = TRUE;
point->inputs.disable = FALSE;
point->inputs.opaque = FALSE;
{
- point->plane[0].dcdx = -1;
- point->plane[0].dcdy = 0;
- point->plane[0].c = 1-bbox.x0;
- point->plane[0].ei = 0;
- point->plane[0].eo = 1;
-
- point->plane[1].dcdx = 1;
- point->plane[1].dcdy = 0;
- point->plane[1].c = bbox.x1+1;
- point->plane[1].ei = -1;
- point->plane[1].eo = 0;
-
- point->plane[2].dcdx = 0;
- point->plane[2].dcdy = 1;
- point->plane[2].c = 1-bbox.y0;
- point->plane[2].ei = 0;
- point->plane[2].eo = 1;
-
- point->plane[3].dcdx = 0;
- point->plane[3].dcdy = -1;
- point->plane[3].c = bbox.y1+1;
- point->plane[3].ei = -1;
- point->plane[3].eo = 0;
+ struct lp_rast_plane *plane = GET_PLANES(point);
+
+ plane[0].dcdx = -1;
+ plane[0].dcdy = 0;
+ plane[0].c = 1-bbox.x0;
+ plane[0].eo = 1;
+
+ plane[1].dcdx = 1;
+ plane[1].dcdy = 0;
+ plane[1].c = bbox.x1+1;
+ plane[1].eo = 0;
+
+ plane[2].dcdx = 0;
+ plane[2].dcdy = 1;
+ plane[2].c = 1-bbox.y0;
+ plane[2].eo = 1;
+
+ plane[3].dcdx = 0;
+ plane[3].dcdy = -1;
+ plane[3].c = bbox.y1+1;
+ plane[3].eo = 0;
}
return lp_setup_bin_triangle(setup, point, &bbox, nr_planes);
@@ -384,10 +417,11 @@ lp_setup_point(struct lp_setup_context *setup,
{
if (!try_setup_point( setup, v0 ))
{
- lp_setup_flush_and_restart(setup);
+ if (!lp_setup_flush_and_restart(setup))
+ return;
if (!try_setup_point( setup, v0 ))
- assert(0);
+ return;
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 9016bb8e24..4ab0b72a57 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -32,15 +32,18 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_rect.h"
+#include "util/u_sse.h"
#include "lp_perf.h"
#include "lp_setup_context.h"
-#include "lp_setup_coef.h"
#include "lp_rast.h"
#include "lp_state_fs.h"
+#include "lp_state_setup.h"
#define NUM_CHANNELS 4
-
+#if defined(PIPE_ARCH_SSE)
+#include <emmintrin.h>
+#endif
static INLINE int
subpixel_snap(float a)
@@ -65,7 +68,7 @@ fixed_to_float(int a)
* immediately after it.
* The memory is allocated from the per-scene pool, not per-tile.
* \param tri_size returns number of bytes allocated
- * \param nr_inputs number of fragment shader inputs
+ * \param num_inputs number of fragment shader inputs
* \return pointer to triangle space
*/
struct lp_rast_triangle *
@@ -75,22 +78,23 @@ lp_setup_alloc_triangle(struct lp_scene *scene,
unsigned *tri_size)
{
unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
+ unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
struct lp_rast_triangle *tri;
- unsigned tri_bytes, bytes;
- char *inputs;
- tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16);
- bytes = tri_bytes + (3 * input_array_sz);
+ *tri_size = (sizeof(struct lp_rast_triangle) +
+ 3 * input_array_sz +
+ plane_sz);
- tri = lp_scene_alloc_aligned( scene, bytes, 16 );
+ tri = lp_scene_alloc_aligned( scene, *tri_size, 16 );
+ if (tri == NULL)
+ return NULL;
- if (tri) {
- inputs = ((char *)tri) + tri_bytes;
- tri->inputs.a0 = (float (*)[4]) inputs;
- tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz);
- tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz);
+ tri->inputs.stride = input_array_sz;
- *tri_size = bytes;
+ {
+ char *a = (char *)tri;
+ char *b = (char *)&GET_PLANES(tri)[nr_planes];
+ assert(b - a == *tri_size);
}
return tri;
@@ -101,25 +105,26 @@ lp_setup_print_vertex(struct lp_setup_context *setup,
const char *name,
const float (*v)[4])
{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
int i, j;
debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n",
name,
v[0][0], v[0][1], v[0][2], v[0][3]);
- for (i = 0; i < setup->fs.nr_inputs; i++) {
- const float *in = v[setup->fs.input[i].src_index];
+ for (i = 0; i < key->num_inputs; i++) {
+ const float *in = v[key->inputs[i].src_index];
debug_printf(" in[%d] (%s[%d]) %s%s%s%s ",
i,
- name, setup->fs.input[i].src_index,
- (setup->fs.input[i].usage_mask & 0x1) ? "x" : " ",
- (setup->fs.input[i].usage_mask & 0x2) ? "y" : " ",
- (setup->fs.input[i].usage_mask & 0x4) ? "z" : " ",
- (setup->fs.input[i].usage_mask & 0x8) ? "w" : " ");
+ name, key->inputs[i].src_index,
+ (key->inputs[i].usage_mask & 0x1) ? "x" : " ",
+ (key->inputs[i].usage_mask & 0x2) ? "y" : " ",
+ (key->inputs[i].usage_mask & 0x4) ? "z" : " ",
+ (key->inputs[i].usage_mask & 0x8) ? "w" : " ");
for (j = 0; j < 4; j++)
- if (setup->fs.input[i].usage_mask & (1<<j))
+ if (key->inputs[i].usage_mask & (1<<j))
debug_printf("%.5f ", in[j]);
debug_printf("\n");
@@ -200,14 +205,16 @@ lp_setup_whole_tile(struct lp_setup_context *setup,
}
LP_COUNT(nr_shade_opaque_64);
- return lp_scene_bin_command( scene, tx, ty,
- LP_RAST_OP_SHADE_TILE_OPAQUE,
- lp_rast_arg_inputs(inputs) );
+ return lp_scene_bin_cmd_with_state( scene, tx, ty,
+ setup->fs.stored,
+ LP_RAST_OP_SHADE_TILE_OPAQUE,
+ lp_rast_arg_inputs(inputs) );
} else {
LP_COUNT(nr_shade_64);
- return lp_scene_bin_command( scene, tx, ty,
- LP_RAST_OP_SHADE_TILE,
- lp_rast_arg_inputs(inputs) );
+ return lp_scene_bin_cmd_with_state( scene, tx, ty,
+ setup->fs.stored,
+ LP_RAST_OP_SHADE_TILE,
+ lp_rast_arg_inputs(inputs) );
}
}
@@ -225,13 +232,13 @@ do_triangle_ccw(struct lp_setup_context *setup,
boolean frontfacing )
{
struct lp_scene *scene = setup->scene;
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
struct lp_rast_triangle *tri;
- int x[3];
- int y[3];
- int area;
+ struct lp_rast_plane *plane;
+ int x[4];
+ int y[4];
struct u_rect bbox;
unsigned tri_bytes;
- int i;
int nr_planes = 3;
if (0)
@@ -248,10 +255,12 @@ do_triangle_ccw(struct lp_setup_context *setup,
x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
+ x[3] = 0;
y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
-
+ y[3] = 0;
+
/* Bounding rectangle (in pixels) */
{
@@ -289,13 +298,13 @@ do_triangle_ccw(struct lp_setup_context *setup,
u_rect_find_intersection(&setup->draw_region, &bbox);
tri = lp_setup_alloc_triangle(scene,
- setup->fs.nr_inputs,
+ key->num_inputs,
nr_planes,
&tri_bytes);
if (!tri)
return FALSE;
-#ifdef DEBUG
+#if 0
tri->v[0][0] = v0[0][0];
tri->v[1][0] = v1[0][0];
tri->v[2][0] = v2[0][0];
@@ -304,92 +313,172 @@ do_triangle_ccw(struct lp_setup_context *setup,
tri->v[2][1] = v2[0][1];
#endif
- tri->plane[0].dcdy = x[0] - x[1];
- tri->plane[1].dcdy = x[1] - x[2];
- tri->plane[2].dcdy = x[2] - x[0];
-
- tri->plane[0].dcdx = y[0] - y[1];
- tri->plane[1].dcdx = y[1] - y[2];
- tri->plane[2].dcdx = y[2] - y[0];
-
- area = (tri->plane[0].dcdy * tri->plane[2].dcdx -
- tri->plane[2].dcdy * tri->plane[0].dcdx);
-
LP_COUNT(nr_tris);
- /* Cull non-ccw and zero-sized triangles.
- *
- * XXX: subject to overflow??
- */
- if (area <= 0) {
- lp_scene_putback_data( scene, tri_bytes );
- LP_COUNT(nr_culled_tris);
- return TRUE;
- }
-
/* Setup parameter interpolants:
*/
- lp_setup_tri_coef( setup, &tri->inputs, v0, v1, v2, frontfacing );
-
- tri->inputs.facing = frontfacing ? 1.0F : -1.0F;
+ setup->setup.variant->jit_function( v0,
+ v1,
+ v2,
+ frontfacing,
+ GET_A0(&tri->inputs),
+ GET_DADX(&tri->inputs),
+ GET_DADY(&tri->inputs) );
+
+ tri->inputs.frontfacing = frontfacing;
tri->inputs.disable = FALSE;
tri->inputs.opaque = setup->fs.current.variant->opaque;
- tri->inputs.state = setup->fs.stored;
-
- for (i = 0; i < 3; i++) {
- struct lp_rast_plane *plane = &tri->plane[i];
+ if (0)
+ lp_dump_setup_coef(&setup->setup.variant->key,
+ (const float (*)[4])GET_A0(&tri->inputs),
+ (const float (*)[4])GET_DADX(&tri->inputs),
+ (const float (*)[4])GET_DADY(&tri->inputs));
+
+ plane = GET_PLANES(tri);
+
+#if defined(PIPE_ARCH_SSE)
+ {
+ __m128i vertx, verty;
+ __m128i shufx, shufy;
+ __m128i dcdx, dcdy, c;
+ __m128i unused;
+ __m128i dcdx_neg_mask;
+ __m128i dcdy_neg_mask;
+ __m128i dcdx_zero_mask;
+ __m128i top_left_flag;
+ __m128i c_inc_mask, c_inc;
+ __m128i eo, p0, p1, p2;
+ __m128i zero = _mm_setzero_si128();
+
+ vertx = _mm_loadu_si128((__m128i *)x); /* vertex x coords */
+ verty = _mm_loadu_si128((__m128i *)y); /* vertex y coords */
+
+ shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
+ shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
+
+ dcdx = _mm_sub_epi32(verty, shufy);
+ dcdy = _mm_sub_epi32(vertx, shufx);
+
+ dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
+ dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero);
+ dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
+
+ top_left_flag = _mm_set1_epi32((setup->pixel_offset == 0) ? ~0 : 0);
- /* half-edge constants, will be interated over the whole render
- * target.
+ c_inc_mask = _mm_or_si128(dcdx_neg_mask,
+ _mm_and_si128(dcdx_zero_mask,
+ _mm_xor_si128(dcdy_neg_mask,
+ top_left_flag)));
+
+ c_inc = _mm_srli_epi32(c_inc_mask, 31);
+
+ c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
+ mm_mullo_epi32(dcdy, verty));
+
+ c = _mm_add_epi32(c, c_inc);
+
+ /* Scale up to match c:
*/
- plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
-
- /* correct for top-left vs. bottom-left fill convention.
- *
- * note that we're overloading gl_rasterization_rules to mean
- * both (0.5,0.5) pixel centers *and* bottom-left filling
- * convention.
- *
- * GL actually has a top-left filling convention, but GL's
- * notion of "top" differs from gallium's...
- *
- * Also, sometimes (in FBO cases) GL will render upside down
- * to its usual method, in which case it will probably want
- * to use the opposite, top-left convention.
- */
- if (plane->dcdx < 0) {
- /* both fill conventions want this - adjust for left edges */
- plane->c++;
- }
- else if (plane->dcdx == 0) {
- if (setup->pixel_offset == 0) {
- /* correct for top-left fill convention:
- */
- if (plane->dcdy > 0) plane->c++;
+ dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
+ dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
+
+ /* Calculate trivial reject values:
+ */
+ eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
+ _mm_and_si128(dcdx_neg_mask, dcdx));
+
+ /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
+
+ /* Pointless transpose which gets undone immediately in
+ * rasterization:
+ */
+ transpose4_epi32(&c, &dcdx, &dcdy, &eo,
+ &p0, &p1, &p2, &unused);
+
+ _mm_store_si128((__m128i *)&plane[0], p0);
+ _mm_store_si128((__m128i *)&plane[1], p1);
+ _mm_store_si128((__m128i *)&plane[2], p2);
+ }
+#else
+ {
+ int i;
+ plane[0].dcdy = x[0] - x[1];
+ plane[1].dcdy = x[1] - x[2];
+ plane[2].dcdy = x[2] - x[0];
+ plane[0].dcdx = y[0] - y[1];
+ plane[1].dcdx = y[1] - y[2];
+ plane[2].dcdx = y[2] - y[0];
+
+ for (i = 0; i < 3; i++) {
+ /* half-edge constants, will be interated over the whole render
+ * target.
+ */
+ plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i];
+
+ /* correct for top-left vs. bottom-left fill convention.
+ *
+ * note that we're overloading gl_rasterization_rules to mean
+ * both (0.5,0.5) pixel centers *and* bottom-left filling
+ * convention.
+ *
+ * GL actually has a top-left filling convention, but GL's
+ * notion of "top" differs from gallium's...
+ *
+ * Also, sometimes (in FBO cases) GL will render upside down
+ * to its usual method, in which case it will probably want
+ * to use the opposite, top-left convention.
+ */
+ if (plane[i].dcdx < 0) {
+ /* both fill conventions want this - adjust for left edges */
+ plane[i].c++;
}
- else {
- /* correct for bottom-left fill convention:
- */
- if (plane->dcdy < 0) plane->c++;
+ else if (plane[i].dcdx == 0) {
+ if (setup->pixel_offset == 0) {
+ /* correct for top-left fill convention:
+ */
+ if (plane[i].dcdy > 0) plane[i].c++;
+ }
+ else {
+ /* correct for bottom-left fill convention:
+ */
+ if (plane[i].dcdy < 0) plane[i].c++;
+ }
}
- }
- plane->dcdx *= FIXED_ONE;
- plane->dcdy *= FIXED_ONE;
+ plane[i].dcdx *= FIXED_ONE;
+ plane[i].dcdy *= FIXED_ONE;
- /* find trivial reject offsets for each edge for a single-pixel
- * sized block. These will be scaled up at each recursive level to
- * match the active blocksize. Scaling in this way works best if
- * the blocks are square.
- */
- plane->eo = 0;
- if (plane->dcdx < 0) plane->eo -= plane->dcdx;
- if (plane->dcdy > 0) plane->eo += plane->dcdy;
+ /* find trivial reject offsets for each edge for a single-pixel
+ * sized block. These will be scaled up at each recursive level to
+ * match the active blocksize. Scaling in this way works best if
+ * the blocks are square.
+ */
+ plane[i].eo = 0;
+ if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
+ if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
+ }
+ }
+#endif
- /* Calculate trivial accept offsets from the above.
- */
- plane->ei = plane->dcdy - plane->dcdx - plane->eo;
+ if (0) {
+ debug_printf("p0: %08x/%08x/%08x/%08x\n",
+ plane[0].c,
+ plane[0].dcdx,
+ plane[0].dcdy,
+ plane[0].eo);
+
+ debug_printf("p1: %08x/%08x/%08x/%08x\n",
+ plane[1].c,
+ plane[1].dcdx,
+ plane[1].dcdy,
+ plane[1].eo);
+
+ debug_printf("p0: %08x/%08x/%08x/%08x\n",
+ plane[2].c,
+ plane[2].dcdx,
+ plane[2].dcdy,
+ plane[2].eo);
}
@@ -412,29 +501,25 @@ do_triangle_ccw(struct lp_setup_context *setup,
* these planes elsewhere.
*/
if (nr_planes == 7) {
- tri->plane[3].dcdx = -1;
- tri->plane[3].dcdy = 0;
- tri->plane[3].c = 1-bbox.x0;
- tri->plane[3].ei = 0;
- tri->plane[3].eo = 1;
-
- tri->plane[4].dcdx = 1;
- tri->plane[4].dcdy = 0;
- tri->plane[4].c = bbox.x1+1;
- tri->plane[4].ei = -1;
- tri->plane[4].eo = 0;
-
- tri->plane[5].dcdx = 0;
- tri->plane[5].dcdy = 1;
- tri->plane[5].c = 1-bbox.y0;
- tri->plane[5].ei = 0;
- tri->plane[5].eo = 1;
-
- tri->plane[6].dcdx = 0;
- tri->plane[6].dcdy = -1;
- tri->plane[6].c = bbox.y1+1;
- tri->plane[6].ei = -1;
- tri->plane[6].eo = 0;
+ plane[3].dcdx = -1;
+ plane[3].dcdy = 0;
+ plane[3].c = 1-bbox.x0;
+ plane[3].eo = 1;
+
+ plane[4].dcdx = 1;
+ plane[4].dcdy = 0;
+ plane[4].c = bbox.x1+1;
+ plane[4].eo = 0;
+
+ plane[5].dcdx = 0;
+ plane[5].dcdy = 1;
+ plane[5].c = 1-bbox.y0;
+ plane[5].eo = 1;
+
+ plane[6].dcdx = 0;
+ plane[6].dcdy = -1;
+ plane[6].c = bbox.y1+1;
+ plane[6].eo = 0;
}
return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes );
@@ -487,51 +572,58 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) |
(bbox->y1 - (bbox->y0 & ~3)));
- if (nr_planes == 3) {
- if (sz < 4 && dx < 64)
- {
- /* Triangle is contained in a single 4x4 stamp:
- */
- int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8);
-
- return lp_scene_bin_command( scene,
- bbox->x0/64, bbox->y0/64,
- LP_RAST_OP_TRIANGLE_3_4,
- lp_rast_arg_triangle(tri, mask) );
- }
-
- if (sz < 16 && dx < 64)
- {
- int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8);
-
- /* Triangle is contained in a single 16x16 block:
- */
- return lp_scene_bin_command( scene,
- bbox->x0/64, bbox->y0/64,
- LP_RAST_OP_TRIANGLE_3_16,
- lp_rast_arg_triangle(tri, mask) );
- }
- }
-
-
/* Determine which tile(s) intersect the triangle's bounding box
*/
if (dx < TILE_SIZE)
{
int ix0 = bbox->x0 / TILE_SIZE;
int iy0 = bbox->y0 / TILE_SIZE;
+ int px = bbox->x0 & 63 & ~3;
+ int py = bbox->y0 & 63 & ~3;
+ int mask = px | (py << 8);
assert(iy0 == bbox->y1 / TILE_SIZE &&
ix0 == bbox->x1 / TILE_SIZE);
+ if (nr_planes == 3) {
+ if (sz < 4)
+ {
+ /* Triangle is contained in a single 4x4 stamp:
+ */
+ return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
+ setup->fs.stored,
+ LP_RAST_OP_TRIANGLE_3_4,
+ lp_rast_arg_triangle(tri, mask) );
+ }
+
+ if (sz < 16)
+ {
+ /* Triangle is contained in a single 16x16 block:
+ */
+ return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
+ setup->fs.stored,
+ LP_RAST_OP_TRIANGLE_3_16,
+ lp_rast_arg_triangle(tri, mask) );
+ }
+ }
+ else if (nr_planes == 4 && sz < 16)
+ {
+ return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
+ setup->fs.stored,
+ LP_RAST_OP_TRIANGLE_4_16,
+ lp_rast_arg_triangle(tri, mask) );
+ }
+
+
/* Triangle is contained in a single tile:
*/
- return lp_scene_bin_command( scene, ix0, iy0,
- lp_rast_tri_tab[nr_planes],
- lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
+ return lp_scene_bin_cmd_with_state( scene, ix0, iy0, setup->fs.stored,
+ lp_rast_tri_tab[nr_planes],
+ lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
}
else
{
+ struct lp_rast_plane *plane = GET_PLANES(tri);
int c[MAX_PLANES];
int ei[MAX_PLANES];
int eo[MAX_PLANES];
@@ -545,14 +637,17 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
int iy1 = bbox->y1 / TILE_SIZE;
for (i = 0; i < nr_planes; i++) {
- c[i] = (tri->plane[i].c +
- tri->plane[i].dcdy * iy0 * TILE_SIZE -
- tri->plane[i].dcdx * ix0 * TILE_SIZE);
-
- ei[i] = tri->plane[i].ei << TILE_ORDER;
- eo[i] = tri->plane[i].eo << TILE_ORDER;
- xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER);
- ystep[i] = tri->plane[i].dcdy << TILE_ORDER;
+ c[i] = (plane[i].c +
+ plane[i].dcdy * iy0 * TILE_SIZE -
+ plane[i].dcdx * ix0 * TILE_SIZE);
+
+ ei[i] = (plane[i].dcdy -
+ plane[i].dcdx -
+ plane[i].eo) << TILE_ORDER;
+
+ eo[i] = plane[i].eo << TILE_ORDER;
+ xstep[i] = -(plane[i].dcdx << TILE_ORDER);
+ ystep[i] = plane[i].dcdy << TILE_ORDER;
}
@@ -594,9 +689,11 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
*/
int count = util_bitcount(partial);
in = TRUE;
- if (!lp_scene_bin_command( scene, x, y,
- lp_rast_tri_tab[count],
- lp_rast_arg_triangle(tri, partial) ))
+
+ if (!lp_scene_bin_cmd_with_state( scene, x, y,
+ setup->fs.stored,
+ lp_rast_tri_tab[count],
+ lp_rast_arg_triangle(tri, partial) ))
goto fail;
LP_COUNT(nr_partially_covered_64);
@@ -635,40 +732,62 @@ fail:
/**
- * Draw triangle if it's CW, cull otherwise.
+ * Try to draw the triangle, restart the scene on failure.
*/
-static void triangle_cw( struct lp_setup_context *setup,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4] )
+static void retry_triangle_ccw( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean front)
{
- if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ))
+ if (!do_triangle_ccw( setup, v0, v1, v2, front ))
{
- lp_setup_flush_and_restart(setup);
+ if (!lp_setup_flush_and_restart(setup))
+ return;
- if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ))
- assert(0);
+ if (!do_triangle_ccw( setup, v0, v1, v2, front ))
+ return;
}
}
+static INLINE float
+calc_area(const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ float dx01 = v0[0][0] - v1[0][0];
+ float dy01 = v0[0][1] - v1[0][1];
+ float dx20 = v2[0][0] - v0[0][0];
+ float dy20 = v2[0][1] - v0[0][1];
+ return dx01 * dy20 - dx20 * dy01;
+}
+
/**
- * Draw triangle if it's CCW, cull otherwise.
+ * Draw triangle if it's CW, cull otherwise.
*/
-static void triangle_ccw( struct lp_setup_context *setup,
+static void triangle_cw( struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4] )
{
- if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ))
- {
- lp_setup_flush_and_restart(setup);
- if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ))
- assert(0);
- }
+ float area = calc_area(v0, v1, v2);
+
+ if (area < 0.0f)
+ retry_triangle_ccw(setup, v0, v2, v1, !setup->ccw_is_frontface);
}
+static void triangle_ccw( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ float area = calc_area(v0, v1, v2);
+
+ if (area > 0.0f)
+ retry_triangle_ccw(setup, v0, v1, v2, setup->ccw_is_frontface);
+}
/**
* Draw triangle whether it's CW or CCW.
@@ -678,18 +797,12 @@ static void triangle_both( struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4] )
{
- /* edge vectors e = v0 - v2, f = v1 - v2 */
- const float ex = v0[0][0] - v2[0][0];
- const float ey = v0[0][1] - v2[0][1];
- const float fx = v1[0][0] - v2[0][0];
- const float fy = v1[0][1] - v2[0][1];
-
- /* det = cross(e,f).z */
- const float det = ex * fy - ey * fx;
- if (det < 0.0f)
- triangle_ccw( setup, v0, v1, v2 );
- else if (det > 0.0f)
- triangle_cw( setup, v0, v1, v2 );
+ float area = calc_area(v0, v1, v2);
+
+ if (area > 0.0f)
+ retry_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface );
+ else if (area < 0.0f)
+ retry_triangle_ccw( setup, v0, v2, v1, !setup->ccw_is_frontface );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
index 6308561f24..9c1f0fe793 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
@@ -141,7 +141,8 @@ lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr)
const boolean flatshade_first = setup->flatshade_first;
unsigned i;
- lp_setup_update_state(setup, TRUE);
+ if (!lp_setup_update_state(setup, TRUE))
+ return;
switch (setup->prim) {
case PIPE_PRIM_POINTS:
@@ -338,7 +339,8 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
const boolean flatshade_first = setup->flatshade_first;
unsigned i;
- lp_setup_update_state(setup, TRUE);
+ if (!lp_setup_update_state(setup, TRUE))
+ return;
switch (setup->prim) {
case PIPE_PRIM_POINTS:
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 86313e1c48..7893e9cdc0 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -97,6 +97,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *,
void
llvmpipe_update_fs(struct llvmpipe_context *lp);
+void
+llvmpipe_update_setup(struct llvmpipe_context *lp);
+
void
llvmpipe_update_derived(struct llvmpipe_context *llvmpipe);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index d2be22d7fc..0f5f7369e0 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -50,12 +50,13 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
{
const struct lp_fragment_shader *lpfs = llvmpipe->fs;
struct vertex_info *vinfo = &llvmpipe->vertex_info;
- struct lp_shader_input *inputs = llvmpipe->inputs;
unsigned vs_index;
uint i;
/*
- * Match FS inputs against VS outputs, emitting the necessary attributes.
+ * Match FS inputs against VS outputs, emitting the necessary
+ * attributes. Could cache these structs and look them up with a
+ * combination of fragment shader, vertex shader ids.
*/
vinfo->num_attribs = 0;
@@ -66,72 +67,18 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
- for (i = 0; i < lpfs->info.num_inputs; i++) {
+ for (i = 0; i < lpfs->info.base.num_inputs; i++) {
/*
* Search for each input in current vs output:
*/
vs_index = draw_find_shader_output(llvmpipe->draw,
- lpfs->info.input_semantic_name[i],
- lpfs->info.input_semantic_index[i]);
- if (vs_index < 0) {
- /*
- * This can happen with sprite coordinates - the vertex
- * shader doesn't need to provide an output as we generate
- * them internally. However, lets keep pretending that there
- * is something there to not confuse other code.
- */
- vs_index = 0;
- }
-
- /* This can be pre-computed, except for flatshade:
- */
- inputs[i].usage_mask = lpfs->info.input_usage_mask[i];
-
- switch (lpfs->info.input_interpolate[i]) {
- case TGSI_INTERPOLATE_CONSTANT:
- inputs[i].interp = LP_INTERP_CONSTANT;
- break;
- case TGSI_INTERPOLATE_LINEAR:
- inputs[i].interp = LP_INTERP_LINEAR;
- break;
- case TGSI_INTERPOLATE_PERSPECTIVE:
- inputs[i].interp = LP_INTERP_PERSPECTIVE;
- break;
- default:
- assert(0);
- break;
- }
-
- switch (lpfs->info.input_semantic_name[i]) {
- case TGSI_SEMANTIC_FACE:
- inputs[i].interp = LP_INTERP_FACING;
- break;
- case TGSI_SEMANTIC_POSITION:
- /* Position was already emitted above
- */
- inputs[i].interp = LP_INTERP_POSITION;
- inputs[i].src_index = 0;
- continue;
- case TGSI_SEMANTIC_COLOR:
- /* Colors are linearly inputs[i].interpolated in the fragment shader
- * even when flatshading is active. This just tells the
- * setup module to use coefficients with ddx==0 and
- * ddy==0.
- */
- if (llvmpipe->rasterizer->flatshade)
- inputs[i].interp = LP_INTERP_CONSTANT;
- break;
-
- default:
- break;
- }
+ lpfs->info.base.input_semantic_name[i],
+ lpfs->info.base.input_semantic_index[i]);
/*
* Emit the requested fs attribute for all but position.
*/
-
- inputs[i].src_index = vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
}
@@ -145,15 +92,8 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
}
- llvmpipe->num_inputs = lpfs->info.num_inputs;
-
draw_compute_vertex_size(vinfo);
-
lp_setup_set_vertex_info(llvmpipe->setup, vinfo);
-
- lp_setup_set_fs_inputs(llvmpipe->setup,
- inputs,
- lpfs->info.num_inputs);
}
@@ -190,6 +130,10 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
LP_NEW_QUERY))
llvmpipe_update_fs( llvmpipe );
+ if (llvmpipe->dirty & (LP_NEW_FS |
+ LP_NEW_RASTERIZER))
+ llvmpipe_update_setup( llvmpipe );
+
if (llvmpipe->dirty & LP_NEW_BLEND_COLOR)
lp_setup_set_blend_color(llvmpipe->setup,
&llvmpipe->blend_color);
@@ -208,11 +152,14 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
lp_setup_set_fs_constants(llvmpipe->setup,
llvmpipe->constants[PIPE_SHADER_FRAGMENT][0]);
- if (llvmpipe->dirty & (LP_NEW_SAMPLER_VIEW |
- LP_NEW_SAMPLER))
+ if (llvmpipe->dirty & (LP_NEW_SAMPLER_VIEW))
lp_setup_set_fragment_sampler_views(llvmpipe->setup,
llvmpipe->num_fragment_sampler_views,
- llvmpipe->fragment_sampler_views,
+ llvmpipe->fragment_sampler_views);
+
+ if (llvmpipe->dirty & (LP_NEW_SAMPLER))
+ lp_setup_set_fragment_sampler_state(llvmpipe->setup,
+ llvmpipe->num_samplers,
llvmpipe->sampler);
llvmpipe->dirty = 0;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index f0a15e11b9..9fbedac165 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -99,74 +99,12 @@
#include <llvm-c/Analysis.h>
+#include <llvm-c/BitWriter.h>
static unsigned fs_no = 0;
-/**
- * Generate the depth /stencil test code.
- */
-static void
-generate_depth_stencil(LLVMBuilderRef builder,
- const struct lp_fragment_shader_variant_key *key,
- struct lp_type src_type,
- struct lp_build_mask_context *mask,
- LLVMValueRef stencil_refs[2],
- LLVMValueRef src,
- LLVMValueRef dst_ptr,
- LLVMValueRef facing,
- LLVMValueRef counter)
-{
- const struct util_format_description *format_desc;
- struct lp_type dst_type;
-
- if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled)
- return;
-
- format_desc = util_format_description(key->zsbuf_format);
- assert(format_desc);
-
- /*
- * Depths are expected to be between 0 and 1, even if they are stored in
- * floats. Setting these bits here will ensure that the lp_build_conv() call
- * below won't try to unnecessarily clamp the incoming values.
- */
- if(src_type.floating) {
- src_type.sign = FALSE;
- src_type.norm = TRUE;
- }
- else {
- assert(!src_type.sign);
- assert(src_type.norm);
- }
-
- /* Pick the depth type. */
- dst_type = lp_depth_type(format_desc, src_type.width*src_type.length);
-
- /* FIXME: Cope with a depth test type with a different bit width. */
- assert(dst_type.width == src_type.width);
- assert(dst_type.length == src_type.length);
-
- /* Convert fragment Z from float to integer */
- lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1);
-
- dst_ptr = LLVMBuildBitCast(builder,
- dst_ptr,
- LLVMPointerType(lp_build_vec_type(dst_type), 0), "");
- lp_build_depth_stencil_test(builder,
- &key->depth,
- key->stencil,
- dst_type,
- format_desc,
- mask,
- stencil_refs,
- src,
- dst_ptr,
- facing,
- counter);
-}
-
/**
* Expand the relevent bits of mask_input to a 4-dword mask for the
@@ -248,6 +186,26 @@ generate_quad_mask(LLVMBuilderRef builder,
}
+#define EARLY_DEPTH_TEST 0x1
+#define LATE_DEPTH_TEST 0x2
+#define EARLY_DEPTH_WRITE 0x4
+#define LATE_DEPTH_WRITE 0x8
+
+static int
+find_output_by_semantic( const struct tgsi_shader_info *info,
+ unsigned semantic,
+ unsigned index )
+{
+ int i;
+
+ for (i = 0; i < info->num_outputs; i++)
+ if (info->output_semantic_name[i] == semantic &&
+ info->output_semantic_index[i] == index)
+ return i;
+
+ return -1;
+}
+
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
@@ -255,14 +213,13 @@ generate_quad_mask(LLVMBuilderRef builder,
* \param partial_mask if 1, do mask_input testing
*/
static void
-generate_fs(struct llvmpipe_context *lp,
- struct lp_fragment_shader *shader,
+generate_fs(struct lp_fragment_shader *shader,
const struct lp_fragment_shader_variant_key *key,
LLVMBuilderRef builder,
struct lp_type type,
LLVMValueRef context_ptr,
unsigned i,
- const struct lp_build_interp_soa_context *interp,
+ struct lp_build_interp_soa_context *interp,
struct lp_build_sampler_soa *sampler,
LLVMValueRef *pmask,
LLVMValueRef (*color)[4],
@@ -272,18 +229,52 @@ generate_fs(struct llvmpipe_context *lp,
LLVMValueRef mask_input,
LLVMValueRef counter)
{
+ const struct util_format_description *zs_format_desc = NULL;
const struct tgsi_token *tokens = shader->base.tokens;
LLVMTypeRef vec_type;
LLVMValueRef consts_ptr;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
- LLVMValueRef z = interp->pos[2];
+ LLVMValueRef z;
+ LLVMValueRef zs_value = NULL;
LLVMValueRef stencil_refs[2];
- struct lp_build_flow_context *flow;
struct lp_build_mask_context mask;
- boolean early_depth_stencil_test;
+ boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 &&
+ shader->info.base.num_inputs < 3 &&
+ shader->info.base.num_instructions < 8);
unsigned attrib;
unsigned chan;
unsigned cbuf;
+ unsigned depth_mode;
+
+ if (key->depth.enabled ||
+ key->stencil[0].enabled ||
+ key->stencil[1].enabled) {
+
+ zs_format_desc = util_format_description(key->zsbuf_format);
+ assert(zs_format_desc);
+
+ if (!shader->info.base.writes_z) {
+ if (key->alpha.enabled || shader->info.base.uses_kill)
+ /* With alpha test and kill, can do the depth test early
+ * and hopefully eliminate some quads. But need to do a
+ * special deferred depth write once the final mask value
+ * is known.
+ */
+ depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
+ else
+ depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
+ }
+ else {
+ depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
+ }
+
+ if (!(key->depth.enabled && key->depth.writemask) &&
+ !(key->stencil[0].enabled && key->stencil[0].writemask))
+ depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
+ }
+ else {
+ depth_mode = 0;
+ }
assert(i < 4);
@@ -294,20 +285,14 @@ generate_fs(struct llvmpipe_context *lp,
consts_ptr = lp_jit_context_constants(builder, context_ptr);
- flow = lp_build_flow_create(builder);
-
memset(outputs, 0, sizeof outputs);
- lp_build_flow_scope_begin(flow);
-
/* Declare the color and z variables */
for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
- color[cbuf][chan] = LLVMGetUndef(vec_type);
- lp_build_flow_scope_declare(flow, &color[cbuf][chan]);
+ color[cbuf][chan] = lp_build_alloca(builder, vec_type, "color");
}
}
- lp_build_flow_scope_declare(flow, &z);
/* do triangle edge testing */
if (partial_mask) {
@@ -319,74 +304,126 @@ generate_fs(struct llvmpipe_context *lp,
}
/* 'mask' will control execution based on quad's pixel alive/killed state */
- lp_build_mask_begin(&mask, flow, type, *pmask);
-
- early_depth_stencil_test =
- (key->depth.enabled || key->stencil[0].enabled) &&
- !key->alpha.enabled &&
- !shader->info.uses_kill &&
- !shader->info.writes_z;
-
- if (early_depth_stencil_test)
- generate_depth_stencil(builder, key,
- type, &mask,
- stencil_refs, z, depth_ptr, facing, counter);
+ lp_build_mask_begin(&mask, builder, type, *pmask);
+
+ if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
+ lp_build_mask_check(&mask);
+
+ lp_build_interp_soa_update_pos(interp, i);
+ z = interp->pos[2];
+
+ if (depth_mode & EARLY_DEPTH_TEST) {
+ lp_build_depth_stencil_test(builder,
+ &key->depth,
+ key->stencil,
+ type,
+ zs_format_desc,
+ &mask,
+ stencil_refs,
+ z,
+ depth_ptr, facing,
+ &zs_value,
+ !simple_shader);
+
+ if (depth_mode & EARLY_DEPTH_WRITE) {
+ lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value);
+ }
+ }
+ lp_build_interp_soa_update_inputs(interp, i);
+
+ /* Build the actual shader */
lp_build_tgsi_soa(builder, tokens, type, &mask,
consts_ptr, interp->pos, interp->inputs,
- outputs, sampler, &shader->info);
+ outputs, sampler, &shader->info.base);
- /* loop over fragment shader outputs/results */
- for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
- for(chan = 0; chan < NUM_CHANNELS; ++chan) {
- if(outputs[attrib][chan]) {
- LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
- lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]);
-
- switch (shader->info.output_semantic_name[attrib]) {
- case TGSI_SEMANTIC_COLOR:
- {
- unsigned cbuf = shader->info.output_semantic_index[attrib];
-
- lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
-
- /* Alpha test */
- /* XXX: should only test the final assignment to alpha */
- if (cbuf == 0 && chan == 3 && key->alpha.enabled) {
- LLVMValueRef alpha = out;
- LLVMValueRef alpha_ref_value;
- alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
- alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
- lp_build_alpha_test(builder, key->alpha.func, type,
- &mask, alpha, alpha_ref_value);
- }
-
- color[cbuf][chan] = out;
- break;
- }
-
- case TGSI_SEMANTIC_POSITION:
- if(chan == 2)
- z = out;
- break;
- }
- }
+
+ /* Alpha test */
+ if (key->alpha.enabled) {
+ int color0 = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_COLOR,
+ 0);
+
+ if (color0 != -1 && outputs[color0][3]) {
+ LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
+ LLVMValueRef alpha_ref_value;
+
+ alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
+ alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
+
+ lp_build_alpha_test(builder, key->alpha.func, type,
+ &mask, alpha, alpha_ref_value,
+ (depth_mode & LATE_DEPTH_TEST) != 0);
}
}
- if (!early_depth_stencil_test)
- generate_depth_stencil(builder, key,
- type, &mask,
- stencil_refs, z, depth_ptr, facing, counter);
+ /* Late Z test */
+ if (depth_mode & LATE_DEPTH_TEST) {
+ int pos0 = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_POSITION,
+ 0);
+
+ if (pos0 != -1 && outputs[pos0][2]) {
+ z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
+ }
- lp_build_mask_end(&mask);
+ lp_build_depth_stencil_test(builder,
+ &key->depth,
+ key->stencil,
+ type,
+ zs_format_desc,
+ &mask,
+ stencil_refs,
+ z,
+ depth_ptr, facing,
+ &zs_value,
+ !simple_shader);
+ /* Late Z write */
+ if (depth_mode & LATE_DEPTH_WRITE) {
+ lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value);
+ }
+ }
+ else if ((depth_mode & EARLY_DEPTH_TEST) &&
+ (depth_mode & LATE_DEPTH_WRITE))
+ {
+ /* Need to apply a reduced mask to the depth write. Reload the
+ * depth value, update from zs_value with the new mask value and
+ * write that out.
+ */
+ lp_build_deferred_depth_write(builder,
+ type,
+ zs_format_desc,
+ &mask,
+ depth_ptr,
+ zs_value);
+ }
- lp_build_flow_scope_end(flow);
- lp_build_flow_destroy(flow);
+ /* Color write */
+ for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
+ {
+ if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR &&
+ shader->info.base.output_semantic_index[attrib] < key->nr_cbufs)
+ {
+ unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+ for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ if(outputs[attrib][chan]) {
+ /* XXX: just initialize outputs to point at colors[] and
+ * skip this.
+ */
+ LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
+ lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
+ LLVMBuildStore(builder, out, color[cbuf][chan]);
+ }
+ }
+ }
+ }
- *pmask = mask.value;
+ if (counter)
+ lp_build_occlusion_count(builder, type,
+ lp_build_mask_value(&mask), counter);
+ *pmask = lp_build_mask_end(&mask);
}
@@ -407,10 +444,10 @@ generate_blend(const struct pipe_blend_state *blend,
LLVMValueRef context_ptr,
LLVMValueRef mask,
LLVMValueRef *src,
- LLVMValueRef dst_ptr)
+ LLVMValueRef dst_ptr,
+ boolean do_branch)
{
struct lp_build_context bld;
- struct lp_build_flow_context *flow;
struct lp_build_mask_context mask_ctx;
LLVMTypeRef vec_type;
LLVMValueRef const_ptr;
@@ -421,10 +458,9 @@ generate_blend(const struct pipe_blend_state *blend,
lp_build_context_init(&bld, builder, type);
- flow = lp_build_flow_create(builder);
-
- /* we'll use this mask context to skip blending if all pixels are dead */
- lp_build_mask_begin(&mask_ctx, flow, type, mask);
+ lp_build_mask_begin(&mask_ctx, builder, type, mask);
+ if (do_branch)
+ lp_build_mask_check(&mask_ctx);
vec_type = lp_build_vec_type(type);
@@ -457,7 +493,6 @@ generate_blend(const struct pipe_blend_state *blend,
}
lp_build_mask_end(&mask_ctx);
- lp_build_flow_destroy(flow);
}
@@ -468,13 +503,13 @@ generate_blend(const struct pipe_blend_state *blend,
* 2x2 pixels.
*/
static void
-generate_fragment(struct llvmpipe_context *lp,
+generate_fragment(struct llvmpipe_screen *screen,
struct lp_fragment_shader *shader,
struct lp_fragment_shader_variant *variant,
unsigned partial_mask)
{
- struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
const struct lp_fragment_shader_variant_key *key = &variant->key;
+ struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
char func_name[256];
struct lp_type fs_type;
struct lp_type blend_type;
@@ -502,11 +537,24 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMValueRef blend_mask;
LLVMValueRef function;
LLVMValueRef facing;
+ const struct util_format_description *zs_format_desc;
unsigned num_fs;
unsigned i;
unsigned chan;
unsigned cbuf;
+ /* Adjust color input interpolation according to flatshade state:
+ */
+ memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]);
+ for (i = 0; i < shader->info.base.num_inputs; i++) {
+ if (inputs[i].interp == LP_INTERP_COLOR) {
+ if (key->flatshade)
+ inputs[i].interp = LP_INTERP_CONSTANT;
+ else
+ inputs[i].interp = LP_INTERP_LINEAR;
+ }
+ }
+
/* TODO: actually pick these based on the fs and color buffer
* characteristics. */
@@ -542,12 +590,12 @@ generate_fragment(struct llvmpipe_context *lp,
arg_types[0] = screen->context_ptr_type; /* context */
arg_types[1] = LLVMInt32Type(); /* x */
arg_types[2] = LLVMInt32Type(); /* y */
- arg_types[3] = LLVMFloatType(); /* facing */
+ arg_types[3] = LLVMInt32Type(); /* facing */
arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */
arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */
arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */
arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */
- arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
+ arg_types[8] = LLVMPointerType(LLVMInt8Type(), 0); /* depth */
arg_types[9] = LLVMInt32Type(); /* mask_input */
arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
@@ -558,7 +606,6 @@ generate_fragment(struct llvmpipe_context *lp,
variant->function[partial_mask] = function;
-
/* XXX: need to propagate noalias down into color param now we are
* passing a pointer-to-pointer?
*/
@@ -606,8 +653,8 @@ generate_fragment(struct llvmpipe_context *lp,
* already included in the shader key.
*/
lp_build_interp_soa_init(&interp,
- lp->num_inputs,
- lp->inputs,
+ shader->info.base.num_inputs,
+ inputs,
builder, fs_type,
a0_ptr, dadx_ptr, dady_ptr,
x, y);
@@ -616,17 +663,18 @@ generate_fragment(struct llvmpipe_context *lp,
sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
/* loop over quads in the block */
+ zs_format_desc = util_format_description(key->zsbuf_format);
+
for(i = 0; i < num_fs; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef depth_offset = LLVMConstInt(LLVMInt32Type(),
+ i*fs_type.length*zs_format_desc->block.bits/8,
+ 0);
LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS];
LLVMValueRef depth_ptr_i;
- if(i != 0)
- lp_build_interp_soa_update(&interp, i);
+ depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
- depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
-
- generate_fs(lp, shader, key,
+ generate_fs(shader, key,
builder,
fs_type,
context_ptr,
@@ -660,9 +708,18 @@ generate_fragment(struct llvmpipe_context *lp,
* Convert the fs's output color and mask to fit to the blending type.
*/
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH];
+
+ for (i = 0; i < num_fs; i++) {
+ fs_color_vals[i] =
+ LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals");
+ }
+
lp_build_conv(builder, fs_type, blend_type,
- fs_out_color[cbuf][chan], num_fs,
+ fs_color_vals,
+ num_fs,
&blend_in_color[chan], 1);
+
lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
}
@@ -685,14 +742,23 @@ generate_fragment(struct llvmpipe_context *lp,
/*
* Blending.
*/
- generate_blend(&key->blend,
- rt,
- builder,
- blend_type,
- context_ptr,
- blend_mask,
- blend_in_color,
- color_ptr);
+ {
+ /* Could the 4x4 have been killed?
+ */
+ boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) &&
+ !key->alpha.enabled &&
+ !shader->info.base.uses_kill);
+
+ generate_blend(&key->blend,
+ rt,
+ builder,
+ blend_type,
+ context_ptr,
+ blend_mask,
+ blend_in_color,
+ color_ptr,
+ do_branch);
+ }
}
#ifdef PIPE_ARCH_X86
@@ -717,12 +783,17 @@ generate_fragment(struct llvmpipe_context *lp,
/* Apply optimizations to LLVM IR */
LLVMRunFunctionPassManager(screen->pass, function);
- if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ if ((gallivm_debug & GALLIVM_DEBUG_IR) || (LP_DEBUG & DEBUG_FS)) {
/* Print the LLVM IR to stderr */
lp_debug_dump_value(function);
debug_printf("\n");
}
+ /* Dump byte code to a file */
+ if (0) {
+ LLVMWriteBitcodeToFile(lp_build_module, "llvmpipe.bc");
+ }
+
/*
* Translate the LLVM IR into machine code.
*/
@@ -731,7 +802,7 @@ generate_fragment(struct llvmpipe_context *lp,
variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f);
- if (gallivm_debug & GALLIVM_DEBUG_ASM) {
+ if ((gallivm_debug & GALLIVM_DEBUG_ASM) || (LP_DEBUG & DEBUG_FS)) {
lp_disassemble(f);
}
lp_func_delete_body(function);
@@ -746,6 +817,9 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
debug_printf("fs variant %p:\n", (void *) key);
+ if (key->flatshade) {
+ debug_printf("flatshade = 1\n");
+ }
for (i = 0; i < key->nr_cbufs; ++i) {
debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i]));
}
@@ -770,6 +844,10 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE));
}
+ if (key->occlusion_count) {
+ debug_printf("occlusion_count = 1\n");
+ }
+
if (key->blend.logicop_enable) {
debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE));
}
@@ -782,31 +860,33 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE));
}
debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask);
- for (i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
- if (key->sampler[i].format) {
- debug_printf("sampler[%u] = \n", i);
- debug_printf(" .format = %s\n",
- util_format_name(key->sampler[i].format));
- debug_printf(" .target = %s\n",
- util_dump_tex_target(key->sampler[i].target, TRUE));
- debug_printf(" .pot = %u %u %u\n",
- key->sampler[i].pot_width,
- key->sampler[i].pot_height,
- key->sampler[i].pot_depth);
- debug_printf(" .wrap = %s %s %s\n",
- util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
- util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
- util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
- debug_printf(" .min_img_filter = %s\n",
- util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
- debug_printf(" .min_mip_filter = %s\n",
- util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
- debug_printf(" .mag_img_filter = %s\n",
- util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
- if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
- debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE));
- debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
- }
+ for (i = 0; i < key->nr_samplers; ++i) {
+ debug_printf("sampler[%u] = \n", i);
+ debug_printf(" .format = %s\n",
+ util_format_name(key->sampler[i].format));
+ debug_printf(" .target = %s\n",
+ util_dump_tex_target(key->sampler[i].target, TRUE));
+ debug_printf(" .pot = %u %u %u\n",
+ key->sampler[i].pot_width,
+ key->sampler[i].pot_height,
+ key->sampler[i].pot_depth);
+ debug_printf(" .wrap = %s %s %s\n",
+ util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+ util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+ util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+ debug_printf(" .min_img_filter = %s\n",
+ util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+ debug_printf(" .min_mip_filter = %s\n",
+ util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+ debug_printf(" .mag_img_filter = %s\n",
+ util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
+ if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
+ debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE));
+ debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+ debug_printf(" .min_max_lod_equal = %u\n", key->sampler[i].min_max_lod_equal);
+ debug_printf(" .lod_bias_non_zero = %u\n", key->sampler[i].lod_bias_non_zero);
+ debug_printf(" .apply_min_lod = %u\n", key->sampler[i].apply_min_lod);
+ debug_printf(" .apply_max_lod = %u\n", key->sampler[i].apply_max_lod);
}
}
@@ -823,7 +903,7 @@ lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant)
}
static struct lp_fragment_shader_variant *
-generate_variant(struct llvmpipe_context *lp,
+generate_variant(struct llvmpipe_screen *screen,
struct lp_fragment_shader *shader,
const struct lp_fragment_shader_variant_key *key)
{
@@ -861,7 +941,7 @@ generate_variant(struct llvmpipe_context *lp,
!key->stencil[0].enabled &&
!key->alpha.enabled &&
!key->depth.enabled &&
- !shader->info.uses_kill
+ !shader->info.base.uses_kill
? TRUE : FALSE;
@@ -869,11 +949,11 @@ generate_variant(struct llvmpipe_context *lp,
lp_debug_fs_variant(variant);
}
- generate_fragment(lp, shader, variant, RAST_EDGE_TEST);
+ generate_fragment(screen, shader, variant, RAST_EDGE_TEST);
if (variant->opaque) {
/* Specialized shader, which doesn't need to read the color buffer. */
- generate_fragment(lp, shader, variant, RAST_WHOLE);
+ generate_fragment(screen, shader, variant, RAST_WHOLE);
} else {
variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
}
@@ -889,6 +969,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
struct lp_fragment_shader *shader;
int nr_samplers;
+ int i;
shader = CALLOC_STRUCT(lp_fragment_shader);
if (!shader)
@@ -898,7 +979,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
make_empty_list(&shader->variants);
/* get/save the summary info for this shader */
- tgsi_scan_shader(templ->tokens, &shader->info);
+ lp_build_tgsi_info(templ->tokens, &shader->info);
/* we need to keep a local copy of the tokens */
shader->base.tokens = tgsi_dup_tokens(templ->tokens);
@@ -910,18 +991,58 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
return NULL;
}
- nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+ nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key,
sampler[nr_samplers]);
+ for (i = 0; i < shader->info.base.num_inputs; i++) {
+ shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i];
+
+ switch (shader->info.base.input_interpolate[i]) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ shader->inputs[i].interp = LP_INTERP_CONSTANT;
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ shader->inputs[i].interp = LP_INTERP_LINEAR;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (shader->info.base.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ /* Colors may be either linearly or constant interpolated in
+ * the fragment shader, but that information isn't available
+ * here. Mark color inputs and fix them up later.
+ */
+ shader->inputs[i].interp = LP_INTERP_COLOR;
+ break;
+ case TGSI_SEMANTIC_FACE:
+ shader->inputs[i].interp = LP_INTERP_FACING;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ /* Position was already emitted above
+ */
+ shader->inputs[i].interp = LP_INTERP_POSITION;
+ shader->inputs[i].src_index = 0;
+ continue;
+ }
+
+ shader->inputs[i].src_index = i+1;
+ }
+
if (LP_DEBUG & DEBUG_TGSI) {
unsigned attrib;
debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader);
tgsi_dump(templ->tokens, 0);
debug_printf("usage masks:\n");
- for (attrib = 0; attrib < shader->info.num_inputs; ++attrib) {
- unsigned usage_mask = shader->info.input_usage_mask[attrib];
+ for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) {
+ unsigned usage_mask = shader->info.base.input_usage_mask[attrib];
debug_printf(" IN[%u].%s%s%s%s\n",
attrib,
usage_mask & TGSI_WRITEMASK_X ? "x" : "",
@@ -1150,10 +1271,10 @@ make_variant_key(struct llvmpipe_context *lp,
/* This value will be the same for all the variants of a given shader:
*/
- key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+ key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
for(i = 0; i < key->nr_samplers; ++i) {
- if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+ if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
lp_sampler_static_state(&key->sampler[i],
lp->fragment_sampler_views[i],
lp->sampler[i]);
@@ -1168,6 +1289,7 @@ make_variant_key(struct llvmpipe_context *lp,
void
llvmpipe_update_fs(struct llvmpipe_context *lp)
{
+ struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
struct lp_fragment_shader *shader = lp->fs;
struct lp_fragment_shader_variant_key key;
struct lp_fragment_shader_variant *variant = NULL;
@@ -1208,7 +1330,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
}
t0 = os_time_get();
- variant = generate_variant(lp, shader, &key);
+ variant = generate_variant(screen, shader, &key);
t1 = os_time_get();
dt = t1 - t0;
@@ -1228,6 +1350,10 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
+
+
+
+
void
llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index 4999b8dca1..7d58c4936c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -34,6 +34,8 @@
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */
#include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */
+#include "gallivm/lp_bld_tgsi.h" /* for lp_tgsi_info */
+#include "lp_bld_interp.h" /* for struct lp_shader_input */
struct tgsi_token;
@@ -96,7 +98,7 @@ struct lp_fragment_shader
{
struct pipe_shader_state base;
- struct tgsi_shader_info info;
+ struct lp_tgsi_info info;
struct lp_fs_variant_list_item variants;
@@ -107,6 +109,9 @@ struct lp_fragment_shader
unsigned no;
unsigned variants_created;
unsigned variants_cached;
+
+ /** Fragment shader input interpolation info */
+ struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
};
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 17a4a0ed02..1dd866195d 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -246,9 +246,9 @@ llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp,
struct pipe_sampler_view **views)
{
unsigned i;
- uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS];
- uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS];
- const void *data[DRAW_MAX_TEXTURE_LEVELS];
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS];
+ const void *data[PIPE_MAX_TEXTURE_LEVELS];
assert(num <= PIPE_MAX_VERTEX_SAMPLERS);
if (!num)
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
new file mode 100644
index 0000000000..2c8b8b9a92
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -0,0 +1,759 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+#include "os/os_time.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_intr.h"
+#include <llvm-c/Analysis.h> /* for LLVMVerifyFunction */
+
+#include "lp_perf.h"
+#include "lp_debug.h"
+#include "lp_flush.h"
+#include "lp_screen.h"
+#include "lp_context.h"
+#include "lp_setup_context.h"
+#include "lp_rast.h"
+#include "lp_state.h"
+#include "lp_state_fs.h"
+#include "lp_state_setup.h"
+
+
+
+/* currently organized to interpolate full float[4] attributes even
+ * when some elements are unused. Later, can pack vertex data more
+ * closely.
+ */
+
+
+struct lp_setup_args
+{
+ /* Function arguments:
+ */
+ LLVMValueRef v0;
+ LLVMValueRef v1;
+ LLVMValueRef v2;
+ LLVMValueRef facing; /* boolean */
+ LLVMValueRef a0;
+ LLVMValueRef dadx;
+ LLVMValueRef dady;
+
+ /* Derived:
+ */
+ LLVMValueRef x0_center;
+ LLVMValueRef y0_center;
+ LLVMValueRef dy20_ooa;
+ LLVMValueRef dy01_ooa;
+ LLVMValueRef dx20_ooa;
+ LLVMValueRef dx01_ooa;
+};
+
+static LLVMTypeRef type4f(void)
+{
+ return LLVMVectorType(LLVMFloatType(), 4);
+}
+
+
+/* Equivalent of _mm_setr_ps(a,b,c,d)
+ */
+static LLVMValueRef vec4f(LLVMBuilderRef bld,
+ LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d,
+ const char *name)
+{
+ LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+ LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+ LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
+
+ LLVMValueRef res = LLVMGetUndef(type4f());
+
+ res = LLVMBuildInsertElement(bld, res, a, i0, "");
+ res = LLVMBuildInsertElement(bld, res, b, i1, "");
+ res = LLVMBuildInsertElement(bld, res, c, i2, "");
+ res = LLVMBuildInsertElement(bld, res, d, i3, name);
+
+ return res;
+}
+
+/* Equivalent of _mm_set1_ps(a)
+ */
+static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld,
+ LLVMValueRef a,
+ const char *name)
+{
+ LLVMValueRef res = LLVMGetUndef(type4f());
+ int i;
+
+ for(i = 0; i < 4; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : "");
+ }
+
+ return res;
+}
+
+static void
+store_coef(LLVMBuilderRef builder,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef a0,
+ LLVMValueRef dadx,
+ LLVMValueRef dady)
+{
+ LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), slot, 0);
+
+ LLVMBuildStore(builder,
+ a0,
+ LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
+
+ LLVMBuildStore(builder,
+ dadx,
+ LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
+
+ LLVMBuildStore(builder,
+ dady,
+ LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
+}
+
+
+
+static void
+emit_constant_coef4( LLVMBuilderRef builder,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef vert,
+ unsigned attr)
+{
+ LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
+ LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero");
+ LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), attr, 0);
+ LLVMValueRef attr_ptr = LLVMBuildGEP(builder, vert, &idx, 1, "attr_ptr");
+ LLVMValueRef vert_attr = LLVMBuildLoad(builder, attr_ptr, "vert_attr");
+
+ store_coef(builder, args, slot, vert_attr, zerovec, zerovec);
+}
+
+
+
+/**
+ * Setup the fragment input attribute with the front-facing value.
+ * \param frontface is the triangle front facing?
+ */
+static void
+emit_facing_coef( LLVMBuilderRef builder,
+ struct lp_setup_args *args,
+ unsigned slot )
+{
+ LLVMValueRef a0_0 = args->facing;
+ LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, LLVMFloatType(), "");
+ LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
+ LLVMValueRef a0 = vec4f(builder, a0_0f, zero, zero, zero, "facing");
+ LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero");
+
+ store_coef(builder, args, slot, a0, zerovec, zerovec);
+}
+
+
+static LLVMValueRef
+vert_attrib(LLVMBuilderRef b,
+ LLVMValueRef vert,
+ int attr,
+ int elem,
+ const char *name)
+{
+ LLVMValueRef idx[2];
+ idx[0] = LLVMConstInt(LLVMInt32Type(), attr, 0);
+ idx[1] = LLVMConstInt(LLVMInt32Type(), elem, 0);
+ return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
+}
+
+
+
+static void
+emit_coef4( LLVMBuilderRef b,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef a0,
+ LLVMValueRef a1,
+ LLVMValueRef a2)
+{
+ LLVMValueRef dy20_ooa = args->dy20_ooa;
+ LLVMValueRef dy01_ooa = args->dy01_ooa;
+ LLVMValueRef dx20_ooa = args->dx20_ooa;
+ LLVMValueRef dx01_ooa = args->dx01_ooa;
+ LLVMValueRef x0_center = args->x0_center;
+ LLVMValueRef y0_center = args->y0_center;
+
+ /* XXX: using fsub, fmul on vector types -- does this work??
+ */
+ LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01");
+ LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20");
+
+ /* Calculate dadx (vec4f)
+ */
+ LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa");
+ LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa");
+ LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx");
+
+ /* Calculate dady (vec4f)
+ */
+ LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa");
+ LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa");
+ LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady");
+
+ /* Calculate a0 - the attribute value at the origin
+ */
+ LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
+ LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
+ LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
+ LLVMValueRef attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
+
+ store_coef(b, args, slot, attr_0, dadx, dady);
+}
+
+
+static void
+emit_linear_coef( LLVMBuilderRef b,
+ struct lp_setup_args *args,
+ unsigned slot,
+ unsigned vert_attr)
+{
+ LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0);
+
+ LLVMValueRef a0 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
+ LLVMValueRef a1 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
+ LLVMValueRef a2 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
+
+ emit_coef4(b, args, slot, a0, a1, a2);
+}
+
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void
+emit_perspective_coef( LLVMBuilderRef b,
+ struct lp_setup_args *args,
+ unsigned slot,
+ unsigned vert_attr)
+{
+ /* premultiply by 1/w (v[0][3] is always 1/w):
+ */
+ LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0);
+
+ LLVMValueRef v0a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
+ LLVMValueRef v1a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
+ LLVMValueRef v2a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
+
+ LLVMValueRef v0_oow = vec4f_from_scalar(b, vert_attrib(b, args->v0, 0, 3, ""), "v0_oow");
+ LLVMValueRef v1_oow = vec4f_from_scalar(b, vert_attrib(b, args->v1, 0, 3, ""), "v1_oow");
+ LLVMValueRef v2_oow = vec4f_from_scalar(b, vert_attrib(b, args->v2, 0, 3, ""), "v2_oow");
+
+ LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, v0a, v0_oow, "v0_oow_v0a");
+ LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, v1a, v1_oow, "v1_oow_v1a");
+ LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, v2a, v2_oow, "v2_oow_v2a");
+
+ emit_coef4(b, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a);
+}
+
+
+static void
+emit_position_coef( LLVMBuilderRef builder,
+ struct lp_setup_args *args,
+ int slot, int attrib )
+{
+ emit_linear_coef(builder, args, slot, attrib);
+}
+
+
+
+
+/**
+ * Compute the inputs-> dadx, dady, a0 values.
+ */
+static void
+emit_tri_coef( LLVMBuilderRef builder,
+ const struct lp_setup_variant_key *key,
+ struct lp_setup_args *args )
+{
+ unsigned slot;
+
+ /* The internal position input is in slot zero:
+ */
+ emit_position_coef(builder, args, 0, 0);
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < key->num_inputs; slot++) {
+ unsigned vert_attr = key->inputs[slot].src_index;
+
+ switch (key->inputs[slot].interp) {
+ case LP_INTERP_CONSTANT:
+ if (key->flatshade_first) {
+ emit_constant_coef4(builder, args, slot+1, args->v0, vert_attr);
+ }
+ else {
+ emit_constant_coef4(builder, args, slot+1, args->v2, vert_attr);
+ }
+ break;
+
+ case LP_INTERP_LINEAR:
+ emit_linear_coef(builder, args, slot+1, vert_attr);
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ emit_perspective_coef(builder, args, slot+1, vert_attr);
+ break;
+
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0.
+ */
+ break;
+
+ case LP_INTERP_FACING:
+ emit_facing_coef(builder, args, slot+1);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+}
+
+
+/* XXX: This is generic code, share with fs/vs codegen:
+ */
+static lp_jit_setup_triangle
+finalize_function(struct llvmpipe_screen *screen,
+ LLVMBuilderRef builder,
+ LLVMValueRef function)
+{
+ void *f;
+
+ /* Verify the LLVM IR. If invalid, dump and abort */
+#ifdef DEBUG
+ if (LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
+ if (1)
+ lp_debug_dump_value(function);
+ abort();
+ }
+#endif
+
+ /* Apply optimizations to LLVM IR */
+ LLVMRunFunctionPassManager(screen->pass, function);
+
+ if (gallivm_debug & GALLIVM_DEBUG_IR)
+ {
+ /* Print the LLVM IR to stderr */
+ lp_debug_dump_value(function);
+ debug_printf("\n");
+ }
+
+ /*
+ * Translate the LLVM IR into machine code.
+ */
+ f = LLVMGetPointerToGlobal(screen->engine, function);
+
+ if (gallivm_debug & GALLIVM_DEBUG_ASM)
+ {
+ lp_disassemble(f);
+ }
+
+ lp_func_delete_body(function);
+
+ return f;
+}
+
+/* XXX: Generic code:
+ */
+static void
+lp_emit_emms(LLVMBuilderRef builder)
+{
+#ifdef PIPE_ARCH_X86
+ /* Avoid corrupting the FPU stack on 32bit OSes. */
+ lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+}
+
+
+/* XXX: generic code:
+ */
+static void
+set_noalias(LLVMBuilderRef builder,
+ LLVMValueRef function,
+ const LLVMTypeRef *arg_types,
+ int nr_args)
+{
+ int i;
+ for(i = 0; i < Elements(arg_types); ++i)
+ if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+ LLVMAddAttribute(LLVMGetParam(function, i),
+ LLVMNoAliasAttribute);
+}
+
+static void
+init_args(LLVMBuilderRef b,
+ struct lp_setup_args *args,
+ const struct lp_setup_variant *variant)
+{
+ LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x");
+ LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y");
+
+ LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x");
+ LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y");
+
+ LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x");
+ LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y");
+
+ LLVMValueRef pixel_center = LLVMConstReal(LLVMFloatType(),
+ variant->key.pixel_center_half ? 0.5 : 0);
+
+ LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" );
+ LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" );
+
+ LLVMValueRef dx01 = LLVMBuildFSub(b, v0_x, v1_x, "dx01");
+ LLVMValueRef dy01 = LLVMBuildFSub(b, v0_y, v1_y, "dy01");
+ LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20");
+ LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20");
+
+ LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0);
+ LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e");
+ LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f");
+ LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa");
+
+ LLVMValueRef dy20_ooa = LLVMBuildFMul(b, dy20, ooa, "dy20_ooa");
+ LLVMValueRef dy01_ooa = LLVMBuildFMul(b, dy01, ooa, "dy01_ooa");
+ LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa");
+ LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa");
+
+ args->dy20_ooa = vec4f_from_scalar(b, dy20_ooa, "dy20_ooa_4f");
+ args->dy01_ooa = vec4f_from_scalar(b, dy01_ooa, "dy01_ooa_4f");
+
+ args->dx20_ooa = vec4f_from_scalar(b, dx20_ooa, "dx20_ooa_4f");
+ args->dx01_ooa = vec4f_from_scalar(b, dx01_ooa, "dx01_ooa_4f");
+
+ args->x0_center = vec4f_from_scalar(b, x0_center, "x0_center_4f");
+ args->y0_center = vec4f_from_scalar(b, y0_center, "y0_center_4f");
+}
+
+/**
+ * Generate the runtime callable function for the coefficient calculation.
+ *
+ */
+static struct lp_setup_variant *
+generate_setup_variant(struct llvmpipe_screen *screen,
+ struct lp_setup_variant_key *key)
+{
+ struct lp_setup_variant *variant = NULL;
+ struct lp_setup_args args;
+ char func_name[256];
+ LLVMTypeRef vec4f_type;
+ LLVMTypeRef func_type;
+ LLVMTypeRef arg_types[7];
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ int64_t t0, t1;
+
+ if (0)
+ goto fail;
+
+ variant = CALLOC_STRUCT(lp_setup_variant);
+ if (variant == NULL)
+ goto fail;
+
+ if (LP_DEBUG & DEBUG_COUNTERS) {
+ t0 = os_time_get();
+ }
+
+ memcpy(&variant->key, key, key->size);
+ variant->list_item_global.base = variant;
+
+ util_snprintf(func_name, sizeof(func_name), "fs%u_setup%u",
+ 0,
+ variant->no);
+
+ /* Currently always deal with full 4-wide vertex attributes from
+ * the vertices.
+ */
+
+ vec4f_type = LLVMVectorType(LLVMFloatType(), 4);
+
+ arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */
+ arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */
+ arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */
+ arg_types[3] = LLVMInt32Type(); /* facing */
+ arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */
+ arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */
+ arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */
+
+ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+
+ variant->function = LLVMAddFunction(screen->module, func_name, func_type);
+ if (!variant->function)
+ goto fail;
+
+ LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
+
+ args.v0 = LLVMGetParam(variant->function, 0);
+ args.v1 = LLVMGetParam(variant->function, 1);
+ args.v2 = LLVMGetParam(variant->function, 2);
+ args.facing = LLVMGetParam(variant->function, 3);
+ args.a0 = LLVMGetParam(variant->function, 4);
+ args.dadx = LLVMGetParam(variant->function, 5);
+ args.dady = LLVMGetParam(variant->function, 6);
+
+ lp_build_name(args.v0, "in_v0");
+ lp_build_name(args.v1, "in_v1");
+ lp_build_name(args.v2, "in_v2");
+ lp_build_name(args.facing, "in_facing");
+ lp_build_name(args.a0, "out_a0");
+ lp_build_name(args.dadx, "out_dadx");
+ lp_build_name(args.dady, "out_dady");
+
+ /*
+ * Function body
+ */
+ block = LLVMAppendBasicBlock(variant->function, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ set_noalias(builder, variant->function, arg_types, Elements(arg_types));
+ init_args(builder, &args, variant);
+ emit_tri_coef(builder, &variant->key, &args);
+
+ lp_emit_emms(builder);
+ LLVMBuildRetVoid(builder);
+ LLVMDisposeBuilder(builder);
+
+ variant->jit_function = finalize_function(screen, builder,
+ variant->function);
+ if (!variant->jit_function)
+ goto fail;
+
+ /*
+ * Update timing information:
+ */
+ if (LP_DEBUG & DEBUG_COUNTERS) {
+ t1 = os_time_get();
+ LP_COUNT_ADD(llvm_compile_time, t1 - t0);
+ LP_COUNT_ADD(nr_llvm_compiles, 1);
+ }
+
+ return variant;
+
+fail:
+ if (variant) {
+ if (variant->function) {
+ if (variant->jit_function)
+ LLVMFreeMachineCodeForFunction(screen->engine,
+ variant->function);
+ LLVMDeleteFunction(variant->function);
+ }
+ FREE(variant);
+ }
+
+ return NULL;
+}
+
+
+
+static void
+lp_make_setup_variant_key(struct llvmpipe_context *lp,
+ struct lp_setup_variant_key *key)
+{
+ struct lp_fragment_shader *fs = lp->fs;
+ unsigned i;
+
+ assert(sizeof key->inputs[0] == sizeof(ushort));
+
+ key->num_inputs = fs->info.base.num_inputs;
+ key->flatshade_first = lp->rasterizer->flatshade_first;
+ key->pixel_center_half = lp->rasterizer->gl_rasterization_rules;
+ key->size = Offset(struct lp_setup_variant_key,
+ inputs[key->num_inputs]);
+ key->pad = 0;
+
+ memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
+ for (i = 0; i < key->num_inputs; i++) {
+ if (key->inputs[i].interp == LP_INTERP_COLOR) {
+ if (lp->rasterizer->flatshade)
+ key->inputs[i].interp = LP_INTERP_CONSTANT;
+ else
+ key->inputs[i].interp = LP_INTERP_LINEAR;
+ }
+ }
+
+}
+
+
+static void
+remove_setup_variant(struct llvmpipe_context *lp,
+ struct lp_setup_variant *variant)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
+
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ debug_printf("llvmpipe: del setup_variant #%u total %u\n",
+ variant->no, lp->nr_setup_variants);
+ }
+
+ if (variant->function) {
+ if (variant->jit_function)
+ LLVMFreeMachineCodeForFunction(screen->engine,
+ variant->function);
+ LLVMDeleteFunction(variant->function);
+ }
+
+ remove_from_list(&variant->list_item_global);
+ lp->nr_setup_variants--;
+ FREE(variant);
+}
+
+
+
+/* When the number of setup variants exceeds a threshold, cull a
+ * fraction (currently a quarter) of them.
+ */
+static void
+cull_setup_variants(struct llvmpipe_context *lp)
+{
+ struct pipe_context *pipe = &lp->pipe;
+ int i;
+
+ /*
+ * XXX: we need to flush the context until we have some sort of reference
+ * counting in fragment shaders as they may still be binned
+ * Flushing alone might not be sufficient we need to wait on it too.
+ */
+ llvmpipe_finish(pipe, __FUNCTION__);
+
+ for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) {
+ struct lp_setup_variant_list_item *item = last_elem(&lp->setup_variants_list);
+ remove_setup_variant(lp, item->base);
+ }
+}
+
+
+/**
+ * Update fragment/vertex shader linkage state. This is called just
+ * prior to drawing something when some fragment-related state has
+ * changed.
+ */
+void
+llvmpipe_update_setup(struct llvmpipe_context *lp)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
+
+ struct lp_setup_variant_key *key = &lp->setup_variant.key;
+ struct lp_setup_variant *variant = NULL;
+ struct lp_setup_variant_list_item *li;
+
+ lp_make_setup_variant_key(lp, key);
+
+ foreach(li, &lp->setup_variants_list) {
+ if(li->base->key.size == key->size &&
+ memcmp(&li->base->key, key, key->size) == 0) {
+ variant = li->base;
+ break;
+ }
+ }
+
+ if (variant) {
+ move_to_head(&lp->setup_variants_list, &variant->list_item_global);
+ }
+ else {
+ if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) {
+ cull_setup_variants(lp);
+ }
+
+ variant = generate_setup_variant(screen, key);
+ insert_at_head(&lp->setup_variants_list, &variant->list_item_global);
+ lp->nr_setup_variants++;
+ }
+
+ lp_setup_set_setup_variant(lp->setup,
+ variant);
+}
+
+void
+lp_delete_setup_variants(struct llvmpipe_context *lp)
+{
+ struct lp_setup_variant_list_item *li;
+ li = first_elem(&lp->setup_variants_list);
+ while(!at_end(&lp->setup_variants_list, li)) {
+ struct lp_setup_variant_list_item *next = next_elem(li);
+ remove_setup_variant(lp, li->base);
+ li = next;
+ }
+}
+
+void
+lp_dump_setup_coef( const struct lp_setup_variant_key *key,
+ const float (*sa0)[4],
+ const float (*sdadx)[4],
+ const float (*sdady)[4])
+{
+ int i, slot;
+
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ float a0 = sa0 [0][i];
+ float dadx = sdadx[0][i];
+ float dady = sdady[0][i];
+
+ debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
+ "xyzw"[i],
+ a0, dadx, dady);
+ }
+
+ for (slot = 0; slot < key->num_inputs; slot++) {
+ unsigned usage_mask = key->inputs[slot].usage_mask;
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (usage_mask & (1 << i)) {
+ float a0 = sa0 [1 + slot][i];
+ float dadx = sdadx[1 + slot][i];
+ float dady = sdady[1 + slot][i];
+
+ debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
+ slot,
+ "xyzw"[i],
+ a0, dadx, dady);
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.h b/src/gallium/drivers/llvmpipe/lp_state_setup.h
new file mode 100644
index 0000000000..b0c81baa75
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.h
@@ -0,0 +1,80 @@
+#ifndef LP_STATE_SETUP_H
+#define LP_STATE_SETUP_H
+
+#include "lp_bld_interp.h"
+
+
+struct llvmpipe_context;
+struct lp_setup_variant;
+
+struct lp_setup_variant_list_item
+{
+ struct lp_setup_variant *base;
+ struct lp_setup_variant_list_item *next, *prev;
+};
+
+
+struct lp_setup_variant_key {
+ unsigned num_inputs:8;
+ unsigned flatshade_first:1;
+ unsigned pixel_center_half:1;
+ unsigned pad:7;
+ unsigned size:16;
+ struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
+};
+
+
+typedef void (*lp_jit_setup_triangle)( const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean front_facing,
+ float (*a0)[4],
+ float (*dadx)[4],
+ float (*dady)[4] );
+
+
+
+
+/* At this stage, for a given variant key, we create a
+ * draw_vertex_info struct telling the draw module how to format the
+ * vertices, and an llvm-generated function which calculates the
+ * attribute interpolants (a0, dadx, dady) from three of those
+ * vertices.
+ */
+struct lp_setup_variant {
+ struct lp_setup_variant_key key;
+
+ struct lp_setup_variant_list_item list_item_global;
+
+ /* XXX: this is a pointer to the LLVM IR. Once jit_function is
+ * generated, we never need to use the IR again - need to find a
+ * way to release this data without destroying the generated
+ * assembly.
+ */
+ LLVMValueRef function;
+
+ /* The actual generated setup function:
+ */
+ lp_jit_setup_triangle jit_function;
+
+ unsigned no;
+};
+
+void lp_setup_tri_fallback( const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean front_facing,
+ float (*a0)[4],
+ float (*dadx)[4],
+ float (*dady)[4],
+ const struct lp_setup_variant_key *key );
+
+void lp_delete_setup_variants(struct llvmpipe_context *lp);
+
+void
+lp_dump_setup_coef( const struct lp_setup_variant_key *key,
+ const float (*sa0)[4],
+ const float (*sdadx)[4],
+ const float (*sdady)[4]);
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c
index 57b0ee5776..816518e508 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_round.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_round.c
@@ -75,10 +75,7 @@ add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func)
LLVMValueRef ret;
struct lp_build_context bld;
- bld.builder = builder;
- bld.type.floating = 1;
- bld.type.width = 32;
- bld.type.length = 4;
+ lp_build_context_init(&bld, builder, lp_float32_vec4_type());
LLVMSetFunctionCallConv(func, LLVMCCallConv);
@@ -100,9 +97,10 @@ printv(char* string, v4sf value)
f[0], f[1], f[2], f[3]);
}
-static void
+static boolean
compare(v4sf x, v4sf y)
{
+ boolean success = TRUE;
float *xp = (float *) &x;
float *yp = (float *) &y;
if (xp[0] != yp[0] ||
@@ -110,7 +108,9 @@ compare(v4sf x, v4sf y)
xp[2] != yp[2] ||
xp[3] != yp[3]) {
printf(" Incorrect result! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n");
+ success = FALSE;
}
+ return success;
}
@@ -171,9 +171,12 @@ test_round(unsigned verbose, FILE *fp)
LLVMDumpModule(module);
for (i = 0; i < 3; i++) {
+ /* NOTE: There are several acceptable rules for x.5 rounding: ceiling,
+ * nearest even, etc. So we avoid testing such corner cases here.
+ */
v4sf xvals[3] = {
{-10.0, -1, 0, 12.0},
- {-1.5, -0.25, 1.25, 2.5},
+ {-1.49, -0.25, 1.25, 2.51},
{-0.99, -0.01, 0.01, 0.99}
};
v4sf x = xvals[i];
@@ -191,7 +194,7 @@ test_round(unsigned verbose, FILE *fp)
y = round_func(x);
printv("C round(x) ", ref);
printv("LLVM round(x)", y);
- compare(ref, y);
+ success = success && compare(ref, y);
refp[0] = trunc(xp[0]);
refp[1] = trunc(xp[1]);
@@ -200,7 +203,7 @@ test_round(unsigned verbose, FILE *fp)
y = trunc_func(x);
printv("C trunc(x) ", ref);
printv("LLVM trunc(x)", y);
- compare(ref, y);
+ success = success && compare(ref, y);
refp[0] = floor(xp[0]);
refp[1] = floor(xp[1]);
@@ -209,7 +212,7 @@ test_round(unsigned verbose, FILE *fp)
y = floor_func(x);
printv("C floor(x) ", ref);
printv("LLVM floor(x)", y);
- compare(ref, y);
+ success = success && compare(ref, y);
refp[0] = ceil(xp[0]);
refp[1] = ceil(xp[1]);
@@ -218,7 +221,7 @@ test_round(unsigned verbose, FILE *fp)
y = ceil_func(x);
printv("C ceil(x) ", ref);
printv("LLVM ceil(x) ", y);
- compare(ref, y);
+ success = success && compare(ref, y);
}
LLVMFreeMachineCodeForFunction(engine, test_round);
@@ -247,11 +250,7 @@ test_round(unsigned verbose, FILE *fp)
boolean
test_all(unsigned verbose, FILE *fp)
{
- boolean success = TRUE;
-
- test_round(verbose, fp);
-
- return success;
+ return test_round(verbose, fp);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c
index 7ab357f162..79939b1a39 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c
@@ -72,10 +72,7 @@ add_sincos_test(LLVMModuleRef module, boolean sin)
LLVMValueRef ret;
struct lp_build_context bld;
- bld.builder = builder;
- bld.type.floating = 1;
- bld.type.width = 32;
- bld.type.length = 4;
+ lp_build_context_init(&bld, builder, lp_float32_vec4_type());
LLVMSetFunctionCallConv(func, LLVMCCallConv);
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index 2ba39052ab..e49f9c62fe 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -289,172 +289,141 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix):
print
-def generate_ssse3():
+def generate_sse2():
print '''
#if defined(PIPE_ARCH_SSE)
#include "util/u_sse.h"
-static void
-lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst,
- const uint8_t *src, unsigned src_stride,
- unsigned x0, unsigned y0)
+static ALWAYS_INLINE void
+swz4( const __m128i * restrict x,
+ const __m128i * restrict y,
+ const __m128i * restrict z,
+ const __m128i * restrict w,
+ __m128i * restrict a,
+ __m128i * restrict b,
+ __m128i * restrict c,
+ __m128i * restrict d)
{
+ __m128i i, j, k, l;
+ __m128i m, n, o, p;
+ __m128i e, f, g, h;
+
+ m = _mm_unpacklo_epi8(*x,*y);
+ n = _mm_unpackhi_epi8(*x,*y);
+ o = _mm_unpacklo_epi8(*z,*w);
+ p = _mm_unpackhi_epi8(*z,*w);
+
+ i = _mm_unpacklo_epi16(m,n);
+ j = _mm_unpackhi_epi16(m,n);
+ k = _mm_unpacklo_epi16(o,p);
+ l = _mm_unpackhi_epi16(o,p);
+
+ e = _mm_unpacklo_epi8(i,j);
+ f = _mm_unpackhi_epi8(i,j);
+ g = _mm_unpacklo_epi8(k,l);
+ h = _mm_unpackhi_epi8(k,l);
+
+ *a = _mm_unpacklo_epi64(e,g);
+ *b = _mm_unpackhi_epi64(e,g);
+ *c = _mm_unpacklo_epi64(f,h);
+ *d = _mm_unpackhi_epi64(f,h);
+}
+
+static ALWAYS_INLINE void
+unswz4( const __m128i * restrict a,
+ const __m128i * restrict b,
+ const __m128i * restrict c,
+ const __m128i * restrict d,
+ __m128i * restrict x,
+ __m128i * restrict y,
+ __m128i * restrict z,
+ __m128i * restrict w)
+{
+ __m128i i, j, k, l;
+ __m128i m, n, o, p;
+
+ i = _mm_unpacklo_epi8(*a,*b);
+ j = _mm_unpackhi_epi8(*a,*b);
+ k = _mm_unpacklo_epi8(*c,*d);
+ l = _mm_unpackhi_epi8(*c,*d);
+
+ m = _mm_unpacklo_epi16(i,k);
+ n = _mm_unpackhi_epi16(i,k);
+ o = _mm_unpacklo_epi16(j,l);
+ p = _mm_unpackhi_epi16(j,l);
+
+ *x = _mm_unpacklo_epi64(m,n);
+ *y = _mm_unpackhi_epi64(m,n);
+ *z = _mm_unpacklo_epi64(o,p);
+ *w = _mm_unpackhi_epi64(o,p);
+}
+static void
+lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst,
+ const uint8_t * restrict src, unsigned src_stride,
+ unsigned x0, unsigned y0)
+{
+ __m128i *dst128 = (__m128i *) dst;
unsigned x, y;
- __m128i *pdst = (__m128i*) dst;
- const uint8_t *ysrc0 = src + y0*src_stride + x0*sizeof(uint32_t);
- unsigned int tile_stridex = src_stride*(TILE_VECTOR_HEIGHT - 1) - sizeof(uint32_t)*TILE_VECTOR_WIDTH;
- unsigned int tile_stridey = src_stride*TILE_VECTOR_HEIGHT;
-
- const __m128i shuffle00 = _mm_setr_epi8(0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
- const __m128i shuffle01 = _mm_setr_epi8(0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
- const __m128i shuffle02 = _mm_setr_epi8(0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
- const __m128i shuffle03 = _mm_setr_epi8(0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
-
- const __m128i shuffle10 = _mm_setr_epi8(0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
- const __m128i shuffle11 = _mm_setr_epi8(0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
- const __m128i shuffle12 = _mm_setr_epi8(0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
- const __m128i shuffle13 = _mm_setr_epi8(0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
-
- const __m128i shuffle20 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff);
- const __m128i shuffle21 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff);
- const __m128i shuffle22 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff);
- const __m128i shuffle23 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff);
-
- const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e);
- const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d);
- const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c);
- const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f);
-
- for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) {
- __m128i line0 = *(__m128i*)ysrc0;
- const uint8_t *ysrc = ysrc0 + src_stride;
- ysrc0 += tile_stridey;
-
- for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) {
- __m128i r, g, b, a, line1;
- line1 = *(__m128i*)ysrc;
- PIPE_READ_WRITE_BARRIER();
- ysrc += src_stride;
- r = _mm_shuffle_epi8(line0, shuffle00);
- g = _mm_shuffle_epi8(line0, shuffle01);
- b = _mm_shuffle_epi8(line0, shuffle02);
- a = _mm_shuffle_epi8(line0, shuffle03);
-
- line0 = *(__m128i*)ysrc;
- PIPE_READ_WRITE_BARRIER();
- ysrc += src_stride;
- r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle10));
- g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle11));
- b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle12));
- a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle13));
-
- line1 = *(__m128i*)ysrc;
- PIPE_READ_WRITE_BARRIER();
- ysrc -= tile_stridex;
- r = _mm_or_si128(r, _mm_shuffle_epi8(line0, shuffle20));
- g = _mm_or_si128(g, _mm_shuffle_epi8(line0, shuffle21));
- b = _mm_or_si128(b, _mm_shuffle_epi8(line0, shuffle22));
- a = _mm_or_si128(a, _mm_shuffle_epi8(line0, shuffle23));
-
- if (x + 1 < TILE_SIZE) {
- line0 = *(__m128i*)ysrc;
- ysrc += src_stride;
- }
-
- PIPE_READ_WRITE_BARRIER();
- r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle30));
- g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle31));
- b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle32));
- a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle33));
-
- *pdst++ = r;
- *pdst++ = g;
- *pdst++ = b;
- *pdst++ = a;
+
+ src += y0 * src_stride;
+ src += x0 * sizeof(uint32_t);
+
+ for (y = 0; y < TILE_SIZE; y += 4) {
+ const uint8_t *src_row = src;
+
+ for (x = 0; x < TILE_SIZE; x += 4) {
+ swz4((const __m128i *) (src_row + 0 * src_stride),
+ (const __m128i *) (src_row + 1 * src_stride),
+ (const __m128i *) (src_row + 2 * src_stride),
+ (const __m128i *) (src_row + 3 * src_stride),
+ dst128 + 2, /* b */
+ dst128 + 1, /* g */
+ dst128 + 0, /* r */
+ dst128 + 3); /* a */
+
+ dst128 += 4;
+ src_row += sizeof(__m128i);
}
- }
+ src += 4 * src_stride;
+ }
}
static void
-lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src,
- uint8_t *dst, unsigned dst_stride,
+lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src,
+ uint8_t * restrict dst, unsigned dst_stride,
unsigned x0, unsigned y0)
{
unsigned int x, y;
- const __m128i *psrc = (__m128i*) src;
- const __m128i *end = (__m128i*) (src + (y0 + TILE_SIZE - 1)*dst_stride + (x0 + TILE_SIZE - 1)*sizeof(uint32_t));
- uint8_t *pdst = dst + y0 * dst_stride + x0 * sizeof(uint32_t);
- __m128i c0 = *psrc++;
- __m128i c1;
-
- const __m128i shuffle00 = _mm_setr_epi8(0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff);
- const __m128i shuffle01 = _mm_setr_epi8(0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff);
- const __m128i shuffle02 = _mm_setr_epi8(0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff);
- const __m128i shuffle03 = _mm_setr_epi8(0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff);
-
- const __m128i shuffle10 = _mm_setr_epi8(0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff);
- const __m128i shuffle11 = _mm_setr_epi8(0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff);
- const __m128i shuffle12 = _mm_setr_epi8(0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff);
- const __m128i shuffle13 = _mm_setr_epi8(0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff);
-
- const __m128i shuffle20 = _mm_setr_epi8(0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff,0xff);
- const __m128i shuffle21 = _mm_setr_epi8(0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff,0xff);
- const __m128i shuffle22 = _mm_setr_epi8(0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff,0xff);
- const __m128i shuffle23 = _mm_setr_epi8(0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff,0xff);
-
- const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05);
- const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07);
- const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d);
- const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f);
-
- for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) {
- __m128i *tile = (__m128i*) pdst;
- pdst += dst_stride * TILE_VECTOR_HEIGHT;
- for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) {
- uint8_t *linep = (uint8_t*) (tile++);
- __m128i line0, line1, line2, line3;
-
- c1 = *psrc++; /* r */
- PIPE_READ_WRITE_BARRIER();
- line0 = _mm_shuffle_epi8(c0, shuffle00);
- line1 = _mm_shuffle_epi8(c0, shuffle01);
- line2 = _mm_shuffle_epi8(c0, shuffle02);
- line3 = _mm_shuffle_epi8(c0, shuffle03);
-
- c0 = *psrc++; /* g */
- PIPE_READ_WRITE_BARRIER();
- line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle10));
- line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle11));
- line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle12));
- line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle13));
-
- c1 = *psrc++; /* b */
- PIPE_READ_WRITE_BARRIER();
- line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c0, shuffle20));
- line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c0, shuffle21));
- line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c0, shuffle22));
- line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c0, shuffle23));
-
- if (psrc != end)
- c0 = *psrc++; /* a */
- PIPE_READ_WRITE_BARRIER();
- line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle30));
- line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle31));
- line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle32));
- line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle33));
-
- *(__m128i*) (linep) = line0;
- *(__m128i*) (((char*)linep) + dst_stride) = line1;
- *(__m128i*) (((char*)linep) + 2 * dst_stride) = line2;
- *(__m128i*) (((char*)linep) + 3 * dst_stride) = line3;
+ const __m128i *src128 = (const __m128i *) src;
+
+ dst += y0 * dst_stride;
+ dst += x0 * sizeof(uint32_t);
+
+ for (y = 0; y < TILE_SIZE; y += 4) {
+ const uint8_t *dst_row = dst;
+
+ for (x = 0; x < TILE_SIZE; x += 4) {
+ unswz4( &src128[2], /* b */
+ &src128[1], /* g */
+ &src128[0], /* r */
+ &src128[3], /* a */
+ (__m128i *) (dst_row + 0 * dst_stride),
+ (__m128i *) (dst_row + 1 * dst_stride),
+ (__m128i *) (dst_row + 2 * dst_stride),
+ (__m128i *) (dst_row + 3 * dst_stride));
+
+ src128 += 4;
+ dst_row += sizeof(__m128i);;
}
+
+ dst += 4 * dst_stride;
}
}
-#endif /* PIPE_ARCH_SSSE3 */
+#endif /* PIPE_ARCH_SSE */
'''
@@ -479,7 +448,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix)
if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM':
print '#ifdef PIPE_ARCH_SSE'
- print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name)
+ print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name)
print '#else'
print ' func = %s;' % (func_name,)
print '#endif'
@@ -517,7 +486,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix):
func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix)
if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM':
print '#ifdef PIPE_ARCH_SSE'
- print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name)
+ print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name)
print '#else'
print ' func = %s;' % (func_name,)
print '#endif'
@@ -577,7 +546,7 @@ def main():
print '};'
print
- generate_ssse3()
+ generate_sse2()
channel = Channel(UNSIGNED, True, 8)
native_type = 'uint8_t'
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index ebb21a6e5a..a9426df686 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -236,7 +236,7 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
int ret;
ret = nouveau_channel_alloc(dev, 0xbeef0201, 0xbeef0202,
- &screen->channel);
+ 512*1024, &screen->channel);
if (ret)
return ret;
screen->device = dev;
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index ac69c7848e..bf6a577188 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -26,6 +26,9 @@
#define NOUVEAU_MSG(fmt, args...) \
fprintf(stderr, "nouveau: "fmt, ##args);
+#define nouveau_bo_tile_layout(nvbo) \
+ ((nvbo)->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)
+
/* Constant buffer assignment */
#define NV50_CB_PMISC 0
#define NV50_CB_PVP 1
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index 921ed15691..27eb3817bf 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -452,7 +452,7 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
if (nvi->opcode == NV_OP_SAT) {
mi = nvi->src[0]->value->insn;
- if (mi->opcode != NV_OP_ADD || mi->opcode != NV_OP_MAD)
+ if (mi->opcode != NV_OP_ADD && mi->opcode != NV_OP_MAD)
continue;
if (mi->flags_def || mi->def[0]->refc > 1)
continue;
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 3f3166261b..f70c138fe1 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -92,7 +92,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
return 1;
}
- if (!bo->tile_flags) {
+ if (!nouveau_bo_tile_layout(bo)) {
BEGIN_RING(chan, eng2d, mthd, 2);
OUT_RING (chan, format);
OUT_RING (chan, 1);
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index f973cf24b9..0cc2f4a837 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -45,7 +45,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
WAIT_RING (chan, 14);
- if (!src_bo->tile_flags) {
+ if (!nouveau_bo_tile_layout(src_bo)) {
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1);
OUT_RING (chan, 1);
@@ -64,7 +64,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
OUT_RING (chan, sz); /* copying only 1 zslice per call */
}
- if (!dst_bo->tile_flags) {
+ if (!nouveau_bo_tile_layout(dst_bo)) {
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1);
OUT_RING (chan, 1);
@@ -95,14 +95,14 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
OUT_RELOCl(chan, src_bo, src_offset, src_reloc);
OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc);
- if (src_bo->tile_flags) {
+ if (nouveau_bo_tile_layout(src_bo)) {
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1);
OUT_RING (chan, (sy << 16) | (sx * cpp));
} else {
src_offset += (line_count * src_pitch);
}
- if (dst_bo->tile_flags) {
+ if (nouveau_bo_tile_layout(dst_bo)) {
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1);
OUT_RING (chan, (dy << 16) | (dx * cpp));
@@ -280,7 +280,7 @@ nv50_upload_sifc(struct nv50_context *nv50,
MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */
- if (bo->tile_flags) {
+ if (nouveau_bo_tile_layout(bo)) {
BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5);
OUT_RING (chan, dst_format);
OUT_RING (chan, 0);
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
index 23fdb0820a..13e8beed47 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -1558,7 +1558,7 @@ nvfx_fragprog_destroy(struct nvfx_context *nvfx,
struct nvfx_fragment_program_bo* next = fpbo->next;
nouveau_bo_unmap(fpbo->bo);
nouveau_bo_ref(0, &fpbo->bo);
- free(fpbo);
+ os_free_aligned(fpbo);
fpbo = next;
}
while(fpbo != fp->fpbo);
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index 145a7985da..f78fe34790 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -29,6 +29,7 @@
static const struct debug_named_value debug_options[] = {
{ "fp", DBG_FP, "Log fragment program compilation" },
{ "vp", DBG_VP, "Log vertex program compilation" },
+ { "pstat", DBG_P_STAT, "Log vertex/fragment program stats" },
{ "draw", DBG_DRAW, "Log draw calls" },
{ "swtcl", DBG_SWTCL, "Log SWTCL-specific info" },
{ "rsblock", DBG_RS_BLOCK, "Log rasterizer registers" },
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index d9d4a9304d..c91532eb7b 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -378,7 +378,8 @@ static void r300_translate_fragment_shader(
/* Setup the compiler. */
memset(&compiler, 0, sizeof(compiler));
rc_init(&compiler.Base);
- compiler.Base.Debug = DBG_ON(r300, DBG_FP);
+ DBG_ON(r300, DBG_FP) ? compiler.Base.Debug |= RC_DBG_LOG : 0;
+ DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0;
compiler.code = &shader->code;
compiler.state = shader->compare_state;
@@ -395,7 +396,7 @@ static void r300_translate_fragment_shader(
find_output_registers(&compiler, shader);
- if (compiler.Base.Debug) {
+ if (compiler.Base.Debug & RC_DBG_LOG) {
DBG(r300, DBG_FP, "r300: Initial fragment program\n");
tgsi_dump(tokens, 0);
}
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 5b0121ce9e..5f34fcb274 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -158,7 +158,7 @@ static void r300_render_condition(struct pipe_context *pipe,
uint mode)
{
struct r300_context *r300 = r300_context(pipe);
- uint64_t result;
+ uint64_t result = 0;
boolean wait;
if (query) {
@@ -167,9 +167,9 @@ static void r300_render_condition(struct pipe_context *pipe,
if (!r300_get_query_result(pipe, query, wait, &result)) {
r300->skip_rendering = FALSE;
+ } else {
+ r300->skip_rendering = result == 0;
}
-
- r300->skip_rendering = result == 0;
} else {
r300->skip_rendering = FALSE;
}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 7f41ff0e2e..b448924f85 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -124,6 +124,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
return 0;
/* Texturing. */
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index dc2bc7e827..8b7f1fab61 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -102,6 +102,7 @@ r300_winsys_screen(struct pipe_screen *screen) {
#define DBG_NO_CBZB (1 << 21)
/* Statistics. */
#define DBG_STATS (1 << 24)
+#define DBG_P_STAT (1 << 25)
/*@}*/
static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 03ec127ff7..7e501221b1 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -364,6 +364,7 @@ static INLINE uint16_t
r300_translate_vertex_data_type(enum pipe_format format) {
uint32_t result = 0;
const struct util_format_description *desc;
+ unsigned i;
desc = util_format_description(format);
@@ -371,10 +372,17 @@ r300_translate_vertex_data_type(enum pipe_format format) {
return R300_INVALID_FORMAT;
}
- switch (desc->channel[0].type) {
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ switch (desc->channel[i].type) {
/* Half-floats, floats, doubles */
case UTIL_FORMAT_TYPE_FLOAT:
- switch (desc->channel[0].size) {
+ switch (desc->channel[i].size) {
case 16:
/* Supported only on RV350 and later. */
if (desc->nr_channels > 2) {
@@ -394,7 +402,7 @@ r300_translate_vertex_data_type(enum pipe_format format) {
case UTIL_FORMAT_TYPE_UNSIGNED:
/* Signed ints */
case UTIL_FORMAT_TYPE_SIGNED:
- switch (desc->channel[0].size) {
+ switch (desc->channel[i].size) {
case 8:
result = R300_DATA_TYPE_BYTE;
break;
@@ -413,10 +421,10 @@ r300_translate_vertex_data_type(enum pipe_format format) {
return R300_INVALID_FORMAT;
}
- if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
result |= R300_SIGNED;
}
- if (desc->channel[0].normalized) {
+ if (desc->channel[i].normalized) {
result |= R300_NORMALIZE;
}
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index a7911c6fcc..cee56bccdc 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -260,16 +260,26 @@ uint32_t r300_translate_texformat(enum pipe_format format,
return ~0; /* Unsupported/unknown. */
}
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ if (i == 4)
+ return ~0; /* Unsupported/unknown. */
+
/* And finally, uniform formats. */
- switch (desc->channel[0].type) {
+ switch (desc->channel[i].type) {
case UTIL_FORMAT_TYPE_UNSIGNED:
case UTIL_FORMAT_TYPE_SIGNED:
- if (!desc->channel[0].normalized &&
+ if (!desc->channel[i].normalized &&
desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) {
return ~0;
}
- switch (desc->channel[0].size) {
+ switch (desc->channel[i].size) {
case 4:
switch (desc->nr_channels) {
case 2:
@@ -303,7 +313,7 @@ uint32_t r300_translate_texformat(enum pipe_format format,
return ~0;
case UTIL_FORMAT_TYPE_FLOAT:
- switch (desc->channel[0].size) {
+ switch (desc->channel[i].size) {
case 16:
switch (desc->nr_channels) {
case 1:
@@ -359,6 +369,11 @@ static uint32_t r300_translate_colorformat(enum pipe_format format)
return R300_COLOR_FORMAT_I8;
/* 16-bit buffers. */
+ case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8_SNORM:
+ return R300_COLOR_FORMAT_UV88;
+
case PIPE_FORMAT_B5G6R5_UNORM:
return R300_COLOR_FORMAT_RGB565;
@@ -443,15 +458,25 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
desc = util_format_description(format);
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ if (i == 4)
+ return ~0; /* Unsupported/unknown. */
+
/* Specifies how the shader output is written to the fog unit. */
- if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
- if (desc->channel[0].size == 32) {
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
+ if (desc->channel[i].size == 32) {
modifier |= R300_US_OUT_FMT_C4_32_FP;
} else {
modifier |= R300_US_OUT_FMT_C4_16_FP;
}
} else {
- if (desc->channel[0].size == 16) {
+ if (desc->channel[i].size == 16) {
modifier |= R300_US_OUT_FMT_C4_16;
} else {
/* C4_8 seems to be used for the formats whose pixel size
@@ -468,7 +493,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
/* Add swizzles and return. */
switch (format) {
- /* 8-bit outputs.
+ /* 8-bit outputs, one channel.
* COLORFORMAT_I8 stores the C2 component. */
case PIPE_FORMAT_A8_UNORM:
return modifier | R300_C2_SEL_A;
@@ -478,6 +503,14 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
case PIPE_FORMAT_R8_SNORM:
return modifier | R300_C2_SEL_R;
+ /* 16-bit outputs, two channels.
+ * COLORFORMAT_UV88 stores C2 and C0. */
+ case PIPE_FORMAT_L8A8_UNORM:
+ return modifier | R300_C0_SEL_A | R300_C2_SEL_R;
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8_SNORM:
+ return modifier | R300_C0_SEL_G | R300_C2_SEL_R;
+
/* BGRA outputs. */
case PIPE_FORMAT_B5G6R5_UNORM:
case PIPE_FORMAT_B5G5R5A1_UNORM:
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index a49029e1e9..543d0fdc15 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -44,7 +44,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
{{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */
{{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */
{{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */
- {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */
+ {{ 4, 1}, { 2, 2}, { 0, 0}}, /* 64 bits per pixel */
{{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
},
{
@@ -53,7 +53,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
{{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */
{{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */
{{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */
- {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */
+ {{ 32, 8}, {16, 16}, { 0, 0}}, /* 64 bits per pixel */
{{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
}
};
@@ -368,11 +368,11 @@ static void r300_setup_tiling(struct r300_screen *screen,
switch (util_format_get_blocksize(format)) {
case 1:
case 4:
+ case 8:
desc->microtile = R300_BUFFER_TILED;
break;
case 2:
- case 8:
if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) {
desc->microtile = R300_BUFFER_SQUARETILED;
}
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index e2b9af9d01..65696555ac 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -202,7 +202,8 @@ void r300_translate_vertex_shader(struct r300_context *r300,
memset(&compiler, 0, sizeof(compiler));
rc_init(&compiler.Base);
- compiler.Base.Debug = DBG_ON(r300, DBG_VP);
+ DBG_ON(r300, DBG_VP) ? compiler.Base.Debug |= RC_DBG_LOG : 0;
+ DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0;
compiler.code = &vs->code;
compiler.UserData = vs;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
@@ -214,7 +215,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256;
compiler.Base.remove_unused_constants = TRUE;
- if (compiler.Base.Debug) {
+ if (compiler.Base.Debug & RC_DBG_LOG) {
DBG(r300, DBG_VP, "r300: Initial vertex program\n");
tgsi_dump(vs->state.tokens, 0);
}
diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile
index 433b7044e5..ede0bb2ec4 100644
--- a/src/gallium/drivers/r600/Makefile
+++ b/src/gallium/drivers/r600/Makefile
@@ -7,23 +7,18 @@ LIBRARY_INCLUDES = \
$(shell pkg-config libdrm --cflags-only-I)
C_SOURCES = \
- r600_buffer.c \
- r600_state2.c \
- evergreen_state.c \
- r600_context.c \
- r600_shader.c \
- r600_draw.c \
+ r600_asm.c \
r600_blit.c \
+ r600_buffer.c \
r600_helper.c \
+ r600_pipe.c \
r600_query.c \
r600_resource.c \
- r600_screen.c \
+ r600_shader.c \
r600_state.c \
r600_texture.c \
- r600_asm.c \
r700_asm.c \
- r600_hw_states.c \
- eg_asm.c \
- eg_hw_states.c
+ evergreen_state.c \
+ eg_asm.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript
index 99c8644e02..bf0ad8571b 100644
--- a/src/gallium/drivers/r600/SConscript
+++ b/src/gallium/drivers/r600/SConscript
@@ -16,19 +16,19 @@ env.Append(CPPPATH = [
r600 = env.ConvenienceLibrary(
target = 'r600',
source = [
+ 'r600_asm.c',
'r600_buffer.c',
- 'r600_context.c',
- 'r600_draw.c',
'r600_blit.c',
'r600_helper.c',
+ 'r600_pipe.c',
'r600_query.c',
'r600_resource.c',
- 'r600_screen.c',
+ 'r600_shader.c',
'r600_state.c',
'r600_texture.c',
- 'r600_shader.c',
- 'r600_asm.c',
'r700_asm.c',
+ 'evergreen_state.c',
+ 'eg_asm.c',
])
Export('r600')
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 769f550874..52b7189e9e 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -20,14 +20,13 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "radeon.h"
-#include "r600_asm.h"
-#include "r600_context.h"
+#include <stdio.h>
+#include <errno.h>
#include "util/u_memory.h"
+#include "r600_pipe.h"
+#include "r600_asm.h"
#include "eg_sq.h"
#include "r600_opcodes.h"
-#include <stdio.h>
-#include <errno.h>
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
{
@@ -73,8 +72,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COND(cf->cond) |
- S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+ S_SQ_CF_WORD1_COND(cf->cond) |
+ S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
break;
default:
diff --git a/src/gallium/drivers/r600/eg_hw_states.c b/src/gallium/drivers/r600/eg_hw_states.c
deleted file mode 100644
index ebbc9c3f37..0000000000
--- a/src/gallium/drivers/r600/eg_hw_states.c
+++ /dev/null
@@ -1,1088 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- * 2010 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- * Dave Airlie
- */
-#include <util/u_inlines.h>
-#include <util/u_format.h>
-#include <util/u_memory.h>
-#include <util/u_blitter.h>
-#include "util/u_pack_color.h"
-#include "r600_screen.h"
-#include "r600_context.h"
-#include "r600_resource.h"
-#include "eg_state_inlines.h"
-#include "evergreend.h"
-
-#include "eg_states_inc.h"
-
-static void eg_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state)
-{
- struct r600_screen *rscreen = rctx->screen;
- int i;
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0);
- rstate->states[EG_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]);
- rstate->states[EG_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]);
- rstate->states[EG_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]);
- rstate->states[EG_BLEND__CB_BLEND_ALPHA] = fui(rctx->blend_color.color[3]);
- rstate->states[EG_BLEND__CB_BLEND0_CONTROL] = 0x00000000;
- rstate->states[EG_BLEND__CB_BLEND1_CONTROL] = 0x00000000;
- rstate->states[EG_BLEND__CB_BLEND2_CONTROL] = 0x00000000;
- rstate->states[EG_BLEND__CB_BLEND3_CONTROL] = 0x00000000;
- rstate->states[EG_BLEND__CB_BLEND4_CONTROL] = 0x00000000;
- rstate->states[EG_BLEND__CB_BLEND5_CONTROL] = 0x00000000;
- rstate->states[EG_BLEND__CB_BLEND6_CONTROL] = 0x00000000;
- rstate->states[EG_BLEND__CB_BLEND7_CONTROL] = 0x00000000;
-
- for (i = 0; i < 8; i++) {
- unsigned eqRGB = state->rt[i].rgb_func;
- unsigned srcRGB = state->rt[i].rgb_src_factor;
- unsigned dstRGB = state->rt[i].rgb_dst_factor;
-
- unsigned eqA = state->rt[i].alpha_func;
- unsigned srcA = state->rt[i].alpha_src_factor;
- unsigned dstA = state->rt[i].alpha_dst_factor;
- uint32_t bc = 0;
-
- if (!state->rt[i].blend_enable)
- continue;
-
- bc |= S_028780_BLEND_CONTROL_ENABLE(1);
-
- bc |= S_028780_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB));
- bc |= S_028780_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB));
- bc |= S_028780_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB));
-
- if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
- bc |= S_028780_SEPARATE_ALPHA_BLEND(1);
- bc |= S_028780_ALPHA_COMB_FCN(r600_translate_blend_function(eqA));
- bc |= S_028780_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA));
- bc |= S_028780_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA));
- }
-
- rstate->states[EG_BLEND__CB_BLEND0_CONTROL + i] = bc;
- }
-
- radeon_state_pm4(rstate);
-}
-
-static void eg_ucp(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_clip_state *state)
-{
- struct r600_screen *rscreen = rctx->screen;
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_UCP, 0, 0);
-
- for (int i = 0; i < state->nr; i++) {
- rstate->states[i * 4 + 0] = fui(state->ucp[i][0]);
- rstate->states[i * 4 + 1] = fui(state->ucp[i][1]);
- rstate->states[i * 4 + 2] = fui(state->ucp[i][2]);
- rstate->states[i * 4 + 3] = fui(state->ucp[i][3]);
- }
- radeon_state_pm4(rstate);
-}
-
-static void eg_cb(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_framebuffer_state *state, int cb)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct r600_resource_texture *rtex;
- struct r600_resource *rbuffer;
- unsigned level = state->cbufs[cb]->level;
- unsigned pitch, slice;
- unsigned color_info;
- unsigned format, swap, ntype;
- const struct util_format_description *desc;
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0, cb, 0);
- rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
- rbuffer = &rtex->resource;
- radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo);
- rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM;
- rstate->nbo = 1;
- pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
- slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1;
-
- ntype = 0;
- desc = util_format_description(rtex->resource.base.b.format);
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
- ntype = V_028C70_NUMBER_SRGB;
-
- format = r600_translate_colorformat(rtex->resource.base.b.format);
- swap = r600_translate_colorswap(rtex->resource.base.b.format);
-
- color_info = S_028C70_FORMAT(format) |
- S_028C70_COMP_SWAP(swap) |
- S_028C70_BLEND_CLAMP(1) |
- S_028C70_SOURCE_FORMAT(1) |
- S_028C70_NUMBER_TYPE(ntype);
-
- rstate->states[EG_CB__CB_COLOR0_BASE] = state->cbufs[cb]->offset >> 8;
- rstate->states[EG_CB__CB_COLOR0_INFO] = color_info;
- rstate->states[EG_CB__CB_COLOR0_PITCH] = S_028C64_PITCH_TILE_MAX(pitch);
- rstate->states[EG_CB__CB_COLOR0_SLICE] = S_028C68_SLICE_TILE_MAX(slice);
- rstate->states[EG_CB__CB_COLOR0_VIEW] = 0x00000000;
- rstate->states[EG_CB__CB_COLOR0_ATTRIB] = S_028C74_NON_DISP_TILING_ORDER(1);
-
- radeon_state_pm4(rstate);
-}
-
-static void eg_db(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_framebuffer_state *state)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct r600_resource_texture *rtex;
- struct r600_resource *rbuffer;
- unsigned level;
- unsigned pitch, slice, format;
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0);
- if (state->zsbuf == NULL)
- return;
-
- rtex = (struct r600_resource_texture*)state->zsbuf->texture;
- rtex->tiled = 1;
- rtex->array_mode = 2;
- rtex->tile_type = 1;
- rtex->depth = 1;
- rbuffer = &rtex->resource;
-
- radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo);
- rstate->nbo = 1;
- rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM;
- level = state->zsbuf->level;
- pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
- slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1;
- format = r600_translate_dbformat(state->zsbuf->texture->format);
- rstate->states[EG_DB__DB_HTILE_DATA_BASE] = state->zsbuf->offset >> 8;
- rstate->states[EG_DB__DB_Z_READ_BASE] = state->zsbuf->offset >> 8;
- rstate->states[EG_DB__DB_Z_WRITE_BASE] = state->zsbuf->offset >> 8;
- rstate->states[EG_DB__DB_STENCIL_READ_BASE] = state->zsbuf->offset >> 8;
- rstate->states[EG_DB__DB_STENCIL_WRITE_BASE] = state->zsbuf->offset >> 8;
- rstate->states[EG_DB__DB_Z_INFO] = S_028040_ARRAY_MODE(rtex->array_mode) | S_028040_FORMAT(format);
- rstate->states[EG_DB__DB_DEPTH_VIEW] = 0x00000000;
- rstate->states[EG_DB__DB_DEPTH_SIZE] = S_028058_PITCH_TILE_MAX(pitch);
- rstate->states[EG_DB__DB_DEPTH_SLICE] = S_02805C_SLICE_TILE_MAX(slice);
- radeon_state_pm4(rstate);
-}
-
-static void eg_rasterizer(struct r600_context *rctx, struct radeon_state *rstate)
-{
- const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer;
- const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
- const struct pipe_clip_state *clip = NULL;
- struct r600_screen *rscreen = rctx->screen;
- float offset_units = 0, offset_scale = 0;
- char depth = 0;
- unsigned offset_db_fmt_cntl = 0;
- unsigned tmp;
- unsigned prov_vtx = 1;
- unsigned polygon_dual_mode;
-
- if (rctx->clip)
- clip = &rctx->clip->state.clip;
- if (fb->zsbuf) {
- offset_units = state->offset_units;
- offset_scale = state->offset_scale * 12.0f;
- switch (fb->zsbuf->texture->format) {
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- depth = -24;
- offset_units *= 2.0f;
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- depth = -23;
- offset_units *= 1.0f;
- offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- depth = -16;
- offset_units *= 4.0f;
- break;
- default:
- R600_ERR("unsupported %d\n", fb->zsbuf->texture->format);
- return;
- }
- }
- offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
-
- if (state->flatshade_first)
- prov_vtx = 0;
-
- rctx->flat_shade = state->flatshade;
- radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0);
- rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000000;
- if (rctx->flat_shade)
- rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] |= S_0286D4_FLAT_SHADE_ENA(1);
- if (state->sprite_coord_enable) {
- rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] |=
- S_0286D4_PNT_SPRITE_ENA(1) |
- S_0286D4_PNT_SPRITE_OVRD_X(2) |
- S_0286D4_PNT_SPRITE_OVRD_Y(3) |
- S_0286D4_PNT_SPRITE_OVRD_Z(0) |
- S_0286D4_PNT_SPRITE_OVRD_W(1);
- if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
- rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] |=
- S_0286D4_PNT_SPRITE_TOP_1(1);
- }
- }
- rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] = 0;
- if (clip) {
- rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] = S_028810_PS_UCP_MODE(3) | ((1 << clip->nr) - 1);
- rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp);
- rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp);
- }
- polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
- state->fill_back != PIPE_POLYGON_MODE_FILL);
-
- rstate->states[EG_RASTERIZER__PA_SU_SC_MODE_CNTL] =
- S_028814_PROVOKING_VTX_LAST(prov_vtx) |
- S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
- S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
- S_028814_FACE(!state->front_ccw) |
- S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
- S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
- S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
- S_028814_POLY_MODE(polygon_dual_mode) |
- S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
- S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back));
- rstate->states[EG_RASTERIZER__PA_CL_VS_OUT_CNTL] =
- S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
- S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex);
- rstate->states[EG_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000;
- /* point size 12.4 fixed point */
- tmp = (unsigned)(state->point_size * 8.0);
- rstate->states[EG_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp);
- rstate->states[EG_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000;
- rstate->states[EG_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008;
- rstate->states[EG_RASTERIZER__PA_SU_VTX_CNTL] = 0x00000005;
-
- rstate->states[EG_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000;
- rstate->states[EG_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400;
- rstate->states[EG_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000;
- rstate->states[EG_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000;
- rstate->states[EG_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000;
- rstate->states[EG_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000;
- rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = offset_db_fmt_cntl;
- rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000;
- rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = fui(offset_scale);
- rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units);
- rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale);
- rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units);
- radeon_state_pm4(rstate);
-}
-
-static void eg_scissor(struct r600_context *rctx, struct radeon_state *rstate)
-{
- const struct pipe_scissor_state *state = &rctx->scissor->state.scissor;
- const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
- struct r600_screen *rscreen = rctx->screen;
- unsigned minx, maxx, miny, maxy;
- u32 tl, br;
-
- if (state == NULL) {
- minx = 0;
- miny = 0;
- maxx = fb->cbufs[0]->width;
- maxy = fb->cbufs[0]->height;
- } else {
- minx = state->minx;
- miny = state->miny;
- maxx = state->maxx;
- maxy = state->maxy;
- }
- tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny);
- br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy);
- radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0);
- /* screen scissor has no WINDOW OFFSET */
- rstate->states[EG_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl;
- rstate->states[EG_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br;
- rstate->states[EG_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000;
- rstate->states[EG_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl | S_028204_WINDOW_OFFSET_DISABLE(1);
- rstate->states[EG_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br;
- rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF;
- rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl;
- rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_0_BR] = br;
- rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl;
- rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_1_BR] = br;
- rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl;
- rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_2_BR] = br;
- rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl;
- rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_3_BR] = br;
- rstate->states[EG_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA;
- rstate->states[EG_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl | S_028240_WINDOW_OFFSET_DISABLE(1);
- rstate->states[EG_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br;
- rstate->states[EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl | S_028240_WINDOW_OFFSET_DISABLE(1);
- rstate->states[EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br;
- radeon_state_pm4(rstate);
-}
-
-static void eg_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state)
-{
- struct r600_screen *rscreen = rctx->screen;
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0);
- rstate->states[EG_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000;
- rstate->states[EG_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000;
- rstate->states[EG_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]);
- rstate->states[EG_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]);
- rstate->states[EG_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]);
- rstate->states[EG_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]);
- rstate->states[EG_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]);
- rstate->states[EG_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]);
- rstate->states[EG_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F;
- radeon_state_pm4(rstate);
-}
-
-static void eg_dsa(struct r600_context *rctx, struct radeon_state *rstate)
-{
- const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa;
- const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref;
- struct r600_screen *rscreen = rctx->screen;
- unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control;
- unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control;
- unsigned db_count_control = 0;
- struct r600_shader *rshader;
- struct r600_query *rquery = NULL;
- boolean query_running;
- int i;
-
- if (rctx->ps_shader == NULL) {
- return;
- }
- radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0);
-
- db_shader_control = 0;
- db_shader_control |= S_02880C_DUAL_EXPORT_ENABLE(1);
- db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
-
- rshader = &rctx->ps_shader->shader;
- if (rshader->uses_kill)
- db_shader_control |= S_02880C_KILL_ENABLE(1);
- for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
- db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
- }
- stencil_ref_mask = 0;
- stencil_ref_mask_bf = 0;
- db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
- S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
- S_028800_ZFUNC(state->depth.func);
- /* set stencil enable */
-
- if (state->stencil[0].enabled) {
- db_depth_control |= S_028800_STENCIL_ENABLE(1);
- db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func));
- db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op));
- db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op));
- db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op));
-
- stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) |
- S_028430_STENCILWRITEMASK(state->stencil[0].writemask);
- stencil_ref_mask |= S_028430_STENCILREF(stencil_ref->ref_value[0]);
- if (state->stencil[1].enabled) {
- db_depth_control |= S_028800_BACKFACE_ENABLE(1);
- db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func));
- db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op));
- db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op));
- db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op));
- stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) |
- S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask);
- stencil_ref_mask_bf |= S_028430_STENCILREF(stencil_ref->ref_value[1]);
- }
- }
-
- alpha_test_control = 0;
- alpha_ref = 0;
- if (state->alpha.enabled) {
- alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func);
- alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1);
- alpha_ref = fui(state->alpha.ref_value);
- }
-
- db_render_control = 0;
-/// db_render_control = S_028D0C_STENCIL_COMPRESS_DISABLE(1) |
-/// S_028D0C_DEPTH_COMPRESS_DISABLE(1);
- db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
- S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
- S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
-
- query_running = FALSE;
-
- LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) {
- if (rquery->state & R600_QUERY_STATE_STARTED) {
- query_running = TRUE;
- }
- }
-
- if (query_running) {
- db_render_override |= S_02800C_NOOP_CULL_DISABLE(1);
- db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1);
- }
-
- rstate->states[EG_DSA__DB_STENCIL_CLEAR] = 0x00000000;
- rstate->states[EG_DSA__DB_DEPTH_CLEAR] = 0x3F800000;
- rstate->states[EG_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control;
- rstate->states[EG_DSA__DB_STENCILREFMASK] = stencil_ref_mask;
- rstate->states[EG_DSA__DB_STENCILREFMASK_BF] = stencil_ref_mask_bf;
- rstate->states[EG_DSA__SX_ALPHA_REF] = alpha_ref;
- // rstate->states[EG_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000;
- // rstate->states[EG_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000;
- rstate->states[EG_DSA__SPI_FOG_CNTL] = 0x00000000;
- rstate->states[EG_DSA__DB_DEPTH_CONTROL] = db_depth_control;
- rstate->states[EG_DSA__DB_SHADER_CONTROL] = db_shader_control;
- rstate->states[EG_DSA__DB_RENDER_CONTROL] = db_render_control;
- rstate->states[EG_DSA__DB_RENDER_OVERRIDE] = db_render_override;
- rstate->states[EG_DSA__DB_COUNT_CONTROL] = db_count_control;
- rstate->states[EG_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000;
- rstate->states[EG_DSA__DB_PRELOAD_CONTROL] = 0x00000000;
- rstate->states[EG_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00;
- radeon_state_pm4(rstate);
-}
-
-
-static INLINE u32 S_FIXED(float value, u32 frac_bits)
-{
- return value * (1 << frac_bits);
-}
-
-static void eg_sampler_border(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_sampler_state *state, unsigned id)
-{
- struct r600_screen *rscreen = rctx->screen;
- union util_color uc;
-
- util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER_BORDER, id, R600_SHADER_PS);
- if (uc.ui) {
- rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_INDEX] = id;
- rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED] = fui(state->border_color[0]);
- rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN] = fui(state->border_color[1]);
- rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE] = fui(state->border_color[2]);
- rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA] = fui(state->border_color[3]);
- }
- radeon_state_pm4(rstate);
-}
-
-static void eg_sampler(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_sampler_state *state, unsigned id)
-{
- struct r600_screen *rscreen = rctx->screen;
- union util_color uc;
-
- util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER, id, R600_SHADER_PS);
- rstate->states[EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] =
- S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
- S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
- S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
- S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) |
- S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) |
- S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
- S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
- S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0);
- /* FIXME LOD it depends on texture base level ... */
- rstate->states[EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] =
- S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
- S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6));
-
- rstate->states[EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] =
- S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)) |
-S_03C008_TYPE(1);
- radeon_state_pm4(rstate);
-
-}
-
-
-static void eg_resource(struct pipe_context *ctx, struct radeon_state *rstate,
- const struct pipe_sampler_view *view, unsigned id)
-{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_screen *rscreen = rctx->screen;
- const struct util_format_description *desc;
- struct r600_resource_texture *tmp;
- struct r600_resource *rbuffer;
- unsigned format;
- uint32_t word4 = 0, yuv_format = 0, pitch = 0;
- unsigned char swizzle[4];
-
- rstate->cpm4 = 0;
- swizzle[0] = view->swizzle_r;
- swizzle[1] = view->swizzle_g;
- swizzle[2] = view->swizzle_b;
- swizzle[3] = view->swizzle_a;
- format = r600_translate_texformat(view->texture->format,
- swizzle,
- &word4, &yuv_format);
- if (format == ~0) {
- return;
- }
- desc = util_format_description(view->texture->format);
- if (desc == NULL) {
- R600_ERR("unknow format %d\n", view->texture->format);
- return;
- }
- radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS);
- tmp = (struct r600_resource_texture*)view->texture;
- rbuffer = &tmp->resource;
- radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo);
- radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rbuffer->bo);
-
- rstate->nbo = 2;
- rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
- rstate->placement[1] = RADEON_GEM_DOMAIN_GTT;
- rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
- rstate->placement[3] = RADEON_GEM_DOMAIN_GTT;
-
- pitch = align(tmp->pitch[0] / tmp->bpt, 8);
-
- /* FIXME properly handle first level != 0 */
- rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD0] =
- S_030000_DIM(r600_tex_dim(view->texture->target)) |
- S_030000_PITCH((pitch / 8) - 1) |
- S_030000_TEX_WIDTH(view->texture->width0 - 1);
- rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD1] =
- S_030004_TEX_HEIGHT(view->texture->height0 - 1) |
- S_030004_TEX_DEPTH(view->texture->depth0 - 1);
- rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD2] = tmp->offset[0] >> 8;
- rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8;
- rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD4] =
- word4 |
- S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) |
- S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) |
- S_030010_REQUEST_SIZE(1) |
- S_030010_BASE_LEVEL(view->first_level);
- rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD5] =
- S_030014_LAST_LEVEL(view->last_level) |
- S_030014_BASE_ARRAY(0) |
- S_030014_LAST_ARRAY(0);
- rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD6] = 0;
- rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD7] =
- S_03001C_DATA_FORMAT(format) |
- S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE);
- radeon_state_pm4(rstate);
-}
-
-static void eg_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate)
-{
- struct r600_screen *rscreen = rctx->screen;
- const struct pipe_blend_state *pbs = &rctx->blend->state.blend;
- int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs;
- uint32_t color_control, target_mask, shader_mask;
- int i;
-
- target_mask = 0;
- shader_mask = 0;
- color_control = S_028808_MODE(1);
-
- for (i = 0; i < nr_cbufs; i++) {
- shader_mask |= 0xf << (i * 4);
- }
-
- if (pbs->logicop_enable) {
- color_control |= (pbs->logicop_func << 16) | (pbs->logicop_func << 20);
- } else {
- color_control |= (0xcc << 16);
- }
-
- if (pbs->independent_blend_enable) {
- for (i = 0; i < nr_cbufs; i++) {
- target_mask |= (pbs->rt[i].colormask << (4 * i));
- }
- } else {
- for (i = 0; i < nr_cbufs; i++) {
- target_mask |= (pbs->rt[0].colormask << (4 * i));
- }
- }
- radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0);
- rstate->states[EG_CB_CNTL__CB_SHADER_MASK] = shader_mask;
- rstate->states[EG_CB_CNTL__CB_TARGET_MASK] = target_mask;
- rstate->states[EG_CB_CNTL__CB_COLOR_CONTROL] = color_control;
- rstate->states[EG_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000;
- rstate->states[EG_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000;
- rstate->states[EG_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF;
- radeon_state_pm4(rstate);
-}
-
-
-static void eg_init_config(struct r600_context *rctx)
-{
- int ps_prio;
- int vs_prio;
- int gs_prio;
- int es_prio;
- int hs_prio, cs_prio, ls_prio;
- int num_ps_gprs;
- int num_vs_gprs;
- int num_gs_gprs;
- int num_es_gprs;
- int num_hs_gprs;
- int num_ls_gprs;
- int num_temp_gprs;
- int num_ps_threads;
- int num_vs_threads;
- int num_gs_threads;
- int num_es_threads;
- int num_hs_threads;
- int num_ls_threads;
- int num_ps_stack_entries;
- int num_vs_stack_entries;
- int num_gs_stack_entries;
- int num_es_stack_entries;
- int num_hs_stack_entries;
- int num_ls_stack_entries;
- enum radeon_family family;
-
- family = radeon_get_family(rctx->rw);
- ps_prio = 0;
- vs_prio = 1;
- gs_prio = 2;
- es_prio = 3;
- hs_prio = 0;
- ls_prio = 0;
- cs_prio = 0;
-
- switch (family) {
- case CHIP_CEDAR:
- default:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
- num_ps_threads = 96;
- num_vs_threads = 16;
- num_gs_threads = 16;
- num_es_threads = 16;
- num_hs_threads = 16;
- num_ls_threads = 16;
- num_ps_stack_entries = 42;
- num_vs_stack_entries = 42;
- num_gs_stack_entries = 42;
- num_es_stack_entries = 42;
- num_hs_stack_entries = 42;
- num_ls_stack_entries = 42;
- break;
- case CHIP_REDWOOD:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
- num_ps_threads = 128;
- num_vs_threads = 20;
- num_gs_threads = 20;
- num_es_threads = 20;
- num_hs_threads = 20;
- num_ls_threads = 20;
- num_ps_stack_entries = 42;
- num_vs_stack_entries = 42;
- num_gs_stack_entries = 42;
- num_es_stack_entries = 42;
- num_hs_stack_entries = 42;
- num_ls_stack_entries = 42;
- break;
- case CHIP_JUNIPER:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
- num_ps_threads = 128;
- num_vs_threads = 20;
- num_gs_threads = 20;
- num_es_threads = 20;
- num_hs_threads = 20;
- num_ls_threads = 20;
- num_ps_stack_entries = 85;
- num_vs_stack_entries = 85;
- num_gs_stack_entries = 85;
- num_es_stack_entries = 85;
- num_hs_stack_entries = 85;
- num_ls_stack_entries = 85;
- break;
- case CHIP_CYPRESS:
- case CHIP_HEMLOCK:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
- num_ps_threads = 128;
- num_vs_threads = 20;
- num_gs_threads = 20;
- num_es_threads = 20;
- num_hs_threads = 20;
- num_ls_threads = 20;
- num_ps_stack_entries = 85;
- num_vs_stack_entries = 85;
- num_gs_stack_entries = 85;
- num_es_stack_entries = 85;
- num_hs_stack_entries = 85;
- num_ls_stack_entries = 85;
- break;
- }
-
- radeon_state_init(&rctx->config, rctx->rw, R600_STATE_CONFIG, 0, 0);
-
- rctx->config.states[EG_CONFIG__SQ_CONFIG] = 0x00000000;
- switch (family) {
- case CHIP_CEDAR:
- break;
- default:
- rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1);
- break;
- }
- rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_EXPORT_SRC_C(1);
- rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_CS_PRIO(cs_prio);
- rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_LS_PRIO(ls_prio);
- rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_HS_PRIO(hs_prio);
- rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio);
- rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio);
- rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio);
- rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio);
-
- rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0;
- rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
- rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
- rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
-
- rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0;
- rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
- rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_ES_GPRS(num_es_gprs);
-
- rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3] = 0;
- rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3] |= S_008C0C_NUM_HS_GPRS(num_hs_gprs);
- rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3] |= S_008C0C_NUM_LS_GPRS(num_ls_gprs);
-
- rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] = 0;
- rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_PS_THREADS(num_ps_threads);
- rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_VS_THREADS(num_vs_threads);
- rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_GS_THREADS(num_gs_threads);
- rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_ES_THREADS(num_es_threads);
-
- rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2] = 0;
- rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2] |= S_008C1C_NUM_HS_THREADS(num_hs_threads);
- rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2] |= S_008C1C_NUM_LS_THREADS(num_ls_threads);
-
- rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0;
- rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
- rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
-
- rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0;
- rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
- rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
-
- rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3] = 0;
- rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3] |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries);
- rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3] |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries);
-
- rctx->config.states[EG_CONFIG__SPI_CONFIG_CNTL] = 0x00000000;
- rctx->config.states[EG_CONFIG__SPI_CONFIG_CNTL_1] = S_00913C_VTX_DONE_DELAY(4);
-
- rctx->config.states[EG_CONFIG__SX_MISC] = 0x00000000;
-
- rctx->config.states[EG_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000;
- rctx->config.states[EG_CONFIG__PA_SC_MODE_CNTL_0] = 0x0;
- rctx->config.states[EG_CONFIG__PA_SC_MODE_CNTL_1] = 0x0;
-
- rctx->config.states[EG_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[EG_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[EG_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[EG_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[EG_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[EG_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000;
-
- rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000;
- rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE_1] = 0x00000000;
- rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE_2] = 0x00000000;
- rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE_3] = 0x00000000;
-
- rctx->config.states[EG_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_HOS_CNTL] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_GROUP_DECR] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_GS_MODE] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_STRMOUT_CONFIG] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_STRMOUT_BUFFER_CONFIG] = 0x00000000;
- rctx->config.states[EG_CONFIG__VGT_REUSE_OFF] = 0x00000001;
- rctx->config.states[EG_CONFIG__VGT_VTX_CNT_EN] = 0x00000000;
-// rctx->config.states[EG_CONFIG__VGT_CACHE_INVALIDATION] = 0x2;
-// rctx->config.states[EG_CONFIG__VGT_GS_VERTEX_REUSE] = 0x16;
- rctx->config.states[EG_CONFIG__PA_CL_ENHANCE] = (3 << 1) | 1;
-
- radeon_state_pm4(&rctx->config);
-}
-
-static int eg_vs_resource(struct r600_context *rctx, int id, struct r600_resource *rbuffer, uint32_t offset,
- uint32_t stride, uint32_t src_format)
-{
- struct radeon_state *vs_resource = &rctx->vs_resource[id];
- struct r600_screen *rscreen = rctx->screen;
- unsigned format, num_format = 0, format_comp = 0;
-
- format = r600_translate_colorformat(src_format);
-
- r600_translate_vertex_num_format(src_format, &num_format, &format_comp);
- format = S_030008_DATA_FORMAT(format) | S_030008_NUM_FORMAT_ALL(num_format) |
- S_030008_FORMAT_COMP_ALL(format_comp);
-
- radeon_state_init(vs_resource, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_VS);
-
- radeon_ws_bo_reference(rscreen->rw, &vs_resource->bo[0], rbuffer->bo);
- vs_resource->nbo = 1;
- vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD0] = offset;
- vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->size - offset - 1;
- vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD2] = S_030008_STRIDE(stride) | format;
- vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD3] = S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
- S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
- S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
- S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W);
-
- vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000;
- vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000;
- vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD6] = 0x00000000;
- vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD7] = 0xC0000000;
- vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT;
- vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT;
- return radeon_state_pm4(vs_resource);
-}
-
-static int eg_draw_vgt_init(struct r600_draw *draw,
- int vgt_draw_initiator)
-{
- struct r600_context *rctx = r600_context(draw->ctx);
- struct r600_screen *rscreen = rctx->screen;
- struct r600_resource *rbuffer = (struct r600_resource *)draw->index_buffer;
- radeon_state_init(&draw->draw, rscreen->rw, R600_STATE_DRAW, 0, 0);
- draw->draw.states[EG_DRAW__VGT_NUM_INDICES] = draw->count;
- draw->draw.states[EG_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator;
- draw->draw.states[EG_DRAW__VGT_DMA_BASE] = draw->index_buffer_offset;
- if (rbuffer) {
- radeon_ws_bo_reference(rscreen->rw, &draw->draw.bo[0], rbuffer->bo);
- draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT;
- draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT;
- draw->draw.nbo = 1;
- }
- return radeon_state_pm4(&draw->draw);
-}
-
-static int eg_draw_vgt_prim(struct r600_draw *draw,
- uint32_t prim, uint32_t vgt_dma_index_type)
-{
- struct r600_context *rctx = r600_context(draw->ctx);
- struct r600_screen *rscreen = rctx->screen;
- radeon_state_init(&draw->vgt, rscreen->rw, R600_STATE_VGT, 0, 0);
- draw->vgt.states[EG_VGT__VGT_PRIMITIVE_TYPE] = prim;
- draw->vgt.states[EG_VGT__VGT_MAX_VTX_INDX] = draw->max_index;
- draw->vgt.states[EG_VGT__VGT_MIN_VTX_INDX] = draw->min_index;
- draw->vgt.states[EG_VGT__VGT_INDX_OFFSET] = draw->index_bias;
- draw->vgt.states[EG_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type;
- draw->vgt.states[EG_VGT__VGT_PRIMITIVEID_EN] = 0x00000000;
- draw->vgt.states[EG_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001;
- draw->vgt.states[EG_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000;
- draw->vgt.states[EG_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000;
- draw->vgt.states[EG_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000;
- return radeon_state_pm4(&draw->vgt);
-}
-
-
-static int eg_ps_shader(struct r600_context *rctx, struct r600_context_state *rpshader,
- struct radeon_state *state)
-{
- struct r600_screen *rscreen = rctx->screen;
- const struct pipe_rasterizer_state *rasterizer;
- struct r600_shader *rshader = &rpshader->shader;
- unsigned i, tmp, exports_ps, num_cout;
- boolean have_pos = FALSE, have_face = FALSE;
-
- rasterizer = &rctx->rasterizer->state.rasterizer;
-
- radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS);
- for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(rctx, rshader, i));
- if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
- have_pos = TRUE;
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
- }
-
- if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
- have_face = TRUE;
-
- if (rasterizer->sprite_coord_enable & (1 << i)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- state->states[EG_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
- }
-
- exports_ps = 0;
- num_cout = 0;
- for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
- exports_ps |= 1;
- else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
- num_cout++;
- }
- }
- exports_ps |= (1 << num_cout);
- if (!exports_ps) {
- /* always at least export 1 component per pixel */
- exports_ps = 2;
- }
- state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
- S_0286CC_PERSP_GRADIENT_ENA(1);
- if (have_pos) {
- state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1);
- state->states[EG_PS_SHADER__SPI_INPUT_Z] |= 1;
- }
-
- state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
- state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_1] |= S_0286D0_FRONT_FACE_ENA(have_face);
-
- state->states[EG_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) |
- S_028844_STACK_SIZE(rshader->bc.nstack);
- state->states[EG_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
- state->states[EG_PS_SHADER__SPI_BARYC_CNTL] = S_0286E0_PERSP_CENTROID_ENA(1) |
- S_0286E0_LINEAR_CENTROID_ENA(1);
- radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo);
- state->nbo = 1;
- state->placement[0] = RADEON_GEM_DOMAIN_GTT;
- return radeon_state_pm4(state);
-}
-
-static int eg_vs_shader(struct r600_context *rctx, struct r600_context_state *rpshader,
- struct radeon_state *state)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct r600_shader *rshader = &rpshader->shader;
- unsigned i, tmp;
-
- radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS);
- for (i = 0; i < 10; i++) {
- state->states[EG_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
- }
- /* so far never got proper semantic id from tgsi */
- for (i = 0; i < 32; i++) {
- tmp = i << ((i & 3) * 8);
- state->states[EG_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
- }
- state->states[EG_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
- state->states[EG_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028860_NUM_GPRS(rshader->bc.ngpr) |
- S_028860_STACK_SIZE(rshader->bc.nstack);
- radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo);
- radeon_ws_bo_reference(rscreen->rw, &state->bo[1], rpshader->bo);
- state->nbo = 2;
- state->placement[0] = RADEON_GEM_DOMAIN_GTT;
- state->placement[2] = RADEON_GEM_DOMAIN_GTT;
- return radeon_state_pm4(state);
-
-}
-
-struct r600_context_hw_state_vtbl eg_hw_state_vtbl = {
- .blend = eg_blend,
- .ucp = eg_ucp,
- .cb = eg_cb,
- .db = eg_db,
- .rasterizer = eg_rasterizer,
- .scissor = eg_scissor,
- .viewport = eg_viewport,
- .dsa = eg_dsa,
- .sampler_border = eg_sampler_border,
- .sampler = eg_sampler,
- .resource = eg_resource,
- .cb_cntl = eg_cb_cntl,
- .vs_resource = eg_vs_resource,
- .vgt_init = eg_draw_vgt_init,
- .vgt_prim = eg_draw_vgt_prim,
- .vs_shader = eg_vs_shader,
- .ps_shader = eg_ps_shader,
- .init_config = eg_init_config,
-};
-
-void eg_set_constant_buffer(struct pipe_context *ctx,
- uint shader, uint index,
- struct pipe_resource *buffer)
-{
- struct r600_screen *rscreen = r600_screen(ctx->screen);
- struct r600_context *rctx = r600_context(ctx);
- unsigned nconstant = 0, type, shader_class, size;
- struct radeon_state *rstate, *rstates;
- struct r600_resource *rbuffer = (struct r600_resource*)buffer;
-
- type = R600_STATE_CBUF;
-
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- shader_class = R600_SHADER_VS;
- rstates = rctx->vs_constant;
- break;
- case PIPE_SHADER_FRAGMENT:
- shader_class = R600_SHADER_PS;
- rstates = rctx->ps_constant;
- break;
- default:
- R600_ERR("unsupported %d\n", shader);
- return;
- }
-
- rstate = &rstates[0];
-
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
- nconstant = buffer->width0 / 16;
- size = ALIGN_DIVUP(nconstant, 16);
-
- radeon_state_init(rstate, rscreen->rw, type, 0, shader_class);
- rstate->states[EG_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0] = size;
- rstate->states[EG_VS_CBUF__ALU_CONST_CACHE_VS_0] = 0;
-
- radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo);
- rstate->nbo = 1;
- rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM;
- if (radeon_state_pm4(rstate))
- return;
- radeon_draw_bind(&rctx->draw, rstate);
-}
diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h
index 497865a66d..be81c28b43 100644
--- a/src/gallium/drivers/r600/eg_state_inlines.h
+++ b/src/gallium/drivers/r600/eg_state_inlines.h
@@ -25,6 +25,7 @@
#include "util/u_format.h"
#include "evergreend.h"
+#include "r600_formats.h"
static INLINE uint32_t r600_translate_blend_function(int blend_func)
{
@@ -276,6 +277,14 @@ static inline uint32_t r600_translate_dbformat(enum pipe_format format)
}
}
+static inline uint32_t r600_translate_stencilformat(enum pipe_format format)
+{
+ if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED)
+ return 1;
+ else
+ return 0;
+}
+
static inline uint32_t r600_translate_colorswap(enum pipe_format format)
{
switch (format) {
@@ -301,6 +310,12 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_Z16_UNORM:
return V_028C70_SWAP_STD;
+
+ case PIPE_FORMAT_R8G8_UNORM:
+ return V_028C70_SWAP_STD;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return V_028C70_SWAP_STD;
/* 32-bit buffers. */
case PIPE_FORMAT_A8B8G8R8_SRGB:
@@ -338,6 +353,9 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
return V_028C70_SWAP_STD_REV;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return V_028C70_SWAP_STD;
+
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
@@ -382,6 +400,12 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_Z16_UNORM:
return V_028C70_COLOR_16;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return V_028C70_COLOR_8_8;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return V_028C70_COLOR_16;
+
/* 32-bit buffers. */
case PIPE_FORMAT_A8B8G8R8_SRGB:
case PIPE_FORMAT_A8B8G8R8_UNORM:
@@ -419,6 +443,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_028C70_COLOR_16_16_FLOAT;
case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16G16_UNORM:
return V_028C70_COLOR_16_16;
/* 64-bit buffers. */
@@ -453,25 +478,6 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
}
}
-static INLINE void r600_translate_vertex_num_format(enum pipe_format format, uint32_t *num_format_p,
- uint32_t *format_comp_p)
-{
- uint32_t num_format = 0, format_comp = 0;
- switch (format) {
- case PIPE_FORMAT_R16G16B16A16_SSCALED:
- case PIPE_FORMAT_R16G16B16_SSCALED:
- case PIPE_FORMAT_R16G16_SSCALED:
- case PIPE_FORMAT_R32G32_SSCALED:
- num_format = V_030008_SQ_NUM_FORMAT_SCALED;
- format_comp = 1;
- break;
- default:
- break;
- }
- *num_format_p = num_format;
- *format_comp_p = format_comp;
-}
-
static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format)
{
return r600_translate_texformat(format, NULL, NULL, NULL) != ~0;
@@ -493,4 +499,173 @@ static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format)
return r600_translate_colorformat(format) != ~0;
}
+static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format)
+{
+ uint32_t result = 0;
+ const struct util_format_description *desc;
+ unsigned i;
+
+ desc = util_format_description(format);
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ goto out_unknown;
+ }
+
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ switch (desc->channel[i].type) {
+ /* Half-floats, floats, doubles */
+ case UTIL_FORMAT_TYPE_FLOAT:
+ switch (desc->channel[i].size) {
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_16_FLOAT;
+ break;
+ case 2:
+ result = FMT_16_16_FLOAT;
+ break;
+ case 3:
+ result = FMT_16_16_16_FLOAT;
+ break;
+ case 4:
+ result = FMT_16_16_16_16_FLOAT;
+ break;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_32_FLOAT;
+ break;
+ case 2:
+ result = FMT_32_32_FLOAT;
+ break;
+ case 3:
+ result = FMT_32_32_32_FLOAT;
+ break;
+ case 4:
+ result = FMT_32_32_32_32_FLOAT;
+ break;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+ break;
+ /* Unsigned ints */
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ /* Signed ints */
+ case UTIL_FORMAT_TYPE_SIGNED:
+ switch (desc->channel[i].size) {
+ case 8:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_8;
+ break;
+ case 2:
+ result = FMT_8_8;
+ break;
+ case 3:
+// result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */
+// break;
+ case 4:
+ result = FMT_8_8_8_8;
+ break;
+ }
+ break;
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_16;
+ break;
+ case 2:
+ result = FMT_16_16;
+ break;
+ case 3:
+// result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */
+// break;
+ case 4:
+ result = FMT_16_16_16_16;
+ break;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_32;
+ break;
+ case 2:
+ result = FMT_32_32;
+ break;
+ case 3:
+ result = FMT_32_32_32;
+ break;
+ case 4:
+ result = FMT_32_32_32_32;
+ break;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+
+ result = S_030008_DATA_FORMAT(result);
+
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ result |= S_030008_FORMAT_COMP_ALL(1);
+ }
+ if (desc->channel[i].normalized) {
+ result |= S_030008_NUM_FORMAT_ALL(0);
+ } else {
+ result |= S_030008_NUM_FORMAT_ALL(2);
+ }
+ return result;
+out_unknown:
+ R600_ERR("unsupported vertex format %s\n", util_format_name(format));
+ return ~0;
+}
+
+static INLINE uint32_t r600_translate_vertex_data_swizzle(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+ unsigned i;
+ uint32_t word3;
+
+ assert(format);
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ fprintf(stderr, "r600: Bad format %s in %s:%d\n",
+ util_format_short_name(format), __FUNCTION__, __LINE__);
+ return 0;
+ }
+
+ word3 = 0;
+ for (i = 0; i < desc->nr_channels; i++) {
+ switch (i) {
+ case 0:
+ word3 |= S_03000C_DST_SEL_X(desc->swizzle[0]);
+ break;
+ case 1:
+ word3 |= S_03000C_DST_SEL_Y(desc->swizzle[1]);
+ break;
+ case 2:
+ word3 |= S_03000C_DST_SEL_Z(desc->swizzle[2]);
+ break;
+ case 3:
+ word3 |= S_03000C_DST_SEL_W(desc->swizzle[3]);
+ break;
+ }
+ }
+ return word3;
+}
+
#endif
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 0803a5768c..ce34ed4ad3 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -39,12 +39,10 @@
#include <util/u_pack_color.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
+#include <util/u_framebuffer.h>
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
#include "evergreend.h"
-struct radeon_state {
- unsigned dummy;
-};
#include "r600_resource.h"
#include "r600_shader.h"
#include "r600_pipe.h"
@@ -60,10 +58,10 @@ static void evergreen_set_blend_color(struct pipe_context *ctx,
return;
rstate->id = R600_PIPE_STATE_BLEND_COLOR;
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL);
free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]);
rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate;
@@ -104,9 +102,9 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx,
}
}
blend->cb_target_mask = target_mask;
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028808_CB_COLOR_CONTROL,
+ r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
color_control, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
for (int i = 0; i < 8; i++) {
unsigned eqRGB = state->rt[i].rgb_func;
@@ -133,7 +131,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx,
}
}
for (int i = 0; i < 8; i++) {
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL);
}
return rstate;
@@ -214,25 +212,25 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
/* TODO db_render_override depends on query */
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
R_028430_DB_STENCILREFMASK, stencil_ref_mask,
0xFFFFFFFF & C_028430_STENCILREF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf,
0xFFFFFFFF & C_028434_STENCILREF_BF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL);
+ r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL);
return rstate;
}
@@ -244,6 +242,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
struct r600_pipe_state *rstate;
unsigned tmp;
unsigned prov_vtx = 1, polygon_dual_mode;
+ unsigned clip_rule;
if (rs == NULL) {
return NULL;
@@ -253,6 +252,8 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
rs->flatshade = state->flatshade;
rs->sprite_coord_enable = state->sprite_coord_enable;
+ clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
+
/* offset */
rs->offset_units = state->offset_units;
rs->offset_scale = state->offset_scale * 12.0f;
@@ -271,11 +272,11 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
}
}
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL);
polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
state->fill_back != PIPE_POLYGON_MODE_FILL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028814_PA_SU_SC_MODE_CNTL,
+ r600_pipe_state_add_reg(rstate, R_028814_PA_SU_SC_MODE_CNTL,
S_028814_PROVOKING_VTX_LAST(prov_vtx) |
S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
@@ -286,22 +287,23 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
S_028814_POLY_MODE(polygon_dual_mode) |
S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02881C_PA_CL_VS_OUT_CNTL,
+ r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL,
S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
/* point size 12.4 fixed point */
tmp = (unsigned)(state->point_size * 8.0);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C08_PA_SU_VTX_CNTL, 0x00000005, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, 0x00000005, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL);
return rstate;
}
@@ -347,7 +349,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
rstate->id = R600_PIPE_STATE_SAMPLER;
util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_SAMPLER, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
+ r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
@@ -357,17 +359,20 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL);
/* FIXME LOD it depends on texture base level ... */
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_SAMPLER, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
+ r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)),
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_SAMPLER, R_03C008_SQ_TEX_SAMPLER_WORD2_0,
+ r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0,
S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)) |
S_03C008_TYPE(1),
0xFFFFFFFF, NULL);
if (uc.ui) {
- /* TODO border color */
+ r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL);
}
return rstate;
}
@@ -406,7 +411,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
unsigned format;
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
unsigned char swizzle[4];
- struct radeon_ws_bo *bo[2];
+ struct r600_bo *bo[2];
if (resource == NULL)
return NULL;
@@ -424,15 +429,15 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
swizzle[1] = state->swizzle_g;
swizzle[2] = state->swizzle_b;
swizzle[3] = state->swizzle_a;
- format = r600_translate_texformat(texture->format,
+ format = r600_translate_texformat(state->format,
swizzle,
&word4, &yuv_format);
if (format == ~0) {
format = 0;
}
- desc = util_format_description(texture->format);
+ desc = util_format_description(state->format);
if (desc == NULL) {
- R600_ERR("unknow format %d\n", texture->format);
+ R600_ERR("unknow format %d\n", state->format);
}
tmp = (struct r600_resource_texture*)texture;
rbuffer = &tmp->resource;
@@ -440,41 +445,37 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
bo[1] = rbuffer->bo;
/* FIXME depth texture decompression */
if (tmp->depth) {
-#if 0
- r = evergreen_texture_from_depth(ctx, tmp, view->first_level);
- if (r) {
- return;
- }
- bo[0] = radeon_ws_bo_incref(rscreen->rw, tmp->uncompressed);
- bo[1] = radeon_ws_bo_incref(rscreen->rw, tmp->uncompressed);
-#endif
+ r600_texture_depth_flush(ctx, texture);
+ tmp = (struct r600_resource_texture*)texture;
+ rbuffer = &tmp->flushed_depth_texture->resource;
+ bo[0] = rbuffer->bo;
+ bo[1] = rbuffer->bo;
}
- pitch = align(tmp->pitch[0] / tmp->bpt, 8);
+ pitch = align(tmp->pitch_in_pixels[0], 8);
/* FIXME properly handle first level != 0 */
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030000_RESOURCE0_WORD0,
+ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
S_030000_DIM(r600_tex_dim(texture->target)) |
S_030000_PITCH((pitch / 8) - 1) |
S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030004_RESOURCE0_WORD1,
+ r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
S_030004_TEX_HEIGHT(texture->height0 - 1) |
S_030004_TEX_DEPTH(texture->depth0 - 1),
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030008_RESOURCE0_WORD2,
- tmp->offset[0] >> 8, 0xFFFFFFFF, bo[0]);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_03000C_RESOURCE0_WORD3,
- tmp->offset[1] >> 8, 0xFFFFFFFF, bo[1]);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030010_RESOURCE0_WORD4,
+ r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
+ (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
+ r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
+ (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
+ r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) |
S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) |
- S_030010_REQUEST_SIZE(1) |
S_030010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030014_RESOURCE0_WORD5,
+ r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
S_030014_LAST_LEVEL(state->last_level) |
S_030014_BASE_ARRAY(0) |
S_030014_LAST_ARRAY(0), 0xffffffff, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030018_RESOURCE0_WORD6, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_03001C_RESOURCE0_WORD7,
+ r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
S_03001C_DATA_FORMAT(format) |
S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL);
@@ -484,8 +485,14 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
struct pipe_sampler_view **views)
{
- /* TODO */
- assert(1);
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views;
+
+ for (int i = 0; i < count; i++) {
+ if (resource[i]) {
+ evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS);
+ }
+ }
}
static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
@@ -493,12 +500,27 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views;
-
- for (int i = 0; i < count; i++) {
- if (resource[i]) {
- evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if (&rctx->ps_samplers.views[i]->base != views[i]) {
+ if (resource[i])
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ else
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
+ views[i]);
+ }
+ }
+ for (i = count; i < NUM_TEX_UNITS; i++) {
+ if (rctx->ps_samplers.views[i]) {
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
}
}
+ rctx->ps_samplers.n_views = count;
}
static void evergreen_bind_state(struct pipe_context *ctx, void *state)
@@ -517,6 +539,10 @@ static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count,
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
+
+ memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count);
+ rctx->ps_samplers.n_samplers = count;
+
for (int i = 0; i < count; i++) {
evergreen_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i);
}
@@ -527,7 +553,6 @@ static void evergreen_bind_vs_sampler(struct pipe_context *ctx, unsigned count,
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
- /* TODO implement */
for (int i = 0; i < count; i++) {
evergreen_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i);
}
@@ -542,7 +567,7 @@ static void evergreen_delete_state(struct pipe_context *ctx, void *state)
rctx->states[rstate->id] = NULL;
}
for (int i = 0; i < rstate->nregs; i++) {
- radeon_ws_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL);
+ r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL);
}
free(rstate);
}
@@ -570,20 +595,20 @@ static void evergreen_set_clip_state(struct pipe_context *ctx,
rctx->clip = *state;
rstate->id = R600_PIPE_STATE_CLIP;
for (int i = 0; i < state->nr; i++) {
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_0285BC_PA_CL_UCP0_X + i * 4,
fui(state->ucp[i][0]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_0285C0_PA_CL_UCP0_Y + i * 4,
fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_0285C4_PA_CL_UCP0_Z + i * 4,
fui(state->ucp[i][2]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_0285C8_PA_CL_UCP0_W + i * 4,
fui(state->ucp[i][3]), 0xFFFFFFFF, NULL);
}
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028810_PA_CL_CLIP_CNTL,
+ r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL,
S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) |
S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) |
S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL);
@@ -628,51 +653,30 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx,
rstate->id = R600_PIPE_STATE_SCISSOR;
tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny);
br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
- R_028030_PA_SC_SCREEN_SCISSOR_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
- R_028034_PA_SC_SCREEN_SCISSOR_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
- R_028204_PA_SC_WINDOW_SCISSOR_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
- R_028208_PA_SC_WINDOW_SCISSOR_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028210_PA_SC_CLIPRECT_0_TL, tl,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028214_PA_SC_CLIPRECT_0_BR, br,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028218_PA_SC_CLIPRECT_1_TL, tl,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_02821C_PA_SC_CLIPRECT_1_BR, br,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028220_PA_SC_CLIPRECT_2_TL, tl,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028224_PA_SC_CLIPRECT_2_BR, br,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028228_PA_SC_CLIPRECT_3_TL, tl,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_02822C_PA_SC_CLIPRECT_3_BR, br,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
- R_028200_PA_SC_WINDOW_OFFSET, 0x00000000,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
- R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
- R_028230_PA_SC_EDGERULE, 0xAAAAAAAA,
- 0xFFFFFFFF, NULL);
free(rctx->states[R600_PIPE_STATE_SCISSOR]);
rctx->states[R600_PIPE_STATE_SCISSOR] = rstate;
@@ -692,11 +696,11 @@ static void evergreen_set_stencil_ref(struct pipe_context *ctx,
rctx->stencil_ref = *state;
rstate->id = R600_PIPE_STATE_STENCIL_REF;
tmp = S_028430_STENCILREF(state->ref_value[0]);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028430_DB_STENCILREFMASK, tmp,
~C_028430_STENCILREF, NULL);
tmp = S_028434_STENCILREF_BF(state->ref_value[1]);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028434_DB_STENCILREFMASK_BF, tmp,
~C_028434_STENCILREF_BF, NULL);
@@ -716,15 +720,15 @@ static void evergreen_set_viewport_state(struct pipe_context *ctx,
rctx->viewport = *state;
rstate->id = R600_PIPE_STATE_VIEWPORT;
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL);
free(rctx->states[R600_PIPE_STATE_VIEWPORT]);
rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate;
@@ -741,7 +745,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
unsigned color_info;
unsigned format, swap, ntype;
const struct util_format_description *desc;
- struct radeon_ws_bo *bo[3];
+ struct r600_bo *bo[3];
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
rbuffer = &rtex->resource;
@@ -749,8 +753,8 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
bo[1] = rbuffer->bo;
bo[2] = rbuffer->bo;
- pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
- slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1;
+ pitch = rtex->pitch_in_pixels[level] / 8 - 1;
+ slice = rtex->pitch_in_pixels[level] * state->cbufs[cb]->height / 64 - 1;
ntype = 0;
desc = util_format_description(rtex->resource.base.b.format);
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
@@ -766,30 +770,30 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
color_info |= S_028C70_SOURCE_FORMAT(1);
/* FIXME handle enabling of CB beyond BASE8 which has different offset */
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028C60_CB_COLOR0_BASE + cb * 0x3C,
- state->cbufs[cb]->offset >> 8, 0xFFFFFFFF, bo[0]);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
+ r600_pipe_state_add_reg(rstate,
R_028C78_CB_COLOR0_DIM + cb * 0x3C,
0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028C70_CB_COLOR0_INFO + cb * 0x3C,
color_info, 0xFFFFFFFF, bo[0]);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028C64_CB_COLOR0_PITCH + cb * 0x3C,
S_028C64_PITCH_TILE_MAX(pitch),
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028C68_CB_COLOR0_SLICE + cb * 0x3C,
S_028C68_SLICE_TILE_MAX(slice),
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028C6C_CB_COLOR0_VIEW + cb * 0x3C,
0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C,
S_028C74_NON_DISP_TILING_ORDER(1),
- 0xFFFFFFFF, NULL);
+ 0xFFFFFFFF, bo[0]);
}
static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate,
@@ -798,7 +802,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
unsigned level;
- unsigned pitch, slice, format;
+ unsigned pitch, slice, format, stencil_format;
if (state->zsbuf == NULL)
return;
@@ -811,23 +815,37 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
rbuffer = &rtex->resource;
level = state->zsbuf->level;
- pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
- slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1;
+ pitch = rtex->pitch_in_pixels[level] / 8 - 1;
+ slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
+ stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format);
+
+ r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
+ (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE,
+ (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+
+ if (stencil_format) {
+ uint32_t stencil_offset;
+
+ stencil_offset = ((state->zsbuf->height * rtex->pitch_in_bytes[level]) + 255) & ~255;
+ r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
+ (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
+ (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ }
+
+ r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
+ S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028048_DB_Z_READ_BASE,
- state->zsbuf->offset >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028050_DB_Z_WRITE_BASE,
- state->zsbuf->offset >> 8, 0xFFFFFFFF, rbuffer->bo);
-// r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028014_DB_HTILE_DATA_BASE, state->zsbuf->offset >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028040_DB_Z_INFO,
+ r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO,
S_028040_ARRAY_MODE(rtex->array_mode) | S_028040_FORMAT(format),
0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028058_DB_DEPTH_SIZE,
+ r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE,
S_028058_PITCH_TILE_MAX(pitch),
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02805C_DB_DEPTH_SLICE,
+ r600_pipe_state_add_reg(rstate, R_02805C_DB_DEPTH_SLICE,
S_02805C_SLICE_TILE_MAX(slice),
0xFFFFFFFF, NULL);
}
@@ -844,14 +862,10 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
/* unreference old buffer and reference new one */
rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
- for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
- pipe_surface_reference(&rctx->framebuffer.cbufs[i], NULL);
- }
- for (int i = 0; i < state->nr_cbufs; i++) {
- pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]);
- }
- pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf);
- rctx->framebuffer = *state;
+
+ util_copy_framebuffer_state(&rctx->framebuffer, state);
+
+ rctx->pframebuffer = &rctx->framebuffer;
/* build states */
for (int i = 0; i < state->nr_cbufs; i++) {
@@ -871,26 +885,44 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
tl = S_028240_TL_X(0) | S_028240_TL_Y(0);
br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028240_PA_SC_GENERIC_SCISSOR_TL, tl,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028244_PA_SC_GENERIC_SCISSOR_BR, br,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028254_PA_SC_VPORT_SCISSOR_0_BR, br,
0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028030_PA_SC_SCREEN_SCISSOR_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028034_PA_SC_SCREEN_SCISSOR_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028204_PA_SC_WINDOW_SCISSOR_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028208_PA_SC_WINDOW_SCISSOR_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028200_PA_SC_WINDOW_OFFSET, 0x00000000,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028230_PA_SC_EDGERULE, 0xAAAAAAAA,
+ 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028238_CB_TARGET_MASK,
+ r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK,
0x00000000, target_mask, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02823C_CB_SHADER_MASK,
+ r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK,
shader_mask, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C04_PA_SC_AA_CONFIG,
+ r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG,
0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX,
+ r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX,
0x00000000, 0xFFFFFFFF, NULL);
free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
@@ -941,24 +973,24 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader,
switch (shader) {
case PIPE_SHADER_VERTEX:
rctx->vs_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->vs_const_buffer, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(&rctx->vs_const_buffer,
R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
ALIGN_DIVUP(buffer->width0 >> 4, 16),
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->vs_const_buffer, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(&rctx->vs_const_buffer,
R_028980_ALU_CONST_CACHE_VS_0,
- 0, 0xFFFFFFFF, rbuffer->bo);
+ (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
break;
case PIPE_SHADER_FRAGMENT:
rctx->ps_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->ps_const_buffer, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(&rctx->ps_const_buffer,
R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
ALIGN_DIVUP(buffer->width0 >> 4, 16),
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->ps_const_buffer, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(&rctx->ps_const_buffer,
R_028940_ALU_CONST_CACHE_PS_0,
- 0, 0xFFFFFFFF, rbuffer->bo);
+ (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
break;
default:
@@ -973,8 +1005,7 @@ static void *evergreen_create_shader_state(struct pipe_context *ctx,
struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader);
int r;
- shader->shader.use_mem_constant = TRUE;
- r = r600_pipe_shader_create2(ctx, shader, state->tokens);
+ r = r600_pipe_shader_create(ctx, shader, state->tokens);
if (r) {
return NULL;
}
@@ -1021,7 +1052,7 @@ static void evergreen_delete_vs_shader(struct pipe_context *ctx, void *state)
free(shader);
}
-void evergreen_init_state_functions2(struct r600_pipe_context *rctx)
+void evergreen_init_state_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_blend_state = evergreen_create_blend_state;
rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
@@ -1062,7 +1093,7 @@ void evergreen_init_state_functions2(struct r600_pipe_context *rctx)
rctx->context.sampler_view_destroy = evergreen_sampler_view_destroy;
}
-void evergreen_init_config2(struct r600_pipe_context *rctx)
+void evergreen_init_config(struct r600_pipe_context *rctx)
{
struct r600_pipe_state *rstate = &rctx->config;
int ps_prio;
@@ -1206,125 +1237,125 @@ void evergreen_init_config2(struct r600_pipe_context *rctx)
tmp |= S_008C00_VS_PRIO(vs_prio);
tmp |= S_008C00_GS_PRIO(gs_prio);
tmp |= S_008C00_ES_PRIO(es_prio);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
tmp = 0;
tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
tmp = 0;
tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
tmp = 0;
tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs);
tmp |= S_008C0C_NUM_LS_GPRS(num_ls_gprs);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL);
tmp = 0;
tmp |= S_008C18_NUM_PS_THREADS(num_ps_threads);
tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads);
tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads);
tmp |= S_008C18_NUM_ES_THREADS(num_es_threads);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
tmp = 0;
tmp |= S_008C1C_NUM_HS_THREADS(num_hs_threads);
tmp |= S_008C1C_NUM_LS_THREADS(num_ls_threads);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
tmp = 0;
tmp |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
tmp |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
tmp = 0;
tmp |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
tmp |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
tmp = 0;
tmp |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries);
tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL);
-
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL);
-
-// r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL);
-
-// r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL);
-
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL);
-
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL);
-
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL);
-
-r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028810_PA_CL_CLIP_CNTL,
+ r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL);
+
+// r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL);
+
+// r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL);
+
+r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL,
0x0, 0xFFFFFFFF, NULL);
r600_context_pipe_state_set(&rctx->ctx, rstate);
@@ -1336,21 +1367,20 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate;
struct r600_resource *rbuffer;
- unsigned i, j, offset, format, prim;
+ unsigned i, j, offset, prim;
u32 vgt_dma_index_type, vgt_draw_initiator, mask;
struct pipe_vertex_buffer *vertex_buffer;
struct r600_draw rdraw;
struct r600_pipe_state vgt;
struct r600_drawl draw;
- assert(info->index_bias == 0);
-
if (rctx->any_user_vbs) {
- r600_upload_user_buffers2(rctx);
+ r600_upload_user_buffers(rctx);
rctx->any_user_vbs = FALSE;
}
memset(&draw, 0, sizeof(struct r600_drawl));
+ draw.ctx = ctx;
draw.mode = info->mode;
draw.start = info->start;
draw.count = info->count;
@@ -1360,16 +1390,16 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
draw.max_index = info->max_index;
draw.index_bias = info->index_bias;
- r600_translate_index_buffer2(rctx, &rctx->index_buffer.buffer,
+ r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer,
&rctx->index_buffer.index_size,
&draw.start,
info->count);
draw.index_size = rctx->index_buffer.index_size;
- draw.index_buffer = rctx->index_buffer.buffer;
+ pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer);
draw.index_buffer_offset = draw.start * draw.index_size;
draw.start = 0;
- r600_upload_index_buffer2(rctx, &draw);
+ r600_upload_index_buffer(rctx, &draw);
} else {
draw.index_size = 0;
draw.index_buffer = NULL;
@@ -1399,44 +1429,40 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
return;
/* rebuild vertex shader if input format changed */
- if (r600_pipe_shader_update2(&rctx->context, rctx->vs_shader))
+ if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader))
return;
- if (r600_pipe_shader_update2(&rctx->context, rctx->ps_shader))
+ if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader))
return;
for (i = 0 ; i < rctx->vertex_elements->count; i++) {
- unsigned num_format = 0, format_comp = 0;
-
+ uint32_t word3, word2;
+ uint32_t format;
rstate = &rctx->vs_resource[i];
+
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+
j = rctx->vertex_elements->elements[i].vertex_buffer_index;
vertex_buffer = &rctx->vertex_buffer[j];
rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset;
- format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format);
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
+ offset = rctx->vertex_elements->elements[i].src_offset +
+ vertex_buffer->buffer_offset +
+ r600_bo_offset(rbuffer->bo);
- r600_translate_vertex_num_format(rctx->vertex_elements->elements[i].src_format, &num_format, &format_comp);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE,
- R_030008_RESOURCE0_WORD2,
- S_030008_STRIDE(vertex_buffer->stride) |
- S_030008_DATA_FORMAT(format) |
- S_030008_NUM_FORMAT_ALL(num_format) |
- S_030008_FORMAT_COMP_ALL(format_comp),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE,
- R_03000C_RESOURCE0_WORD3,
- S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
- S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
- S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
- S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030018_RESOURCE0_WORD6, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL);
+ format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format);
+
+ word2 = format | S_030008_STRIDE(vertex_buffer->stride);
+
+ word3 = r600_translate_vertex_data_swizzle(rctx->vertex_elements->elements[i].src_format);
+
+ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, word3, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL);
evergreen_vs_resource_set(&rctx->ctx, rstate, i);
}
@@ -1447,11 +1473,13 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
vgt.id = R600_PIPE_STATE_VGT;
vgt.nregs = 0;
- r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONFIG, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
if (rctx->rasterizer && rctx->framebuffer.zsbuf) {
float offset_units = rctx->rasterizer->offset_units;
@@ -1476,19 +1504,19 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
return;
}
offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
- r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(&vgt,
R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(&vgt,
R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
fui(offset_units), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(&vgt,
R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(&vgt,
R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
fui(offset_units), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(&vgt,
R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
}
@@ -1505,6 +1533,8 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
rdraw.indices_bo_offset = draw.index_buffer_offset;
}
evergreen_context_draw(&rctx->ctx, &rdraw);
+
+ pipe_resource_reference(&draw.index_buffer, NULL);
}
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
@@ -1512,40 +1542,63 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z;
- boolean have_pos = FALSE, have_face = FALSE;
+ unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
+ int pos_index = -1, face_index = -1;
+ int ninterp = 0;
+ boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
+ unsigned spi_baryc_cntl;
/* clear previous register */
rstate->nregs = 0;
for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index2(&rctx->vs_shader->shader, rshader, i));
+ tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
+ /* evergreen NUM_INTERP only contains values interpolated into the LDS,
+ POSITION goes via GPRs from the SC so isn't counted */
if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
- have_pos = TRUE;
+ pos_index = i;
+ else if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
+ face_index = i;
+ else {
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR ||
+ rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
+ ninterp++;
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+ have_linear = TRUE;
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
+ have_perspective = TRUE;
+ if (rshader->input[i].centroid)
+ have_centroid = TRUE;
+ }
if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
}
- if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
- have_face = TRUE;
if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
tmp |= S_028644_PT_SPRITE_TEX(1);
}
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
}
for (i = 0; i < rshader->noutput; i++) {
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_Z_EXPORT_ENABLE(1),
- S_02880C_Z_EXPORT_ENABLE(1), NULL);
+ if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
+ r600_pipe_state_add_reg(rstate,
+ R_02880C_DB_SHADER_CONTROL,
+ S_02880C_Z_EXPORT_ENABLE(1),
+ S_02880C_Z_EXPORT_ENABLE(1), NULL);
+ if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+ r600_pipe_state_add_reg(rstate,
+ R_02880C_DB_SHADER_CONTROL,
+ S_02880C_STENCIL_EXPORT_ENABLE(1),
+ S_02880C_STENCIL_EXPORT_ENABLE(1), NULL);
}
exports_ps = 0;
num_cout = 0;
for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
+ if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
+ rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
exports_ps |= 1;
else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
num_cout++;
@@ -1557,46 +1610,75 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
exports_ps = 2;
}
- spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
- S_0286CC_PERSP_GRADIENT_ENA(1);
+ if (ninterp == 0) {
+ ninterp = 1;
+ have_perspective = TRUE;
+ }
+
+ spi_ps_in_control_0 = S_0286CC_NUM_INTERP(ninterp) |
+ S_0286CC_PERSP_GRADIENT_ENA(have_perspective) |
+ S_0286CC_LINEAR_GRADIENT_ENA(have_linear);
spi_input_z = 0;
- if (have_pos) {
- spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1);
+ if (pos_index != -1) {
+ spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) |
+ S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
+ S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr);
spi_input_z |= 1;
}
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0286CC_SPI_PS_IN_CONTROL_0,
+
+ spi_ps_in_control_1 = 0;
+ if (face_index != -1) {
+ spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
+ S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
+ }
+
+ spi_baryc_cntl = 0;
+ if (have_perspective)
+ spi_baryc_cntl |= S_0286E0_PERSP_CENTER_ENA(1) |
+ S_0286E0_PERSP_CENTROID_ENA(have_centroid);
+ if (have_linear)
+ spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) |
+ S_0286E0_LINEAR_CENTROID_ENA(have_centroid);
+
+ r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0,
spi_ps_in_control_0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0286D0_SPI_PS_IN_CONTROL_1,
- S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1,
+ spi_ps_in_control_1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2,
+ 0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_0286E0_SPI_BARYC_CNTL,
+ spi_baryc_cntl,
+ 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate,
R_028840_SQ_PGM_START_PS,
- 0x00000000, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo);
+ r600_pipe_state_add_reg(rstate,
R_028844_SQ_PGM_RESOURCES_PS,
S_028844_NUM_GPRS(rshader->bc.ngpr) |
S_028844_PRIME_CACHE_ON_DRAW(1) |
S_028844_STACK_SIZE(rshader->bc.nstack),
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028848_SQ_PGM_RESOURCES_2_PS,
0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_02884C_SQ_PGM_EXPORTS_PS,
exports_ps, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
- R_0286E0_SPI_BARYC_CNTL,
- S_0286E0_PERSP_CENTROID_ENA(1) |
- S_0286E0_LINEAR_CENTROID_ENA(1),
- 0xFFFFFFFF, NULL);
if (rshader->uses_kill) {
/* only set some bits here, the other bits are set in the dsa state */
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_02880C_DB_SHADER_CONTROL,
S_02880C_KILL_ENABLE(1),
S_02880C_KILL_ENABLE(1), NULL);
}
+
+ r600_pipe_state_add_reg(rstate,
+ R_03A200_SQ_LOOP_CONST_0, 0x01000FFF,
+ 0xFFFFFFFF, NULL);
}
void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
@@ -1618,30 +1700,57 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
spi_vs_out_id[i / 4] |= tmp;
}
for (i = 0; i < 10; i++) {
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_02861C_SPI_VS_OUT_ID_0 + i * 4,
spi_vs_out_id[i], 0xFFFFFFFF, NULL);
}
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_0286C4_SPI_VS_OUT_CONFIG,
S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028860_SQ_PGM_RESOURCES_VS,
S_028860_NUM_GPRS(rshader->bc.ngpr) |
S_028860_STACK_SIZE(rshader->bc.nstack),
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_028864_SQ_PGM_RESOURCES_2_VS,
0x0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_0288A8_SQ_PGM_RESOURCES_FS,
0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ r600_pipe_state_add_reg(rstate,
R_02885C_SQ_PGM_START_VS,
- 0x00000000, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT,
+ (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo);
+ r600_pipe_state_add_reg(rstate,
R_0288A4_SQ_PGM_START_FS,
- 0x00000000, 0xFFFFFFFF, shader->bo);
+ (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo);
+
+ r600_pipe_state_add_reg(rstate,
+ R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
+ 0xFFFFFFFF, NULL);
+}
+
+void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx)
+{
+ struct pipe_depth_stencil_alpha_state dsa;
+ struct r600_pipe_state *rstate;
+
+ memset(&dsa, 0, sizeof(dsa));
+
+ rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa);
+ r600_pipe_state_add_reg(rstate,
+ R_02880C_DB_SHADER_CONTROL,
+ 0x0,
+ S_02880C_DUAL_EXPORT_ENABLE(1), NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028000_DB_RENDER_CONTROL,
+ S_028000_DEPTH_COPY_ENABLE(1) |
+ S_028000_STENCIL_COPY_ENABLE(1) |
+ S_028000_COPY_CENTROID(1),
+ S_028000_DEPTH_COPY_ENABLE(1) |
+ S_028000_STENCIL_COPY_ENABLE(1) |
+ S_028000_COPY_CENTROID(1), NULL);
+ return rstate;
}
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 486cb29005..8e96f9355e 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -40,6 +40,9 @@
#define EVERGREEN_SAMPLER_OFFSET 0X0003C000
#define EVERGREEN_SAMPLER_END 0X0003CFF0
+#define EVERGREEN_CTL_CONST_OFFSET 0x0003CFF0
+#define EVERGREEN_CTL_CONST_END 0x0003E200
+
#define EVENT_TYPE_ZPASS_DONE 0x15
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
@@ -683,6 +686,9 @@
#define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0)
#define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1)
#define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE
+#define S_02880C_STENCIL_EXPORT_ENABLE(x) (((x) & 0x1) << 1)
+#define G_02880C_STENCIL_EXPORT_ENABLE(x) (((x) >> 1) & 0x1)
+#define C_02880C_STENCIL_EXPORT_ENABLE 0xFFFFFFFD
#define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4)
#define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3)
#define C_02880C_Z_ORDER 0xFFFFFCFF
@@ -981,9 +987,6 @@
#define S_030010_ENDIAN_SWAP(x) (((x) & 0x3) << 12)
#define G_030010_ENDIAN_SWAP(x) (((x) >> 12) & 0x3)
#define C_030010_ENDIAN_SWAP 0xFFFFCFFF
-#define S_030010_REQUEST_SIZE(x) (((x) & 0x3) << 14)
-#define G_030010_REQUEST_SIZE(x) (((x) >> 14) & 0x3)
-#define C_030010_REQUEST_SIZE 0xFFFF3FFF
#define S_030010_DST_SEL_X(x) (((x) & 0x7) << 16)
#define G_030010_DST_SEL_X(x) (((x) >> 16) & 0x7)
#define C_030010_DST_SEL_X 0xFFF8FFFF
@@ -1047,40 +1050,6 @@
#define S_030008_DATA_FORMAT(x) (((x) & 0x3F) << 20)
#define G_030008_DATA_FORMAT(x) (((x) >> 20) & 0x3F)
#define C_030008_DATA_FORMAT 0xFC0FFFFF
-#define V_030008_COLOR_INVALID 0x00000000
-#define V_030008_COLOR_8 0x00000001
-#define V_030008_COLOR_4_4 0x00000002
-#define V_030008_COLOR_3_3_2 0x00000003
-#define V_030008_COLOR_16 0x00000005
-#define V_030008_COLOR_16_FLOAT 0x00000006
-#define V_030008_COLOR_8_8 0x00000007
-#define V_030008_COLOR_5_6_5 0x00000008
-#define V_030008_COLOR_6_5_5 0x00000009
-#define V_030008_COLOR_1_5_5_5 0x0000000A
-#define V_030008_COLOR_4_4_4_4 0x0000000B
-#define V_030008_COLOR_5_5_5_1 0x0000000C
-#define V_030008_COLOR_32 0x0000000D
-#define V_030008_COLOR_32_FLOAT 0x0000000E
-#define V_030008_COLOR_16_16 0x0000000F
-#define V_030008_COLOR_16_16_FLOAT 0x00000010
-#define V_030008_COLOR_8_24 0x00000011
-#define V_030008_COLOR_8_24_FLOAT 0x00000012
-#define V_030008_COLOR_24_8 0x00000013
-#define V_030008_COLOR_24_8_FLOAT 0x00000014
-#define V_030008_COLOR_10_11_11 0x00000015
-#define V_030008_COLOR_10_11_11_FLOAT 0x00000016
-#define V_030008_COLOR_11_11_10 0x00000017
-#define V_030008_COLOR_11_11_10_FLOAT 0x00000018
-#define V_030008_COLOR_2_10_10_10 0x00000019
-#define V_030008_COLOR_8_8_8_8 0x0000001A
-#define V_030008_COLOR_10_10_10_2 0x0000001B
-#define V_030008_COLOR_X24_8_32_FLOAT 0x0000001C
-#define V_030008_COLOR_32_32 0x0000001D
-#define V_030008_COLOR_32_32_FLOAT 0x0000001E
-#define V_030008_COLOR_16_16_16_16 0x0000001F
-#define V_030008_COLOR_16_16_16_16_FLOAT 0x00000020
-#define V_030008_COLOR_32_32_32_32 0x00000022
-#define V_030008_COLOR_32_32_32_32_FLOAT 0x00000023
#define S_030008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26)
#define G_030008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3)
#define C_030008_NUM_FORMAT_ALL 0xF3FFFFFF
@@ -1424,8 +1393,16 @@
#define R_008C0C_SQ_THREAD_RESOURCE_MGMT 0x00008C0C
#define R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 0x00008D8C
#define R_028000_DB_RENDER_CONTROL 0x00028000
+#define S_028000_DEPTH_CLEAR_ENABLE(x) (((x) & 0x1) << 0)
+#define S_028000_STENCIL_CLEAR_ENABLE(x) (((x) & 0x1) << 1)
+#define S_028000_DEPTH_COPY_ENABLE(x) (((x) & 0x1) << 2)
+#define S_028000_STENCIL_COPY_ENABLE(x) (((x) & 0x1) << 3)
+#define S_028000_RESUMMARIZE_ENABLE(x) (((x) & 0x1) << 4)
#define S_028000_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5)
#define S_028000_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6)
+#define S_028000_COPY_CENTROID(x) (((x) & 0x1) << 7)
+#define S_028000_COPY_SAMPLE(x) (((x) & 0x7) << 8)
+#define S_028000_COLOR_DISABLE(x) (((x) & 0x1) << 12)
#define R_028004_DB_COUNT_CONTROL 0x00028004
#define S_028004_ZPASS_INCREMENT_DISABLE (((x) & 0x1) << 0)
#define S_028004_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 1)
@@ -1724,7 +1701,7 @@
#define R_028CA8_CB_COLOR1_VIEW 0x00028CA8
#define R_028CAC_CB_COLOR1_INFO 0x00028CAC
#define R_028CB0_CB_COLOR1_ATTRIB 0x00028CB0
-#define R_028CB8_CB_COLOR1_DIM 0x00028CB8
+#define R_028CB4_CB_COLOR1_DIM 0x00028CB4
#define R_028CD8_CB_COLOR2_BASE 0x00028CD8
#define R_028CDC_CB_COLOR2_PITCH 0x00028CDC
#define R_028CE0_CB_COLOR2_SLICE 0x00028CE0
@@ -1849,9 +1826,18 @@
#define S_0085F0_DB_DEST_BASE_ENA(x) (((x) & 0x1) << 14)
#define G_0085F0_DB_DEST_BASE_ENA(x) (((x) >> 14) & 0x1)
#define C_0085F0_DB_DEST_BASE_ENA 0xFFFFBFFF
-#define S_0085F0_CR_DEST_BASE_ENA(x) (((x) & 0x1) << 15)
-#define G_0085F0_CR_DEST_BASE_ENA(x) (((x) >> 15) & 0x1)
-#define C_0085F0_CR_DEST_BASE_ENA 0xFFFF7FFF
+#define S_0085F0_CB8_DEST_BASE_ENA(x) (((x) & 0x1) << 15)
+#define G_0085F0_CB8_DEST_BASE_ENA(x) (((x) >> 15) & 0x1)
+
+#define S_0085F0_CB9_DEST_BASE_ENA(x) (((x) & 0x1) << 16)
+#define G_0085F0_CB9_DEST_BASE_ENA(x) (((x) >> 16) & 0x1)
+
+#define S_0085F0_CB10_DEST_BASE_ENA(x) (((x) & 0x1) << 17)
+#define G_0085F0_CB10_DEST_BASE_ENA(x) (((x) >> 17) & 0x1)
+
+#define S_0085F0_CB11_DEST_BASE_ENA(x) (((x) & 0x1) << 18)
+#define G_0085F0_CB11_DEST_BASE_ENA(x) (((x) >> 18) & 0x1)
+
#define S_0085F0_TC_ACTION_ENA(x) (((x) & 0x1) << 23)
#define G_0085F0_TC_ACTION_ENA(x) (((x) >> 23) & 0x1)
#define C_0085F0_TC_ACTION_ENA 0xFF7FFFFF
@@ -1882,4 +1868,8 @@
#define R_008970_VGT_NUM_INDICES 0x008970
#define R_0287F0_VGT_DRAW_INITIATOR 0x0287F0
+#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0
+#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4
+
+#define R_03A200_SQ_LOOP_CONST_0 0x3A200
#endif
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index b8c74675e6..62d983269f 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -26,6 +26,7 @@
#ifndef R600_H
#define R600_H
+#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <util/u_double_list.h>
@@ -98,55 +99,42 @@ enum chip_class {
EVERGREEN,
};
+struct r600_tiling_info {
+ unsigned num_channels;
+ unsigned num_banks;
+ unsigned group_bytes;
+};
+
enum radeon_family r600_get_family(struct radeon *rw);
enum chip_class r600_get_family_class(struct radeon *radeon);
+struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon);
-/* lowlevel WS bo */
-struct radeon_ws_bo;
-struct radeon_ws_bo *radeon_ws_bo(struct radeon *radeon,
+/* r600_bo.c */
+struct r600_bo;
+struct r600_bo *r600_bo(struct radeon *radeon,
unsigned size, unsigned alignment, unsigned usage);
-struct radeon_ws_bo *radeon_ws_bo_handle(struct radeon *radeon,
- unsigned handle);
-void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned usage, void *ctx);
-void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo);
-void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst,
- struct radeon_ws_bo *src);
+struct r600_bo *r600_bo_handle(struct radeon *radeon,
+ unsigned handle, unsigned *array_mode);
+void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx);
+void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
+void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst,
+ struct r600_bo *src);
+static INLINE unsigned r600_bo_offset(struct r600_bo *bo)
+{
+ return 0;
+}
+
/* R600/R700 STATES */
#define R600_GROUP_MAX 16
#define R600_BLOCK_MAX_BO 32
#define R600_BLOCK_MAX_REG 128
-enum r600_group_id {
- R600_GROUP_CONFIG = 0,
- R600_GROUP_CONTEXT,
- R600_GROUP_ALU_CONST,
- R600_GROUP_RESOURCE,
- R600_GROUP_SAMPLER,
- R600_GROUP_CTL_CONST,
- R600_GROUP_LOOP_CONST,
- R600_GROUP_BOOL_CONST,
- R600_NGROUPS
-};
-
-enum evergreen_group_id {
- EVERGREEN_GROUP_CONFIG = 0,
- EVERGREEN_GROUP_CONTEXT,
- EVERGREEN_GROUP_RESOURCE,
- EVERGREEN_GROUP_SAMPLER,
- EVERGREEN_GROUP_CTL_CONST,
- EVERGREEN_GROUP_LOOP_CONST,
- EVERGREEN_GROUP_BOOL_CONST,
- EVERGREEN_GROUP_SAMPLER_BORDER,
- EVERGREEN_NGROUPS
-};
-
struct r600_pipe_reg {
- unsigned group_id;
u32 offset;
u32 mask;
u32 value;
- struct radeon_ws_bo *bo;
+ struct r600_bo *bo;
};
struct r600_pipe_state {
@@ -156,11 +144,9 @@ struct r600_pipe_state {
};
static inline void r600_pipe_state_add_reg(struct r600_pipe_state *state,
- unsigned group_id, u32 offset,
- u32 value, u32 mask,
- struct radeon_ws_bo *bo)
+ u32 offset, u32 value, u32 mask,
+ struct r600_bo *bo)
{
- state->regs[state->nregs].group_id = group_id;
state->regs[state->nregs].offset = offset;
state->regs[state->nregs].value = value;
state->regs[state->nregs].mask = mask;
@@ -173,15 +159,18 @@ static inline void r600_pipe_state_add_reg(struct r600_pipe_state *state,
#define R600_BLOCK_STATUS_DIRTY (1 << 1)
struct r600_block_reloc {
- struct radeon_ws_bo *bo;
- unsigned nreloc;
- unsigned bo_pm4_index[R600_BLOCK_MAX_BO];
+ struct r600_bo *bo;
+ unsigned flush_flags;
+ unsigned flush_mask;
+ unsigned bo_pm4_index;
};
-struct r600_group_block {
+struct r600_block {
+ struct list_head list;
unsigned status;
unsigned start_offset;
unsigned pm4_ndwords;
+ unsigned pm4_flush_ndwords;
unsigned nbo;
unsigned nreg;
u32 *reg;
@@ -190,12 +179,10 @@ struct r600_group_block {
struct r600_block_reloc reloc[R600_BLOCK_MAX_BO];
};
-struct r600_group {
+struct r600_range {
unsigned start_offset;
unsigned end_offset;
- unsigned nblocks;
- struct r600_group_block *blocks;
- unsigned *offset_block_id;
+ struct r600_block **blocks;
};
/*
@@ -223,7 +210,7 @@ struct r600_query {
/* if we've flushed the query */
unsigned state;
/* The buffer where query results are stored. */
- struct radeon_ws_bo *buffer;
+ struct r600_bo *buffer;
unsigned buffer_size;
/* linked list of queries */
struct list_head list;
@@ -236,8 +223,12 @@ struct r600_query {
struct r600_context {
struct radeon *radeon;
- unsigned ngroups;
- struct r600_group groups[R600_GROUP_MAX];
+ unsigned hash_size;
+ unsigned hash_shift;
+ struct r600_range range[256];
+ unsigned nblocks;
+ struct r600_block **blocks;
+ struct list_head dirty;
unsigned pm4_ndwords;
unsigned pm4_cdwords;
unsigned pm4_dirty_cdwords;
@@ -249,6 +240,10 @@ struct r600_context {
u32 *pm4;
struct list_head query_list;
unsigned num_query_running;
+ unsigned fence;
+ struct list_head fenced_bo;
+ unsigned *cfence;
+ struct r600_bo *fence_bo;
};
struct r600_draw {
@@ -257,7 +252,7 @@ struct r600_draw {
u32 vgt_index_type;
u32 vgt_draw_initiator;
u32 indices_bo_offset;
- struct radeon_ws_bo *indices;
+ struct r600_bo *indices;
};
int r600_context_init(struct r600_context *ctx, struct radeon *radeon);
@@ -278,6 +273,8 @@ boolean r600_context_query_result(struct r600_context *ctx,
boolean wait, void *vresult);
void r600_query_begin(struct r600_context *ctx, struct r600_query *query);
void r600_query_end(struct r600_context *ctx, struct r600_query *query);
+void r600_context_queries_suspend(struct r600_context *ctx);
+void r600_context_queries_resume(struct r600_context *ctx);
int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon);
void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index fcdcad3edf..d13da0ef63 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -20,14 +20,13 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "radeon.h"
-#include "r600_context.h"
+#include <stdio.h>
+#include <errno.h>
#include "util/u_memory.h"
+#include "r600_pipe.h"
#include "r600_sq.h"
#include "r600_opcodes.h"
#include "r600_asm.h"
-#include <stdio.h>
-#include <errno.h>
static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
{
@@ -466,8 +465,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
bc->cf_last->ndw += 2;
bc->ndw += 2;
- if (bc->use_mem_constant)
- bc->cf_last->kcache0_mode = 2;
+ bc->cf_last->kcache0_mode = 2;
/* process cur ALU instructions for bank swizzle */
if (alu->last) {
@@ -602,7 +600,11 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign
S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) |
S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) |
S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) |
- S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) |
+ S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) |
+ S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) |
+ S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) |
+ S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
+ S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
bc->bytecode[id++] = 0;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 6aadf72957..bebc7c15b0 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -58,7 +58,7 @@ struct r600_bc_alu {
unsigned bank_swizzle;
unsigned bank_swizzle_force;
u32 value[4];
- int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+ int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
};
struct r600_bc_tex {
@@ -101,6 +101,11 @@ struct r600_bc_vtx {
unsigned dst_sel_y;
unsigned dst_sel_z;
unsigned dst_sel_w;
+ unsigned use_const_fields;
+ unsigned data_format;
+ unsigned num_format_all;
+ unsigned format_comp_all;
+ unsigned srf_mode_all;
};
struct r600_bc_output {
@@ -160,7 +165,6 @@ struct r600_cf_callstack {
struct r600_bc {
enum radeon_family family;
int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */
- unsigned use_mem_constant;
struct list_head cf;
struct r600_bc_cf *cf_last;
unsigned ndw;
@@ -176,6 +180,10 @@ struct r600_bc {
struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
};
+/* eg_asm.c */
+int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
+
+/* r600_asm.c */
int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu);
int r600_bc_add_literal(struct r600_bc *bc, const u32 *value);
@@ -186,4 +194,7 @@ int r600_bc_build(struct r600_bc *bc);
int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
+/* r700_asm.c */
+int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
+
#endif
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 357776c55e..50d47060c1 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -19,68 +19,105 @@
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- * Marek Olšák
*/
-#include <errno.h>
-#include <pipe/p_screen.h>
+#include <util/u_surface.h>
#include <util/u_blitter.h>
-#include <util/u_inlines.h>
-#include <util/u_memory.h>
-#include "util/u_surface.h"
-#include "r600_screen.h"
-#include "r600_context.h"
+#include <util/u_format.h>
+#include "r600_pipe.h"
+
+enum r600_blitter_op /* bitmask */
+{
+ R600_CLEAR = 1,
+ R600_CLEAR_SURFACE = 2,
+ R600_COPY = 4
+};
-static void r600_blitter_save_states(struct pipe_context *ctx)
+static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op)
{
- struct r600_context *rctx = r600_context(ctx);
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- util_blitter_save_blend(rctx->blitter, rctx->blend);
- util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa);
- if (rctx->stencil_ref) {
- util_blitter_save_stencil_ref(rctx->blitter,
- &rctx->stencil_ref->state.stencil_ref);
+ r600_context_queries_suspend(&rctx->ctx);
+
+ util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]);
+ util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]);
+ if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) {
+ util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref);
}
- util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer);
+ util_blitter_save_rasterizer(rctx->blitter, rctx->states[R600_PIPE_STATE_RASTERIZER]);
util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader);
util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements);
- if (rctx->viewport) {
- util_blitter_save_viewport(rctx->blitter, &rctx->viewport->state.viewport);
+ if (rctx->states[R600_PIPE_STATE_VIEWPORT]) {
+ util_blitter_save_viewport(rctx->blitter, &rctx->viewport);
}
- if (rctx->clip) {
- util_blitter_save_clip(rctx->blitter, &rctx->clip->state.clip);
+ if (rctx->states[R600_PIPE_STATE_CLIP]) {
+ util_blitter_save_clip(rctx->blitter, &rctx->clip);
}
- util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer,
- rctx->vertex_buffer);
-
- /* remove ptr so they don't get deleted */
- rctx->blend = NULL;
- rctx->clip = NULL;
- rctx->vs_shader = NULL;
- rctx->ps_shader = NULL;
- rctx->rasterizer = NULL;
- rctx->dsa = NULL;
+ util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer);
+
rctx->vertex_elements = NULL;
- /* suspend queries */
- r600_queries_suspend(ctx);
+ if (op & (R600_CLEAR_SURFACE | R600_COPY))
+ util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer);
+
+ if (op & R600_COPY) {
+ util_blitter_save_fragment_sampler_states(
+ rctx->blitter, rctx->ps_samplers.n_samplers,
+ (void**)rctx->ps_samplers.samplers);
+
+ util_blitter_save_fragment_sampler_views(
+ rctx->blitter, rctx->ps_samplers.n_views,
+ (struct pipe_sampler_view**)rctx->ps_samplers.views);
+ }
+
+}
+
+static void r600_blitter_end(struct pipe_context *ctx)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ r600_context_queries_resume(&rctx->ctx);
+}
+
+int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct pipe_surface *zsurf, *cbsurf;
+ int level = 0;
+ float depth = 1.0f;
+
+ zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0,
+ PIPE_BIND_DEPTH_STENCIL);
+
+ cbsurf = ctx->screen->get_tex_surface(ctx->screen,
+ (struct pipe_resource*)texture->flushed_depth_texture,
+ 0, level, 0, PIPE_BIND_RENDER_TARGET);
+
+ if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 ||
+ rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635)
+ depth = 0.0f;
+
+ r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
+ util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth);
+ r600_blitter_end(ctx);
+
+ pipe_surface_reference(&zsurf, NULL);
+ pipe_surface_reference(&cbsurf, NULL);
+
+
+ return 0;
}
static void r600_clear(struct pipe_context *ctx, unsigned buffers,
const float *rgba, double depth, unsigned stencil)
{
- struct r600_context *rctx = r600_context(ctx);
- struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct pipe_framebuffer_state *fb = &rctx->framebuffer;
- r600_blitter_save_states(ctx);
+ r600_blitter_begin(ctx, R600_CLEAR);
util_blitter_clear(rctx->blitter, fb->width, fb->height,
fb->nr_cbufs, buffers, rgba, depth,
stencil);
- /* resume queries */
- r600_queries_resume(ctx);
+ r600_blitter_end(ctx);
}
static void r600_clear_render_target(struct pipe_context *ctx,
@@ -89,16 +126,12 @@ static void r600_clear_render_target(struct pipe_context *ctx,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
- struct r600_context *rctx = r600_context(ctx);
- struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
-
- r600_blitter_save_states(ctx);
- util_blitter_save_framebuffer(rctx->blitter, fb);
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
util_blitter_clear_render_target(rctx->blitter, dst, rgba,
dstx, dsty, width, height);
- /* resume queries */
- r600_queries_resume(ctx);
+ r600_blitter_end(ctx);
}
static void r600_clear_depth_stencil(struct pipe_context *ctx,
@@ -109,19 +142,35 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
- struct r600_context *rctx = r600_context(ctx);
- struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
-
- r600_blitter_save_states(ctx);
- util_blitter_save_framebuffer(rctx->blitter, fb);
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil,
dstx, dsty, width, height);
- /* resume queries */
- r600_queries_resume(ctx);
+ r600_blitter_end(ctx);
}
+
+/* Copy a block of pixels from one surface to another using HW. */
+static void r600_hw_copy_region(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned srcx, unsigned srcy, unsigned srcz,
+ unsigned width, unsigned height)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+ r600_blitter_begin(ctx, R600_COPY);
+ util_blitter_copy_region(rctx->blitter, dst, subdst, dstx, dsty, dstz,
+ src, subsrc, srcx, srcy, srcz, width, height,
+ TRUE);
+ r600_blitter_end(ctx);
+}
+
static void r600_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
struct pipe_subresource subdst,
@@ -131,79 +180,22 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
unsigned srcx, unsigned srcy, unsigned srcz,
unsigned width, unsigned height)
{
- util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
- src, subsrc, srcx, srcy, srcz, width, height);
-}
+ boolean is_depth;
+ /* there is something wrong with depth resource copies at the moment so avoid them for now */
+ is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
+ if (is_depth)
+ util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
+ src, subsrc, srcx, srcy, srcz, width, height);
+ else
+ r600_hw_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
+ src, subsrc, srcx, srcy, srcz, width, height);
-static void *r600_create_db_flush_dsa(struct r600_context *rctx)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct pipe_depth_stencil_alpha_state dsa;
- struct r600_context_state *state;
- boolean quirk = false;
- enum radeon_family family;
-
- family = radeon_get_family(rscreen->rw);
- if (family == CHIP_RV610 || family == CHIP_RV630 || family == CHIP_RV620 ||
- family == CHIP_RV635)
- quirk = true;
-
- memset(&dsa, 0, sizeof(dsa));
-
- if (quirk) {
- dsa.depth.enabled = 1;
- dsa.depth.func = PIPE_FUNC_LEQUAL;
- dsa.stencil[0].enabled = 1;
- dsa.stencil[0].func = PIPE_FUNC_ALWAYS;
- dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP;
- dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_INCR;
- dsa.stencil[0].writemask = 0xff;
- }
-
- state = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa);
- state->flags |= R600_STATE_FLAG_DSA_FLUSH;
- return state;
-
}
-void r600_init_blit_functions(struct r600_context *rctx)
+void r600_init_blit_functions(struct r600_pipe_context *rctx)
{
rctx->context.clear = r600_clear;
rctx->context.clear_render_target = r600_clear_render_target;
rctx->context.clear_depth_stencil = r600_clear_depth_stencil;
rctx->context.resource_copy_region = r600_resource_copy_region;
-
- /* create a custom depth stencil for DB flush */
- rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx);
-}
-
-int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
-{
- struct r600_context *rctx = r600_context(ctx);
- struct pipe_framebuffer_state *fb = rctx->pframebuffer;
- struct pipe_surface *zsurf, *cbsurf;
- int level = 0;
- float depth = 1.0f;
-
- zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0,
- PIPE_BIND_DEPTH_STENCIL);
-
- cbsurf = ctx->screen->get_tex_surface(ctx->screen, texture->flushed_depth_texture, 0, level, 0,
- PIPE_BIND_RENDER_TARGET);
-
- r600_blitter_save_states(ctx);
- util_blitter_save_framebuffer(rctx->blitter, fb);
-
- if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 ||
- rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635)
- depth = 0.0f;
-
- util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth);
-
- pipe_surface_reference(&zsurf, NULL);
- pipe_surface_reference(&cbsurf, NULL);
-
- /* resume queries */
- r600_queries_resume(ctx);
- return 0;
}
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 1621b2ab63..455aa2e81f 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -31,9 +31,10 @@
#include <util/u_memory.h>
#include <util/u_upload_mgr.h>
#include "state_tracker/drm_driver.h"
-#include "r600_screen.h"
-#include "r600_context.h"
-#include "r600_resource.h"
+#include <xf86drm.h>
+#include "radeon_drm.h"
+#include "r600.h"
+#include "r600_pipe.h"
extern struct u_resource_vtbl r600_buffer_vtbl;
@@ -42,23 +43,23 @@ u32 r600_domain_from_usage(unsigned usage)
u32 domain = RADEON_GEM_DOMAIN_GTT;
if (usage & PIPE_BIND_RENDER_TARGET) {
- domain |= RADEON_GEM_DOMAIN_VRAM;
+ domain |= RADEON_GEM_DOMAIN_VRAM;
}
if (usage & PIPE_BIND_DEPTH_STENCIL) {
- domain |= RADEON_GEM_DOMAIN_VRAM;
+ domain |= RADEON_GEM_DOMAIN_VRAM;
}
if (usage & PIPE_BIND_SAMPLER_VIEW) {
- domain |= RADEON_GEM_DOMAIN_VRAM;
+ domain |= RADEON_GEM_DOMAIN_VRAM;
}
/* also need BIND_BLIT_SOURCE/DESTINATION ? */
if (usage & PIPE_BIND_VERTEX_BUFFER) {
- domain |= RADEON_GEM_DOMAIN_GTT;
+ domain |= RADEON_GEM_DOMAIN_GTT;
}
if (usage & PIPE_BIND_INDEX_BUFFER) {
- domain |= RADEON_GEM_DOMAIN_GTT;
+ domain |= RADEON_GEM_DOMAIN_GTT;
}
if (usage & PIPE_BIND_CONSTANT_BUFFER) {
- domain |= RADEON_GEM_DOMAIN_VRAM;
+ domain |= RADEON_GEM_DOMAIN_VRAM;
}
return domain;
@@ -67,9 +68,8 @@ u32 r600_domain_from_usage(unsigned usage)
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
- struct r600_screen *rscreen = r600_screen(screen);
struct r600_resource_buffer *rbuffer;
- struct radeon_ws_bo *bo;
+ struct r600_bo *bo;
/* XXX We probably want a different alignment for buffers and textures. */
unsigned alignment = 4096;
@@ -86,7 +86,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
rbuffer->r.base.vtbl = &r600_buffer_vtbl;
rbuffer->r.size = rbuffer->r.base.b.width0;
rbuffer->r.domain = r600_domain_from_usage(rbuffer->r.base.b.bind);
- bo = radeon_ws_bo(rscreen->rw, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind);
+ bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind);
if (bo == NULL) {
FREE(rbuffer);
return NULL;
@@ -127,10 +127,9 @@ static void r600_buffer_destroy(struct pipe_screen *screen,
struct pipe_resource *buf)
{
struct r600_resource_buffer *rbuffer = r600_buffer(buf);
- struct r600_screen *rscreen = r600_screen(screen);
if (rbuffer->r.bo) {
- radeon_ws_bo_reference(rscreen->rw, &rbuffer->r.bo, NULL);
+ r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL);
}
FREE(rbuffer);
}
@@ -139,7 +138,6 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
struct pipe_transfer *transfer)
{
struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
- struct r600_screen *rscreen = r600_screen(pipe->screen);
int write = 0;
uint8_t *data;
int i;
@@ -155,9 +153,9 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
flush = TRUE;
if (flush) {
- radeon_ws_bo_reference(rscreen->rw, &rbuffer->r.bo, NULL);
+ r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL);
rbuffer->num_ranges = 0;
- rbuffer->r.bo = radeon_ws_bo(rscreen->rw,
+ rbuffer->r.bo = r600_bo((struct radeon*)pipe->winsys,
rbuffer->r.base.b.width0, 0,
rbuffer->r.base.b.bind);
break;
@@ -170,7 +168,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
if (transfer->usage & PIPE_TRANSFER_WRITE) {
write = 1;
}
- data = radeon_ws_bo_map(rscreen->rw, rbuffer->r.bo, transfer->usage, pipe);
+ data = r600_bo_map((struct radeon*)pipe->winsys, rbuffer->r.bo, transfer->usage, pipe);
if (!data)
return NULL;
@@ -181,10 +179,9 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
struct pipe_transfer *transfer)
{
struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
- struct r600_screen *rscreen = r600_screen(pipe->screen);
if (rbuffer->r.bo)
- radeon_ws_bo_unmap(rscreen->rw, rbuffer->r.bo);
+ r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo);
}
static void r600_buffer_transfer_flush_region(struct pipe_context *pipe,
@@ -228,16 +225,16 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
{
struct radeon *rw = (struct radeon*)screen->winsys;
struct r600_resource *rbuffer;
- struct radeon_ws_bo *bo = NULL;
+ struct r600_bo *bo = NULL;
- bo = radeon_ws_bo_handle(rw, whandle->handle);
+ bo = r600_bo_handle(rw, whandle->handle, NULL);
if (bo == NULL) {
return NULL;
}
rbuffer = CALLOC_STRUCT(r600_resource);
if (rbuffer == NULL) {
- radeon_ws_bo_reference(rw, &bo, NULL);
+ r600_bo_reference(rw, &bo, NULL);
return NULL;
}
@@ -262,8 +259,7 @@ struct u_resource_vtbl r600_buffer_vtbl =
u_default_transfer_inline_write /* transfer_inline_write */
};
-int r600_upload_index_buffer(struct r600_context *rctx,
- struct r600_draw *draw)
+int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw)
{
struct pipe_resource *upload_buffer = NULL;
unsigned index_offset = draw->index_buffer_offset;
@@ -280,14 +276,17 @@ int r600_upload_index_buffer(struct r600_context *rctx,
goto done;
}
draw->index_buffer_offset = index_offset;
- draw->index_buffer = upload_buffer;
+
+ /* Transfer ownership. */
+ pipe_resource_reference(&draw->index_buffer, upload_buffer);
+ pipe_resource_reference(&upload_buffer, NULL);
}
done:
return ret;
}
-int r600_upload_user_buffers(struct r600_context *rctx)
+int r600_upload_user_buffers(struct r600_pipe_context *rctx)
{
enum pipe_error ret = PIPE_OK;
int i, nr;
diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c
deleted file mode 100644
index 091751e93a..0000000000
--- a/src/gallium/drivers/r600/r600_context.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- * Corbin Simpson
- */
-#include <stdio.h>
-#include <util/u_inlines.h>
-#include <util/u_format.h>
-#include <util/u_memory.h>
-#include <util/u_upload_mgr.h>
-#include <util/u_blitter.h>
-#include "r600_screen.h"
-#include "r600_context.h"
-#include "r600_resource.h"
-
-static void r600_destroy_context(struct pipe_context *context)
-{
- struct r600_context *rctx = r600_context(context);
-
- rctx->rasterizer = r600_context_state_decref(rctx->rasterizer);
- rctx->poly_stipple = r600_context_state_decref(rctx->poly_stipple);
- rctx->scissor = r600_context_state_decref(rctx->scissor);
- rctx->clip = r600_context_state_decref(rctx->clip);
- rctx->ps_shader = r600_context_state_decref(rctx->ps_shader);
- rctx->vs_shader = r600_context_state_decref(rctx->vs_shader);
- rctx->depth = r600_context_state_decref(rctx->depth);
- rctx->stencil = r600_context_state_decref(rctx->stencil);
- rctx->alpha = r600_context_state_decref(rctx->alpha);
- rctx->dsa = r600_context_state_decref(rctx->dsa);
- rctx->blend = r600_context_state_decref(rctx->blend);
- rctx->stencil_ref = r600_context_state_decref(rctx->stencil_ref);
- rctx->viewport = r600_context_state_decref(rctx->viewport);
- rctx->framebuffer = r600_context_state_decref(rctx->framebuffer);
-
- free(rctx->ps_constant);
- free(rctx->vs_constant);
- free(rctx->vs_resource);
-
- util_blitter_destroy(rctx->blitter);
-
- u_upload_destroy(rctx->upload_vb);
- u_upload_destroy(rctx->upload_ib);
-
- radeon_ctx_fini(rctx->ctx);
- FREE(rctx);
-}
-
-void r600_flush(struct pipe_context *ctx, unsigned flags,
- struct pipe_fence_handle **fence)
-{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_query *rquery = NULL;
-#if 0
- static int dc = 0;
- char dname[256];
-#endif
-
- /* flush upload buffers */
- u_upload_flush(rctx->upload_vb);
- u_upload_flush(rctx->upload_ib);
-
- /* suspend queries */
- r600_queries_suspend(ctx);
-
-
-#if 0
- sprintf(dname, "gallium-%08d.bof", dc);
- if (dc < 20) {
- radeon_ctx_dump_bof(rctx->ctx, dname);
- R600_ERR("dumped %s\n", dname);
- }
- dc++;
-#endif
-
- radeon_ctx_submit(rctx->ctx);
-
- LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) {
- rquery->flushed = TRUE;
- }
-
- radeon_ctx_clear(rctx->ctx);
- /* resume queries */
- r600_queries_resume(ctx);
-}
-
-struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv)
-{
- struct r600_context *rctx = CALLOC_STRUCT(r600_context);
- struct r600_screen* rscreen = r600_screen(screen);
-
- if (rctx == NULL)
- return NULL;
- rctx->context.winsys = rscreen->screen.winsys;
- rctx->context.screen = screen;
- rctx->context.priv = priv;
- rctx->context.destroy = r600_destroy_context;
- rctx->context.draw_vbo = r600_draw_vbo;
- rctx->context.flush = r600_flush;
-
- /* Easy accessing of screen/winsys. */
- rctx->screen = rscreen;
- rctx->rw = rscreen->rw;
-
- if (radeon_get_family_class(rscreen->rw) == EVERGREEN)
- rctx->vtbl = &eg_hw_state_vtbl;
- else
- rctx->vtbl = &r600_hw_state_vtbl;
-
- r600_init_query_functions(rctx);
- r600_init_state_functions(rctx);
- r600_init_context_resource_functions(rctx);
-
- r600_init_blit_functions(rctx);
-
- rctx->blitter = util_blitter_create(&rctx->context);
- if (rctx->blitter == NULL) {
- FREE(rctx);
- return NULL;
- }
-
- rctx->vtbl->init_config(rctx);
-
- rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16,
- PIPE_BIND_INDEX_BUFFER);
- if (rctx->upload_ib == NULL) {
- goto out_free;
- }
-
- rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16,
- PIPE_BIND_VERTEX_BUFFER);
- if (rctx->upload_vb == NULL) {
- goto out_free;
- }
-
- rctx->vs_constant = (struct radeon_state *)calloc(R600_MAX_CONSTANT, sizeof(struct radeon_state));
- if (!rctx->vs_constant) {
- goto out_free;
- }
-
- rctx->ps_constant = (struct radeon_state *)calloc(R600_MAX_CONSTANT, sizeof(struct radeon_state));
- if (!rctx->ps_constant) {
- goto out_free;
- }
-
- rctx->vs_resource = (struct radeon_state *)calloc(R600_MAX_RESOURCE, sizeof(struct radeon_state));
- if (!rctx->vs_resource) {
- goto out_free;
- }
-
- rctx->ctx = radeon_ctx_init(rscreen->rw);
- radeon_draw_init(&rctx->draw, rscreen->rw);
- r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth;
- return &rctx->context;
- out_free:
- FREE(rctx);
- return NULL;
-}
diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h
deleted file mode 100644
index d104531d36..0000000000
--- a/src/gallium/drivers/r600/r600_context.h
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#ifndef R600_CONTEXT_H
-#define R600_CONTEXT_H
-
-#include <stdio.h>
-#include <pipe/p_state.h>
-#include <pipe/p_context.h>
-#include <tgsi/tgsi_scan.h>
-#include <tgsi/tgsi_parse.h>
-#include <tgsi/tgsi_util.h>
-#include <util/u_blitter.h>
-#include <util/u_double_list.h>
-#include "radeon.h"
-#include "r600_shader.h"
-
-struct u_upload_mgr;
-
-#define R600_QUERY_STATE_STARTED (1 << 0)
-#define R600_QUERY_STATE_ENDED (1 << 1)
-#define R600_QUERY_STATE_SUSPENDED (1 << 2)
-
-struct r600_query {
- u64 result;
- /* The kind of query. Currently only OQ is supported. */
- unsigned type;
- /* How many results have been written, in dwords. It's incremented
- * after end_query and flush. */
- unsigned num_results;
- /* if we've flushed the query */
- boolean flushed;
- unsigned state;
- /* The buffer where query results are stored. */
- struct radeon_ws_bo *buffer;
- unsigned buffer_size;
- /* linked list of queries */
- struct list_head list;
- struct radeon_state rstate;
-};
-
-/* XXX move this to a more appropriate place */
-union pipe_states {
- struct pipe_rasterizer_state rasterizer;
- struct pipe_poly_stipple poly_stipple;
- struct pipe_scissor_state scissor;
- struct pipe_clip_state clip;
- struct pipe_shader_state shader;
- struct pipe_depth_state depth;
- struct pipe_stencil_state stencil;
- struct pipe_alpha_state alpha;
- struct pipe_depth_stencil_alpha_state dsa;
- struct pipe_blend_state blend;
- struct pipe_blend_color blend_color;
- struct pipe_stencil_ref stencil_ref;
- struct pipe_framebuffer_state framebuffer;
- struct pipe_sampler_state sampler;
- struct pipe_sampler_view sampler_view;
- struct pipe_viewport_state viewport;
-};
-
-enum pipe_state_type {
- pipe_rasterizer_type = 1,
- pipe_poly_stipple_type,
- pipe_scissor_type,
- pipe_clip_type,
- pipe_shader_type,
- pipe_depth_type,
- pipe_stencil_type,
- pipe_alpha_type,
- pipe_dsa_type,
- pipe_blend_type,
- pipe_stencil_ref_type,
- pipe_framebuffer_type,
- pipe_sampler_type,
- pipe_sampler_view_type,
- pipe_viewport_type,
- pipe_type_count
-};
-
-#define R600_MAX_RSTATE 16
-#define R600_STATE_FLAG_DSA_FLUSH 1
-
-struct r600_context_state {
- union pipe_states state;
- unsigned refcount;
- unsigned type;
- struct radeon_state rstate[R600_MAX_RSTATE];
- struct r600_shader shader;
- struct radeon_ws_bo *bo;
- unsigned nrstate;
- unsigned flags;
-};
-
-struct r600_vertex_element
-{
- unsigned refcount;
- unsigned count;
- struct pipe_vertex_element elements[32];
-};
-
-struct r600_draw {
- struct pipe_context *ctx;
- struct radeon_state draw;
- struct radeon_state vgt;
- unsigned mode;
- unsigned start;
- unsigned count;
- unsigned index_size;
- struct pipe_resource *index_buffer;
- unsigned index_buffer_offset;
- unsigned min_index, max_index;
- unsigned index_bias;
-};
-
-struct r600_context_hw_states {
- struct radeon_state rasterizer;
- struct radeon_state scissor;
- struct radeon_state dsa;
- struct radeon_state cb_cntl;
-
- struct radeon_state db_flush;
- struct radeon_state cb_flush;
-};
-
-#define R600_MAX_CONSTANT 256 /* magic */
-#define R600_MAX_RESOURCE 160 /* magic */
-
-struct r600_shader_sampler_states {
- unsigned nsampler;
- unsigned nview;
- unsigned nborder;
- struct radeon_state *sampler[PIPE_MAX_ATTRIBS];
- struct radeon_state *view[PIPE_MAX_ATTRIBS];
- struct radeon_state *border[PIPE_MAX_ATTRIBS];
-};
-
-struct r600_context;
-struct r600_screen;
-struct r600_resource;
-struct r600_resource_texture;
-
-struct r600_context_hw_state_vtbl {
- void (*blend)(struct r600_context *rctx,
- struct radeon_state *rstate,
- const struct pipe_blend_state *state);
- void (*ucp)(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_clip_state *state);
- void (*cb)(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_framebuffer_state *state, int cb);
- void (*db)(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_framebuffer_state *state);
- void (*rasterizer)(struct r600_context *rctx, struct radeon_state *rstate);
- void (*scissor)(struct r600_context *rctx, struct radeon_state *rstate);
- void (*viewport)(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state);
- void (*dsa)(struct r600_context *rctx, struct radeon_state *rstate);
- void (*sampler_border)(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_sampler_state *state, unsigned id);
- void (*sampler)(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_sampler_state *state, unsigned id);
- void (*resource)(struct pipe_context *ctx, struct radeon_state *rstate,
- const struct pipe_sampler_view *view, unsigned id);
- void (*cb_cntl)(struct r600_context *rctx, struct radeon_state *rstate);
- int (*vs_resource)(struct r600_context *rctx, int id, struct r600_resource *rbuffer, uint32_t offset,
- uint32_t stride, uint32_t format);
- int (*vgt_init)(struct r600_draw *draw,
- int vgt_draw_initiator);
- int (*vgt_prim)(struct r600_draw *draw,
- uint32_t prim, uint32_t vgt_dma_index_type);
-
- int (*ps_shader)(struct r600_context *rctx, struct r600_context_state *rshader,
- struct radeon_state *state);
- int (*vs_shader)(struct r600_context *rctx, struct r600_context_state *rpshader,
- struct radeon_state *state);
- void (*init_config)(struct r600_context *rctx);
-};
-extern struct r600_context_hw_state_vtbl r600_hw_state_vtbl;
-extern struct r600_context_hw_state_vtbl eg_hw_state_vtbl;
-
-struct r600_context {
- struct pipe_context context;
- struct blitter_context *blitter;
- struct pipe_framebuffer_state *pframebuffer;
- unsigned family;
- void *custom_dsa_flush;
- struct list_head query_list;
- struct r600_screen *screen;
- struct radeon *rw;
- struct radeon_ctx *ctx;
- struct radeon_draw draw;
- struct r600_context_hw_state_vtbl *vtbl;
- struct radeon_state config;
- boolean use_mem_constant;
- /* FIXME get rid of those vs_resource,vs/ps_constant */
- struct radeon_state *vs_resource;
- unsigned vs_nresource;
- struct radeon_state *vs_constant;
- struct radeon_state *ps_constant;
- /* hw states */
- struct r600_context_hw_states hw_states;
- /* pipe states */
- unsigned flat_shade;
-
- unsigned nvertex_buffer;
- struct r600_context_state *rasterizer;
- struct r600_context_state *poly_stipple;
- struct r600_context_state *scissor;
- struct r600_context_state *clip;
- struct r600_context_state *ps_shader;
- struct r600_context_state *vs_shader;
- struct r600_context_state *depth;
- struct r600_context_state *stencil;
- struct r600_context_state *alpha;
- struct r600_context_state *dsa;
- struct r600_context_state *blend;
- struct r600_context_state *stencil_ref;
- struct r600_context_state *viewport;
- struct r600_context_state *framebuffer;
- struct r600_shader_sampler_states vs_sampler;
- struct r600_shader_sampler_states ps_sampler;
- /* can add gs later */
- struct r600_vertex_element *vertex_elements;
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- struct pipe_index_buffer index_buffer;
- struct pipe_blend_color blend_color;
-
- /* upload managers */
- struct u_upload_mgr *upload_vb;
- struct u_upload_mgr *upload_ib;
- bool any_user_vbs;
-};
-
-/* Convenience cast wrapper. */
-static INLINE struct r600_context *r600_context(struct pipe_context *pipe)
-{
- return (struct r600_context*)pipe;
-}
-
-static INLINE struct r600_query* r600_query(struct pipe_query* q)
-{
- return (struct r600_query*)q;
-}
-
-struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate);
-struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate);
-void r600_flush(struct pipe_context *ctx, unsigned flags,
- struct pipe_fence_handle **fence);
-
-int r600_context_hw_states(struct pipe_context *ctx);
-
-void r600_draw_vbo(struct pipe_context *ctx,
- const struct pipe_draw_info *info);
-
-void r600_init_blit_functions(struct r600_context *rctx);
-void r600_init_state_functions(struct r600_context *rctx);
-void r600_init_query_functions(struct r600_context* rctx);
-struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv);
-
-extern int r600_pipe_shader_create(struct pipe_context *ctx,
- struct r600_context_state *rstate,
- const struct tgsi_token *tokens);
-extern int r600_pipe_shader_update(struct pipe_context *ctx,
- struct r600_context_state *rstate);
-extern int r600_find_vs_semantic_index(struct r600_context *rctx, struct r600_shader *rshader, int id);
-
-#define R600_ERR(fmt, args...) \
- fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args)
-
-uint32_t r600_translate_texformat(enum pipe_format format,
- const unsigned char *swizzle_view,
- uint32_t *word4_p, uint32_t *yuv_format_p);
-
-/* query */
-extern void r600_queries_resume(struct pipe_context *ctx);
-extern void r600_queries_suspend(struct pipe_context *ctx);
-
-int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
-
-void r600_set_constant_buffer_file(struct pipe_context *ctx,
- uint shader, uint index,
- struct pipe_resource *buffer);
-void r600_set_constant_buffer_mem(struct pipe_context *ctx,
- uint shader, uint index,
- struct pipe_resource *buffer);
-void eg_set_constant_buffer(struct pipe_context *ctx,
- uint shader, uint index,
- struct pipe_resource *buffer);
-
-int r600_upload_index_buffer(struct r600_context *rctx,
- struct r600_draw *draw);
-int r600_upload_user_buffers(struct r600_context *rctx);
-
-#endif
diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c
deleted file mode 100644
index afc3b7bba1..0000000000
--- a/src/gallium/drivers/r600/r600_draw.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- * Corbin Simpson
- */
-#include <stdio.h>
-#include <errno.h>
-#include <pipe/p_screen.h>
-#include <util/u_format.h>
-#include <util/u_math.h>
-#include <util/u_inlines.h>
-#include <util/u_memory.h>
-#include <util/u_index_modify.h>
-#include "radeon.h"
-#include "r600_screen.h"
-#include "r600_context.h"
-#include "r600_resource.h"
-#include "r600_state_inlines.h"
-
-static void r600_translate_index_buffer(struct r600_context *r600,
- struct pipe_resource **index_buffer,
- unsigned *index_size,
- unsigned *start, unsigned count)
-{
- switch (*index_size) {
- case 1:
- util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count);
- *index_size = 2;
- *start = 0;
- break;
-
- case 2:
- if (*start % 2 != 0) {
- util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count);
- *start = 0;
- }
- break;
-
- case 4:
- break;
- }
-}
-
-static int r600_draw_common(struct r600_draw *draw)
-{
- struct r600_context *rctx = r600_context(draw->ctx);
- /* FIXME vs_resource */
- struct radeon_state *vs_resource;
- struct r600_resource *rbuffer;
- unsigned i, j, offset, prim;
- u32 vgt_dma_index_type, vgt_draw_initiator;
- struct pipe_vertex_buffer *vertex_buffer;
- int r;
-
- r = r600_context_hw_states(draw->ctx);
- if (r)
- return r;
- switch (draw->index_size) {
- case 2:
- vgt_draw_initiator = S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_DMA);
- vgt_dma_index_type = 0;
- break;
- case 4:
- vgt_draw_initiator = S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_DMA);
- vgt_dma_index_type = 1;
- break;
- case 0:
- vgt_draw_initiator = S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_AUTO_INDEX);
- vgt_dma_index_type = 0;
- break;
- default:
- fprintf(stderr, "%s %d unsupported index size %d\n", __func__, __LINE__, draw->index_size);
- return -EINVAL;
- }
- r = r600_conv_pipe_prim(draw->mode, &prim);
- if (r)
- return r;
-
- /* rebuild vertex shader if input format changed */
- r = r600_pipe_shader_update(draw->ctx, rctx->vs_shader);
- if (r)
- return r;
- r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader);
- if (r)
- return r;
- radeon_draw_bind(&rctx->draw, &rctx->vs_shader->rstate[0]);
- radeon_draw_bind(&rctx->draw, &rctx->ps_shader->rstate[0]);
-
- for (i = 0 ; i < rctx->vs_nresource; i++) {
- radeon_state_fini(&rctx->vs_resource[i]);
- }
- for (i = 0 ; i < rctx->vertex_elements->count; i++) {
- vs_resource = &rctx->vs_resource[i];
- j = rctx->vertex_elements->elements[i].vertex_buffer_index;
- vertex_buffer = &rctx->vertex_buffer[j];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset;
-
- rctx->vtbl->vs_resource(rctx, i, rbuffer, offset, vertex_buffer->stride, rctx->vertex_elements->elements[i].src_format);
- radeon_draw_bind(&rctx->draw, vs_resource);
- }
- rctx->vs_nresource = rctx->vertex_elements->count;
- /* FIXME start need to change winsys */
- rctx->vtbl->vgt_init(draw, vgt_draw_initiator);
- radeon_draw_bind(&rctx->draw, &draw->draw);
-
- rctx->vtbl->vgt_prim(draw, prim, vgt_dma_index_type);
- radeon_draw_bind(&rctx->draw, &draw->vgt);
-
- r = radeon_ctx_set_draw(rctx->ctx, &rctx->draw);
- if (r == -EBUSY) {
- r600_flush(draw->ctx, 0, NULL);
- r = radeon_ctx_set_draw(rctx->ctx, &rctx->draw);
- }
-
- radeon_state_fini(&draw->draw);
-
- return r;
-}
-
-void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
-{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_draw draw;
- int r;
-
- memset(&draw, 0, sizeof(draw));
-
- if (rctx->any_user_vbs) {
- r600_upload_user_buffers(rctx);
- rctx->any_user_vbs = FALSE;
- }
-
- draw.ctx = ctx;
- draw.mode = info->mode;
- draw.start = info->start;
- draw.count = info->count;
- if (info->indexed && rctx->index_buffer.buffer) {
- draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->index_bias;
-
- r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer,
- &rctx->index_buffer.index_size,
- &draw.start,
- info->count);
-
- draw.index_size = rctx->index_buffer.index_size;
- draw.index_buffer = rctx->index_buffer.buffer;
- draw.index_buffer_offset = draw.start * draw.index_size;
- draw.start = 0;
- r600_upload_index_buffer(rctx, &draw);
- }
- else {
- draw.index_size = 0;
- draw.index_buffer = NULL;
- draw.min_index = 0;
- draw.max_index = 0xffffff;
- draw.index_buffer_offset = 0;
- draw.index_bias = draw.start;
- }
-
- r = r600_draw_common(&draw);
- if (r)
- fprintf(stderr,"draw common failed %d\n", r);
-}
diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h
new file mode 100644
index 0000000000..0c91a21238
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_formats.h
@@ -0,0 +1,56 @@
+#ifndef R600_FORMATS_H
+#define R600_FORMATS_H
+
+/* list of formats from R700 ISA document - apply across GPUs in different registers */
+#define FMT_INVALID 0x00000000
+#define FMT_8 0x00000001
+#define FMT_4_4 0x00000002
+#define FMT_3_3_2 0x00000003
+#define FMT_16 0x00000005
+#define FMT_16_FLOAT 0x00000006
+#define FMT_8_8 0x00000007
+#define FMT_5_6_5 0x00000008
+#define FMT_6_5_5 0x00000009
+#define FMT_1_5_5_5 0x0000000A
+#define FMT_4_4_4_4 0x0000000B
+#define FMT_5_5_5_1 0x0000000C
+#define FMT_32 0x0000000D
+#define FMT_32_FLOAT 0x0000000E
+#define FMT_16_16 0x0000000F
+#define FMT_16_16_FLOAT 0x00000010
+#define FMT_8_24 0x00000011
+#define FMT_8_24_FLOAT 0x00000012
+#define FMT_24_8 0x00000013
+#define FMT_24_8_FLOAT 0x00000014
+#define FMT_10_11_11 0x00000015
+#define FMT_10_11_11_FLOAT 0x00000016
+#define FMT_11_11_10 0x00000017
+#define FMT_11_11_10_FLOAT 0x00000018
+#define FMT_2_10_10_10 0x00000019
+#define FMT_8_8_8_8 0x0000001A
+#define FMT_10_10_10_2 0x0000001B
+#define FMT_X24_8_32_FLOAT 0x0000001C
+#define FMT_32_32 0x0000001D
+#define FMT_32_32_FLOAT 0x0000001E
+#define FMT_16_16_16_16 0x0000001F
+#define FMT_16_16_16_16_FLOAT 0x00000020
+#define FMT_32_32_32_32 0x00000022
+#define FMT_32_32_32_32_FLOAT 0x00000023
+#define FMT_1 0x00000025
+#define FMT_GB_GR 0x00000027
+#define FMT_BG_RG 0x00000028
+#define FMT_32_AS_8 0x00000029
+#define FMT_32_AS_8_8 0x0000002a
+#define FMT_5_9_9_9_SHAREDEXP 0x0000002b
+#define FMT_8_8_8 0x0000002c
+#define FMT_16_16_16 0x0000002d
+#define FMT_16_16_16_FLOAT 0x0000002e
+#define FMT_32_32_32 0x0000002f
+#define FMT_32_32_32_FLOAT 0x00000030
+#define FMT_BC1 0x00000031
+#define FMT_BC2 0x00000032
+#define FMT_BC3 0x00000033
+#define FMT_BC4 0x00000034
+#define FMT_BC5 0x00000035
+
+#endif
diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c
index 5e0e0aab57..7e13109306 100644
--- a/src/gallium/drivers/r600/r600_helper.c
+++ b/src/gallium/drivers/r600/r600_helper.c
@@ -26,8 +26,7 @@
#include <stdio.h>
#include <errno.h>
#include <util/u_inlines.h>
-#include "r600_screen.h"
-#include "r600_context.h"
+#include "r600_pipe.h"
#include "r600d.h"
int r600_conv_pipe_prim(unsigned pprim, unsigned *prim)
diff --git a/src/gallium/drivers/r600/r600_hw_states.c b/src/gallium/drivers/r600/r600_hw_states.c
deleted file mode 100644
index b4d73a0fb1..0000000000
--- a/src/gallium/drivers/r600/r600_hw_states.c
+++ /dev/null
@@ -1,1215 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- * 2010 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- * Dave Airlie
- */
-
-#include <util/u_inlines.h>
-#include <util/u_format.h>
-#include <util/u_memory.h>
-#include <util/u_blitter.h>
-#include "util/u_pack_color.h"
-#include "r600_screen.h"
-#include "r600_context.h"
-#include "r600_resource.h"
-#include "r600_state_inlines.h"
-#include "r600d.h"
-
-static void r600_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state)
-{
- struct r600_screen *rscreen = rctx->screen;
- int i;
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0);
- rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]);
- rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]);
- rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]);
- rstate->states[R600_BLEND__CB_BLEND_ALPHA] = fui(rctx->blend_color.color[3]);
- rstate->states[R600_BLEND__CB_BLEND0_CONTROL] = 0x00000000;
- rstate->states[R600_BLEND__CB_BLEND1_CONTROL] = 0x00000000;
- rstate->states[R600_BLEND__CB_BLEND2_CONTROL] = 0x00000000;
- rstate->states[R600_BLEND__CB_BLEND3_CONTROL] = 0x00000000;
- rstate->states[R600_BLEND__CB_BLEND4_CONTROL] = 0x00000000;
- rstate->states[R600_BLEND__CB_BLEND5_CONTROL] = 0x00000000;
- rstate->states[R600_BLEND__CB_BLEND6_CONTROL] = 0x00000000;
- rstate->states[R600_BLEND__CB_BLEND7_CONTROL] = 0x00000000;
- rstate->states[R600_BLEND__CB_BLEND_CONTROL] = 0x00000000;
-
- for (i = 0; i < 8; i++) {
- unsigned eqRGB = state->rt[i].rgb_func;
- unsigned srcRGB = state->rt[i].rgb_src_factor;
- unsigned dstRGB = state->rt[i].rgb_dst_factor;
-
- unsigned eqA = state->rt[i].alpha_func;
- unsigned srcA = state->rt[i].alpha_src_factor;
- unsigned dstA = state->rt[i].alpha_dst_factor;
- uint32_t bc = 0;
-
- if (!state->rt[i].blend_enable)
- continue;
-
- bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB));
- bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB));
- bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB));
-
- if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
- bc |= S_028804_SEPARATE_ALPHA_BLEND(1);
- bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA));
- bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA));
- bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA));
- }
-
- rstate->states[R600_BLEND__CB_BLEND0_CONTROL + i] = bc;
- if (i == 0)
- rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc;
- }
-
- radeon_state_pm4(rstate);
-}
-
-static void r600_ucp(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_clip_state *state)
-{
- struct r600_screen *rscreen = rctx->screen;
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_UCP, 0, 0);
-
- for (int i = 0; i < state->nr; i++) {
- rstate->states[i * 4 + 0] = fui(state->ucp[i][0]);
- rstate->states[i * 4 + 1] = fui(state->ucp[i][1]);
- rstate->states[i * 4 + 2] = fui(state->ucp[i][2]);
- rstate->states[i * 4 + 3] = fui(state->ucp[i][3]);
- }
- radeon_state_pm4(rstate);
-}
-
-static void r600_cb(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_framebuffer_state *state, int cb)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct r600_resource_texture *rtex;
- struct r600_resource *rbuffer;
- unsigned level = state->cbufs[cb]->level;
- unsigned pitch, slice;
- unsigned color_info;
- unsigned format, swap, ntype;
- const struct util_format_description *desc;
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0 + cb, 0, 0);
- rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
- rbuffer = &rtex->resource;
- radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo);
- rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
- rstate->nbo = 1;
- pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
- slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1;
-
- ntype = 0;
- desc = util_format_description(rtex->resource.base.b.format);
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
- ntype = V_0280A0_NUMBER_SRGB;
-
- format = r600_translate_colorformat(rtex->resource.base.b.format);
- swap = r600_translate_colorswap(rtex->resource.base.b.format);
-
-
- color_info = S_0280A0_FORMAT(format) |
- S_0280A0_COMP_SWAP(swap) |
- S_0280A0_BLEND_CLAMP(1) |
- S_0280A0_NUMBER_TYPE(ntype);
-
- if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
- color_info |= S_0280A0_SOURCE_FORMAT(1);
- rstate->states[R600_CB0__CB_COLOR0_BASE] = state->cbufs[cb]->offset >> 8;
- rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info;
- rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) |
- S_028060_SLICE_TILE_MAX(slice);
- rstate->states[R600_CB0__CB_COLOR0_VIEW] = 0x00000000;
- rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000;
- rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000;
- rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000;
- radeon_state_pm4(rstate);
-}
-
-static void r600_db(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_framebuffer_state *state)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct r600_resource_texture *rtex;
- struct r600_resource *rbuffer;
- unsigned level;
- unsigned pitch, slice, format;
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0);
- if (state->zsbuf == NULL)
- return;
-
- rtex = (struct r600_resource_texture*)state->zsbuf->texture;
- rtex->tiled = 1;
- rtex->array_mode = 2;
- rtex->tile_type = 1;
- rtex->depth = 1;
- rbuffer = &rtex->resource;
-
- radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo);
- rstate->nbo = 1;
- rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM;
- level = state->zsbuf->level;
- pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
- slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1;
- format = r600_translate_dbformat(state->zsbuf->texture->format);
- rstate->states[R600_DB__DB_DEPTH_BASE] = state->zsbuf->offset >> 8;
- rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtex->array_mode) |
- S_028010_FORMAT(format);
- rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000;
- rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1;
- rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) |
- S_028000_SLICE_TILE_MAX(slice);
- radeon_state_pm4(rstate);
-}
-
-static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstate)
-{
- const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer;
- const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
- const struct pipe_clip_state *clip = NULL;
- struct r600_screen *rscreen = rctx->screen;
- float offset_units = 0, offset_scale = 0;
- char depth = 0;
- unsigned offset_db_fmt_cntl = 0;
- unsigned point_size;
- unsigned prov_vtx = 1;
- unsigned polygon_dual_mode;
-
- if (rctx->clip)
- clip = &rctx->clip->state.clip;
- if (fb->zsbuf) {
- offset_units = state->offset_units;
- offset_scale = state->offset_scale * 12.0f;
- switch (fb->zsbuf->texture->format) {
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- depth = -24;
- offset_units *= 2.0f;
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- depth = -23;
- offset_units *= 1.0f;
- offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- depth = -16;
- offset_units *= 4.0f;
- break;
- default:
- R600_ERR("unsupported %d\n", fb->zsbuf->texture->format);
- return;
- }
- }
- offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
-
- if (state->flatshade_first)
- prov_vtx = 0;
-
- rctx->flat_shade = state->flatshade;
- radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0);
- rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] =
- S_0286D4_FLAT_SHADE_ENA(1);
- if (state->sprite_coord_enable) {
- rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |=
- S_0286D4_PNT_SPRITE_ENA(1) |
- S_0286D4_PNT_SPRITE_OVRD_X(2) |
- S_0286D4_PNT_SPRITE_OVRD_Y(3) |
- S_0286D4_PNT_SPRITE_OVRD_Z(0) |
- S_0286D4_PNT_SPRITE_OVRD_W(1);
- if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
- rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |=
- S_0286D4_PNT_SPRITE_TOP_1(1);
- }
- }
- rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0;
- if (clip) {
- /* Clip plane enable bits are stashed in the lower six bits of
- * PA_CL_CLIP_CNTL, so just set all of the corresponding bits with a
- * pinch of bit twiddling.
- *
- * PS_UCP_MODE 3 is "expand and clip as trifan," which is the same
- * setting that we use on r300-r500. I believe that fglrx always uses
- * this mode as well. */
- rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] =
- ((1 << clip->nr) - 1) |
- S_028810_PS_UCP_MODE(3) |
- S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp) |
- S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp);
- }
- polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
- state->fill_back != PIPE_POLYGON_MODE_FILL);
-
- rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] =
- S_028814_PROVOKING_VTX_LAST(prov_vtx) |
- S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
- S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
- S_028814_FACE(!state->front_ccw) |
- S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
- S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
- S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
- S_028814_POLY_MODE(polygon_dual_mode) |
- S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
- S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back));
- rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] =
- S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
- S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex);
- rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000;
- /* Point size for PA_SU_POINT_SIZE and PA_SU_POINT_MINMAX is fixed-point,
- * 12.4.
- *
- * For some reason, maximum point size is set to 0x8000 (2048.0) instead
- * of the maximum value 0xFFF0 (4095.0). */
- point_size = (unsigned)(state->point_size * 8.0);
- rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] =
- S_028A00_HEIGHT(point_size) | S_028A00_WIDTH(point_size);
- rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] =
- S_028A04_MIN_SIZE(0) | S_028A04_MAX_SIZE(0x8000);
- rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = S_028A08_WIDTH(8);
- rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005;
- rstate->states[R600_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000;
- rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = S_028C00_LAST_PIXEL(1);
- rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = fui(1);
- rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = fui(1);
- rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = fui(1);
- rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = fui(1);
- rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = offset_db_fmt_cntl;
- rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000;
- rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = fui(offset_scale);
- rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units);
- rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale);
- rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units);
- radeon_state_pm4(rstate);
-}
-
-static void r600_scissor(struct r600_context *rctx, struct radeon_state *rstate)
-{
- const struct pipe_scissor_state *state = &rctx->scissor->state.scissor;
- const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
- struct r600_screen *rscreen = rctx->screen;
- enum radeon_family family;
- unsigned minx, maxx, miny, maxy;
- u32 tl, br;
-
- family = radeon_get_family(rctx->rw);
-
- if (state == NULL) {
- minx = 0;
- miny = 0;
- maxx = fb->cbufs[0]->width;
- maxy = fb->cbufs[0]->height;
- } else {
- minx = state->minx;
- miny = state->miny;
- maxx = state->maxx;
- maxy = state->maxy;
- }
- tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny) | S_028240_WINDOW_OFFSET_DISABLE(1);
- br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy);
- radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0);
- rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl;
- rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br;
- rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000;
- rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl;
- rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br;
- rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] =
- S_02820C_CLIP_RULE(0xFFFF);
- rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl;
- rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = br;
- rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl;
- rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = br;
- rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl;
- rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = br;
- rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl;
- rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = br;
-
- if (family >= CHIP_RV770)
- rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA;
-
- rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl;
- rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br;
- rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl;
- rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br;
- radeon_state_pm4(rstate);
-}
-
-static void r600_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state)
-{
- struct r600_screen *rscreen = rctx->screen;
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0);
- rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = fui(0);
- rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = fui(1);
- rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]);
- rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]);
- rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]);
- rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]);
- rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]);
- rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]);
- rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] =
- S_028818_VPORT_X_SCALE_ENA(1) |
- S_028818_VPORT_X_OFFSET_ENA(1) |
- S_028818_VPORT_Y_SCALE_ENA(1) |
- S_028818_VPORT_Y_OFFSET_ENA(1) |
- S_028818_VPORT_Z_SCALE_ENA(1) |
- S_028818_VPORT_Z_OFFSET_ENA(1) |
- S_028818_VTX_W0_FMT(1);
- radeon_state_pm4(rstate);
-}
-
-static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate)
-{
- const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa;
- const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref;
- struct r600_screen *rscreen = rctx->screen;
- unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control;
- unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control;
- struct r600_shader *rshader;
- struct r600_query *rquery = NULL;
- boolean query_running;
- int i;
- bool flush_db = FALSE;
-
- if (rctx->ps_shader == NULL) {
- return;
- }
- if (rctx->dsa->flags & R600_STATE_FLAG_DSA_FLUSH)
- flush_db = TRUE;
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0);
-
- db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
-
- if (!flush_db)
- db_shader_control = S_02880C_DUAL_EXPORT_ENABLE(1);
-
- rshader = &rctx->ps_shader->shader;
- if (rshader->uses_kill)
- db_shader_control |= S_02880C_KILL_ENABLE(1);
- for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
- db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
- }
- stencil_ref_mask = 0;
- stencil_ref_mask_bf = 0;
- db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
- S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
- S_028800_ZFUNC(state->depth.func);
-
- /* set stencil enable */
- if (state->stencil[0].enabled) {
- db_depth_control |= S_028800_STENCIL_ENABLE(1) |
- S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)) |
- S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)) |
- S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)) |
- S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op));
-
- stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) |
- S_028430_STENCILWRITEMASK(state->stencil[0].writemask) |
- S_028430_STENCILREF(stencil_ref->ref_value[0]);
-
- if (state->stencil[1].enabled) {
- db_depth_control |= S_028800_BACKFACE_ENABLE(1) |
- S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)) |
- S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)) |
- S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)) |
- S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op));
- stencil_ref_mask_bf =
- S_028434_STENCILMASK_BF(state->stencil[1].valuemask) |
- S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask) |
- S_028430_STENCILREF(stencil_ref->ref_value[1]);
- }
- }
-
- alpha_test_control = 0;
- alpha_ref = 0;
- if (state->alpha.enabled) {
- alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func) |
- S_028410_ALPHA_TEST_ENABLE(1);
- alpha_ref = fui(state->alpha.ref_value);
- }
-
- db_render_control = 0;
-
- if (flush_db)
- db_render_control = S_028D0C_DEPTH_COPY_ENABLE(1) |
- S_028D0C_STENCIL_COPY_ENABLE(1) |
- S_028D0C_COPY_CENTROID(1);
-
- db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) |
- S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
- S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
-
- query_running = FALSE;
-
- LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) {
- if (rquery->state & R600_QUERY_STATE_STARTED) {
- query_running = TRUE;
- }
- }
-
- if (query_running) {
- db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
- if (radeon_get_family_class(rscreen->rw) == R700)
- db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1);
- }
-
- rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000;
- rstate->states[R600_DSA__DB_DEPTH_CLEAR] = fui(1);
- rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control;
- rstate->states[R600_DSA__DB_STENCILREFMASK] = stencil_ref_mask;
- rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = stencil_ref_mask_bf;
- rstate->states[R600_DSA__SX_ALPHA_REF] = alpha_ref;
- rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000;
- rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000;
- rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000;
- rstate->states[R600_DSA__DB_DEPTH_CONTROL] = db_depth_control;
- rstate->states[R600_DSA__DB_SHADER_CONTROL] = db_shader_control;
- rstate->states[R600_DSA__DB_RENDER_CONTROL] = db_render_control;
- rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = db_render_override;
-
- rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000;
- rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000;
- rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00;
- radeon_state_pm4(rstate);
-}
-
-
-static INLINE u32 S_FIXED(float value, u32 frac_bits)
-{
- return value * (1 << frac_bits);
-}
-
-static void r600_sampler_border(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_sampler_state *state, unsigned id)
-{
- struct r600_screen *rscreen = rctx->screen;
- union util_color uc;
-
- util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER_BORDER, id, R600_SHADER_PS);
- if (uc.ui) {
- rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED] = fui(state->border_color[0]);
- rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN] = fui(state->border_color[1]);
- rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE] = fui(state->border_color[2]);
- rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA] = fui(state->border_color[3]);
- }
- radeon_state_pm4(rstate);
-}
-
-static void r600_sampler(struct r600_context *rctx, struct radeon_state *rstate,
- const struct pipe_sampler_state *state, unsigned id)
-{
- struct r600_screen *rscreen = rctx->screen;
- union util_color uc;
-
- util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
-
- radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER, id, R600_SHADER_PS);
- rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] =
- S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
- S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
- S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
- S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) |
- S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) |
- S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
- S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
- S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0);
- /* FIXME LOD it depends on texture base level ... */
- rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] =
- S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
- S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
- S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6));
- rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1);
- radeon_state_pm4(rstate);
-
-}
-
-
-static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate,
- const struct pipe_sampler_view *view, unsigned id)
-{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_screen *rscreen = rctx->screen;
- const struct util_format_description *desc;
- struct r600_resource_texture *texture;
- struct r600_resource *rbuffer;
- unsigned format;
- uint32_t word4 = 0, yuv_format = 0, pitch = 0;
- unsigned char swizzle[4], array_mode = 0, tile_type = 0;
-
- rstate->cpm4 = 0;
- swizzle[0] = view->swizzle_r;
- swizzle[1] = view->swizzle_g;
- swizzle[2] = view->swizzle_b;
- swizzle[3] = view->swizzle_a;
- format = r600_translate_texformat(view->texture->format,
- swizzle,
- &word4, &yuv_format);
- if (format == ~0) {
- return;
- }
- desc = util_format_description(view->texture->format);
- if (desc == NULL) {
- R600_ERR("unknow format %d\n", view->texture->format);
- return;
- }
- radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS);
- texture = (struct r600_resource_texture*)view->texture;
- rbuffer = &texture->resource;
-
- if (texture->depth) {
- r600_texture_depth_flush(ctx, view->texture);
- rbuffer = &texture->flushed_depth_texture->resource;
- }
-
- radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo);
- radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rbuffer->bo);
-
- rstate->nbo = 2;
- rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
- rstate->placement[1] = RADEON_GEM_DOMAIN_GTT;
- rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
- rstate->placement[3] = RADEON_GEM_DOMAIN_GTT;
-
- pitch = align(texture->pitch[0] / texture->bpt, 8);
-
- /* FIXME properly handle first level != 0 */
- rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] =
- S_038000_DIM(r600_tex_dim(view->texture->target)) |
- S_038000_TILE_MODE(array_mode) |
- S_038000_TILE_TYPE(tile_type) |
- S_038000_PITCH((pitch / 8) - 1) |
- S_038000_TEX_WIDTH(view->texture->width0 - 1);
- rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD1] =
- S_038004_TEX_HEIGHT(view->texture->height0 - 1) |
- S_038004_TEX_DEPTH(view->texture->depth0 - 1) |
- S_038004_DATA_FORMAT(format);
- rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = texture->offset[0] >> 8;
- rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = texture->offset[1] >> 8;
- rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] =
- word4 |
- S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) |
- S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) |
- S_038010_REQUEST_SIZE(1) |
- S_038010_BASE_LEVEL(view->first_level);
- rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] =
- S_038014_LAST_LEVEL(view->last_level) |
- S_038014_BASE_ARRAY(0) |
- S_038014_LAST_ARRAY(0);
- rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] =
- S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE);
- radeon_state_pm4(rstate);
-}
-
-static void r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate)
-{
- struct r600_screen *rscreen = rctx->screen;
- const struct pipe_blend_state *pbs = &rctx->blend->state.blend;
- int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs;
- uint32_t color_control, target_mask, shader_mask, shader_control;
- int i;
-
- target_mask = 0;
- shader_mask = 0;
- shader_control = 0;
- color_control = S_028808_PER_MRT_BLEND(1);
-
- for (i = 0; i < nr_cbufs; i++) {
- shader_mask |= 0xf << (i * 4);
- shader_control |= (1 << i);
- }
-
- if (pbs->logicop_enable) {
- color_control |= (pbs->logicop_func << 16) | (pbs->logicop_func << 20);
- } else {
- color_control |= (0xcc << 16);
- }
-
- if (pbs->independent_blend_enable) {
- for (i = 0; i < nr_cbufs; i++) {
- if (pbs->rt[i].blend_enable) {
- color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i);
- }
- target_mask |= (pbs->rt[i].colormask << (4 * i));
- }
- } else {
- for (i = 0; i < nr_cbufs; i++) {
- if (pbs->rt[0].blend_enable) {
- color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i);
- }
- target_mask |= (pbs->rt[0].colormask << (4 * i));
- }
- }
- radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0);
- rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask;
- rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask;
- rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control;
- if (radeon_get_family_class(rscreen->rw) == R700)
- rstate->states[R600_CB_CNTL__CB_SHADER_CONTROL] = shader_control;
- rstate->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000;
- rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000;
- rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000;
- rstate->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000;
- rstate->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000;
- rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF;
- rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF;
- rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF;
- radeon_state_pm4(rstate);
-}
-
-static void r600_init_config(struct r600_context *rctx)
-{
- int ps_prio;
- int vs_prio;
- int gs_prio;
- int es_prio;
- int num_ps_gprs;
- int num_vs_gprs;
- int num_gs_gprs;
- int num_es_gprs;
- int num_temp_gprs;
- int num_ps_threads;
- int num_vs_threads;
- int num_gs_threads;
- int num_es_threads;
- int num_ps_stack_entries;
- int num_vs_stack_entries;
- int num_gs_stack_entries;
- int num_es_stack_entries;
- enum radeon_family family;
-
- family = radeon_get_family(rctx->rw);
- ps_prio = 0;
- vs_prio = 1;
- gs_prio = 2;
- es_prio = 3;
- switch (family) {
- case CHIP_R600:
- num_ps_gprs = 192;
- num_vs_gprs = 56;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 136;
- num_vs_threads = 48;
- num_gs_threads = 4;
- num_es_threads = 4;
- num_ps_stack_entries = 128;
- num_vs_stack_entries = 128;
- num_gs_stack_entries = 0;
- num_es_stack_entries = 0;
- break;
- case CHIP_RV630:
- case CHIP_RV635:
- num_ps_gprs = 84;
- num_vs_gprs = 36;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 144;
- num_vs_threads = 40;
- num_gs_threads = 4;
- num_es_threads = 4;
- num_ps_stack_entries = 40;
- num_vs_stack_entries = 40;
- num_gs_stack_entries = 32;
- num_es_stack_entries = 16;
- break;
- case CHIP_RV610:
- case CHIP_RV620:
- case CHIP_RS780:
- case CHIP_RS880:
- default:
- num_ps_gprs = 84;
- num_vs_gprs = 36;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 136;
- num_vs_threads = 48;
- num_gs_threads = 4;
- num_es_threads = 4;
- num_ps_stack_entries = 40;
- num_vs_stack_entries = 40;
- num_gs_stack_entries = 32;
- num_es_stack_entries = 16;
- break;
- case CHIP_RV670:
- num_ps_gprs = 144;
- num_vs_gprs = 40;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 136;
- num_vs_threads = 48;
- num_gs_threads = 4;
- num_es_threads = 4;
- num_ps_stack_entries = 40;
- num_vs_stack_entries = 40;
- num_gs_stack_entries = 32;
- num_es_stack_entries = 16;
- break;
- case CHIP_RV770:
- num_ps_gprs = 192;
- num_vs_gprs = 56;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 188;
- num_vs_threads = 60;
- num_gs_threads = 0;
- num_es_threads = 0;
- num_ps_stack_entries = 256;
- num_vs_stack_entries = 256;
- num_gs_stack_entries = 0;
- num_es_stack_entries = 0;
- break;
- case CHIP_RV730:
- case CHIP_RV740:
- num_ps_gprs = 84;
- num_vs_gprs = 36;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 188;
- num_vs_threads = 60;
- num_gs_threads = 0;
- num_es_threads = 0;
- num_ps_stack_entries = 128;
- num_vs_stack_entries = 128;
- num_gs_stack_entries = 0;
- num_es_stack_entries = 0;
- break;
- case CHIP_RV710:
- num_ps_gprs = 192;
- num_vs_gprs = 56;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 144;
- num_vs_threads = 48;
- num_gs_threads = 0;
- num_es_threads = 0;
- num_ps_stack_entries = 128;
- num_vs_stack_entries = 128;
- num_gs_stack_entries = 0;
- num_es_stack_entries = 0;
- break;
- }
- radeon_state_init(&rctx->config, rctx->rw, R600_STATE_CONFIG, 0, 0);
-
- rctx->config.states[R600_CONFIG__SQ_CONFIG] = 0x00000000;
- switch (family) {
- case CHIP_RV610:
- case CHIP_RV620:
- case CHIP_RS780:
- case CHIP_RS880:
- case CHIP_RV710:
- break;
- default:
- rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1);
- break;
- }
-
- if (!rctx->screen->use_mem_constant)
- rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1);
-
- rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1);
- rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio);
- rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio);
- rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio);
- rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio);
-
- rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0;
- rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
- rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
- rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
-
- rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0;
- rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
- rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs);
-
- rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0;
- rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads);
- rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads);
- rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads);
- rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads);
-
- rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0;
- rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
- rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
-
- rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0;
- rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
- rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
-
- rctx->config.states[R600_CONFIG__VC_ENHANCE] = 0x00000000;
- rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000;
-
- if (family >= CHIP_RV770) {
- rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] =
- S_008D8C_VS_PC_LIMIT_ENABLE(1);
- rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002;
- rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000;
- rctx->config.states[R600_CONFIG__DB_WATERMARKS] =
- S_009838_DEPTH_FREE(4) |
- S_009838_DEPTH_FLUSH(16) |
- S_009838_DEPTH_PENDING_FREE(4) |
- S_009838_DEPTH_CACHELINE_FREE(4);
- rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000000;
- rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00500000 |
- S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
- S_028A4C_FORCE_EOV_REZ_ENABLE(1);
- } else {
- rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000;
- rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002 |
- S_009508_DISABLE_CUBE_WRAP(1);
- rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x82000000;
- rctx->config.states[R600_CONFIG__DB_WATERMARKS] =
- S_009838_DEPTH_FREE(4) |
- S_009838_DEPTH_FLUSH(16) |
- S_009838_DEPTH_PENDING_FREE(4) |
- S_009838_DEPTH_CACHELINE_FREE(16);
- rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] =
- S_0286C8_PS_GROUPING(1);
- rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] =
- S_028A4C_WALK_ORDER_ENABLE(1) |
- S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1);
- }
- rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000;
- rctx->config.states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = S_028AB4_REUSE_OFF(1);
- rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000;
- rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000;
- radeon_state_pm4(&rctx->config);
-}
-
-static int r600_vs_resource(struct r600_context *rctx, int id, struct r600_resource *rbuffer, uint32_t offset,
- uint32_t stride, uint32_t src_format)
-{
- struct radeon_state *vs_resource = &rctx->vs_resource[id];
- struct r600_screen *rscreen = rctx->screen;
- unsigned format, num_format = 0, format_comp = 0;
-
- format = r600_translate_colorformat(src_format);
-
- r600_translate_vertex_num_format(src_format, &num_format, &format_comp);
-
- format = S_038008_DATA_FORMAT(format) | S_038008_NUM_FORMAT_ALL(num_format) | S_038008_FORMAT_COMP_ALL(format_comp);
-
- radeon_state_init(vs_resource, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_VS);
- radeon_ws_bo_reference(rscreen->rw, &vs_resource->bo[0], rbuffer->bo);
- vs_resource->nbo = 1;
- vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset;
- vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->size - offset - 1;
- vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(stride) | format;
- vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000;
- vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000;
- vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000;
- vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000;
- vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT;
- vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT;
- return radeon_state_pm4(vs_resource);
-}
-
-static int r600_draw_vgt_init(struct r600_draw *draw,
- int vgt_draw_initiator)
-{
- struct r600_context *rctx = r600_context(draw->ctx);
- struct r600_screen *rscreen = rctx->screen;
- struct r600_resource *rbuffer = (struct r600_resource *)draw->index_buffer;
- radeon_state_init(&draw->draw, rscreen->rw, R600_STATE_DRAW, 0, 0);
- draw->draw.states[R600_DRAW__VGT_NUM_INDICES] = draw->count;
- draw->draw.states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator;
- draw->draw.states[R600_DRAW__VGT_DMA_BASE] = draw->index_buffer_offset;
- if (rbuffer) {
- radeon_ws_bo_reference(rscreen->rw, &draw->draw.bo[0], rbuffer->bo);
- draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT;
- draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT;
- draw->draw.nbo = 1;
- }
- return radeon_state_pm4(&draw->draw);
-}
-
-static int r600_draw_vgt_prim(struct r600_draw *draw,
- uint32_t prim, uint32_t vgt_dma_index_type)
-{
- struct r600_context *rctx = r600_context(draw->ctx);
- struct r600_screen *rscreen = rctx->screen;
- radeon_state_init(&draw->vgt, rscreen->rw, R600_STATE_VGT, 0, 0);
- draw->vgt.states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim;
- draw->vgt.states[R600_VGT__VGT_MAX_VTX_INDX] = draw->max_index;
- draw->vgt.states[R600_VGT__VGT_MIN_VTX_INDX] = draw->min_index;
- draw->vgt.states[R600_VGT__VGT_INDX_OFFSET] = draw->index_bias;
- draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000;
- draw->vgt.states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type;
- draw->vgt.states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000;
- draw->vgt.states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001;
- draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000;
- draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000;
- draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000;
- return radeon_state_pm4(&draw->vgt);
-}
-
-static int r600_ps_shader(struct r600_context *rctx, struct r600_context_state *rpshader,
- struct radeon_state *state)
-{
- struct r600_screen *rscreen = rctx->screen;
- const struct pipe_rasterizer_state *rasterizer;
- struct r600_shader *rshader = &rpshader->shader;
- unsigned i, tmp, exports_ps, num_cout;
- boolean have_pos = FALSE, have_face = FALSE;
-
- rasterizer = &rctx->rasterizer->state.rasterizer;
-
- radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS);
- for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(rctx, rshader, i)) | S_028644_SEL_CENTROID(1);
- if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
- have_pos = TRUE;
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
- }
-
- if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
- have_face = TRUE;
-
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rasterizer->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
- }
-
- exports_ps = 0;
- num_cout = 0;
- for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
- exports_ps |= S_028854_EXPORT_Z(1);
- else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
- num_cout++;
- }
- }
- exports_ps |= S_028854_EXPORT_COLORS(num_cout);
- if (exports_ps == 0) {
- /* Always at least export 1 color component per pixel. */
- exports_ps = S_028854_EXPORT_COLORS(1);
- }
- state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] =
- S_0286CC_NUM_INTERP(rshader->ninput) |
- S_0286CC_PERSP_GRADIENT_ENA(1);
-
- if (have_pos) {
- state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |=
- S_0286CC_POSITION_ENA(1) |
- S_0286CC_BARYC_SAMPLE_CNTL(1);
- state->states[R600_PS_SHADER__SPI_INPUT_Z] |=
- S_0286D8_PROVIDE_Z_TO_SPI(1);
- }
-
- state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] =
- S_0286D0_FRONT_FACE_ENA(have_face);
-
- state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] =
- S_028868_NUM_GPRS(rshader->bc.ngpr) |
- S_028868_STACK_SIZE(rshader->bc.nstack);
- state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
- radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo);
- state->nbo = 1;
- state->placement[0] = RADEON_GEM_DOMAIN_GTT;
- return radeon_state_pm4(state);
-}
-
-static int r600_vs_shader(struct r600_context *rctx, struct r600_context_state *rpshader,
- struct radeon_state *state)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct r600_shader *rshader = &rpshader->shader;
- unsigned i, tmp;
-
- radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS);
- for (i = 0; i < 10; i++) {
- state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
- }
- /* so far never got proper semantic id from tgsi */
- for (i = 0; i < 32; i++) {
- tmp = i << ((i & 3) * 8);
- state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
- }
- state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] =
- S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
- state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] =
- S_028868_NUM_GPRS(rshader->bc.ngpr) |
- S_028868_STACK_SIZE(rshader->bc.nstack);
- radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo);
- radeon_ws_bo_reference(rscreen->rw, &state->bo[1], rpshader->bo);
- state->nbo = 2;
- state->placement[0] = RADEON_GEM_DOMAIN_GTT;
- state->placement[2] = RADEON_GEM_DOMAIN_GTT;
- return radeon_state_pm4(state);
-}
-
-struct r600_context_hw_state_vtbl r600_hw_state_vtbl = {
- .blend = r600_blend,
- .ucp = r600_ucp,
- .cb = r600_cb,
- .db = r600_db,
- .rasterizer = r600_rasterizer,
- .scissor = r600_scissor,
- .viewport = r600_viewport,
- .dsa = r600_dsa,
- .sampler_border = r600_sampler_border,
- .sampler = r600_sampler,
- .resource = r600_resource,
- .cb_cntl = r600_cb_cntl,
- .vs_resource = r600_vs_resource,
- .vgt_init = r600_draw_vgt_init,
- .vgt_prim = r600_draw_vgt_prim,
- .vs_shader = r600_vs_shader,
- .ps_shader = r600_ps_shader,
- .init_config = r600_init_config,
-};
-
-void r600_set_constant_buffer_file(struct pipe_context *ctx,
- uint shader, uint index,
- struct pipe_resource *buffer)
-{
- struct r600_screen *rscreen = r600_screen(ctx->screen);
- struct r600_context *rctx = r600_context(ctx);
- unsigned nconstant = 0, i, type, shader_class;
- struct radeon_state *rstate, *rstates;
- struct pipe_transfer *transfer;
- u32 *ptr;
-
- type = R600_STATE_CONSTANT;
-
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- shader_class = R600_SHADER_VS;
- rstates = rctx->vs_constant;
- break;
- case PIPE_SHADER_FRAGMENT:
- shader_class = R600_SHADER_PS;
- rstates = rctx->ps_constant;
- break;
- default:
- R600_ERR("unsupported %d\n", shader);
- return;
- }
- if (buffer && buffer->width0 > 0) {
- nconstant = buffer->width0 / 16;
- ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer);
- if (ptr == NULL)
- return;
- for (i = 0; i < nconstant; i++) {
- rstate = &rstates[i];
- radeon_state_init(rstate, rscreen->rw, type, i, shader_class);
- rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0];
- rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1];
- rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2];
- rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3];
- if (radeon_state_pm4(rstate))
- return;
- radeon_draw_bind(&rctx->draw, rstate);
- }
- pipe_buffer_unmap(ctx, buffer, transfer);
- }
-}
-
-void r600_set_constant_buffer_mem(struct pipe_context *ctx,
- uint shader, uint index,
- struct pipe_resource *buffer)
-{
- struct r600_screen *rscreen = r600_screen(ctx->screen);
- struct r600_context *rctx = r600_context(ctx);
- unsigned nconstant = 0, type, shader_class, size;
- struct radeon_state *rstate, *rstates;
- struct r600_resource *rbuffer = (struct r600_resource*)buffer;
-
- type = R600_STATE_CBUF;
-
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- shader_class = R600_SHADER_VS;
- rstates = rctx->vs_constant;
- break;
- case PIPE_SHADER_FRAGMENT:
- shader_class = R600_SHADER_PS;
- rstates = rctx->ps_constant;
- break;
- default:
- R600_ERR("unsupported %d\n", shader);
- return;
- }
-
- rstate = &rstates[0];
-
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
-
- nconstant = buffer->width0 / 16;
- size = ALIGN_DIVUP(nconstant, 16);
-
- radeon_state_init(rstate, rscreen->rw, type, 0, shader_class);
- rstate->states[R600_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0] = size;
- rstate->states[R600_VS_CBUF__ALU_CONST_CACHE_VS_0] = 0;
-
- radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo);
- rstate->nbo = 1;
- rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM;
- if (radeon_state_pm4(rstate))
- return;
- radeon_draw_bind(&rctx->draw, rstate);
-}
-
diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h
index 0cf9c1c401..4f9b39a7fd 100644
--- a/src/gallium/drivers/r600/r600_opcodes.h
+++ b/src/gallium/drivers/r600/r600_opcodes.h
@@ -233,12 +233,6 @@
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL 0x00000012
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE 0x00000013
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR 0x00000014
-/* same up to here */
-/*
-#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA 0x00000015
-#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR 0x00000016
-#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x00000018
-*/
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT 0x00000015
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT 0x00000016
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT 0x00000017
@@ -336,9 +330,11 @@
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099
/* TODO Fill in more ALU */
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW 0x000000D7
diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_pipe.c
index be8a78ca3d..dd8fa4fcd7 100644
--- a/src/gallium/drivers/r600/r600_screen.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -19,37 +19,221 @@
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- * Corbin Simpson
*/
#include <stdio.h>
-#include "util/u_inlines.h"
-#include "util/u_format.h"
-#include "util/u_memory.h"
-#include "r600_screen.h"
-#include "r600_context.h"
-#include "r600_public.h"
+#include <errno.h>
+#include <pipe/p_defines.h>
+#include <pipe/p_state.h>
+#include <pipe/p_context.h>
+#include <tgsi/tgsi_scan.h>
+#include <tgsi/tgsi_parse.h>
+#include <tgsi/tgsi_util.h>
+#include <util/u_blitter.h>
+#include <util/u_double_list.h>
+#include <util/u_transfer.h>
+#include <util/u_surface.h>
+#include <util/u_pack_color.h>
+#include <util/u_memory.h>
+#include <util/u_inlines.h>
+#include <util/u_upload_mgr.h>
+#include <pipebuffer/pb_buffer.h>
+#include "r600.h"
+#include "r600d.h"
#include "r600_resource.h"
+#include "r600_shader.h"
+#include "r600_pipe.h"
#include "r600_state_inlines.h"
+/*
+ * pipe_context
+ */
+static void r600_flush(struct pipe_context *ctx, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+#if 0
+ static int dc = 0;
+ char dname[256];
+#endif
+
+ if (!rctx->ctx.pm4_cdwords)
+ return;
+
+ u_upload_flush(rctx->upload_vb);
+ u_upload_flush(rctx->upload_ib);
+
+#if 0
+ sprintf(dname, "gallium-%08d.bof", dc);
+ if (dc < 20) {
+ r600_context_dump_bof(&rctx->ctx, dname);
+ R600_ERR("dumped %s\n", dname);
+ }
+ dc++;
+#endif
+ r600_context_flush(&rctx->ctx);
+}
+
+static void r600_destroy_context(struct pipe_context *context)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)context;
+
+ r600_context_fini(&rctx->ctx);
+ for (int i = 0; i < R600_PIPE_NSTATES; i++) {
+ free(rctx->states[i]);
+ }
+
+ u_upload_destroy(rctx->upload_vb);
+ u_upload_destroy(rctx->upload_ib);
+
+ FREE(rctx->ps_resource);
+ FREE(rctx->vs_resource);
+ FREE(rctx);
+}
+
+static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv)
+{
+ struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context);
+ struct r600_screen* rscreen = (struct r600_screen *)screen;
+ enum chip_class class;
+
+ if (rctx == NULL)
+ return NULL;
+ rctx->context.winsys = rscreen->screen.winsys;
+ rctx->context.screen = screen;
+ rctx->context.priv = priv;
+ rctx->context.destroy = r600_destroy_context;
+ rctx->context.flush = r600_flush;
+
+ /* Easy accessing of screen/winsys. */
+ rctx->screen = rscreen;
+ rctx->radeon = rscreen->radeon;
+ rctx->family = r600_get_family(rctx->radeon);
+
+ r600_init_blit_functions(rctx);
+ r600_init_query_functions(rctx);
+ r600_init_context_resource_functions(rctx);
+
+ switch (r600_get_family(rctx->radeon)) {
+ case CHIP_R600:
+ case CHIP_RV610:
+ case CHIP_RV630:
+ case CHIP_RV670:
+ case CHIP_RV620:
+ case CHIP_RV635:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ case CHIP_RV770:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_RV740:
+ rctx->context.draw_vbo = r600_draw_vbo;
+ r600_init_state_functions(rctx);
+ if (r600_context_init(&rctx->ctx, rctx->radeon)) {
+ r600_destroy_context(&rctx->context);
+ return NULL;
+ }
+ r600_init_config(rctx);
+ break;
+ case CHIP_CEDAR:
+ case CHIP_REDWOOD:
+ case CHIP_JUNIPER:
+ case CHIP_CYPRESS:
+ case CHIP_HEMLOCK:
+ rctx->context.draw_vbo = evergreen_draw;
+ evergreen_init_state_functions(rctx);
+ if (evergreen_context_init(&rctx->ctx, rctx->radeon)) {
+ r600_destroy_context(&rctx->context);
+ return NULL;
+ }
+ evergreen_init_config(rctx);
+ break;
+ default:
+ R600_ERR("unsupported family %d\n", r600_get_family(rctx->radeon));
+ r600_destroy_context(&rctx->context);
+ return NULL;
+ }
+
+ rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16,
+ PIPE_BIND_INDEX_BUFFER);
+ if (rctx->upload_ib == NULL) {
+ r600_destroy_context(&rctx->context);
+ return NULL;
+ }
+
+ rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16,
+ PIPE_BIND_VERTEX_BUFFER);
+ if (rctx->upload_vb == NULL) {
+ r600_destroy_context(&rctx->context);
+ return NULL;
+ }
+
+ rctx->blitter = util_blitter_create(&rctx->context);
+ if (rctx->blitter == NULL) {
+ FREE(rctx);
+ return NULL;
+ }
+
+ rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state));
+ if (!rctx->vs_resource) {
+ FREE(rctx);
+ return NULL;
+ }
+
+ rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state));
+ if (!rctx->ps_resource) {
+ FREE(rctx);
+ return NULL;
+ }
+
+ class = r600_get_family_class(rctx->radeon);
+ if (class == R600 || class == R700)
+ rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx);
+ else
+ rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx);
+
+ r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth;
+
+ return &rctx->context;
+}
+
+/*
+ * pipe_screen
+ */
static const char* r600_get_vendor(struct pipe_screen* pscreen)
{
return "X.Org";
}
+static const char *r600_get_family_name(enum radeon_family family)
+{
+ switch(family) {
+ case CHIP_R600: return "R600";
+ case CHIP_RV610: return "RV610";
+ case CHIP_RV630: return "RV630";
+ case CHIP_RV670: return "RV670";
+ case CHIP_RV620: return "RV620";
+ case CHIP_RV635: return "RV635";
+ case CHIP_RS780: return "RS780";
+ case CHIP_RS880: return "RS880";
+ case CHIP_RV770: return "RV770";
+ case CHIP_RV730: return "RV730";
+ case CHIP_RV710: return "RV710";
+ case CHIP_RV740: return "RV740";
+ case CHIP_CEDAR: return "CEDAR";
+ case CHIP_REDWOOD: return "REDWOOD";
+ case CHIP_JUNIPER: return "JUNIPER";
+ case CHIP_CYPRESS: return "CYPRESS";
+ case CHIP_HEMLOCK: return "HEMLOCK";
+ default: return "unknown";
+ }
+}
+
static const char* r600_get_name(struct pipe_screen* pscreen)
{
- struct r600_screen *screen = r600_screen(pscreen);
- enum radeon_family family = radeon_get_family(screen->rw);
+ struct r600_screen *rscreen = (struct r600_screen *)pscreen;
+ enum radeon_family family = r600_get_family(rscreen->radeon);
- if (family >= CHIP_R600 && family < CHIP_RV770)
- return "R600 (HD2XXX,HD3XXX)";
- else if (family < CHIP_CEDAR)
- return "R700 (HD4XXX)";
- else
- return "EVERGREEN";
+ return r600_get_family_name(family);
}
static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
@@ -72,6 +256,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
case PIPE_CAP_DEPTH_CLAMP:
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
return 1;
/* Unsupported features (boolean caps). */
@@ -87,7 +272,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 14;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
/* FIXME allow this once infrastructure is there */
- return 0;
+ return 16;
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return 16;
@@ -104,15 +289,35 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
return 0;
+
default:
R600_ERR("r600: unknown param %d\n", param);
return 0;
}
}
+static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 8192.0f;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0f;
+ default:
+ R600_ERR("r600: unsupported paramf %d\n", param);
+ return 0.0f;
+ }
+}
+
static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param)
{
- switch(shader) {
+ switch(shader)
+ {
case PIPE_SHADER_FRAGMENT:
case PIPE_SHADER_VERTEX:
break;
@@ -155,24 +360,6 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
}
}
-static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param)
-{
- switch (param) {
- case PIPE_CAP_MAX_LINE_WIDTH:
- case PIPE_CAP_MAX_LINE_WIDTH_AA:
- case PIPE_CAP_MAX_POINT_WIDTH:
- case PIPE_CAP_MAX_POINT_WIDTH_AA:
- return 8192.0f;
- case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
- return 16.0f;
- case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
- return 16.0f;
- default:
- R600_ERR("r600: unsupported paramf %d\n", param);
- return 0.0f;
- }
-}
-
static boolean r600_is_format_supported(struct pipe_screen* screen,
enum pipe_format format,
enum pipe_texture_target target,
@@ -226,31 +413,25 @@ static boolean r600_is_format_supported(struct pipe_screen* screen,
static void r600_destroy_screen(struct pipe_screen* pscreen)
{
- struct r600_screen* rscreen = r600_screen(pscreen);
+ struct r600_screen *rscreen = (struct r600_screen *)pscreen;
if (rscreen == NULL)
return;
FREE(rscreen);
}
-struct pipe_screen *r600_screen_create(struct radeon *rw)
+
+struct pipe_screen *r600_screen_create(struct radeon *radeon)
{
- struct r600_screen* rscreen;
+ struct r600_screen *rscreen;
rscreen = CALLOC_STRUCT(r600_screen);
if (rscreen == NULL) {
return NULL;
}
- /* don't enable mem constant for r600 yet */
- rscreen->use_mem_constant = FALSE;
- if (radeon_get_family_class(rw) == EVERGREEN) {
- rscreen->use_mem_constant = TRUE;
- }
-
- radeon_set_mem_constant(rw, rscreen->use_mem_constant);
- rscreen->rw = rw;
- rscreen->screen.winsys = (struct pipe_winsys*)rw;
+ rscreen->radeon = radeon;
+ rscreen->screen.winsys = (struct pipe_winsys*)radeon;
rscreen->screen.destroy = r600_destroy_screen;
rscreen->screen.get_name = r600_get_name;
rscreen->screen.get_vendor = r600_get_vendor;
@@ -261,5 +442,8 @@ struct pipe_screen *r600_screen_create(struct radeon *rw)
rscreen->screen.context_create = r600_create_context;
r600_init_screen_texture_functions(&rscreen->screen);
r600_init_screen_resource_functions(&rscreen->screen);
+
+ rscreen->tiling_info = r600_get_tiling_info(radeon);
+
return &rscreen->screen;
}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 5abf910c81..e7c4b60d00 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -26,6 +26,15 @@
#ifndef R600_PIPE_H
#define R600_PIPE_H
+#include <pipe/p_state.h>
+#include <pipe/p_screen.h>
+#include <pipe/p_context.h>
+#include <util/u_math.h>
+#include "r600.h"
+#include "r600_public.h"
+#include "r600_shader.h"
+#include "r600_resource.h"
+
enum r600_pipe_state_id {
R600_PIPE_STATE_BLEND = 0,
R600_PIPE_STATE_BLEND_COLOR,
@@ -49,6 +58,7 @@ enum r600_pipe_state_id {
struct r600_screen {
struct pipe_screen screen;
struct radeon *radeon;
+ struct r600_tiling_info *tiling_info;
};
struct r600_pipe_sampler_view {
@@ -69,12 +79,6 @@ struct r600_pipe_blend {
unsigned cb_target_mask;
};
-struct r600_pipe_shader {
- struct r600_shader shader;
- struct r600_pipe_state rstate;
- struct radeon_ws_bo *bo;
-};
-
struct r600_vertex_element
{
unsigned count;
@@ -82,13 +86,32 @@ struct r600_vertex_element
struct pipe_vertex_element elements[32];
};
+struct r600_pipe_shader {
+ struct r600_shader shader;
+ struct r600_pipe_state rstate;
+ struct r600_bo *bo;
+ struct r600_vertex_element vertex_elements;
+};
+
+/* needed for blitter save */
+#define NUM_TEX_UNITS 16
+
+struct r600_textures_info {
+ struct r600_pipe_sampler_view *views[NUM_TEX_UNITS];
+ unsigned n_views;
+ void *samplers[NUM_TEX_UNITS];
+ unsigned n_samplers;
+};
+
+#define R600_CONSTANT_ARRAY_SIZE 256
+#define R600_RESOURCE_ARRAY_SIZE 160
+
struct r600_pipe_context {
struct pipe_context context;
struct blitter_context *blitter;
struct pipe_framebuffer_state *pframebuffer;
unsigned family;
void *custom_dsa_flush;
- struct list_head query_list; /* fake member for depth remove once merged */
struct r600_screen *screen;
struct radeon *radeon;
struct r600_pipe_state *states[R600_PIPE_NSTATES];
@@ -103,12 +126,8 @@ struct r600_pipe_context {
struct pipe_stencil_ref stencil_ref;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
- unsigned vs_nconst;
- unsigned ps_nconst;
- struct r600_pipe_state vs_const[256];
- struct r600_pipe_state ps_const[256];
- struct r600_pipe_state vs_resource[160];
- struct r600_pipe_state ps_resource[160];
+ struct r600_pipe_state *vs_resource;
+ struct r600_pipe_state *ps_resource;
struct r600_pipe_state config;
struct r600_pipe_shader *ps_shader;
struct r600_pipe_shader *vs_shader;
@@ -121,6 +140,8 @@ struct r600_pipe_context {
struct u_upload_mgr *upload_vb;
struct u_upload_mgr *upload_ib;
unsigned any_user_vbs;
+ struct r600_textures_info ps_samplers;
+
};
struct r600_drawl {
@@ -136,29 +157,65 @@ struct r600_drawl {
struct pipe_resource *index_buffer;
};
-uint32_t r600_translate_texformat(enum pipe_format format,
- const unsigned char *swizzle_view,
- uint32_t *word4_p, uint32_t *yuv_format_p);
+/* evergreen_state.c */
+void evergreen_init_state_functions(struct r600_pipe_context *rctx);
+void evergreen_init_config(struct r600_pipe_context *rctx);
+void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info);
+void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
+
+/* r600_blit.c */
+void r600_init_blit_functions(struct r600_pipe_context *rctx);
+int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
+
+/* r600_buffer.c */
+struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ);
+struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
+ void *ptr, unsigned bytes,
+ unsigned bind);
+unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
+ struct pipe_resource *buf,
+ unsigned face, unsigned level);
+struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
+ struct winsys_handle *whandle);
+int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw);
+int r600_upload_user_buffers(struct r600_pipe_context *rctx);
-/* r600_state2.c */
-int r600_pipe_shader_update2(struct pipe_context *ctx, struct r600_pipe_shader *shader);
-int r600_pipe_shader_create2(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens);
-int r600_upload_index_buffer2(struct r600_pipe_context *rctx, struct r600_drawl *draw);
-int r600_upload_user_buffers2(struct r600_pipe_context *rctx);
-void r600_translate_index_buffer2(struct r600_pipe_context *r600,
+/* r600_query.c */
+void r600_init_query_functions(struct r600_pipe_context *rctx);
+
+/* r600_resource.c */
+void r600_init_context_resource_functions(struct r600_pipe_context *r600);
+
+/* r600_shader.c */
+int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens);
+int r600_find_vs_semantic_index(struct r600_shader *vs,
+ struct r600_shader *ps, int id);
+
+/* r600_state.c */
+void r600_init_state_functions(struct r600_pipe_context *rctx);
+void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
+void r600_init_config(struct r600_pipe_context *rctx);
+void r600_translate_index_buffer(struct r600_pipe_context *r600,
struct pipe_resource **index_buffer,
unsigned *index_size,
unsigned *start, unsigned count);
-int r600_find_vs_semantic_index2(struct r600_shader *vs,
- struct r600_shader *ps, int id);
+void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx);
+/* r600_helper.h */
+int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
-/* evergreen_state.c */
-void evergreen_init_state_functions2(struct r600_pipe_context *rctx);
-void evergreen_init_config2(struct r600_pipe_context *rctx);
-void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info);
-void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
-void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+/* r600_texture.c */
+void r600_init_screen_texture_functions(struct pipe_screen *screen);
+uint32_t r600_translate_texformat(enum pipe_format format,
+ const unsigned char *swizzle_view,
+ uint32_t *word4_p, uint32_t *yuv_format_p);
+/*
+ * common helpers
+ */
static INLINE u32 S_FIXED(float value, u32 frac_bits)
{
return value * (1 << frac_bits);
diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h
index 1d89c9f9f6..f1970201e8 100644
--- a/src/gallium/drivers/r600/r600_public.h
+++ b/src/gallium/drivers/r600/r600_public.h
@@ -1,9 +1,28 @@
-
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
#ifndef R600_PUBLIC_H
#define R600_PUBLIC_H
-struct radeon;
-
-struct pipe_screen* r600_screen_create(struct radeon *rw);
+struct pipe_screen *r600_screen_create(struct radeon *radeon);
#endif
diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
index 6e50701de6..726668260c 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -19,230 +19,55 @@
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- * Corbin Simpson
*/
-#include <errno.h>
-#include <util/u_inlines.h>
-#include <util/u_format.h>
-#include <util/u_memory.h>
-#include "r600_screen.h"
-#include "r600_context.h"
-
-static void r600_query_begin(struct r600_context *rctx, struct r600_query *rquery)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate = &rquery->rstate;
-
- radeon_state_fini(rstate);
- radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_BEGIN, 0, 0);
- rstate->states[R600_QUERY__OFFSET] = rquery->num_results;
- radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rquery->buffer);
- rstate->nbo = 1;
- rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
- if (radeon_state_pm4(rstate)) {
- radeon_state_fini(rstate);
- }
-}
-
-static void r600_query_end(struct r600_context *rctx, struct r600_query *rquery)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate = &rquery->rstate;
-
- radeon_state_fini(rstate);
- radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_END, 0, 0);
- rstate->states[R600_QUERY__OFFSET] = rquery->num_results + 8;
- radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rquery->buffer);
- rstate->nbo = 1;
- rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
- if (radeon_state_pm4(rstate)) {
- radeon_state_fini(rstate);
- }
-}
+#include "r600_pipe.h"
static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
{
- struct r600_screen *rscreen = r600_screen(ctx->screen);
- struct r600_context *rctx = r600_context(ctx);
- struct r600_query *q;
-
- if (query_type != PIPE_QUERY_OCCLUSION_COUNTER)
- return NULL;
-
- q = CALLOC_STRUCT(r600_query);
- if (!q)
- return NULL;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- q->type = query_type;
- q->buffer_size = 4096;
-
- q->buffer = radeon_ws_bo(rscreen->rw, q->buffer_size, 1, 0);
- if (!q->buffer) {
- FREE(q);
- return NULL;
- }
-
- LIST_ADDTAIL(&q->list, &rctx->query_list);
-
- return (struct pipe_query *)q;
-}
-
-static void r600_destroy_query(struct pipe_context *ctx,
- struct pipe_query *query)
-{
- struct r600_screen *rscreen = r600_screen(ctx->screen);
- struct r600_query *q = r600_query(query);
-
- radeon_ws_bo_reference(rscreen->rw, &q->buffer, NULL);
- LIST_DEL(&q->list);
- FREE(query);
+ return (struct pipe_query*)r600_context_query_create(&rctx->ctx, query_type);
}
-static void r600_query_result(struct pipe_context *ctx, struct r600_query *rquery)
+static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
{
- struct r600_screen *rscreen = r600_screen(ctx->screen);
- u64 start, end;
- u32 *results;
- int i;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- results = radeon_ws_bo_map(rscreen->rw, rquery->buffer, 0, ctx);
- for (i = 0; i < rquery->num_results; i += 4) {
- start = (u64)results[i] | (u64)results[i + 1] << 32;
- end = (u64)results[i + 2] | (u64)results[i + 3] << 32;
- if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) {
- rquery->result += end - start;
- }
- }
- radeon_ws_bo_unmap(rscreen->rw, rquery->buffer);
- rquery->num_results = 0;
-}
-
-static void r600_query_resume(struct pipe_context *ctx, struct r600_query *rquery)
-{
- struct r600_context *rctx = r600_context(ctx);
-
- if (rquery->num_results >= ((rquery->buffer_size >> 2) - 2)) {
- /* running out of space */
- if (!rquery->flushed) {
- ctx->flush(ctx, 0, NULL);
- }
- r600_query_result(ctx, rquery);
- }
- r600_query_begin(rctx, rquery);
- rquery->flushed = FALSE;
-}
-
-static void r600_query_suspend(struct pipe_context *ctx, struct r600_query *rquery)
-{
- struct r600_context *rctx = r600_context(ctx);
-
- r600_query_end(rctx, rquery);
- rquery->num_results += 16;
+ r600_context_query_destroy(&rctx->ctx, (struct r600_query *)query);
}
static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_query *rquery = r600_query(query);
- int r;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
- rquery->state = R600_QUERY_STATE_STARTED;
+ rquery->result = 0;
rquery->num_results = 0;
- rquery->flushed = FALSE;
- r600_query_resume(ctx, rquery);
- r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate);
- if (r == -EBUSY) {
- /* this shouldn't happen */
- R600_ERR("had to flush while emitting end query\n");
- ctx->flush(ctx, 0, NULL);
- r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate);
- }
+ r600_query_begin(&rctx->ctx, (struct r600_query *)query);
}
static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_query *rquery = r600_query(query);
- int r;
-
- rquery->state &= ~R600_QUERY_STATE_STARTED;
- rquery->state |= R600_QUERY_STATE_ENDED;
- r600_query_suspend(ctx, rquery);
- r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate);
- if (r == -EBUSY) {
- /* this shouldn't happen */
- R600_ERR("had to flush while emitting end query\n");
- ctx->flush(ctx, 0, NULL);
- r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate);
- }
-}
-
-void r600_queries_suspend(struct pipe_context *ctx)
-{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_query *rquery;
- int r;
-
- LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) {
- if (rquery->state & R600_QUERY_STATE_STARTED) {
- r600_query_suspend(ctx, rquery);
- r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate);
- if (r == -EBUSY) {
- /* this shouldn't happen */
- R600_ERR("had to flush while emitting end query\n");
- ctx->flush(ctx, 0, NULL);
- r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate);
- }
- }
- rquery->state |= R600_QUERY_STATE_SUSPENDED;
- }
-}
-
-void r600_queries_resume(struct pipe_context *ctx)
-{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_query *rquery;
- int r;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) {
- if (rquery->state & R600_QUERY_STATE_STARTED) {
- r600_query_resume(ctx, rquery);
- r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate);
- if (r == -EBUSY) {
- /* this shouldn't happen */
- R600_ERR("had to flush while emitting end query\n");
- ctx->flush(ctx, 0, NULL);
- r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate);
- }
- }
- rquery->state &= ~R600_QUERY_STATE_SUSPENDED;
- }
+ r600_query_end(&rctx->ctx, (struct r600_query *)query);
}
static boolean r600_get_query_result(struct pipe_context *ctx,
struct pipe_query *query,
boolean wait, void *vresult)
{
- struct r600_query *rquery = r600_query(query);
- uint64_t *result = (uint64_t*)vresult;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
- if (!rquery->flushed) {
+ if (rquery->num_results) {
ctx->flush(ctx, 0, NULL);
- rquery->flushed = TRUE;
}
- r600_query_result(ctx, rquery);
- *result = rquery->result;
- rquery->result = 0;
- return TRUE;
+ return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult);
}
-void r600_init_query_functions(struct r600_context* rctx)
+void r600_init_query_functions(struct r600_pipe_context *rctx)
{
- LIST_INITHEAD(&rctx->query_list);
-
rctx->context.create_query = r600_create_query;
rctx->context.destroy_query = r600_destroy_query;
rctx->context.begin_query = r600_begin_query;
diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c
index 05707740da..207642ccfa 100644
--- a/src/gallium/drivers/r600/r600_resource.c
+++ b/src/gallium/drivers/r600/r600_resource.c
@@ -21,9 +21,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "r600_context.h"
-#include "r600_resource.h"
-#include "r600_screen.h"
+#include "r600_pipe.h"
static struct pipe_resource *r600_resource_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
@@ -46,7 +44,16 @@ static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * scre
}
}
-void r600_init_context_resource_functions(struct r600_context *r600)
+void r600_init_screen_resource_functions(struct pipe_screen *screen)
+{
+ screen->resource_create = r600_resource_create;
+ screen->resource_from_handle = r600_resource_from_handle;
+ screen->resource_get_handle = u_resource_get_handle_vtbl;
+ screen->resource_destroy = u_resource_destroy_vtbl;
+ screen->user_buffer_create = r600_user_buffer_create;
+}
+
+void r600_init_context_resource_functions(struct r600_pipe_context *r600)
{
r600->context.get_transfer = u_get_transfer_vtbl;
r600->context.transfer_map = u_transfer_map_vtbl;
@@ -56,12 +63,3 @@ void r600_init_context_resource_functions(struct r600_context *r600)
r600->context.transfer_inline_write = u_transfer_inline_write_vtbl;
r600->context.is_resource_referenced = u_is_resource_referenced_vtbl;
}
-
-void r600_init_screen_resource_functions(struct pipe_screen *screen)
-{
- screen->resource_create = r600_resource_create;
- screen->resource_from_handle = r600_resource_from_handle;
- screen->resource_get_handle = u_resource_get_handle_vtbl;
- screen->resource_destroy = u_resource_destroy_vtbl;
- screen->user_buffer_create = r600_user_buffer_create;
-}
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index b0026e9578..ef484aba4a 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -25,8 +25,15 @@
#include "util/u_transfer.h"
-struct r600_context;
-struct r600_screen;
+/* Texture transfer. */
+struct r600_transfer {
+ /* Base class. */
+ struct pipe_transfer transfer;
+ /* Buffer transfer. */
+ struct pipe_transfer *buffer_transfer;
+ unsigned offset;
+ struct pipe_resource *linear_texture;
+};
/* This gets further specialized into either buffer or texture
* structures. Use the vtbl struct to choose between the two
@@ -34,7 +41,7 @@ struct r600_screen;
*/
struct r600_resource {
struct u_resource base;
- struct radeon_ws_bo *bo;
+ struct r600_bo *bo;
u32 domain;
u32 flink;
u32 size;
@@ -42,14 +49,12 @@ struct r600_resource {
struct r600_resource_texture {
struct r600_resource resource;
- unsigned long offset[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long width[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long height[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long pitch_override;
- unsigned long bpt;
- unsigned long size;
+ unsigned offset[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned pitch_override;
+ unsigned size;
unsigned tiled;
unsigned array_mode;
unsigned tile_type;
@@ -58,7 +63,6 @@ struct r600_resource_texture {
struct r600_resource_texture *flushed_depth_texture;
};
-void r600_init_context_resource_functions(struct r600_context *r600);
void r600_init_screen_resource_functions(struct pipe_screen *screen);
/* r600_buffer */
@@ -106,4 +110,18 @@ int r600_texture_depth_flush(struct pipe_context *ctx,
struct pipe_resource *texture);
extern int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture);
+
+/* r600_texture.c texture transfer functions. */
+struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box);
+void r600_texture_transfer_destroy(struct pipe_context *ctx,
+ struct pipe_transfer *trans);
+void* r600_texture_transfer_map(struct pipe_context *ctx,
+ struct pipe_transfer* transfer);
+void r600_texture_transfer_unmap(struct pipe_context *ctx,
+ struct pipe_transfer* transfer);
+
#endif
diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h
deleted file mode 100644
index 4105bb7cf6..0000000000
--- a/src/gallium/drivers/r600/r600_screen.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#ifndef R600_SCREEN_H
-#define R600_SCREEN_H
-
-#include <pipe/p_state.h>
-#include <pipe/p_screen.h>
-#include <pipebuffer/pb_buffer.h>
-#include <xf86drm.h>
-#include <radeon_drm.h>
-#include "radeon.h"
-#include "util/u_transfer.h"
-#include "r600_resource.h"
-
-/* Texture transfer. */
-struct r600_transfer {
- /* Base class. */
- struct pipe_transfer transfer;
- /* Buffer transfer. */
- struct pipe_transfer *buffer_transfer;
- unsigned offset;
- struct pipe_resource *linear_texture;
-};
-
-struct r600_screen {
- struct pipe_screen screen;
- struct radeon *rw;
- boolean use_mem_constant;
-};
-
-static INLINE struct r600_screen *r600_screen(struct pipe_screen *screen)
-{
- return (struct r600_screen*)screen;
-}
-
-/* Buffer functions. */
-struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
- const struct pipe_resource *templ);
-struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
- void *ptr, unsigned bytes,
- unsigned bind);
-unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
- struct pipe_resource *buf,
- unsigned face, unsigned level);
-struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
- struct winsys_handle *whandle);
-
-/* r600_texture.c texture transfer functions. */
-struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
- struct pipe_resource *texture,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box);
-void r600_texture_transfer_destroy(struct pipe_context *ctx,
- struct pipe_transfer *trans);
-void* r600_texture_transfer_map(struct pipe_context *ctx,
- struct pipe_transfer* transfer);
-void r600_texture_transfer_unmap(struct pipe_context *ctx,
- struct pipe_transfer* transfer);
-
-/* r600_blit.c */
-int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
-
-/* helpers */
-int r600_conv_pipe_format(unsigned pformat, unsigned *format);
-int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
-
-void r600_init_screen_texture_functions(struct pipe_screen *screen);
-
-#endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 0c27bb7d87..d1143985ea 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -25,9 +25,7 @@
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_dump.h"
#include "util/u_format.h"
-#include "r600_screen.h"
-#include "r600_context.h"
-#include "r600_shader.h"
+#include "r600_pipe.h"
#include "r600_asm.h"
#include "r600_sq.h"
#include "r600_opcodes.h"
@@ -35,54 +33,227 @@
#include <stdio.h>
#include <errno.h>
+static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+ struct r600_pipe_state *rstate = &shader->rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned spi_vs_out_id[10];
+ unsigned i, tmp;
+
+ /* clear previous register */
+ rstate->nregs = 0;
+
+ /* so far never got proper semantic id from tgsi */
+ for (i = 0; i < 10; i++) {
+ spi_vs_out_id[i] = 0;
+ }
+ for (i = 0; i < 32; i++) {
+ tmp = i << ((i & 3) * 8);
+ spi_vs_out_id[i / 4] |= tmp;
+ }
+ for (i = 0; i < 10; i++) {
+ r600_pipe_state_add_reg(rstate,
+ R_028614_SPI_VS_OUT_ID_0 + i * 4,
+ spi_vs_out_id[i], 0xFFFFFFFF, NULL);
+ }
+
+ r600_pipe_state_add_reg(rstate,
+ R_0286C4_SPI_VS_OUT_CONFIG,
+ S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028868_SQ_PGM_RESOURCES_VS,
+ S_028868_NUM_GPRS(rshader->bc.ngpr) |
+ S_028868_STACK_SIZE(rshader->bc.nstack),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_0288A4_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_0288D0_SQ_PGM_CF_OFFSET_VS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_0288DC_SQ_PGM_CF_OFFSET_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028858_SQ_PGM_START_VS,
+ r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+ r600_pipe_state_add_reg(rstate,
+ R_028894_SQ_PGM_START_FS,
+ r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+
+ r600_pipe_state_add_reg(rstate,
+ R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
+ 0xFFFFFFFF, NULL);
-struct r600_shader_tgsi_instruction;
+}
-struct r600_shader_ctx {
- struct tgsi_shader_info info;
- struct tgsi_parse_context parse;
- const struct tgsi_token *tokens;
- unsigned type;
- unsigned file_offset[TGSI_FILE_COUNT];
- unsigned temp_reg;
- struct r600_shader_tgsi_instruction *inst_info;
- struct r600_bc *bc;
- struct r600_shader *shader;
- u32 value[4];
- u32 *literals;
- u32 nliterals;
- u32 max_driver_temp_used;
-};
+int r600_find_vs_semantic_index(struct r600_shader *vs,
+ struct r600_shader *ps, int id)
+{
+ struct r600_shader_io *input = &ps->input[id];
-struct r600_shader_tgsi_instruction {
- unsigned tgsi_opcode;
- unsigned is_op3;
- unsigned r600_opcode;
- int (*process)(struct r600_shader_ctx *ctx);
-};
+ for (int i = 0; i < vs->noutput; i++) {
+ if (input->name == vs->output[i].name &&
+ input->sid == vs->output[i].sid) {
+ return i - 1;
+ }
+ }
+ return 0;
+}
-static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
-static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
+static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state *rstate = &shader->rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
+ int pos_index = -1, face_index = -1;
+
+ /* clear previous register */
+ rstate->nregs = 0;
+
+ for (i = 0; i < rshader->ninput; i++) {
+ tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
+ if (rshader->input[i].centroid)
+ tmp |= S_028644_SEL_CENTROID(1);
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+ tmp |= S_028644_SEL_LINEAR(1);
+
+ if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
+ pos_index = i;
+ if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
+ tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
+ }
+ if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
+ face_index = i;
+ if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
+ rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
+ tmp |= S_028644_PT_SPRITE_TEX(1);
+ }
+ r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
+ }
+ for (i = 0; i < rshader->noutput; i++) {
+ if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
+ r600_pipe_state_add_reg(rstate,
+ R_02880C_DB_SHADER_CONTROL,
+ S_02880C_Z_EXPORT_ENABLE(1),
+ S_02880C_Z_EXPORT_ENABLE(1), NULL);
+ if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+ r600_pipe_state_add_reg(rstate,
+ R_02880C_DB_SHADER_CONTROL,
+ S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
+ S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
+ }
+
+ exports_ps = 0;
+ num_cout = 0;
+ for (i = 0; i < rshader->noutput; i++) {
+ if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+ exports_ps |= 1;
+ else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
+ num_cout++;
+ }
+ }
+ exports_ps |= S_028854_EXPORT_COLORS(num_cout);
+ if (!exports_ps) {
+ /* always at least export 1 component per pixel */
+ exports_ps = 2;
+ }
+
+ spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
+ S_0286CC_PERSP_GRADIENT_ENA(1);
+ spi_input_z = 0;
+ if (pos_index != -1) {
+ spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
+ S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
+ S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
+ S_0286CC_BARYC_SAMPLE_CNTL(1));
+ spi_input_z |= 1;
+ }
+
+ spi_ps_in_control_1 = 0;
+ if (face_index != -1) {
+ spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
+ S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
+ }
+
+ r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028840_SQ_PGM_START_PS,
+ r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+ r600_pipe_state_add_reg(rstate,
+ R_028850_SQ_PGM_RESOURCES_PS,
+ S_028868_NUM_GPRS(rshader->bc.ngpr) |
+ S_028868_STACK_SIZE(rshader->bc.nstack),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028854_SQ_PGM_EXPORTS_PS,
+ exports_ps, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_0288CC_SQ_PGM_CF_OFFSET_PS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+
+ if (rshader->uses_kill) {
+ /* only set some bits here, the other bits are set in the dsa state */
+ r600_pipe_state_add_reg(rstate,
+ R_02880C_DB_SHADER_CONTROL,
+ S_02880C_KILL_ENABLE(1),
+ S_02880C_KILL_ENABLE(1), NULL);
+ }
+ r600_pipe_state_add_reg(rstate,
+ R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
+ 0xFFFFFFFF, NULL);
+}
-/* called from hw states files to find VS->FS mapping */
-int r600_find_vs_semantic_index(struct r600_context *rctx, struct r600_shader *rshader, int id)
+static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
- int i;
- struct r600_shader *vs = &rctx->vs_shader->shader;
- struct r600_shader_io *input = &rshader->input[id];
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_shader *rshader = &shader->shader;
+ void *ptr;
- for (i = 0; i < vs->noutput; i++) {
- if (input->name == vs->output[i].name &&
- input->sid == vs->output[i].sid) {
- return i - 1;
+ /* copy new shader */
+ if (shader->bo == NULL) {
+ shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0);
+ if (shader->bo == NULL) {
+ return -ENOMEM;
}
+ ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
+ memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
+ r600_bo_unmap(rctx->radeon, shader->bo);
}
+ /* build state */
+ rshader->flat_shade = rctx->flatshade;
+ switch (rshader->processor_type) {
+ case TGSI_PROCESSOR_VERTEX:
+ if (rshader->family >= CHIP_CEDAR) {
+ evergreen_pipe_shader_vs(ctx, shader);
+ } else {
+ r600_pipe_shader_vs(ctx, shader);
+ }
+ break;
+ case TGSI_PROCESSOR_FRAGMENT:
+ if (rshader->family >= CHIP_CEDAR) {
+ evergreen_pipe_shader_ps(ctx, shader);
+ } else {
+ r600_pipe_shader_ps(ctx, shader);
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+ r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
return 0;
}
-static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
+static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
{
- struct r600_context *rctx = r600_context(ctx);
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_shader *shader = &rshader->shader;
const struct util_format_description *desc;
enum pipe_format resource_format[160];
unsigned i, nresources = 0;
@@ -92,9 +263,16 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shad
if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
return 0;
+ /* doing a full memcmp fell over the refcount */
+ if ((rshader->vertex_elements.count == rctx->vertex_elements->count) &&
+ (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 32 * sizeof(struct pipe_vertex_element)))) {
+ return 0;
+ }
+ rshader->vertex_elements = *rctx->vertex_elements;
for (i = 0; i < rctx->vertex_elements->count; i++) {
resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
}
+ r600_bo_reference(rctx->radeon, &rshader->bo, NULL);
LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
switch (cf->inst) {
case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
@@ -118,25 +296,40 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shad
return r600_bc_build(&shader->bc);
}
-int r600_pipe_shader_create(struct pipe_context *ctx,
- struct r600_context_state *rpshader,
- const struct tgsi_token *tokens)
+int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
- struct r600_screen *rscreen = r600_screen(ctx->screen);
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ int r;
+
+ if (shader == NULL)
+ return -EINVAL;
+ /* there should be enough input */
+ if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
+ R600_ERR("%d resources provided, expecting %d\n",
+ rctx->vertex_elements->count, shader->shader.bc.nresource);
+ return -EINVAL;
+ }
+ r = r600_shader_update(ctx, shader);
+ if (r)
+ return r;
+ return r600_pipe_shader(ctx, shader);
+}
+
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
int r;
//fprintf(stderr, "--------------------------------------------------------------\n");
//tgsi_dump(tokens, 0);
- if (rpshader == NULL)
- return -ENOMEM;
- rpshader->shader.family = radeon_get_family(rscreen->rw);
- rpshader->shader.use_mem_constant = rscreen->use_mem_constant;
- r = r600_shader_from_tgsi(tokens, &rpshader->shader);
+ shader->shader.family = r600_get_family(rctx->radeon);
+ r = r600_shader_from_tgsi(tokens, &shader->shader);
if (r) {
R600_ERR("translation from TGSI failed !\n");
return r;
}
- r = r600_bc_build(&rpshader->shader.bc);
+ r = r600_bc_build(&shader->shader.bc);
if (r) {
R600_ERR("building bytecode failed !\n");
return r;
@@ -145,81 +338,41 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
return 0;
}
-static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
-{
- struct r600_context *rctx = r600_context(ctx);
- struct radeon_state *state;
-
- state = &rpshader->rstate[0];
- radeon_state_fini(&rpshader->rstate[0]);
-
- return rctx->vtbl->vs_shader(rctx, rpshader, state);
-}
-
-static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
-{
- struct r600_context *rctx = r600_context(ctx);
- struct radeon_state *state;
-
- state = &rpshader->rstate[0];
- radeon_state_fini(state);
-
- return rctx->vtbl->ps_shader(rctx, rpshader, state);
-}
-
-static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
-{
- struct r600_screen *rscreen = r600_screen(ctx->screen);
- struct r600_context *rctx = r600_context(ctx);
- struct r600_shader *rshader = &rpshader->shader;
- int r;
- void *data;
+/*
+ * tgsi -> r600 shader
+ */
+struct r600_shader_tgsi_instruction;
- /* copy new shader */
- radeon_ws_bo_reference(rscreen->rw, &rpshader->bo, NULL);
- rpshader->bo = NULL;
- rpshader->bo = radeon_ws_bo(rscreen->rw, rshader->bc.ndw * 4,
- 4096, 0);
- if (rpshader->bo == NULL) {
- return -ENOMEM;
- }
- data = radeon_ws_bo_map(rscreen->rw, rpshader->bo, 0, ctx);
- memcpy(data, rshader->bc.bytecode, rshader->bc.ndw * 4);
- radeon_ws_bo_unmap(rscreen->rw, rpshader->bo);
- /* build state */
- rshader->flat_shade = rctx->flat_shade;
- switch (rshader->processor_type) {
- case TGSI_PROCESSOR_VERTEX:
- r = r600_pipe_shader_vs(ctx, rpshader);
- break;
- case TGSI_PROCESSOR_FRAGMENT:
- r = r600_pipe_shader_ps(ctx, rpshader);
- break;
- default:
- r = -EINVAL;
- break;
- }
- return r;
-}
+struct r600_shader_ctx {
+ struct tgsi_shader_info info;
+ struct tgsi_parse_context parse;
+ const struct tgsi_token *tokens;
+ unsigned type;
+ unsigned file_offset[TGSI_FILE_COUNT];
+ unsigned temp_reg;
+ struct r600_shader_tgsi_instruction *inst_info;
+ struct r600_bc *bc;
+ struct r600_shader *shader;
+ u32 value[4];
+ u32 *literals;
+ u32 nliterals;
+ u32 max_driver_temp_used;
+ /* needed for evergreen interpolation */
+ boolean input_centroid;
+ boolean input_linear;
+ boolean input_perspective;
+ int num_interp_gpr;
+};
-int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
-{
- struct r600_context *rctx = r600_context(ctx);
- int r;
+struct r600_shader_tgsi_instruction {
+ unsigned tgsi_opcode;
+ unsigned is_op3;
+ unsigned r600_opcode;
+ int (*process)(struct r600_shader_ctx *ctx);
+};
- if (rpshader == NULL)
- return -EINVAL;
- /* there should be enough input */
- if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
- R600_ERR("%d resources provided, expecting %d\n",
- rctx->vertex_elements->count, rpshader->shader.bc.nresource);
- return -EINVAL;
- }
- r = r600_shader_update(ctx, &rpshader->shader);
- if (r)
- return r;
- return r600_pipe_shader(ctx, rpshader);
-}
+static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
+static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
static int tgsi_is_supported(struct r600_shader_ctx *ctx)
{
@@ -241,11 +394,9 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
}
#endif
for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
- if (i->Src[j].Register.Dimension ||
- i->Src[j].Register.Absolute) {
- R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
- i->Src[j].Register.Dimension,
- i->Src[j].Register.Absolute);
+ if (i->Src[j].Register.Dimension) {
+ R600_ERR("unsupported src %d (dimension %d)\n", j,
+ i->Src[j].Register.Dimension);
return -EINVAL;
}
}
@@ -258,10 +409,33 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
return 0;
}
-static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr)
+static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
{
int i, r;
struct r600_bc_alu alu;
+ int gpr = 0, base_chan = 0;
+ int ij_index = 0;
+
+ if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
+ ij_index = 0;
+ if (ctx->shader->input[input].centroid)
+ ij_index++;
+ } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
+ ij_index = 0;
+ /* if we have perspective add one */
+ if (ctx->input_perspective) {
+ ij_index++;
+ /* if we have perspective centroid */
+ if (ctx->input_centroid)
+ ij_index++;
+ }
+ if (ctx->shader->input[input].centroid)
+ ij_index++;
+ }
+
+ /* work out gpr and base_chan from index */
+ gpr = ij_index / 2;
+ base_chan = (2 * (ij_index % 2)) + 1;
for (i = 0; i < 8; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -272,13 +446,16 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr)
alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
if ((i > 1) && (i < 6)) {
- alu.dst.sel = ctx->shader->input[gpr].gpr;
+ alu.dst.sel = ctx->shader->input[input].gpr;
alu.dst.write = 1;
}
alu.dst.chan = i % 4;
- alu.src[0].chan = (1 - (i % 2));
- alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr;
+
+ alu.src[0].sel = gpr;
+ alu.src[0].chan = (base_chan - (i % 2));
+
+ alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
alu.bank_swizzle_force = SQ_ALU_VEC_210;
if ((i % 4) == 3)
@@ -304,6 +481,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
ctx->shader->input[i].name = d->Semantic.Name;
ctx->shader->input[i].sid = d->Semantic.Index;
ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
+ ctx->shader->input[i].centroid = d->Declaration.Centroid;
ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
if (ctx->type == TGSI_PROCESSOR_VERTEX) {
/* turn input into fetch */
@@ -320,13 +498,19 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
vtx.dst_sel_y = 1;
vtx.dst_sel_z = 2;
vtx.dst_sel_w = 3;
+ vtx.use_const_fields = 1;
r = r600_bc_add_vtx(ctx->bc, &vtx);
if (r)
return r;
}
if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) {
/* turn input into interpolate on EG */
- evergreen_interp_alu(ctx, i);
+ if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
+ if (ctx->shader->input[i].interpolate > 0) {
+ ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
+ evergreen_interp_alu(ctx, i);
+ }
+ }
}
break;
case TGSI_FILE_OUTPUT:
@@ -353,6 +537,53 @@ static int r600_get_temp(struct r600_shader_ctx *ctx)
return ctx->temp_reg + ctx->max_driver_temp_used++;
}
+/*
+ * for evergreen we need to scan the shader to find the number of GPRs we need to
+ * reserve for interpolation.
+ *
+ * we need to know if we are going to emit
+ * any centroid inputs
+ * if perspective and linear are required
+*/
+static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
+{
+ int i;
+ int num_baryc;
+
+ ctx->input_linear = FALSE;
+ ctx->input_perspective = FALSE;
+ ctx->input_centroid = FALSE;
+ ctx->num_interp_gpr = 1;
+
+ /* any centroid inputs */
+ for (i = 0; i < ctx->info.num_inputs; i++) {
+ /* skip position/face */
+ if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
+ ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
+ continue;
+ if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
+ ctx->input_linear = TRUE;
+ if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
+ ctx->input_perspective = TRUE;
+ if (ctx->info.input_centroid[i])
+ ctx->input_centroid = TRUE;
+ }
+
+ num_baryc = 0;
+ /* ignoring sample for now */
+ if (ctx->input_perspective)
+ num_baryc++;
+ if (ctx->input_linear)
+ num_baryc++;
+ if (ctx->input_centroid)
+ num_baryc *= 2;
+
+ ctx->num_interp_gpr += (num_baryc + 1) >> 1;
+
+ /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
+ return ctx->num_interp_gpr;
+}
+
int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
{
struct tgsi_full_immediate *immediate;
@@ -367,7 +598,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
r = r600_bc_init(ctx.bc, shader->family);
if (r)
return r;
- ctx.bc->use_mem_constant = shader->use_mem_constant;
ctx.tokens = tokens;
tgsi_scan_shader(tokens, &ctx.info);
tgsi_parse_init(&ctx.parse, tokens);
@@ -399,14 +629,15 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
if (ctx.type == TGSI_PROCESSOR_VERTEX) {
ctx.file_offset[TGSI_FILE_INPUT] = 1;
}
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) {
+ ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
+ }
ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
ctx.info.file_count[TGSI_FILE_INPUT];
ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
ctx.info.file_count[TGSI_FILE_OUTPUT];
- if (ctx.shader->use_mem_constant)
- ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
- else
- ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
+
+ ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
@@ -497,7 +728,14 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
output[i].array_base = 61;
output[i].swizzle_x = 2;
- output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7;
+ output[i].swizzle_y = 7;
+ output[i].swizzle_z = output[i].swizzle_w = 7;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
+ output[i].array_base = 61;
+ output[i].swizzle_x = 7;
+ output[i].swizzle_y = 1;
+ output[i].swizzle_z = output[i].swizzle_w = 7;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else {
R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
@@ -603,6 +841,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
if (tgsi_src->Register.Indirect)
r600_src->rel = V_SQ_REL_RELATIVE;
r600_src->neg = tgsi_src->Register.Negate;
+ r600_src->abs = tgsi_src->Register.Absolute;
r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
return 0;
}
@@ -658,13 +897,14 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
}
}
for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
+ if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
int treg = r600_get_temp(ctx);
for (k = 0; k < 4; k++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = r600_src[j].sel;
+ alu.src[0].sel = r600_src[i].sel;
alu.src[0].chan = k;
+ alu.src[0].rel = r600_src[i].rel;
alu.dst.sel = treg;
alu.dst.chan = k;
alu.dst.write = 1;
@@ -674,7 +914,8 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
if (r)
return r;
}
- r600_src[j].sel = treg;
+ r600_src[i].sel = treg;
+ r600_src[i].rel =0;
j--;
}
}
@@ -693,13 +934,13 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
nliteral++;
}
}
- for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) {
+ for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
+ if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
int treg = r600_get_temp(ctx);
for (k = 0; k < 4; k++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = r600_src[j].sel;
+ alu.src[0].sel = r600_src[i].sel;
alu.src[0].chan = k;
alu.dst.sel = treg;
alu.dst.chan = k;
@@ -710,11 +951,11 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
+ r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
if (r)
return r;
- r600_src[j].sel = treg;
- j++;
+ r600_src[i].sel = treg;
+ j--;
}
}
return 0;
@@ -737,6 +978,9 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
+ r = tgsi_split_literal_constant(ctx, r600_src);
+ if (r)
+ return r;
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
@@ -807,6 +1051,9 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
+ r = tgsi_split_literal_constant(ctx, r600_src);
+ if (r)
+ return r;
r = tgsi_split_literal_constant(ctx, r600_src);
if (r)
@@ -1230,34 +1477,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
return tgsi_helper_tempx_replicate(ctx);
}
-static int tgsi_trans(struct r600_shader_ctx *ctx)
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu alu;
- int i, j, r;
-
- for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- if (inst->Dst[0].Register.WriteMask & (1 << i)) {
- alu.inst = ctx->inst_info->r600_opcode;
- for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
- if (r)
- return r;
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
- }
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- }
- return 0;
-}
-
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -1374,6 +1593,9 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
+ r = tgsi_split_literal_constant(ctx, r600_src);
+ if (r)
+ return r;
/* tmp = (src > 0 ? 1 : src) */
for (i = 0; i < 4; i++) {
@@ -1470,6 +1692,9 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
+ r = tgsi_split_literal_constant(ctx, r600_src);
+ if (r)
+ return r;
/* do it in 2 step as op3 doesn't support writemask */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -1502,6 +1727,9 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
+ r = tgsi_split_literal_constant(ctx, r600_src);
+ if (r)
+ return r;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
@@ -1575,7 +1803,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
+
for (i = 0; i < 3; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
@@ -1728,7 +1956,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = src_gpr;
- alu.src[0].chan = i;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == 3)
@@ -1748,14 +1976,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
memset(&tex, 0, sizeof(struct r600_bc_tex));
tex.inst = opcode;
- tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
- tex.sampler_id = tex.resource_id;
+ tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
+ tex.resource_id = tex.sampler_id;
+ if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX)
+ tex.resource_id += PIPE_MAX_ATTRIBS;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
- tex.dst_sel_x = 0;
- tex.dst_sel_y = 1;
- tex.dst_sel_z = 2;
- tex.dst_sel_w = 3;
+ tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
+ tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
+ tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
+ tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
tex.src_sel_x = 0;
tex.src_sel_y = 1;
tex.src_sel_z = 2;
@@ -1798,6 +2028,9 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
+ r = tgsi_split_literal_constant(ctx, r600_src);
+ if (r)
+ return r;
/* 1 - src0 */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -1877,6 +2110,9 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
+ r = tgsi_split_literal_constant(ctx, r600_src);
+ if (r)
+ return r;
if (inst->Dst[0].Register.WriteMask != 0xf)
use_temp = 1;
@@ -1928,7 +2164,10 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
-
+ r = tgsi_split_literal_constant(ctx, r600_src);
+ if (r)
+ return r;
+
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
@@ -2371,7 +2610,40 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
}
/* r6/7 only for now */
-static int tgsi_arl(struct r600_shader_ctx *ctx)
+static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int r;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ alu.last = 1;
+ alu.dst.chan = 0;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ if (r)
+ return r;
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ if (r)
+ return r;
+ return 0;
+}
+static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
{
/* TODO from r600c, ar values don't persist between clauses */
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -2716,7 +2988,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
}
static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
- {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
+ {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
{TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
{TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
@@ -2797,7 +3069,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
- {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
{TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
{TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
/* gap */
@@ -2880,7 +3152,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
};
static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
- {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
{TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
{TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
{TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
@@ -2955,7 +3227,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
- {TGSI_OPCODE_TXL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
{TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
{TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
/* gap */
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 06dd65038d..f8bc595139 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -31,6 +31,8 @@ struct r600_shader_io {
unsigned done;
int sid;
unsigned interpolate;
+ boolean centroid;
+ unsigned lds_pos; /* for evergreen */
};
struct r600_shader {
@@ -39,11 +41,11 @@ struct r600_shader {
boolean flat_shade;
unsigned ninput;
unsigned noutput;
+ unsigned nlds;
struct r600_shader_io input[32];
struct r600_shader_io output[32];
enum radeon_family family;
boolean uses_kill;
- boolean use_mem_constant;
};
int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 86f9825b52..81d25b5420 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -19,202 +19,767 @@
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
+ */
+
+/* TODO:
+ * - fix mask for depth control & cull for query
*/
#include <stdio.h>
#include <errno.h>
-#include "util/u_inlines.h"
-#include "util/u_format.h"
-#include "util/u_memory.h"
-#include "util/u_pack_color.h"
-#include "r600_screen.h"
-#include "r600_context.h"
+#include <pipe/p_defines.h>
+#include <pipe/p_state.h>
+#include <pipe/p_context.h>
+#include <tgsi/tgsi_scan.h>
+#include <tgsi/tgsi_parse.h>
+#include <tgsi/tgsi_util.h>
+#include <util/u_double_list.h>
+#include <util/u_pack_color.h>
+#include <util/u_memory.h>
+#include <util/u_inlines.h>
+#include <util/u_upload_mgr.h>
+#include <util/u_index_modify.h>
+#include <util/u_framebuffer.h>
+#include <pipebuffer/pb_buffer.h>
+#include "r600.h"
+#include "r600d.h"
#include "r600_resource.h"
+#include "r600_shader.h"
+#include "r600_pipe.h"
+#include "r600_state_inlines.h"
+
+static void r600_draw_common(struct r600_drawl *draw)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx;
+ struct r600_pipe_state *rstate;
+ struct r600_resource *rbuffer;
+ unsigned i, j, offset, prim;
+ u32 vgt_dma_index_type, vgt_draw_initiator, mask;
+ struct pipe_vertex_buffer *vertex_buffer;
+ struct r600_draw rdraw;
+ struct r600_pipe_state vgt;
+
+ switch (draw->index_size) {
+ case 2:
+ vgt_draw_initiator = 0;
+ vgt_dma_index_type = 0;
+ break;
+ case 4:
+ vgt_draw_initiator = 0;
+ vgt_dma_index_type = 1;
+ break;
+ case 0:
+ vgt_draw_initiator = 2;
+ vgt_dma_index_type = 0;
+ break;
+ default:
+ R600_ERR("unsupported index size %d\n", draw->index_size);
+ return;
+ }
+ if (r600_conv_pipe_prim(draw->mode, &prim))
+ return;
+
+
+ /* rebuild vertex shader if input format changed */
+ if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader))
+ return;
+ if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader))
+ return;
+
+ for (i = 0 ; i < rctx->vertex_elements->count; i++) {
+ uint32_t word2, format;
+
+ rstate = &rctx->vs_resource[i];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+
+ j = rctx->vertex_elements->elements[i].vertex_buffer_index;
+ vertex_buffer = &rctx->vertex_buffer[j];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = rctx->vertex_elements->elements[i].src_offset +
+ vertex_buffer->buffer_offset +
+ r600_bo_offset(rbuffer->bo);
+
+ format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format);
+
+ word2 = format | S_038008_STRIDE(vertex_buffer->stride);
+
+ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL);
+ r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, i);
+ }
-static void clean_flush(struct r600_context *rctx, struct radeon_state *flush);
-static int setup_cb_flush(struct r600_context *rctx, struct radeon_state *flush);
-static int setup_db_flush(struct r600_context *rctx, struct radeon_state *flush);
+ mask = 0;
+ for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
+ mask |= (0xF << (i * 4));
+ }
+
+ vgt.id = R600_PIPE_STATE_VGT;
+ vgt.nregs = 0;
+ r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
+ /* build late state */
+ if (rctx->rasterizer && rctx->framebuffer.zsbuf) {
+ float offset_units = rctx->rasterizer->offset_units;
+ unsigned offset_db_fmt_cntl = 0, depth;
+
+ switch (rctx->framebuffer.zsbuf->texture->format) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ depth = -24;
+ offset_units *= 2.0f;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ depth = -23;
+ offset_units *= 1.0f;
+ offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ depth = -16;
+ offset_units *= 4.0f;
+ break;
+ default:
+ return;
+ }
+ offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
+ r600_pipe_state_add_reg(&vgt,
+ R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE,
+ fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt,
+ R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET,
+ fui(offset_units), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt,
+ R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE,
+ fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt,
+ R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET,
+ fui(offset_units), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt,
+ R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+ offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
+ }
+ r600_context_pipe_state_set(&rctx->ctx, &vgt);
+
+ rdraw.vgt_num_indices = draw->count;
+ rdraw.vgt_num_instances = 1;
+ rdraw.vgt_index_type = vgt_dma_index_type;
+ rdraw.vgt_draw_initiator = vgt_draw_initiator;
+ rdraw.indices = NULL;
+ if (draw->index_buffer) {
+ rbuffer = (struct r600_resource*)draw->index_buffer;
+ rdraw.indices = rbuffer->bo;
+ rdraw.indices_bo_offset = draw->index_buffer_offset;
+ }
+ r600_context_draw(&rctx->ctx, &rdraw);
+}
+
+void r600_translate_index_buffer(struct r600_pipe_context *r600,
+ struct pipe_resource **index_buffer,
+ unsigned *index_size,
+ unsigned *start, unsigned count)
+{
+ switch (*index_size) {
+ case 1:
+ util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count);
+ *index_size = 2;
+ *start = 0;
+ break;
+
+ case 2:
+ if (*start % 2 != 0) {
+ util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count);
+ *start = 0;
+ }
+ break;
-static struct r600_context_state *r600_new_context_state(unsigned type)
+ case 4:
+ break;
+ }
+}
+
+void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
- struct r600_context_state *rstate = CALLOC_STRUCT(r600_context_state);
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_drawl draw;
+
+ if (rctx->any_user_vbs) {
+ r600_upload_user_buffers(rctx);
+ rctx->any_user_vbs = FALSE;
+ }
+
+ memset(&draw, 0, sizeof(struct r600_drawl));
+ draw.ctx = ctx;
+ draw.mode = info->mode;
+ draw.start = info->start;
+ draw.count = info->count;
+ if (info->indexed && rctx->index_buffer.buffer) {
+ draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
+ draw.min_index = info->min_index;
+ draw.max_index = info->max_index;
+ draw.index_bias = info->index_bias;
+
+ r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer,
+ &rctx->index_buffer.index_size,
+ &draw.start,
+ info->count);
+
+ draw.index_size = rctx->index_buffer.index_size;
+ pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer);
+ draw.index_buffer_offset = draw.start * draw.index_size;
+ draw.start = 0;
+ r600_upload_index_buffer(rctx, &draw);
+ } else {
+ draw.index_size = 0;
+ draw.index_buffer = NULL;
+ draw.min_index = info->min_index;
+ draw.max_index = info->max_index;
+ draw.index_bias = info->start;
+ }
+ r600_draw_common(&draw);
+
+ pipe_resource_reference(&draw.index_buffer, NULL);
+}
+
+static void r600_set_blend_color(struct pipe_context *ctx,
+ const struct pipe_blend_color *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
+
if (rstate == NULL)
- return NULL;
- rstate->type = type;
- rstate->refcount = 1;
- return rstate;
+ return;
+
+ rstate->id = R600_PIPE_STATE_BLEND_COLOR;
+ r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL);
+ free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]);
+ rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate;
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
}
static void *r600_create_blend_state(struct pipe_context *ctx,
const struct pipe_blend_state *state)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate;
+ struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend);
+ struct r600_pipe_state *rstate;
+ u32 color_control, target_mask;
- rstate = r600_new_context_state(pipe_blend_type);
- rstate->state.blend = *state;
- rctx->vtbl->blend(rctx, &rstate->rstate[0], &rstate->state.blend);
-
+ if (blend == NULL) {
+ return NULL;
+ }
+ rstate = &blend->rstate;
+
+ rstate->id = R600_PIPE_STATE_BLEND;
+
+ target_mask = 0;
+ color_control = S_028808_PER_MRT_BLEND(1);
+ if (state->logicop_enable) {
+ color_control |= (state->logicop_func << 16) | (state->logicop_func << 20);
+ } else {
+ color_control |= (0xcc << 16);
+ }
+ /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
+ if (state->independent_blend_enable) {
+ for (int i = 0; i < 8; i++) {
+ if (state->rt[i].blend_enable) {
+ color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i);
+ }
+ target_mask |= (state->rt[i].colormask << (4 * i));
+ }
+ } else {
+ for (int i = 0; i < 8; i++) {
+ if (state->rt[0].blend_enable) {
+ color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i);
+ }
+ target_mask |= (state->rt[0].colormask << (4 * i));
+ }
+ }
+ blend->cb_target_mask = target_mask;
+ r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
+ color_control, 0xFFFFFFFF, NULL);
+
+ for (int i = 0; i < 8; i++) {
+ unsigned eqRGB = state->rt[i].rgb_func;
+ unsigned srcRGB = state->rt[i].rgb_src_factor;
+ unsigned dstRGB = state->rt[i].rgb_dst_factor;
+
+ unsigned eqA = state->rt[i].alpha_func;
+ unsigned srcA = state->rt[i].alpha_src_factor;
+ unsigned dstA = state->rt[i].alpha_dst_factor;
+ uint32_t bc = 0;
+
+ if (!state->rt[i].blend_enable)
+ continue;
+
+ bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB));
+ bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB));
+ bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB));
+
+ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
+ bc |= S_028804_SEPARATE_ALPHA_BLEND(1);
+ bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA));
+ bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA));
+ bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA));
+ }
+
+ r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL);
+ if (i == 0) {
+ r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL);
+ }
+ }
return rstate;
}
+static void r600_bind_blend_state(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state;
+ struct r600_pipe_state *rstate;
+
+ if (state == NULL)
+ return;
+ rstate = &blend->rstate;
+ rctx->states[rstate->id] = rstate;
+ rctx->cb_target_mask = blend->cb_target_mask;
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
+}
+
static void *r600_create_dsa_state(struct pipe_context *ctx,
const struct pipe_depth_stencil_alpha_state *state)
{
- struct r600_context_state *rstate;
+ struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
+ unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control;
+ unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control;
+
+ if (rstate == NULL) {
+ return NULL;
+ }
+
+ rstate->id = R600_PIPE_STATE_DSA;
+ /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */
+ /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be
+ * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will
+ * be set if shader use texkill instruction
+ */
+ db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
+ stencil_ref_mask = 0;
+ stencil_ref_mask_bf = 0;
+ db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
+ S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
+ S_028800_ZFUNC(state->depth.func);
+
+ /* stencil */
+ if (state->stencil[0].enabled) {
+ db_depth_control |= S_028800_STENCIL_ENABLE(1);
+ db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func));
+ db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op));
+ db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op));
+ db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op));
+
+
+ stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) |
+ S_028430_STENCILWRITEMASK(state->stencil[0].writemask);
+ if (state->stencil[1].enabled) {
+ db_depth_control |= S_028800_BACKFACE_ENABLE(1);
+ db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func));
+ db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op));
+ db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op));
+ db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op));
+ stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) |
+ S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask);
+ }
+ }
+
+ /* alpha */
+ alpha_test_control = 0;
+ alpha_ref = 0;
+ if (state->alpha.enabled) {
+ alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func);
+ alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1);
+ alpha_ref = fui(state->alpha.ref_value);
+ }
+
+ /* misc */
+ db_render_control = 0;
+ db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) |
+ S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
+ S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
+ /* TODO db_render_override depends on query */
+ r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028430_DB_STENCILREFMASK, stencil_ref_mask,
+ 0xFFFFFFFF & C_028430_STENCILREF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf,
+ 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL);
+ r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL);
- rstate = r600_new_context_state(pipe_dsa_type);
- rstate->state.dsa = *state;
return rstate;
}
static void *r600_create_rs_state(struct pipe_context *ctx,
const struct pipe_rasterizer_state *state)
{
- struct r600_context_state *rstate;
+ struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer);
+ struct r600_pipe_state *rstate;
+ unsigned tmp;
+ unsigned prov_vtx = 1, polygon_dual_mode;
+ unsigned clip_rule;
+
+ if (rs == NULL) {
+ return NULL;
+ }
+
+ rstate = &rs->rstate;
+ rs->flatshade = state->flatshade;
+ rs->sprite_coord_enable = state->sprite_coord_enable;
+
+ clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
+ /* offset */
+ rs->offset_units = state->offset_units;
+ rs->offset_scale = state->offset_scale * 12.0f;
+
+ rstate->id = R600_PIPE_STATE_RASTERIZER;
+ if (state->flatshade_first)
+ prov_vtx = 0;
+ tmp = 0x00000001;
+ if (state->sprite_coord_enable) {
+ tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
+ S_0286D4_PNT_SPRITE_OVRD_X(2) |
+ S_0286D4_PNT_SPRITE_OVRD_Y(3) |
+ S_0286D4_PNT_SPRITE_OVRD_Z(0) |
+ S_0286D4_PNT_SPRITE_OVRD_W(1);
+ if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
+ tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
+ }
+ }
+ r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL);
+
+ polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
+ state->fill_back != PIPE_POLYGON_MODE_FILL);
+ r600_pipe_state_add_reg(rstate, R_028814_PA_SU_SC_MODE_CNTL,
+ S_028814_PROVOKING_VTX_LAST(prov_vtx) |
+ S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
+ S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
+ S_028814_FACE(!state->front_ccw) |
+ S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
+ S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
+ S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
+ S_028814_POLY_MODE(polygon_dual_mode) |
+ S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
+ S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL,
+ S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
+ S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+ /* point size 12.4 fixed point */
+ tmp = (unsigned)(state->point_size * 8.0);
+ r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL);
+
+ tmp = (unsigned)(state->line_width * 8.0);
+ r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL);
- rstate = r600_new_context_state(pipe_rasterizer_type);
- rstate->state.rasterizer = *state;
return rstate;
}
+static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+ if (state == NULL)
+ return;
+
+ rctx->flatshade = rs->flatshade;
+ rctx->sprite_coord_enable = rs->sprite_coord_enable;
+ rctx->rasterizer = rs;
+
+ rctx->states[rs->rstate.id] = &rs->rstate;
+ r600_context_pipe_state_set(&rctx->ctx, &rs->rstate);
+}
+
+static void r600_delete_rs_state(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state;
+
+ if (rctx->rasterizer == rs) {
+ rctx->rasterizer = NULL;
+ }
+ if (rctx->states[rs->rstate.id] == &rs->rstate) {
+ rctx->states[rs->rstate.id] = NULL;
+ }
+ free(rs);
+}
+
static void *r600_create_sampler_state(struct pipe_context *ctx,
const struct pipe_sampler_state *state)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate;
+ struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
+ union util_color uc;
+
+ if (rstate == NULL) {
+ return NULL;
+ }
- rstate = r600_new_context_state(pipe_sampler_type);
- rstate->state.sampler = *state;
- rctx->vtbl->sampler(rctx, &rstate->rstate[0], &rstate->state.sampler, 0);
- rctx->vtbl->sampler_border(rctx, &rstate->rstate[1], &rstate->state.sampler, 0);
+ rstate->id = R600_PIPE_STATE_SAMPLER;
+ util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
+ S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
+ S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
+ S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
+ S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) |
+ S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) |
+ S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
+ S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
+ S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL);
+ /* FIXME LOD it depends on texture base level ... */
+ r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
+ S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
+ S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
+ S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL);
+ if (uc.ui) {
+ r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL);
+ }
return rstate;
}
-static void r600_remove_sampler_view(struct r600_shader_sampler_states *sampler,
- struct r600_context_state *rstate)
+static void *r600_create_vertex_elements(struct pipe_context *ctx,
+ unsigned count,
+ const struct pipe_vertex_element *elements)
{
- int i, j;
-
- for (i = 0; i < sampler->nview; i++) {
- for (j = 0; j < rstate->nrstate; j++) {
- if (sampler->view[i] == &rstate->rstate[j])
- sampler->view[i] = NULL;
- }
- }
+ struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
+
+ assert(count < 32);
+ v->count = count;
+ v->refcount = 1;
+ memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element));
+ return v;
}
+
static void r600_sampler_view_destroy(struct pipe_context *ctx,
struct pipe_sampler_view *state)
{
- struct r600_context_state *rstate = (struct r600_context_state *)state;
- struct r600_context *rctx = r600_context(ctx);
+ struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state;
- /* need to search list of vs/ps sampler views and remove it from any - uggh */
- r600_remove_sampler_view(&rctx->ps_sampler, rstate);
- r600_remove_sampler_view(&rctx->vs_sampler, rstate);
- r600_context_state_decref(rstate);
+ pipe_resource_reference(&state->texture, NULL);
+ FREE(resource);
}
static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx,
struct pipe_resource *texture,
const struct pipe_sampler_view *state)
{
- struct r600_context_state *rstate;
- struct r600_context *rctx = r600_context(ctx);
+ struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view);
+ struct r600_pipe_state *rstate;
+ const struct util_format_description *desc;
+ struct r600_resource_texture *tmp;
+ struct r600_resource *rbuffer;
+ unsigned format;
+ uint32_t word4 = 0, yuv_format = 0, pitch = 0;
+ unsigned char swizzle[4], array_mode = 0, tile_type = 0;
+ struct r600_bo *bo[2];
- rstate = r600_new_context_state(pipe_sampler_view_type);
- rstate->state.sampler_view = *state;
- rstate->state.sampler_view.texture = NULL;
+ if (resource == NULL)
+ return NULL;
+ rstate = &resource->state;
+
+ /* initialize base object */
+ resource->base = *state;
+ resource->base.texture = NULL;
pipe_reference(NULL, &texture->reference);
- rstate->state.sampler_view.texture = texture;
- rstate->state.sampler_view.reference.count = 1;
- rstate->state.sampler_view.context = ctx;
- rctx->vtbl->resource(ctx, &rstate->rstate[0], &rstate->state.sampler_view, 0);
- return &rstate->state.sampler_view;
+ resource->base.texture = texture;
+ resource->base.reference.count = 1;
+ resource->base.context = ctx;
+
+ swizzle[0] = state->swizzle_r;
+ swizzle[1] = state->swizzle_g;
+ swizzle[2] = state->swizzle_b;
+ swizzle[3] = state->swizzle_a;
+ format = r600_translate_texformat(state->format,
+ swizzle,
+ &word4, &yuv_format);
+ if (format == ~0) {
+ format = 0;
+ }
+ desc = util_format_description(state->format);
+ if (desc == NULL) {
+ R600_ERR("unknow format %d\n", state->format);
+ }
+ tmp = (struct r600_resource_texture*)texture;
+ rbuffer = &tmp->resource;
+ bo[0] = rbuffer->bo;
+ bo[1] = rbuffer->bo;
+ /* FIXME depth texture decompression */
+ if (tmp->depth) {
+ r600_texture_depth_flush(ctx, texture);
+ tmp = (struct r600_resource_texture*)texture;
+ rbuffer = &tmp->flushed_depth_texture->resource;
+ bo[0] = rbuffer->bo;
+ bo[1] = rbuffer->bo;
+ }
+ pitch = align(tmp->pitch_in_pixels[0], 8);
+ if (tmp->tiled) {
+ array_mode = tmp->array_mode;
+ tile_type = tmp->tile_type;
+ }
+
+ /* FIXME properly handle first level != 0 */
+ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
+ S_038000_DIM(r600_tex_dim(texture->target)) |
+ S_038000_TILE_MODE(array_mode) |
+ S_038000_TILE_TYPE(tile_type) |
+ S_038000_PITCH((pitch / 8) - 1) |
+ S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
+ S_038004_TEX_HEIGHT(texture->height0 - 1) |
+ S_038004_TEX_DEPTH(texture->depth0 - 1) |
+ S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
+ (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
+ r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
+ (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
+ r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
+ word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) |
+ S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) |
+ S_038010_REQUEST_SIZE(1) |
+ S_038010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
+ S_038014_LAST_LEVEL(state->last_level) |
+ S_038014_BASE_ARRAY(0) |
+ S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
+ S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL);
+
+ return &resource->base;
}
-static void r600_set_sampler_view(struct pipe_context *ctx,
- unsigned count,
- struct pipe_sampler_view **views,
- struct r600_shader_sampler_states *sampler,
- unsigned shader_id)
+static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
+ struct pipe_sampler_view **views)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate;
- unsigned i;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views;
- for (i = 0; i < sampler->nview; i++) {
- radeon_draw_unbind(&rctx->draw, sampler->view[i]);
+ for (int i = 0; i < count; i++) {
+ if (resource[i]) {
+ r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS);
+ }
}
+}
+
+static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
+ struct pipe_sampler_view **views)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views;
+ int i;
for (i = 0; i < count; i++) {
- rstate = (struct r600_context_state *)views[i];
- if (rstate) {
- rstate->nrstate = 0;
+ if (&rctx->ps_samplers.views[i]->base != views[i]) {
+ if (resource[i])
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ else
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
+ views[i]);
+
}
}
- for (i = 0; i < count; i++) {
- rstate = (struct r600_context_state *)views[i];
- if (rstate) {
- if (rstate->nrstate >= R600_MAX_RSTATE)
- continue;
- if (rstate->nrstate) {
- memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state));
- }
- radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_RESOURCE, i, shader_id);
- sampler->view[i] = &rstate->rstate[rstate->nrstate];
- rstate->nrstate++;
+ for (i = count; i < NUM_TEX_UNITS; i++) {
+ if (rctx->ps_samplers.views[i]) {
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
}
}
- sampler->nview = count;
+ rctx->ps_samplers.n_views = count;
}
-static void r600_set_ps_sampler_view(struct pipe_context *ctx,
- unsigned count,
- struct pipe_sampler_view **views)
+static void r600_bind_state(struct pipe_context *ctx, void *state)
{
- struct r600_context *rctx = r600_context(ctx);
- r600_set_sampler_view(ctx, count, views, &rctx->ps_sampler, R600_SHADER_PS);
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state *rstate = (struct r600_pipe_state *)state;
+
+ if (state == NULL)
+ return;
+ rctx->states[rstate->id] = rstate;
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
}
-static void r600_set_vs_sampler_view(struct pipe_context *ctx,
- unsigned count,
- struct pipe_sampler_view **views)
+static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states)
{
- struct r600_context *rctx = r600_context(ctx);
- r600_set_sampler_view(ctx, count, views, &rctx->vs_sampler, R600_SHADER_VS);
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
+
+ memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count);
+ rctx->ps_samplers.n_samplers = count;
+
+ for (int i = 0; i < count; i++) {
+ r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i);
+ }
}
-static void *r600_create_shader_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
+static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate;
- int r;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
- rstate = r600_new_context_state(pipe_shader_type);
- rstate->state.shader = *state;
- r = r600_pipe_shader_create(&rctx->context, rstate, rstate->state.shader.tokens);
- if (r) {
- r600_context_state_decref(rstate);
- return NULL;
+ for (int i = 0; i < count; i++) {
+ r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i);
}
- return rstate;
}
-static void *r600_create_vertex_elements(struct pipe_context *ctx,
- unsigned count,
- const struct pipe_vertex_element *elements)
+static void r600_delete_state(struct pipe_context *ctx, void *state)
{
- struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state *rstate = (struct r600_pipe_state *)state;
- assert(count < 32);
- v->count = count;
- memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element));
- v->refcount = 1;
- return v;
+ if (rctx->states[rstate->id] == rstate) {
+ rctx->states[rstate->id] = NULL;
+ }
+ for (int i = 0; i < rstate->nregs; i++) {
+ r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL);
+ }
+ free(rstate);
}
static void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
@@ -228,279 +793,475 @@ static void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
free(v);
}
+static void r600_set_clip_state(struct pipe_context *ctx,
+ const struct pipe_clip_state *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
+
+ if (rstate == NULL)
+ return;
+
+ rctx->clip = *state;
+ rstate->id = R600_PIPE_STATE_CLIP;
+ for (int i = 0; i < state->nr; i++) {
+ r600_pipe_state_add_reg(rstate,
+ R_028E20_PA_CL_UCP0_X + i * 4,
+ fui(state->ucp[i][0]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028E24_PA_CL_UCP0_Y + i * 4,
+ fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028E28_PA_CL_UCP0_Z + i * 4,
+ fui(state->ucp[i][2]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028E2C_PA_CL_UCP0_W + i * 4,
+ fui(state->ucp[i][3]), 0xFFFFFFFF, NULL);
+ }
+ r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL,
+ S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) |
+ S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) |
+ S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL);
+
+ free(rctx->states[R600_PIPE_STATE_CLIP]);
+ rctx->states[R600_PIPE_STATE_CLIP] = rstate;
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
+}
+
static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
{
- struct r600_context *rctx = r600_context(ctx);
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_vertex_element *v = (struct r600_vertex_element*)state;
r600_delete_vertex_element(ctx, rctx->vertex_elements);
rctx->vertex_elements = v;
if (v) {
v->refcount++;
+// rctx->vs_rebuild = TRUE;
}
}
-static void r600_bind_rasterizer_state(struct pipe_context *ctx, void *state)
+static void r600_set_polygon_stipple(struct pipe_context *ctx,
+ const struct pipe_poly_stipple *state)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate = (struct r600_context_state *)state;
-
- if (state == NULL)
- return;
- rctx->rasterizer = r600_context_state_decref(rctx->rasterizer);
- rctx->rasterizer = r600_context_state_incref(rstate);
}
-static void r600_bind_blend_state(struct pipe_context *ctx, void *state)
+static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate = (struct r600_context_state *)state;
-
- if (state == NULL)
- return;
- rctx->blend = r600_context_state_decref(rctx->blend);
- rctx->blend = r600_context_state_incref(rstate);
-
}
-static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
+static void r600_set_scissor_state(struct pipe_context *ctx,
+ const struct pipe_scissor_state *state)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate = (struct r600_context_state *)state;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
+ u32 tl, br;
- if (state == NULL)
+ if (rstate == NULL)
return;
- rctx->dsa = r600_context_state_decref(rctx->dsa);
- rctx->dsa = r600_context_state_incref(rstate);
-}
-
-static void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate = (struct r600_context_state *)state;
-
- rctx->ps_shader = r600_context_state_decref(rctx->ps_shader);
- rctx->ps_shader = r600_context_state_incref(rstate);
-}
-
-static void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate = (struct r600_context_state *)state;
- rctx->vs_shader = r600_context_state_decref(rctx->vs_shader);
- rctx->vs_shader = r600_context_state_incref(rstate);
+ rstate->id = R600_PIPE_STATE_SCISSOR;
+ tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1);
+ br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
+ r600_pipe_state_add_reg(rstate,
+ R_028210_PA_SC_CLIPRECT_0_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028214_PA_SC_CLIPRECT_0_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028218_PA_SC_CLIPRECT_1_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_02821C_PA_SC_CLIPRECT_1_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028220_PA_SC_CLIPRECT_2_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028224_PA_SC_CLIPRECT_2_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028228_PA_SC_CLIPRECT_3_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_02822C_PA_SC_CLIPRECT_3_BR, br,
+ 0xFFFFFFFF, NULL);
+
+ free(rctx->states[R600_PIPE_STATE_SCISSOR]);
+ rctx->states[R600_PIPE_STATE_SCISSOR] = rstate;
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
}
-static void r600_bind_sampler_shader(struct pipe_context *ctx,
- unsigned count, void **states,
- struct r600_shader_sampler_states *sampler, unsigned shader_id)
+static void r600_set_stencil_ref(struct pipe_context *ctx,
+ const struct pipe_stencil_ref *state)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate;
- unsigned i;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
+ u32 tmp;
- for (i = 0; i < sampler->nsampler; i++) {
- radeon_draw_unbind(&rctx->draw, sampler->sampler[i]);
- }
- for (i = 0; i < sampler->nborder; i++) {
- radeon_draw_unbind(&rctx->draw, sampler->border[i]);
- }
- for (i = 0; i < count; i++) {
- rstate = (struct r600_context_state *)states[i];
- if (rstate) {
- rstate->nrstate = 0;
- }
- }
- for (i = 0; i < count; i++) {
- rstate = (struct r600_context_state *)states[i];
- if (rstate) {
- if (rstate->nrstate >= R600_MAX_RSTATE)
- continue;
- if (rstate->nrstate) {
- memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state));
- memcpy(&rstate->rstate[rstate->nrstate+1], &rstate->rstate[1], sizeof(struct radeon_state));
- }
- radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_SAMPLER, i, shader_id);
- radeon_state_convert(&rstate->rstate[rstate->nrstate + 1], R600_STATE_SAMPLER_BORDER, i, shader_id);
- sampler->sampler[i] = &rstate->rstate[rstate->nrstate];
- sampler->border[i] = &rstate->rstate[rstate->nrstate + 1];
- rstate->nrstate += 2;
- }
- }
- sampler->nsampler = count;
- sampler->nborder = count;
-}
+ if (rstate == NULL)
+ return;
-static void r600_bind_ps_sampler(struct pipe_context *ctx,
- unsigned count, void **states)
-{
- struct r600_context *rctx = r600_context(ctx);
- r600_bind_sampler_shader(ctx, count, states, &rctx->ps_sampler, R600_SHADER_PS);
+ rctx->stencil_ref = *state;
+ rstate->id = R600_PIPE_STATE_STENCIL_REF;
+ tmp = S_028430_STENCILREF(state->ref_value[0]);
+ r600_pipe_state_add_reg(rstate,
+ R_028430_DB_STENCILREFMASK, tmp,
+ ~C_028430_STENCILREF, NULL);
+ tmp = S_028434_STENCILREF_BF(state->ref_value[1]);
+ r600_pipe_state_add_reg(rstate,
+ R_028434_DB_STENCILREFMASK_BF, tmp,
+ ~C_028434_STENCILREF_BF, NULL);
+
+ free(rctx->states[R600_PIPE_STATE_STENCIL_REF]);
+ rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate;
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
}
-static void r600_bind_vs_sampler(struct pipe_context *ctx,
- unsigned count, void **states)
+static void r600_set_viewport_state(struct pipe_context *ctx,
+ const struct pipe_viewport_state *state)
{
- struct r600_context *rctx = r600_context(ctx);
- r600_bind_sampler_shader(ctx, count, states, &rctx->vs_sampler, R600_SHADER_VS);
-}
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
-static void r600_delete_state(struct pipe_context *ctx, void *state)
-{
- struct r600_context_state *rstate = (struct r600_context_state *)state;
+ if (rstate == NULL)
+ return;
- r600_context_state_decref(rstate);
+ rctx->viewport = *state;
+ rstate->id = R600_PIPE_STATE_VIEWPORT;
+ r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL);
+
+ free(rctx->states[R600_PIPE_STATE_VIEWPORT]);
+ rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate;
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
}
-static void r600_set_blend_color(struct pipe_context *ctx,
- const struct pipe_blend_color *color)
+static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate,
+ const struct pipe_framebuffer_state *state, int cb)
{
- struct r600_context *rctx = r600_context(ctx);
-
- rctx->blend_color = *color;
+ struct r600_resource_texture *rtex;
+ struct r600_resource *rbuffer;
+ unsigned level = state->cbufs[cb]->level;
+ unsigned pitch, slice;
+ unsigned color_info;
+ unsigned format, swap, ntype;
+ const struct util_format_description *desc;
+ struct r600_bo *bo[3];
+
+ rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
+ rbuffer = &rtex->resource;
+ bo[0] = rbuffer->bo;
+ bo[1] = rbuffer->bo;
+ bo[2] = rbuffer->bo;
+
+ pitch = rtex->pitch_in_pixels[level] / 8 - 1;
+ slice = rtex->pitch_in_pixels[level] * state->cbufs[cb]->height / 64 - 1;
+ ntype = 0;
+ desc = util_format_description(rtex->resource.base.b.format);
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ ntype = V_0280A0_NUMBER_SRGB;
+
+ format = r600_translate_colorformat(rtex->resource.base.b.format);
+ swap = r600_translate_colorswap(rtex->resource.base.b.format);
+ color_info = S_0280A0_FORMAT(format) |
+ S_0280A0_COMP_SWAP(swap) |
+ S_0280A0_ARRAY_MODE(rtex->array_mode) |
+ S_0280A0_BLEND_CLAMP(1) |
+ S_0280A0_NUMBER_TYPE(ntype);
+ if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+ color_info |= S_0280A0_SOURCE_FORMAT(1);
+
+ r600_pipe_state_add_reg(rstate,
+ R_028040_CB_COLOR0_BASE + cb * 4,
+ (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
+ r600_pipe_state_add_reg(rstate,
+ R_0280A0_CB_COLOR0_INFO + cb * 4,
+ color_info, 0xFFFFFFFF, bo[0]);
+ r600_pipe_state_add_reg(rstate,
+ R_028060_CB_COLOR0_SIZE + cb * 4,
+ S_028060_PITCH_TILE_MAX(pitch) |
+ S_028060_SLICE_TILE_MAX(slice),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028080_CB_COLOR0_VIEW + cb * 4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_0280E0_CB_COLOR0_FRAG + cb * 4,
+ r600_bo_offset(bo[1]) >> 8, 0xFFFFFFFF, bo[1]);
+ r600_pipe_state_add_reg(rstate,
+ R_0280C0_CB_COLOR0_TILE + cb * 4,
+ r600_bo_offset(bo[2]) >> 8, 0xFFFFFFFF, bo[2]);
+ r600_pipe_state_add_reg(rstate,
+ R_028100_CB_COLOR0_MASK + cb * 4,
+ 0x00000000, 0xFFFFFFFF, NULL);
}
-static void r600_set_clip_state(struct pipe_context *ctx,
- const struct pipe_clip_state *state)
+static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate,
+ const struct pipe_framebuffer_state *state)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate;
+ struct r600_resource_texture *rtex;
+ struct r600_resource *rbuffer;
+ unsigned level;
+ unsigned pitch, slice, format;
- r600_context_state_decref(rctx->clip);
+ if (state->zsbuf == NULL)
+ return;
- rstate = r600_new_context_state(pipe_clip_type);
- rstate->state.clip = *state;
- rctx->vtbl->ucp(rctx, &rstate->rstate[0], &rstate->state.clip);
- rctx->clip = rstate;
+ rtex = (struct r600_resource_texture*)state->zsbuf->texture;
+ rtex->tiled = 1;
+ rtex->array_mode = 2;
+ rtex->tile_type = 1;
+ rtex->depth = 1;
+ rbuffer = &rtex->resource;
+
+ level = state->zsbuf->level;
+ pitch = rtex->pitch_in_pixels[level] / 8 - 1;
+ slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1;
+ format = r600_translate_dbformat(state->zsbuf->texture->format);
+
+ r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE,
+ (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE,
+ S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO,
+ S_028010_ARRAY_MODE(rtex->array_mode) | S_028010_FORMAT(format),
+ 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT,
+ (state->zsbuf->height / 8) - 1, 0xFFFFFFFF, NULL);
}
static void r600_set_framebuffer_state(struct pipe_context *ctx,
const struct pipe_framebuffer_state *state)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate;
- int i;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
+ u32 shader_mask, tl, br, shader_control, target_mask;
- if (rctx->framebuffer) {
- for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++)
- radeon_draw_unbind(&rctx->draw, &rctx->framebuffer->rstate[i+1]);
- radeon_draw_unbind(&rctx->draw, &rctx->framebuffer->rstate[0]);
- }
- clean_flush(rctx, &rctx->hw_states.cb_flush);
- clean_flush(rctx, &rctx->hw_states.db_flush);
- rctx->pframebuffer = NULL;
- r600_context_state_decref(rctx->framebuffer);
+ if (rstate == NULL)
+ return;
- rstate = r600_new_context_state(pipe_framebuffer_type);
- rstate->state.framebuffer = *state;
- for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) {
- pipe_reference(NULL, &state->cbufs[i]->reference);
- }
- pipe_reference(NULL, &state->zsbuf->reference);
- rctx->framebuffer = rstate;
- rctx->pframebuffer = &rstate->state.framebuffer;
- for (i = 0; i < state->nr_cbufs; i++) {
- rctx->vtbl->cb(rctx, &rstate->rstate[i+1], state, i);
+ /* unreference old buffer and reference new one */
+ rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
+
+ util_copy_framebuffer_state(&rctx->framebuffer, state);
+
+ rctx->pframebuffer = &rctx->framebuffer;
+
+ /* build states */
+ for (int i = 0; i < state->nr_cbufs; i++) {
+ r600_cb(rctx, rstate, state, i);
}
if (state->zsbuf) {
- rctx->vtbl->db(rctx, &rstate->rstate[0], state);
+ r600_db(rctx, rstate, state);
}
- /* setup flush states */
- setup_cb_flush(rctx, &rctx->hw_states.cb_flush);
- setup_db_flush(rctx, &rctx->hw_states.db_flush);
-
- return;
-}
-
-static void r600_set_polygon_stipple(struct pipe_context *ctx,
- const struct pipe_poly_stipple *state)
-{
-}
-
-static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
-{
-}
-static void r600_set_scissor_state(struct pipe_context *ctx,
- const struct pipe_scissor_state *state)
-{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate;
-
- r600_context_state_decref(rctx->scissor);
+ target_mask = 0x00000000;
+ target_mask = 0xFFFFFFFF;
+ shader_mask = 0;
+ shader_control = 0;
+ for (int i = 0; i < state->nr_cbufs; i++) {
+ target_mask ^= 0xf << (i * 4);
+ shader_mask |= 0xf << (i * 4);
+ shader_control |= 1 << i;
+ }
+ tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1);
+ br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height);
+
+ r600_pipe_state_add_reg(rstate,
+ R_028030_PA_SC_SCREEN_SCISSOR_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028034_PA_SC_SCREEN_SCISSOR_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028204_PA_SC_WINDOW_SCISSOR_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028208_PA_SC_WINDOW_SCISSOR_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028240_PA_SC_GENERIC_SCISSOR_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028244_PA_SC_GENERIC_SCISSOR_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028254_PA_SC_VPORT_SCISSOR_0_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028200_PA_SC_WINDOW_OFFSET, 0x00000000,
+ 0xFFFFFFFF, NULL);
+ if (rctx->family >= CHIP_RV770) {
+ r600_pipe_state_add_reg(rstate,
+ R_028230_PA_SC_EDGERULE, 0xAAAAAAAA,
+ 0xFFFFFFFF, NULL);
+ }
- rstate = r600_new_context_state(pipe_scissor_type);
- rstate->state.scissor = *state;
- rctx->scissor = rstate;
+ r600_pipe_state_add_reg(rstate, R_0287A0_CB_SHADER_CONTROL,
+ shader_control, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK,
+ 0x00000000, target_mask, NULL);
+ r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK,
+ shader_mask, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C30_CB_CLRCMP_CONTROL,
+ 0x01000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C34_CB_CLRCMP_SRC,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C38_CB_CLRCMP_DST,
+ 0x000000FF, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C3C_CB_CLRCMP_MSK,
+ 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C48_PA_SC_AA_MASK,
+ 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+
+ free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
+ rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate;
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
}
-static void r600_set_stencil_ref(struct pipe_context *ctx,
- const struct pipe_stencil_ref *state)
+static void r600_set_index_buffer(struct pipe_context *ctx,
+ const struct pipe_index_buffer *ib)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- r600_context_state_decref(rctx->stencil_ref);
+ if (ib) {
+ pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
+ memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer));
+ } else {
+ pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
+ memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer));
+ }
- rstate = r600_new_context_state(pipe_stencil_ref_type);
- rstate->state.stencil_ref = *state;
- rctx->stencil_ref = rstate;
+ /* TODO make this more like a state */
}
-static void r600_set_vertex_buffers(struct pipe_context *ctx,
- unsigned count,
+static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
const struct pipe_vertex_buffer *buffers)
{
- struct r600_context *rctx = r600_context(ctx);
- unsigned i;
- boolean any_user_buffers = FALSE;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- for (i = 0; i < rctx->nvertex_buffer; i++) {
+ for (int i = 0; i < rctx->nvertex_buffer; i++) {
pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
}
memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count);
- for (i = 0; i < count; i++) {
+ for (int i = 0; i < count; i++) {
rctx->vertex_buffer[i].buffer = NULL;
if (r600_buffer_is_user_buffer(buffers[i].buffer))
- any_user_buffers = TRUE;
+ rctx->any_user_vbs = TRUE;
pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer);
}
- rctx->any_user_vbs = any_user_buffers;
rctx->nvertex_buffer = count;
}
-static void r600_set_index_buffer(struct pipe_context *ctx,
- const struct pipe_index_buffer *ib)
+static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
+ struct pipe_resource *buffer)
{
- struct r600_context *rctx = r600_context(ctx);
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_resource *rbuffer = (struct r600_resource*)buffer;
+
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ rctx->vs_const_buffer.nregs = 0;
+ r600_pipe_state_add_reg(&rctx->vs_const_buffer,
+ R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
+ ALIGN_DIVUP(buffer->width0 >> 4, 16),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->vs_const_buffer,
+ R_028980_ALU_CONST_CACHE_VS_0,
+ r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ rctx->ps_const_buffer.nregs = 0;
+ r600_pipe_state_add_reg(&rctx->ps_const_buffer,
+ R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
+ ALIGN_DIVUP(buffer->width0 >> 4, 16),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->ps_const_buffer,
+ R_028940_ALU_CONST_CACHE_PS_0,
+ r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
+ break;
+ default:
+ R600_ERR("unsupported %d\n", shader);
+ return;
+ }
+}
- if (ib) {
- pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
- memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer));
- } else {
- pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
- memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer));
+static void *r600_create_shader_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader);
+ int r;
+
+ r = r600_pipe_shader_create(ctx, shader, state->tokens);
+ if (r) {
+ return NULL;
}
+ return shader;
+}
- /* TODO make this more like a state */
+static void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+ /* TODO delete old shader */
+ rctx->ps_shader = (struct r600_pipe_shader *)state;
}
-static void r600_set_viewport_state(struct pipe_context *ctx,
- const struct pipe_viewport_state *state)
+static void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_context_state *rstate;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+ /* TODO delete old shader */
+ rctx->vs_shader = (struct r600_pipe_shader *)state;
+}
- r600_context_state_decref(rctx->viewport);
+static void r600_delete_ps_shader(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state;
- rstate = r600_new_context_state(pipe_viewport_type);
- rstate->state.viewport = *state;
- rctx->vtbl->viewport(rctx, &rstate->rstate[0], &rstate->state.viewport);
- rctx->viewport = rstate;
+ if (rctx->ps_shader == shader) {
+ rctx->ps_shader = NULL;
+ }
+ /* TODO proper delete */
+ free(shader);
}
-void r600_init_state_functions(struct r600_context *rctx)
+static void r600_delete_vs_shader(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state;
+
+ if (rctx->vs_shader == shader) {
+ rctx->vs_shader = NULL;
+ }
+ /* TODO proper delete */
+ free(shader);
+}
+
+void r600_init_state_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_blend_state = r600_create_blend_state;
rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state;
@@ -511,30 +1272,23 @@ void r600_init_state_functions(struct r600_context *rctx)
rctx->context.create_vertex_elements_state = r600_create_vertex_elements;
rctx->context.create_vs_state = r600_create_shader_state;
rctx->context.bind_blend_state = r600_bind_blend_state;
- rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state;
+ rctx->context.bind_depth_stencil_alpha_state = r600_bind_state;
rctx->context.bind_fragment_sampler_states = r600_bind_ps_sampler;
rctx->context.bind_fs_state = r600_bind_ps_shader;
- rctx->context.bind_rasterizer_state = r600_bind_rasterizer_state;
+ rctx->context.bind_rasterizer_state = r600_bind_rs_state;
rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements;
rctx->context.bind_vertex_sampler_states = r600_bind_vs_sampler;
rctx->context.bind_vs_state = r600_bind_vs_shader;
rctx->context.delete_blend_state = r600_delete_state;
rctx->context.delete_depth_stencil_alpha_state = r600_delete_state;
- rctx->context.delete_fs_state = r600_delete_state;
- rctx->context.delete_rasterizer_state = r600_delete_state;
+ rctx->context.delete_fs_state = r600_delete_ps_shader;
+ rctx->context.delete_rasterizer_state = r600_delete_rs_state;
rctx->context.delete_sampler_state = r600_delete_state;
rctx->context.delete_vertex_elements_state = r600_delete_vertex_element;
- rctx->context.delete_vs_state = r600_delete_state;
+ rctx->context.delete_vs_state = r600_delete_vs_shader;
rctx->context.set_blend_color = r600_set_blend_color;
rctx->context.set_clip_state = r600_set_clip_state;
-
- if (radeon_get_family_class(rctx->rw) == EVERGREEN)
- rctx->context.set_constant_buffer = eg_set_constant_buffer;
- else if (rctx->screen->use_mem_constant)
- rctx->context.set_constant_buffer = r600_set_constant_buffer_mem;
- else
- rctx->context.set_constant_buffer = r600_set_constant_buffer_file;
-
+ rctx->context.set_constant_buffer = r600_set_constant_buffer;
rctx->context.set_fragment_sampler_views = r600_set_ps_sampler_view;
rctx->context.set_framebuffer_state = r600_set_framebuffer_state;
rctx->context.set_polygon_stipple = r600_set_polygon_stipple;
@@ -548,174 +1302,291 @@ void r600_init_state_functions(struct r600_context *rctx)
rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
}
-struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate)
-{
- if (rstate == NULL)
- return NULL;
- rstate->refcount++;
- return rstate;
-}
-
-struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate)
+void r600_init_config(struct r600_pipe_context *rctx)
{
- unsigned i;
-
- if (rstate == NULL)
- return NULL;
- if (--rstate->refcount)
- return NULL;
- switch (rstate->type) {
- case pipe_sampler_view_type:
- pipe_resource_reference(&rstate->state.sampler_view.texture, NULL);
- break;
- case pipe_framebuffer_type:
- for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) {
- pipe_surface_reference(&rstate->state.framebuffer.cbufs[i], NULL);
- radeon_state_fini(&rstate->rstate[i+1]);
- }
- pipe_surface_reference(&rstate->state.framebuffer.zsbuf, NULL);
+ int ps_prio;
+ int vs_prio;
+ int gs_prio;
+ int es_prio;
+ int num_ps_gprs;
+ int num_vs_gprs;
+ int num_gs_gprs;
+ int num_es_gprs;
+ int num_temp_gprs;
+ int num_ps_threads;
+ int num_vs_threads;
+ int num_gs_threads;
+ int num_es_threads;
+ int num_ps_stack_entries;
+ int num_vs_stack_entries;
+ int num_gs_stack_entries;
+ int num_es_stack_entries;
+ enum radeon_family family;
+ struct r600_pipe_state *rstate = &rctx->config;
+ u32 tmp;
+
+ family = r600_get_family(rctx->radeon);
+ ps_prio = 0;
+ vs_prio = 1;
+ gs_prio = 2;
+ es_prio = 3;
+ switch (family) {
+ case CHIP_R600:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
break;
- case pipe_viewport_type:
- case pipe_depth_type:
- case pipe_rasterizer_type:
- case pipe_poly_stipple_type:
- case pipe_scissor_type:
- case pipe_clip_type:
- case pipe_stencil_type:
- case pipe_alpha_type:
- case pipe_dsa_type:
- case pipe_blend_type:
- case pipe_stencil_ref_type:
- case pipe_shader_type:
- case pipe_sampler_type:
+ case CHIP_RV630:
+ case CHIP_RV635:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 144;
+ num_vs_threads = 40;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
break;
+ case CHIP_RV610:
+ case CHIP_RV620:
+ case CHIP_RS780:
+ case CHIP_RS880:
default:
- R600_ERR("invalid type %d\n", rstate->type);
- return NULL;
- }
- radeon_state_fini(&rstate->rstate[0]);
- FREE(rstate);
- return NULL;
-}
-
-static void r600_bind_shader_sampler(struct r600_context *rctx, struct r600_shader_sampler_states *sampler)
-{
- int i;
-
- for (i = 0; i < sampler->nsampler; i++) {
- if (sampler->sampler[i])
- radeon_draw_bind(&rctx->draw, sampler->sampler[i]);
- }
-
- for (i = 0; i < sampler->nborder; i++) {
- if (sampler->border[i])
- radeon_draw_bind(&rctx->draw, sampler->border[i]);
- }
-
- for (i = 0; i < sampler->nview; i++) {
- if (sampler->view[i])
- radeon_draw_bind(&rctx->draw, sampler->view[i]);
- }
-}
-
-static void clean_flush(struct r600_context *rctx, struct radeon_state *flush)
-{
- struct r600_screen *rscreen = rctx->screen;
- int i;
-
- for (i = 0 ; i < flush->nbo; i++) {
- radeon_ws_bo_reference(rscreen->rw, &flush->bo[i], NULL);
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_RV670:
+ num_ps_gprs = 144;
+ num_vs_gprs = 40;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_RV770:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 188;
+ num_vs_threads = 60;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 256;
+ num_vs_stack_entries = 256;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_RV730:
+ case CHIP_RV740:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 188;
+ num_vs_threads = 60;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_RV710:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 144;
+ num_vs_threads = 48;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
}
- flush->nbo = 0;
- radeon_state_fini(flush);
-}
-static int setup_cb_flush(struct r600_context *rctx, struct radeon_state *flush)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct r600_resource_texture *rtex;
- struct r600_resource *rbuffer;
- struct pipe_surface *surf;
- int i;
-
- radeon_state_init(flush, rscreen->rw, R600_STATE_CB_FLUSH, 0, 0);
+ rstate->id = R600_PIPE_STATE_CONFIG;
- for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) {
- surf = rctx->framebuffer->state.framebuffer.cbufs[i];
-
- rtex = (struct r600_resource_texture*)surf->texture;
- rbuffer = &rtex->resource;
- /* just need to the bo to the flush list */
- radeon_ws_bo_reference(rscreen->rw, &flush->bo[i], rbuffer->bo);
- flush->placement[i] = RADEON_GEM_DOMAIN_VRAM;
+ /* SQ_CONFIG */
+ tmp = 0;
+ switch (family) {
+ case CHIP_RV610:
+ case CHIP_RV620:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ case CHIP_RV710:
+ break;
+ default:
+ tmp |= S_008C00_VC_ENABLE(1);
+ break;
}
- flush->nbo = rctx->framebuffer->state.framebuffer.nr_cbufs;
- return radeon_state_pm4(flush);
-}
-
-static int setup_db_flush(struct r600_context *rctx, struct radeon_state *flush)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct r600_resource_texture *rtex;
- struct r600_resource *rbuffer;
- struct pipe_surface *surf;
-
- surf = rctx->framebuffer->state.framebuffer.zsbuf;
-
- radeon_state_init(flush, rscreen->rw, R600_STATE_DB_FLUSH, 0, 0);
-
- if (surf) {
- rtex = (struct r600_resource_texture*)surf->texture;
- rbuffer = &rtex->resource;
- /* just need to the bo to the flush list */
- radeon_ws_bo_reference(rscreen->rw, &flush->bo[0], rbuffer->bo);
- flush->placement[0] = RADEON_GEM_DOMAIN_VRAM;
-
- flush->nbo = 1;
+ tmp |= S_008C00_DX9_CONSTS(0);
+ tmp |= S_008C00_ALU_INST_PREFER_VECTOR(1);
+ tmp |= S_008C00_PS_PRIO(ps_prio);
+ tmp |= S_008C00_VS_PRIO(vs_prio);
+ tmp |= S_008C00_GS_PRIO(gs_prio);
+ tmp |= S_008C00_ES_PRIO(es_prio);
+ r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
+
+ /* SQ_GPR_RESOURCE_MGMT_1 */
+ tmp = 0;
+ tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
+ tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
+ tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
+ r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+
+ /* SQ_GPR_RESOURCE_MGMT_2 */
+ tmp = 0;
+ tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
+ tmp |= S_008C08_NUM_GS_GPRS(num_es_gprs);
+ r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+
+ /* SQ_THREAD_RESOURCE_MGMT */
+ tmp = 0;
+ tmp |= S_008C0C_NUM_PS_THREADS(num_ps_threads);
+ tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads);
+ tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads);
+ tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads);
+ r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL);
+
+ /* SQ_STACK_RESOURCE_MGMT_1 */
+ tmp = 0;
+ tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
+ tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
+ r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+
+ /* SQ_STACK_RESOURCE_MGMT_2 */
+ tmp = 0;
+ tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
+ tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
+ r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL);
+
+ if (family >= CHIP_RV770) {
+ r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000002, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL);
+ } else {
+ r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL);
}
- return radeon_state_pm4(flush);
+ r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
}
-int r600_context_hw_states(struct pipe_context *ctx)
+void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
{
- struct r600_context *rctx = r600_context(ctx);
- unsigned i;
-
- /* build new states */
- rctx->vtbl->rasterizer(rctx, &rctx->hw_states.rasterizer);
- rctx->vtbl->scissor(rctx, &rctx->hw_states.scissor);
- rctx->vtbl->dsa(rctx, &rctx->hw_states.dsa);
- rctx->vtbl->cb_cntl(rctx, &rctx->hw_states.cb_cntl);
-
- /* bind states */
- radeon_draw_bind(&rctx->draw, &rctx->config);
-
- radeon_draw_bind(&rctx->draw, &rctx->hw_states.rasterizer);
- radeon_draw_bind(&rctx->draw, &rctx->hw_states.scissor);
- radeon_draw_bind(&rctx->draw, &rctx->hw_states.dsa);
- radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_cntl);
-
- radeon_draw_bind(&rctx->draw, &rctx->hw_states.db_flush);
- radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_flush);
-
- if (rctx->viewport) {
- radeon_draw_bind(&rctx->draw, &rctx->viewport->rstate[0]);
- }
- if (rctx->blend) {
- radeon_draw_bind(&rctx->draw, &rctx->blend->rstate[0]);
- }
- if (rctx->clip) {
- radeon_draw_bind(&rctx->draw, &rctx->clip->rstate[0]);
- }
- for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) {
- radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[i+1]);
- }
- if (rctx->framebuffer->state.framebuffer.zsbuf) {
- radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[0]);
+ struct pipe_depth_stencil_alpha_state dsa;
+ struct r600_pipe_state *rstate;
+ boolean quirk = false;
+
+ if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 ||
+ rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635)
+ quirk = true;
+
+ memset(&dsa, 0, sizeof(dsa));
+
+ if (quirk) {
+ dsa.depth.enabled = 1;
+ dsa.depth.func = PIPE_FUNC_LEQUAL;
+ dsa.stencil[0].enabled = 1;
+ dsa.stencil[0].func = PIPE_FUNC_ALWAYS;
+ dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP;
+ dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_INCR;
+ dsa.stencil[0].writemask = 0xff;
}
- r600_bind_shader_sampler(rctx, &rctx->vs_sampler);
- r600_bind_shader_sampler(rctx, &rctx->ps_sampler);
-
- return 0;
+ rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa);
+ r600_pipe_state_add_reg(rstate,
+ R_02880C_DB_SHADER_CONTROL,
+ 0x0,
+ S_02880C_DUAL_EXPORT_ENABLE(1), NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028D0C_DB_RENDER_CONTROL,
+ S_028D0C_DEPTH_COPY_ENABLE(1) |
+ S_028D0C_STENCIL_COPY_ENABLE(1) |
+ S_028D0C_COPY_CENTROID(1),
+ S_028D0C_DEPTH_COPY_ENABLE(1) |
+ S_028D0C_STENCIL_COPY_ENABLE(1) |
+ S_028D0C_COPY_CENTROID(1), NULL);
+ return rstate;
}
diff --git a/src/gallium/drivers/r600/r600_state2.c b/src/gallium/drivers/r600/r600_state2.c
deleted file mode 100644
index 153780594e..0000000000
--- a/src/gallium/drivers/r600/r600_state2.c
+++ /dev/null
@@ -1,2490 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/* TODO:
- * - fix mask for depth control & cull for query
- */
-#include <stdio.h>
-#include <errno.h>
-#include <pipe/p_defines.h>
-#include <pipe/p_state.h>
-#include <pipe/p_context.h>
-#include <tgsi/tgsi_scan.h>
-#include <tgsi/tgsi_parse.h>
-#include <tgsi/tgsi_util.h>
-#include <util/u_blitter.h>
-#include <util/u_double_list.h>
-#include <util/u_transfer.h>
-#include <util/u_surface.h>
-#include <util/u_pack_color.h>
-#include <util/u_memory.h>
-#include <util/u_inlines.h>
-#include <util/u_upload_mgr.h>
-#include <util/u_index_modify.h>
-#include <pipebuffer/pb_buffer.h>
-#include "r600.h"
-#include "r600d.h"
-#include "r700_sq.h"
-struct radeon_state {
- unsigned dummy;
-};
-#include "r600_resource.h"
-#include "r600_shader.h"
-#include "r600_pipe.h"
-#include "r600_state_inlines.h"
-
-/* r600_shader.c */
-static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_state *rstate = &shader->rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned spi_vs_out_id[10];
- unsigned i, tmp;
-
- /* clear previous register */
- rstate->nregs = 0;
-
- /* so far never got proper semantic id from tgsi */
- for (i = 0; i < 10; i++) {
- spi_vs_out_id[i] = 0;
- }
- for (i = 0; i < 32; i++) {
- tmp = i << ((i & 3) * 8);
- spi_vs_out_id[i / 4] |= tmp;
- }
- for (i = 0; i < 10; i++) {
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028614_SPI_VS_OUT_ID_0 + i * 4,
- spi_vs_out_id[i], 0xFFFFFFFF, NULL);
- }
-
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_0286C4_SPI_VS_OUT_CONFIG,
- S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028868_SQ_PGM_RESOURCES_VS,
- S_028868_NUM_GPRS(rshader->bc.ngpr) |
- S_028868_STACK_SIZE(rshader->bc.nstack),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_0288A4_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_0288D0_SQ_PGM_CF_OFFSET_VS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_0288DC_SQ_PGM_CF_OFFSET_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028858_SQ_PGM_START_VS,
- 0x00000000, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028894_SQ_PGM_START_FS,
- 0x00000000, 0xFFFFFFFF, shader->bo);
-}
-
-int r600_find_vs_semantic_index2(struct r600_shader *vs,
- struct r600_shader *ps, int id)
-{
- struct r600_shader_io *input = &ps->input[id];
-
- for (int i = 0; i < vs->noutput; i++) {
- if (input->name == vs->output[i].name &&
- input->sid == vs->output[i].sid) {
- return i - 1;
- }
- }
- return 0;
-}
-
-static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = &shader->rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z;
- boolean have_pos = FALSE, have_face = FALSE;
-
- /* clear previous register */
- rstate->nregs = 0;
-
- for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index2(&rctx->vs_shader->shader, rshader, i));
- tmp |= S_028644_SEL_CENTROID(1);
- if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
- have_pos = TRUE;
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
- }
- if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
- have_face = TRUE;
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
- }
- for (i = 0; i < rshader->noutput; i++) {
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_Z_EXPORT_ENABLE(1),
- S_02880C_Z_EXPORT_ENABLE(1), NULL);
- }
-
- exports_ps = 0;
- num_cout = 0;
- for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
- exports_ps |= 1;
- else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
- num_cout++;
- }
- }
- exports_ps |= S_028854_EXPORT_COLORS(num_cout);
- if (!exports_ps) {
- /* always at least export 1 component per pixel */
- exports_ps = 2;
- }
-
- spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
- S_0286CC_PERSP_GRADIENT_ENA(1);
- spi_input_z = 0;
- if (have_pos) {
- spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) |
- S_0286CC_BARYC_SAMPLE_CNTL(1);
- spi_input_z |= 1;
- }
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286D0_SPI_PS_IN_CONTROL_1, S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028840_SQ_PGM_START_PS,
- 0x00000000, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028850_SQ_PGM_RESOURCES_PS,
- S_028868_NUM_GPRS(rshader->bc.ngpr) |
- S_028868_STACK_SIZE(rshader->bc.nstack),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028854_SQ_PGM_EXPORTS_PS,
- exports_ps, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_0288CC_SQ_PGM_CF_OFFSET_PS,
- 0x00000000, 0xFFFFFFFF, NULL);
-
- if (rshader->uses_kill) {
- /* only set some bits here, the other bits are set in the dsa state */
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_KILL_ENABLE(1),
- S_02880C_KILL_ENABLE(1), NULL);
- }
-}
-
-static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_shader *rshader = &shader->shader;
- void *ptr;
-
- /* copy new shader */
- if (shader->bo == NULL) {
- shader->bo = radeon_ws_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0);
- if (shader->bo == NULL) {
- return -ENOMEM;
- }
- ptr = radeon_ws_bo_map(rctx->radeon, shader->bo, 0, NULL);
- memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
- radeon_ws_bo_unmap(rctx->radeon, shader->bo);
- }
- /* build state */
- rshader->flat_shade = rctx->flatshade;
- switch (rshader->processor_type) {
- case TGSI_PROCESSOR_VERTEX:
- if (rshader->family >= CHIP_CEDAR) {
- evergreen_pipe_shader_vs(ctx, shader);
- } else {
- r600_pipe_shader_vs(ctx, shader);
- }
- break;
- case TGSI_PROCESSOR_FRAGMENT:
- if (rshader->family >= CHIP_CEDAR) {
- evergreen_pipe_shader_ps(ctx, shader);
- } else {
- r600_pipe_shader_ps(ctx, shader);
- }
- break;
- default:
- return -EINVAL;
- }
- r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
- return 0;
-}
-
-static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_shader *shader = &rshader->shader;
- const struct util_format_description *desc;
- enum pipe_format resource_format[160];
- unsigned i, nresources = 0;
- struct r600_bc *bc = &shader->bc;
- struct r600_bc_cf *cf;
- struct r600_bc_vtx *vtx;
-
- if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
- return 0;
- for (i = 0; i < rctx->vertex_elements->count; i++) {
- resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
- }
- radeon_ws_bo_reference(rctx->radeon, &rshader->bo, NULL);
- LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
- switch (cf->inst) {
- case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
- case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
- LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
- desc = util_format_description(resource_format[vtx->buffer_id]);
- if (desc == NULL) {
- R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
- return -EINVAL;
- }
- vtx->dst_sel_x = desc->swizzle[0];
- vtx->dst_sel_y = desc->swizzle[1];
- vtx->dst_sel_z = desc->swizzle[2];
- vtx->dst_sel_w = desc->swizzle[3];
- }
- break;
- default:
- break;
- }
- }
- return r600_bc_build(&shader->bc);
-}
-
-int r600_pipe_shader_update2(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- int r;
-
- if (shader == NULL)
- return -EINVAL;
- /* there should be enough input */
- if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
- R600_ERR("%d resources provided, expecting %d\n",
- rctx->vertex_elements->count, shader->shader.bc.nresource);
- return -EINVAL;
- }
- r = r600_shader_update(ctx, shader);
- if (r)
- return r;
- return r600_pipe_shader(ctx, shader);
-}
-
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
-int r600_pipe_shader_create2(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- int r;
-
-//fprintf(stderr, "--------------------------------------------------------------\n");
-//tgsi_dump(tokens, 0);
- shader->shader.family = r600_get_family(rctx->radeon);
- r = r600_shader_from_tgsi(tokens, &shader->shader);
- if (r) {
- R600_ERR("translation from TGSI failed !\n");
- return r;
- }
- r = r600_bc_build(&shader->shader.bc);
- if (r) {
- R600_ERR("building bytecode failed !\n");
- return r;
- }
-//fprintf(stderr, "______________________________________________________________\n");
- return 0;
-}
-/* r600_shader.c END */
-
-static const char* r600_get_vendor(struct pipe_screen* pscreen)
-{
- return "X.Org";
-}
-
-static const char* r600_get_name(struct pipe_screen* pscreen)
-{
- struct r600_screen *rscreen = (struct r600_screen *)pscreen;
- enum radeon_family family = r600_get_family(rscreen->radeon);
-
- if (family >= CHIP_R600 && family < CHIP_RV770)
- return "R600 (HD2XXX,HD3XXX)";
- else
- return "R700 (HD4XXX)";
-}
-
-static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
-{
- switch (param) {
- /* Supported features (boolean caps). */
- case PIPE_CAP_NPOT_TEXTURES:
- case PIPE_CAP_TWO_SIDED_STENCIL:
- case PIPE_CAP_GLSL:
- case PIPE_CAP_DUAL_SOURCE_BLEND:
- case PIPE_CAP_ANISOTROPIC_FILTER:
- case PIPE_CAP_POINT_SPRITE:
- case PIPE_CAP_OCCLUSION_QUERY:
- case PIPE_CAP_TEXTURE_SHADOW_MAP:
- case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
- case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- case PIPE_CAP_SM3:
- case PIPE_CAP_TEXTURE_SWIZZLE:
- case PIPE_CAP_INDEP_BLEND_ENABLE:
- case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
- case PIPE_CAP_DEPTH_CLAMP:
- return 1;
-
- /* Unsupported features (boolean caps). */
- case PIPE_CAP_TIMER_QUERY:
- case PIPE_CAP_STREAM_OUTPUT:
- case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */
- return 0;
-
- /* Texturing. */
- case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
- case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- return 14;
- case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- /* FIXME allow this once infrastructure is there */
- return 0;
- case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
- case PIPE_CAP_MAX_COMBINED_SAMPLERS:
- return 16;
-
- /* Render targets. */
- case PIPE_CAP_MAX_RENDER_TARGETS:
- /* FIXME some r6xx are buggy and can only do 4 */
- return 8;
-
- /* Fragment coordinate conventions. */
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
- return 1;
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
- return 0;
-
- default:
- R600_ERR("r600: unknown param %d\n", param);
- return 0;
- }
-}
-
-static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param)
-{
- switch (param) {
- case PIPE_CAP_MAX_LINE_WIDTH:
- case PIPE_CAP_MAX_LINE_WIDTH_AA:
- case PIPE_CAP_MAX_POINT_WIDTH:
- case PIPE_CAP_MAX_POINT_WIDTH_AA:
- return 8192.0f;
- case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
- return 16.0f;
- case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
- return 16.0f;
- default:
- R600_ERR("r600: unsupported paramf %d\n", param);
- return 0.0f;
- }
-}
-
-static boolean r600_is_format_supported(struct pipe_screen* screen,
- enum pipe_format format,
- enum pipe_texture_target target,
- unsigned sample_count,
- unsigned usage,
- unsigned geom_flags)
-{
- unsigned retval = 0;
- if (target >= PIPE_MAX_TEXTURE_TYPES) {
- R600_ERR("r600: unsupported texture type %d\n", target);
- return FALSE;
- }
-
- /* Multisample */
- if (sample_count > 1)
- return FALSE;
-
- if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
- r600_is_sampler_format_supported(format)) {
- retval |= PIPE_BIND_SAMPLER_VIEW;
- }
-
- if ((usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED)) &&
- r600_is_colorbuffer_format_supported(format)) {
- retval |= usage &
- (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED);
- }
-
- if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
- r600_is_zs_format_supported(format)) {
- retval |= PIPE_BIND_DEPTH_STENCIL;
- }
-
- if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
- r600_is_vertex_format_supported(format))
- retval |= PIPE_BIND_VERTEX_BUFFER;
-
- if (usage & PIPE_BIND_TRANSFER_READ)
- retval |= PIPE_BIND_TRANSFER_READ;
- if (usage & PIPE_BIND_TRANSFER_WRITE)
- retval |= PIPE_BIND_TRANSFER_WRITE;
-
- return retval == usage;
-}
-
-static void r600_destroy_screen(struct pipe_screen* pscreen)
-{
- struct r600_screen *rscreen = (struct r600_screen *)pscreen;
-
- if (rscreen == NULL)
- return;
- FREE(rscreen);
-}
-
-int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
-static void r600_draw_common(struct r600_drawl *draw)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx;
- struct r600_pipe_state *rstate;
- struct r600_resource *rbuffer;
- unsigned i, j, offset, format, prim;
- u32 vgt_dma_index_type, vgt_draw_initiator, mask;
- struct pipe_vertex_buffer *vertex_buffer;
- struct r600_draw rdraw;
- struct r600_pipe_state vgt;
-
- switch (draw->index_size) {
- case 2:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 0;
- break;
- case 4:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 1;
- break;
- case 0:
- vgt_draw_initiator = 2;
- vgt_dma_index_type = 0;
- break;
- default:
- R600_ERR("unsupported index size %d\n", draw->index_size);
- return;
- }
- if (r600_conv_pipe_prim(draw->mode, &prim))
- return;
-
-
- /* rebuild vertex shader if input format changed */
- if (r600_pipe_shader_update2(&rctx->context, rctx->vs_shader))
- return;
- if (r600_pipe_shader_update2(&rctx->context, rctx->ps_shader))
- return;
-
- for (i = 0 ; i < rctx->vertex_elements->count; i++) {
- unsigned num_format = 0, format_comp = 0;
-
- rstate = &rctx->vs_resource[i];
- j = rctx->vertex_elements->elements[i].vertex_buffer_index;
- vertex_buffer = &rctx->vertex_buffer[j];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset;
- format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format);
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
-
- r600_translate_vertex_num_format(rctx->vertex_elements->elements[i].src_format, &num_format, &format_comp);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE,
- R_038008_RESOURCE0_WORD2,
- S_038008_STRIDE(vertex_buffer->stride) |
- S_038008_DATA_FORMAT(format) |
- S_038008_NUM_FORMAT_ALL(num_format) |
- S_038008_FORMAT_COMP_ALL(format_comp),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, i);
- }
-
- mask = 0;
- for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
- mask |= (0xF << (i * 4));
- }
-
- vgt.id = R600_PIPE_STATE_VGT;
- vgt.nregs = 0;
- r600_pipe_state_add_reg(&vgt, R600_GROUP_CONFIG, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
- /* build late state */
- if (rctx->rasterizer && rctx->framebuffer.zsbuf) {
- float offset_units = rctx->rasterizer->offset_units;
- unsigned offset_db_fmt_cntl = 0, depth;
-
- switch (rctx->framebuffer.zsbuf->texture->format) {
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- depth = -24;
- offset_units *= 2.0f;
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- depth = -23;
- offset_units *= 1.0f;
- offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- depth = -16;
- offset_units *= 4.0f;
- break;
- default:
- return;
- }
- offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
- r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT,
- R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE,
- fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT,
- R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET,
- fui(offset_units), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT,
- R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE,
- fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT,
- R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET,
- fui(offset_units), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT,
- R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
- offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
- }
- r600_context_pipe_state_set(&rctx->ctx, &vgt);
-
- rdraw.vgt_num_indices = draw->count;
- rdraw.vgt_num_instances = 1;
- rdraw.vgt_index_type = vgt_dma_index_type;
- rdraw.vgt_draw_initiator = vgt_draw_initiator;
- rdraw.indices = NULL;
- if (draw->index_buffer) {
- rbuffer = (struct r600_resource*)draw->index_buffer;
- rdraw.indices = rbuffer->bo;
- rdraw.indices_bo_offset = draw->index_buffer_offset;
- }
- r600_context_draw(&rctx->ctx, &rdraw);
-}
-
-void r600_translate_index_buffer2(struct r600_pipe_context *r600,
- struct pipe_resource **index_buffer,
- unsigned *index_size,
- unsigned *start, unsigned count)
-{
- switch (*index_size) {
- case 1:
- util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count);
- *index_size = 2;
- *start = 0;
- break;
-
- case 2:
- if (*start % 2 != 0) {
- util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count);
- *start = 0;
- }
- break;
-
- case 4:
- break;
- }
-}
-
-static void r600_draw_vbo2(struct pipe_context *ctx, const struct pipe_draw_info *info)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_drawl draw;
-
- assert(info->index_bias == 0);
-
- if (rctx->any_user_vbs) {
- r600_upload_user_buffers2(rctx);
- rctx->any_user_vbs = FALSE;
- }
-
- memset(&draw, 0, sizeof(struct r600_drawl));
- draw.ctx = ctx;
- draw.mode = info->mode;
- draw.start = info->start;
- draw.count = info->count;
- if (info->indexed && rctx->index_buffer.buffer) {
- draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->index_bias;
-
- r600_translate_index_buffer2(rctx, &rctx->index_buffer.buffer,
- &rctx->index_buffer.index_size,
- &draw.start,
- info->count);
-
- draw.index_size = rctx->index_buffer.index_size;
- draw.index_buffer = rctx->index_buffer.buffer;
- draw.index_buffer_offset = draw.start * draw.index_size;
- draw.start = 0;
- r600_upload_index_buffer2(rctx, &draw);
- } else {
- draw.index_size = 0;
- draw.index_buffer = NULL;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->start;
- }
- r600_draw_common(&draw);
-}
-
-static void r600_flush2(struct pipe_context *ctx, unsigned flags,
- struct pipe_fence_handle **fence)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-#if 0
- static int dc = 0;
- char dname[256];
-#endif
-
- if (!rctx->ctx.pm4_cdwords)
- return;
-
- u_upload_flush(rctx->upload_vb);
- u_upload_flush(rctx->upload_ib);
-
-#if 0
- sprintf(dname, "gallium-%08d.bof", dc);
- if (dc < 20) {
- r600_context_dump_bof(&rctx->ctx, dname);
- R600_ERR("dumped %s\n", dname);
- }
- dc++;
-#endif
- r600_context_flush(&rctx->ctx);
-}
-
-static void r600_destroy_context(struct pipe_context *context)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)context;
-
- r600_context_fini(&rctx->ctx);
- for (int i = 0; i < R600_PIPE_NSTATES; i++) {
- free(rctx->states[i]);
- }
-
- u_upload_destroy(rctx->upload_vb);
- u_upload_destroy(rctx->upload_ib);
-
- FREE(rctx);
-}
-
-static void r600_blitter_save_states(struct pipe_context *ctx)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]);
- util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]);
- if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) {
- util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref);
- }
- util_blitter_save_rasterizer(rctx->blitter, rctx->states[R600_PIPE_STATE_RASTERIZER]);
- util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader);
- util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
- util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements);
- if (rctx->states[R600_PIPE_STATE_VIEWPORT]) {
- util_blitter_save_viewport(rctx->blitter, &rctx->viewport);
- }
- if (rctx->states[R600_PIPE_STATE_CLIP]) {
- util_blitter_save_clip(rctx->blitter, &rctx->clip);
- }
- util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer);
-
- rctx->vertex_elements = NULL;
-
- /* TODO queries */
-}
-
-int r600_blit_uncompress_depth2(struct pipe_context *ctx, struct r600_resource_texture *texture)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct pipe_framebuffer_state fb = *rctx->pframebuffer;
- struct pipe_surface *zsurf, *cbsurf;
- int level = 0;
- float depth = 1.0f;
-
- for (int i = 0; i < fb.nr_cbufs; i++) {
- fb.cbufs[i] = NULL;
- pipe_surface_reference(&fb.cbufs[i], rctx->pframebuffer->cbufs[i]);
- }
- fb.zsbuf = NULL;
- pipe_surface_reference(&fb.zsbuf, rctx->pframebuffer->zsbuf);
-
- zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0,
- PIPE_BIND_DEPTH_STENCIL);
-
- cbsurf = ctx->screen->get_tex_surface(ctx->screen, texture->flushed_depth_texture, 0, level, 0,
- PIPE_BIND_RENDER_TARGET);
-
- r600_blitter_save_states(ctx);
- util_blitter_save_framebuffer(rctx->blitter, &fb);
-
- if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 ||
- rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635)
- depth = 0.0f;
-
- util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth);
-
- pipe_surface_reference(&zsurf, NULL);
- pipe_surface_reference(&cbsurf, NULL);
- for (int i = 0; i < fb.nr_cbufs; i++) {
- pipe_surface_reference(&fb.cbufs[i], NULL);
- }
- pipe_surface_reference(&fb.zsbuf, NULL);
-
- return 0;
-}
-
-static void r600_clear(struct pipe_context *ctx, unsigned buffers,
- const float *rgba, double depth, unsigned stencil)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct pipe_framebuffer_state *fb = &rctx->framebuffer;
-
- r600_blitter_save_states(ctx);
- util_blitter_clear(rctx->blitter, fb->width, fb->height,
- fb->nr_cbufs, buffers, rgba, depth,
- stencil);
-}
-
-static void r600_clear_render_target(struct pipe_context *ctx,
- struct pipe_surface *dst,
- const float *rgba,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct pipe_framebuffer_state *fb = &rctx->framebuffer;
-
- util_blitter_save_framebuffer(rctx->blitter, fb);
- util_blitter_clear_render_target(rctx->blitter, dst, rgba,
- dstx, dsty, width, height);
-}
-
-static void r600_clear_depth_stencil(struct pipe_context *ctx,
- struct pipe_surface *dst,
- unsigned clear_flags,
- double depth,
- unsigned stencil,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct pipe_framebuffer_state *fb = &rctx->framebuffer;
-
- util_blitter_save_framebuffer(rctx->blitter, fb);
- util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil,
- dstx, dsty, width, height);
-}
-
-
-static void r600_resource_copy_region(struct pipe_context *ctx,
- struct pipe_resource *dst,
- struct pipe_subresource subdst,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *src,
- struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
-{
- util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
- src, subsrc, srcx, srcy, srcz, width, height);
-}
-
-static void r600_init_blit_functions2(struct r600_pipe_context *rctx)
-{
- rctx->context.clear = r600_clear;
- rctx->context.clear_render_target = r600_clear_render_target;
- rctx->context.clear_depth_stencil = r600_clear_depth_stencil;
- rctx->context.resource_copy_region = r600_resource_copy_region;
-}
-
-static void r600_init_context_resource_functions2(struct r600_pipe_context *r600)
-{
- r600->context.get_transfer = u_get_transfer_vtbl;
- r600->context.transfer_map = u_transfer_map_vtbl;
- r600->context.transfer_flush_region = u_transfer_flush_region_vtbl;
- r600->context.transfer_unmap = u_transfer_unmap_vtbl;
- r600->context.transfer_destroy = u_transfer_destroy_vtbl;
- r600->context.transfer_inline_write = u_transfer_inline_write_vtbl;
- r600->context.is_resource_referenced = u_is_resource_referenced_vtbl;
-}
-
-static void r600_set_blend_color(struct pipe_context *ctx,
- const struct pipe_blend_color *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
-
- if (rstate == NULL)
- return;
-
- rstate->id = R600_PIPE_STATE_BLEND_COLOR;
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL);
- free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]);
- rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate;
- r600_context_pipe_state_set(&rctx->ctx, rstate);
-}
-
-static void *r600_create_blend_state(struct pipe_context *ctx,
- const struct pipe_blend_state *state)
-{
- struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend);
- struct r600_pipe_state *rstate;
- u32 color_control, target_mask;
-
- if (blend == NULL) {
- return NULL;
- }
- rstate = &blend->rstate;
-
- rstate->id = R600_PIPE_STATE_BLEND;
-
- target_mask = 0;
- color_control = S_028808_PER_MRT_BLEND(1);
- if (state->logicop_enable) {
- color_control |= (state->logicop_func << 16) | (state->logicop_func << 20);
- } else {
- color_control |= (0xcc << 16);
- }
- /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
- if (state->independent_blend_enable) {
- for (int i = 0; i < 8; i++) {
- if (state->rt[i].blend_enable) {
- color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i);
- }
- target_mask |= (state->rt[i].colormask << (4 * i));
- }
- } else {
- for (int i = 0; i < 8; i++) {
- if (state->rt[0].blend_enable) {
- color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i);
- }
- target_mask |= (state->rt[0].colormask << (4 * i));
- }
- }
- blend->cb_target_mask = target_mask;
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028808_CB_COLOR_CONTROL,
- color_control, 0xFFFFFFFF, NULL);
-
- for (int i = 0; i < 8; i++) {
- unsigned eqRGB = state->rt[i].rgb_func;
- unsigned srcRGB = state->rt[i].rgb_src_factor;
- unsigned dstRGB = state->rt[i].rgb_dst_factor;
-
- unsigned eqA = state->rt[i].alpha_func;
- unsigned srcA = state->rt[i].alpha_src_factor;
- unsigned dstA = state->rt[i].alpha_dst_factor;
- uint32_t bc = 0;
-
- if (!state->rt[i].blend_enable)
- continue;
-
- bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB));
- bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB));
- bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB));
-
- if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
- bc |= S_028804_SEPARATE_ALPHA_BLEND(1);
- bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA));
- bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA));
- bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA));
- }
-
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL);
- if (i == 0) {
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL);
- }
- }
- return rstate;
-}
-
-static void r600_bind_blend_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state;
- struct r600_pipe_state *rstate;
-
- if (state == NULL)
- return;
- rstate = &blend->rstate;
- rctx->states[rstate->id] = rstate;
- rctx->cb_target_mask = blend->cb_target_mask;
- r600_context_pipe_state_set(&rctx->ctx, rstate);
-}
-
-static void *r600_create_dsa_state(struct pipe_context *ctx,
- const struct pipe_depth_stencil_alpha_state *state)
-{
- struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
- unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control;
- unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control;
-
- if (rstate == NULL) {
- return NULL;
- }
-
- rstate->id = R600_PIPE_STATE_DSA;
- /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */
- /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be
- * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will
- * be set if shader use texkill instruction
- */
- db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
- stencil_ref_mask = 0;
- stencil_ref_mask_bf = 0;
- db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
- S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
- S_028800_ZFUNC(state->depth.func);
-
- /* stencil */
- if (state->stencil[0].enabled) {
- db_depth_control |= S_028800_STENCIL_ENABLE(1);
- db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func));
- db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op));
- db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op));
- db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op));
-
-
- stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) |
- S_028430_STENCILWRITEMASK(state->stencil[0].writemask);
- if (state->stencil[1].enabled) {
- db_depth_control |= S_028800_BACKFACE_ENABLE(1);
- db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func));
- db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op));
- db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op));
- db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op));
- stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) |
- S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask);
- }
- }
-
- /* alpha */
- alpha_test_control = 0;
- alpha_ref = 0;
- if (state->alpha.enabled) {
- alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func);
- alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1);
- alpha_ref = fui(state->alpha.ref_value);
- }
-
- /* misc */
- db_render_control = 0;
- db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) |
- S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
- S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
- /* TODO db_render_override depends on query */
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028430_DB_STENCILREFMASK, stencil_ref_mask,
- 0xFFFFFFFF & C_028430_STENCILREF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf,
- 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL);
-
- return rstate;
-}
-
-static void *r600_create_rs_state(struct pipe_context *ctx,
- const struct pipe_rasterizer_state *state)
-{
- struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer);
- struct r600_pipe_state *rstate;
- unsigned tmp;
- unsigned prov_vtx = 1, polygon_dual_mode;
-
- if (rs == NULL) {
- return NULL;
- }
-
- rstate = &rs->rstate;
- rs->flatshade = state->flatshade;
- rs->sprite_coord_enable = state->sprite_coord_enable;
-
- /* offset */
- rs->offset_units = state->offset_units;
- rs->offset_scale = state->offset_scale * 12.0f;
-
- rstate->id = R600_PIPE_STATE_RASTERIZER;
- if (state->flatshade_first)
- prov_vtx = 0;
- tmp = 0x00000001;
- if (state->sprite_coord_enable) {
- tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
- S_0286D4_PNT_SPRITE_OVRD_X(2) |
- S_0286D4_PNT_SPRITE_OVRD_Y(3) |
- S_0286D4_PNT_SPRITE_OVRD_Z(0) |
- S_0286D4_PNT_SPRITE_OVRD_W(1);
- if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
- tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
- }
- }
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL);
-
- polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
- state->fill_back != PIPE_POLYGON_MODE_FILL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028814_PA_SU_SC_MODE_CNTL,
- S_028814_PROVOKING_VTX_LAST(prov_vtx) |
- S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
- S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
- S_028814_FACE(!state->front_ccw) |
- S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
- S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
- S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
- S_028814_POLY_MODE(polygon_dual_mode) |
- S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
- S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02881C_PA_CL_VS_OUT_CNTL,
- S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
- S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
- /* point size 12.4 fixed point */
- tmp = (unsigned)(state->point_size * 8.0);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL);
- return rstate;
-}
-
-static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state;
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- if (state == NULL)
- return;
-
- rctx->flatshade = rs->flatshade;
- rctx->sprite_coord_enable = rs->sprite_coord_enable;
- rctx->rasterizer = rs;
-
- rctx->states[rs->rstate.id] = &rs->rstate;
- r600_context_pipe_state_set(&rctx->ctx, &rs->rstate);
-}
-
-static void r600_delete_rs_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state;
-
- if (rctx->rasterizer == rs) {
- rctx->rasterizer = NULL;
- }
- if (rctx->states[rs->rstate.id] == &rs->rstate) {
- rctx->states[rs->rstate.id] = NULL;
- }
- free(rs);
-}
-
-static void *r600_create_sampler_state(struct pipe_context *ctx,
- const struct pipe_sampler_state *state)
-{
- struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
- union util_color uc;
-
- if (rstate == NULL) {
- return NULL;
- }
-
- rstate->id = R600_PIPE_STATE_SAMPLER;
- util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
- r600_pipe_state_add_reg(rstate, R600_GROUP_SAMPLER, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
- S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
- S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
- S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
- S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) |
- S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) |
- S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
- S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
- S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL);
- /* FIXME LOD it depends on texture base level ... */
- r600_pipe_state_add_reg(rstate, R600_GROUP_SAMPLER, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
- S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
- S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
- S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_SAMPLER, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL);
- if (uc.ui) {
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL);
- }
- return rstate;
-}
-
-static void *r600_create_vertex_elements(struct pipe_context *ctx,
- unsigned count,
- const struct pipe_vertex_element *elements)
-{
- struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
-
- assert(count < 32);
- v->count = count;
- v->refcount = 1;
- memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element));
- return v;
-}
-
-static void r600_sampler_view_destroy(struct pipe_context *ctx,
- struct pipe_sampler_view *state)
-{
- struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state;
-
- pipe_resource_reference(&state->texture, NULL);
- FREE(resource);
-}
-
-static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx,
- struct pipe_resource *texture,
- const struct pipe_sampler_view *state)
-{
- struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view);
- struct r600_pipe_state *rstate;
- const struct util_format_description *desc;
- struct r600_resource_texture *tmp;
- struct r600_resource *rbuffer;
- unsigned format;
- uint32_t word4 = 0, yuv_format = 0, pitch = 0;
- unsigned char swizzle[4], array_mode = 0, tile_type = 0;
- struct radeon_ws_bo *bo[2];
-
- if (resource == NULL)
- return NULL;
- rstate = &resource->state;
-
- /* initialize base object */
- resource->base = *state;
- resource->base.texture = NULL;
- pipe_reference(NULL, &texture->reference);
- resource->base.texture = texture;
- resource->base.reference.count = 1;
- resource->base.context = ctx;
-
- swizzle[0] = state->swizzle_r;
- swizzle[1] = state->swizzle_g;
- swizzle[2] = state->swizzle_b;
- swizzle[3] = state->swizzle_a;
- format = r600_translate_texformat(texture->format,
- swizzle,
- &word4, &yuv_format);
- if (format == ~0) {
- format = 0;
- }
- desc = util_format_description(texture->format);
- if (desc == NULL) {
- R600_ERR("unknow format %d\n", texture->format);
- }
- tmp = (struct r600_resource_texture*)texture;
- rbuffer = &tmp->resource;
- bo[0] = rbuffer->bo;
- bo[1] = rbuffer->bo;
- /* FIXME depth texture decompression */
- if (tmp->depth) {
- r600_texture_depth_flush(ctx, texture);
- tmp = (struct r600_resource_texture*)texture;
- rbuffer = &tmp->flushed_depth_texture->resource;
- bo[0] = rbuffer->bo;
- bo[1] = rbuffer->bo;
- }
- pitch = align(tmp->pitch[0] / tmp->bpt, 8);
-
- /* FIXME properly handle first level != 0 */
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038000_RESOURCE0_WORD0,
- S_038000_DIM(r600_tex_dim(texture->target)) |
- S_038000_TILE_MODE(array_mode) |
- S_038000_TILE_TYPE(tile_type) |
- S_038000_PITCH((pitch / 8) - 1) |
- S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038004_RESOURCE0_WORD1,
- S_038004_TEX_HEIGHT(texture->height0 - 1) |
- S_038004_TEX_DEPTH(texture->depth0 - 1) |
- S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038008_RESOURCE0_WORD2,
- tmp->offset[0] >> 8, 0xFFFFFFFF, bo[0]);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_03800C_RESOURCE0_WORD3,
- tmp->offset[1] >> 8, 0xFFFFFFFF, bo[1]);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038010_RESOURCE0_WORD4,
- word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) |
- S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) |
- S_038010_REQUEST_SIZE(1) |
- S_038010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038014_RESOURCE0_WORD5,
- S_038014_LAST_LEVEL(state->last_level) |
- S_038014_BASE_ARRAY(0) |
- S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038018_RESOURCE0_WORD6,
- S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL);
-
- return &resource->base;
-}
-
-static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
- struct pipe_sampler_view **views)
-{
- /* TODO */
- assert(1);
-}
-
-static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
- struct pipe_sampler_view **views)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views;
-
- for (int i = 0; i < count; i++) {
- if (resource[i]) {
- r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
- }
- }
-}
-
-static void r600_bind_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = (struct r600_pipe_state *)state;
-
- if (state == NULL)
- return;
- rctx->states[rstate->id] = rstate;
- r600_context_pipe_state_set(&rctx->ctx, rstate);
-}
-
-static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
-
- for (int i = 0; i < count; i++) {
- r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i);
- }
-}
-
-static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
-
- /* TODO implement */
- for (int i = 0; i < count; i++) {
- r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i);
- }
-}
-
-static void r600_delete_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = (struct r600_pipe_state *)state;
-
- if (rctx->states[rstate->id] == rstate) {
- rctx->states[rstate->id] = NULL;
- }
- for (int i = 0; i < rstate->nregs; i++) {
- radeon_ws_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL);
- }
- free(rstate);
-}
-
-static void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
-{
- struct r600_vertex_element *v = (struct r600_vertex_element*)state;
-
- if (v == NULL)
- return;
- if (--v->refcount)
- return;
- free(v);
-}
-
-static void r600_set_clip_state(struct pipe_context *ctx,
- const struct pipe_clip_state *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
-
- if (rstate == NULL)
- return;
-
- rctx->clip = *state;
- rstate->id = R600_PIPE_STATE_CLIP;
- for (int i = 0; i < state->nr; i++) {
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028E20_PA_CL_UCP0_X + i * 4,
- fui(state->ucp[i][0]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028E24_PA_CL_UCP0_Y + i * 4,
- fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028E28_PA_CL_UCP0_Z + i * 4,
- fui(state->ucp[i][2]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028E2C_PA_CL_UCP0_W + i * 4,
- fui(state->ucp[i][3]), 0xFFFFFFFF, NULL);
- }
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028810_PA_CL_CLIP_CNTL,
- S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) |
- S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) |
- S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL);
-
- free(rctx->states[R600_PIPE_STATE_CLIP]);
- rctx->states[R600_PIPE_STATE_CLIP] = rstate;
- r600_context_pipe_state_set(&rctx->ctx, rstate);
-}
-
-static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_vertex_element *v = (struct r600_vertex_element*)state;
-
- r600_delete_vertex_element(ctx, rctx->vertex_elements);
- rctx->vertex_elements = v;
- if (v) {
- v->refcount++;
-// rctx->vs_rebuild = TRUE;
- }
-}
-
-static void r600_set_polygon_stipple(struct pipe_context *ctx,
- const struct pipe_poly_stipple *state)
-{
-}
-
-static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
-{
-}
-
-static void r600_set_scissor_state(struct pipe_context *ctx,
- const struct pipe_scissor_state *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
- u32 tl, br;
-
- if (rstate == NULL)
- return;
-
- rstate->id = R600_PIPE_STATE_SCISSOR;
- tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1);
- br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028030_PA_SC_SCREEN_SCISSOR_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028034_PA_SC_SCREEN_SCISSOR_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028204_PA_SC_WINDOW_SCISSOR_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028208_PA_SC_WINDOW_SCISSOR_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028210_PA_SC_CLIPRECT_0_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028214_PA_SC_CLIPRECT_0_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028218_PA_SC_CLIPRECT_1_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_02821C_PA_SC_CLIPRECT_1_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028220_PA_SC_CLIPRECT_2_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028224_PA_SC_CLIPRECT_2_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028228_PA_SC_CLIPRECT_3_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_02822C_PA_SC_CLIPRECT_3_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028200_PA_SC_WINDOW_OFFSET, 0x00000000,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF,
- 0xFFFFFFFF, NULL);
- if (rctx->family >= CHIP_RV770) {
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028230_PA_SC_EDGERULE, 0xAAAAAAAA,
- 0xFFFFFFFF, NULL);
- }
-
- free(rctx->states[R600_PIPE_STATE_SCISSOR]);
- rctx->states[R600_PIPE_STATE_SCISSOR] = rstate;
- r600_context_pipe_state_set(&rctx->ctx, rstate);
-}
-
-static void r600_set_stencil_ref(struct pipe_context *ctx,
- const struct pipe_stencil_ref *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
- u32 tmp;
-
- if (rstate == NULL)
- return;
-
- rctx->stencil_ref = *state;
- rstate->id = R600_PIPE_STATE_STENCIL_REF;
- tmp = S_028430_STENCILREF(state->ref_value[0]);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028430_DB_STENCILREFMASK, tmp,
- ~C_028430_STENCILREF, NULL);
- tmp = S_028434_STENCILREF_BF(state->ref_value[1]);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028434_DB_STENCILREFMASK_BF, tmp,
- ~C_028434_STENCILREF_BF, NULL);
-
- free(rctx->states[R600_PIPE_STATE_STENCIL_REF]);
- rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate;
- r600_context_pipe_state_set(&rctx->ctx, rstate);
-}
-
-static void r600_set_viewport_state(struct pipe_context *ctx,
- const struct pipe_viewport_state *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
-
- if (rstate == NULL)
- return;
-
- rctx->viewport = *state;
- rstate->id = R600_PIPE_STATE_VIEWPORT;
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL);
-
- free(rctx->states[R600_PIPE_STATE_VIEWPORT]);
- rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate;
- r600_context_pipe_state_set(&rctx->ctx, rstate);
-}
-
-static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate,
- const struct pipe_framebuffer_state *state, int cb)
-{
- struct r600_resource_texture *rtex;
- struct r600_resource *rbuffer;
- unsigned level = state->cbufs[cb]->level;
- unsigned pitch, slice;
- unsigned color_info;
- unsigned format, swap, ntype;
- const struct util_format_description *desc;
- struct radeon_ws_bo *bo[3];
-
- rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
- rbuffer = &rtex->resource;
- bo[0] = rbuffer->bo;
- bo[1] = rbuffer->bo;
- bo[2] = rbuffer->bo;
-
- pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
- slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1;
- ntype = 0;
- desc = util_format_description(rtex->resource.base.b.format);
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
- ntype = V_0280A0_NUMBER_SRGB;
-
- format = r600_translate_colorformat(rtex->resource.base.b.format);
- swap = r600_translate_colorswap(rtex->resource.base.b.format);
- color_info = S_0280A0_FORMAT(format) |
- S_0280A0_COMP_SWAP(swap) |
- S_0280A0_BLEND_CLAMP(1) |
- S_0280A0_NUMBER_TYPE(ntype);
- if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
- color_info |= S_0280A0_SOURCE_FORMAT(1);
-
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028040_CB_COLOR0_BASE + cb * 4,
- state->cbufs[cb]->offset >> 8, 0xFFFFFFFF, bo[0]);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_0280A0_CB_COLOR0_INFO + cb * 4,
- color_info, 0xFFFFFFFF, bo[0]);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028060_CB_COLOR0_SIZE + cb * 4,
- S_028060_PITCH_TILE_MAX(pitch) |
- S_028060_SLICE_TILE_MAX(slice),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028080_CB_COLOR0_VIEW + cb * 4,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_0280E0_CB_COLOR0_FRAG + cb * 4,
- 0x00000000, 0xFFFFFFFF, bo[1]);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_0280C0_CB_COLOR0_TILE + cb * 4,
- 0x00000000, 0xFFFFFFFF, bo[2]);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028100_CB_COLOR0_MASK + cb * 4,
- 0x00000000, 0xFFFFFFFF, NULL);
-}
-
-static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate,
- const struct pipe_framebuffer_state *state)
-{
- struct r600_resource_texture *rtex;
- struct r600_resource *rbuffer;
- unsigned level;
- unsigned pitch, slice, format;
-
- if (state->zsbuf == NULL)
- return;
-
- rtex = (struct r600_resource_texture*)state->zsbuf->texture;
- rtex->tiled = 1;
- rtex->array_mode = 2;
- rtex->tile_type = 1;
- rtex->depth = 1;
- rbuffer = &rtex->resource;
-
- level = state->zsbuf->level;
- pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
- slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1;
- format = r600_translate_dbformat(state->zsbuf->texture->format);
-
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02800C_DB_DEPTH_BASE,
- state->zsbuf->offset >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028000_DB_DEPTH_SIZE,
- S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028010_DB_DEPTH_INFO,
- S_028010_ARRAY_MODE(rtex->array_mode) | S_028010_FORMAT(format),
- 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D34_DB_PREFETCH_LIMIT,
- (state->zsbuf->height / 8) - 1, 0xFFFFFFFF, NULL);
-}
-
-static void r600_set_framebuffer_state(struct pipe_context *ctx,
- const struct pipe_framebuffer_state *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
- u32 shader_mask, tl, br, shader_control, target_mask;
-
- if (rstate == NULL)
- return;
-
- /* unreference old buffer and reference new one */
- rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
- for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
- pipe_surface_reference(&rctx->framebuffer.cbufs[i], NULL);
- }
- for (int i = 0; i < state->nr_cbufs; i++) {
- pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]);
- }
- pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf);
- rctx->framebuffer = *state;
- rctx->pframebuffer = &rctx->framebuffer;
-
- /* build states */
- for (int i = 0; i < state->nr_cbufs; i++) {
- r600_cb(rctx, rstate, state, i);
- }
- if (state->zsbuf) {
- r600_db(rctx, rstate, state);
- }
-
- target_mask = 0x00000000;
- target_mask = 0xFFFFFFFF;
- shader_mask = 0;
- shader_control = 0;
- for (int i = 0; i < state->nr_cbufs; i++) {
- target_mask ^= 0xf << (i * 4);
- shader_mask |= 0xf << (i * 4);
- shader_control |= 1 << i;
- }
- tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1);
- br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height);
-
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028240_PA_SC_GENERIC_SCISSOR_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028244_PA_SC_GENERIC_SCISSOR_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028254_PA_SC_VPORT_SCISSOR_0_BR, br,
- 0xFFFFFFFF, NULL);
-
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0287A0_CB_SHADER_CONTROL,
- shader_control, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028238_CB_TARGET_MASK,
- 0x00000000, target_mask, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02823C_CB_SHADER_MASK,
- shader_mask, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C04_PA_SC_AA_CONFIG,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C30_CB_CLRCMP_CONTROL,
- 0x01000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C34_CB_CLRCMP_SRC,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C38_CB_CLRCMP_DST,
- 0x000000FF, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C3C_CB_CLRCMP_MSK,
- 0xFFFFFFFF, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C48_PA_SC_AA_MASK,
- 0xFFFFFFFF, 0xFFFFFFFF, NULL);
-
- free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
- rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate;
- r600_context_pipe_state_set(&rctx->ctx, rstate);
-}
-
-static void r600_set_index_buffer(struct pipe_context *ctx,
- const struct pipe_index_buffer *ib)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- if (ib) {
- pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
- memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer));
- } else {
- pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
- memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer));
- }
-
- /* TODO make this more like a state */
-}
-
-static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
- const struct pipe_vertex_buffer *buffers)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- for (int i = 0; i < rctx->nvertex_buffer; i++) {
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
- }
- memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count);
- for (int i = 0; i < count; i++) {
- rctx->vertex_buffer[i].buffer = NULL;
- if (r600_buffer_is_user_buffer(buffers[i].buffer))
- rctx->any_user_vbs = TRUE;
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer);
- }
- rctx->nvertex_buffer = count;
-}
-
-static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
- struct pipe_resource *buffer)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate;
- struct pipe_transfer *transfer;
- unsigned *nconst = NULL;
- u32 *ptr, offset;
-
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- rstate = rctx->vs_const;
- nconst = &rctx->vs_nconst;
- offset = R_030000_SQ_ALU_CONSTANT0_0 + 0x1000;
- break;
- case PIPE_SHADER_FRAGMENT:
- rstate = rctx->ps_const;
- nconst = &rctx->ps_nconst;
- offset = R_030000_SQ_ALU_CONSTANT0_0;
- break;
- default:
- R600_ERR("unsupported %d\n", shader);
- return;
- }
- if (buffer && buffer->width0 > 0) {
- *nconst = buffer->width0 / 16;
- ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer);
- if (ptr == NULL)
- return;
- for (int i = 0; i < *nconst; i++, offset += 0x10) {
- rstate[i].nregs = 0;
- r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0x0, ptr[i * 4 + 0], 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0x4, ptr[i * 4 + 1], 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0x8, ptr[i * 4 + 2], 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0xC, ptr[i * 4 + 3], 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set(&rctx->ctx, &rstate[i]);
- }
- pipe_buffer_unmap(ctx, buffer, transfer);
- }
-}
-
-static void *r600_create_shader_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
-{
- struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader);
- int r;
-
- r = r600_pipe_shader_create2(ctx, shader, state->tokens);
- if (r) {
- return NULL;
- }
- return shader;
-}
-
-static void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- /* TODO delete old shader */
- rctx->ps_shader = (struct r600_pipe_shader *)state;
-}
-
-static void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- /* TODO delete old shader */
- rctx->vs_shader = (struct r600_pipe_shader *)state;
-}
-
-static void r600_delete_ps_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state;
-
- if (rctx->ps_shader == shader) {
- rctx->ps_shader = NULL;
- }
- /* TODO proper delete */
- free(shader);
-}
-
-static void r600_delete_vs_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state;
-
- if (rctx->vs_shader == shader) {
- rctx->vs_shader = NULL;
- }
- /* TODO proper delete */
- free(shader);
-}
-
-static void r600_init_state_functions2(struct r600_pipe_context *rctx)
-{
- rctx->context.create_blend_state = r600_create_blend_state;
- rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state;
- rctx->context.create_fs_state = r600_create_shader_state;
- rctx->context.create_rasterizer_state = r600_create_rs_state;
- rctx->context.create_sampler_state = r600_create_sampler_state;
- rctx->context.create_sampler_view = r600_create_sampler_view;
- rctx->context.create_vertex_elements_state = r600_create_vertex_elements;
- rctx->context.create_vs_state = r600_create_shader_state;
- rctx->context.bind_blend_state = r600_bind_blend_state;
- rctx->context.bind_depth_stencil_alpha_state = r600_bind_state;
- rctx->context.bind_fragment_sampler_states = r600_bind_ps_sampler;
- rctx->context.bind_fs_state = r600_bind_ps_shader;
- rctx->context.bind_rasterizer_state = r600_bind_rs_state;
- rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements;
- rctx->context.bind_vertex_sampler_states = r600_bind_vs_sampler;
- rctx->context.bind_vs_state = r600_bind_vs_shader;
- rctx->context.delete_blend_state = r600_delete_state;
- rctx->context.delete_depth_stencil_alpha_state = r600_delete_state;
- rctx->context.delete_fs_state = r600_delete_ps_shader;
- rctx->context.delete_rasterizer_state = r600_delete_rs_state;
- rctx->context.delete_sampler_state = r600_delete_state;
- rctx->context.delete_vertex_elements_state = r600_delete_vertex_element;
- rctx->context.delete_vs_state = r600_delete_vs_shader;
- rctx->context.set_blend_color = r600_set_blend_color;
- rctx->context.set_clip_state = r600_set_clip_state;
- rctx->context.set_constant_buffer = r600_set_constant_buffer;
- rctx->context.set_fragment_sampler_views = r600_set_ps_sampler_view;
- rctx->context.set_framebuffer_state = r600_set_framebuffer_state;
- rctx->context.set_polygon_stipple = r600_set_polygon_stipple;
- rctx->context.set_sample_mask = r600_set_sample_mask;
- rctx->context.set_scissor_state = r600_set_scissor_state;
- rctx->context.set_stencil_ref = r600_set_stencil_ref;
- rctx->context.set_vertex_buffers = r600_set_vertex_buffers;
- rctx->context.set_index_buffer = r600_set_index_buffer;
- rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view;
- rctx->context.set_viewport_state = r600_set_viewport_state;
- rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
-}
-
-static void r600_init_config2(struct r600_pipe_context *rctx)
-{
- int ps_prio;
- int vs_prio;
- int gs_prio;
- int es_prio;
- int num_ps_gprs;
- int num_vs_gprs;
- int num_gs_gprs;
- int num_es_gprs;
- int num_temp_gprs;
- int num_ps_threads;
- int num_vs_threads;
- int num_gs_threads;
- int num_es_threads;
- int num_ps_stack_entries;
- int num_vs_stack_entries;
- int num_gs_stack_entries;
- int num_es_stack_entries;
- enum radeon_family family;
- struct r600_pipe_state *rstate = &rctx->config;
- u32 tmp;
-
- family = r600_get_family(rctx->radeon);
- ps_prio = 0;
- vs_prio = 1;
- gs_prio = 2;
- es_prio = 3;
- switch (family) {
- case CHIP_R600:
- num_ps_gprs = 192;
- num_vs_gprs = 56;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 136;
- num_vs_threads = 48;
- num_gs_threads = 4;
- num_es_threads = 4;
- num_ps_stack_entries = 128;
- num_vs_stack_entries = 128;
- num_gs_stack_entries = 0;
- num_es_stack_entries = 0;
- break;
- case CHIP_RV630:
- case CHIP_RV635:
- num_ps_gprs = 84;
- num_vs_gprs = 36;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 144;
- num_vs_threads = 40;
- num_gs_threads = 4;
- num_es_threads = 4;
- num_ps_stack_entries = 40;
- num_vs_stack_entries = 40;
- num_gs_stack_entries = 32;
- num_es_stack_entries = 16;
- break;
- case CHIP_RV610:
- case CHIP_RV620:
- case CHIP_RS780:
- case CHIP_RS880:
- default:
- num_ps_gprs = 84;
- num_vs_gprs = 36;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 136;
- num_vs_threads = 48;
- num_gs_threads = 4;
- num_es_threads = 4;
- num_ps_stack_entries = 40;
- num_vs_stack_entries = 40;
- num_gs_stack_entries = 32;
- num_es_stack_entries = 16;
- break;
- case CHIP_RV670:
- num_ps_gprs = 144;
- num_vs_gprs = 40;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 136;
- num_vs_threads = 48;
- num_gs_threads = 4;
- num_es_threads = 4;
- num_ps_stack_entries = 40;
- num_vs_stack_entries = 40;
- num_gs_stack_entries = 32;
- num_es_stack_entries = 16;
- break;
- case CHIP_RV770:
- num_ps_gprs = 192;
- num_vs_gprs = 56;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 188;
- num_vs_threads = 60;
- num_gs_threads = 0;
- num_es_threads = 0;
- num_ps_stack_entries = 256;
- num_vs_stack_entries = 256;
- num_gs_stack_entries = 0;
- num_es_stack_entries = 0;
- break;
- case CHIP_RV730:
- case CHIP_RV740:
- num_ps_gprs = 84;
- num_vs_gprs = 36;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 188;
- num_vs_threads = 60;
- num_gs_threads = 0;
- num_es_threads = 0;
- num_ps_stack_entries = 128;
- num_vs_stack_entries = 128;
- num_gs_stack_entries = 0;
- num_es_stack_entries = 0;
- break;
- case CHIP_RV710:
- num_ps_gprs = 192;
- num_vs_gprs = 56;
- num_temp_gprs = 4;
- num_gs_gprs = 0;
- num_es_gprs = 0;
- num_ps_threads = 144;
- num_vs_threads = 48;
- num_gs_threads = 0;
- num_es_threads = 0;
- num_ps_stack_entries = 128;
- num_vs_stack_entries = 128;
- num_gs_stack_entries = 0;
- num_es_stack_entries = 0;
- break;
- }
-
- rstate->id = R600_PIPE_STATE_CONFIG;
-
- /* SQ_CONFIG */
- tmp = 0;
- switch (family) {
- case CHIP_RV610:
- case CHIP_RV620:
- case CHIP_RS780:
- case CHIP_RS880:
- case CHIP_RV710:
- break;
- default:
- tmp |= S_008C00_VC_ENABLE(1);
- break;
- }
- tmp |= S_008C00_DX9_CONSTS(1);
- tmp |= S_008C00_ALU_INST_PREFER_VECTOR(1);
- tmp |= S_008C00_PS_PRIO(ps_prio);
- tmp |= S_008C00_VS_PRIO(vs_prio);
- tmp |= S_008C00_GS_PRIO(gs_prio);
- tmp |= S_008C00_ES_PRIO(es_prio);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
-
- /* SQ_GPR_RESOURCE_MGMT_1 */
- tmp = 0;
- tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
- tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
- tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
-
- /* SQ_GPR_RESOURCE_MGMT_2 */
- tmp = 0;
- tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
- tmp |= S_008C08_NUM_GS_GPRS(num_es_gprs);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
-
- /* SQ_THREAD_RESOURCE_MGMT */
- tmp = 0;
- tmp |= S_008C0C_NUM_PS_THREADS(num_ps_threads);
- tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads);
- tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads);
- tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL);
-
- /* SQ_STACK_RESOURCE_MGMT_1 */
- tmp = 0;
- tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
- tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
-
- /* SQ_STACK_RESOURCE_MGMT_2 */
- tmp = 0;
- tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
- tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
-
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL);
-
- if (family >= CHIP_RV770) {
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009508_TA_CNTL_AUX, 0x07000002, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A4C_PA_SC_MODE_CNTL, 0x00514000, 0xFFFFFFFF, NULL);
- } else {
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A4C_PA_SC_MODE_CNTL, 0x00004010, 0xFFFFFFFF, NULL);
- }
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL);
-
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set(&rctx->ctx, rstate);
-}
-
-static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- return (struct pipe_query*)r600_context_query_create(&rctx->ctx, query_type);
-}
-
-static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- r600_context_query_destroy(&rctx->ctx, (struct r600_query *)query);
-}
-
-static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_query *rquery = (struct r600_query *)query;
-
- rquery->result = 0;
- rquery->num_results = 0;
- r600_query_begin(&rctx->ctx, (struct r600_query *)query);
-}
-
-static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- r600_query_end(&rctx->ctx, (struct r600_query *)query);
-}
-
-static boolean r600_get_query_result(struct pipe_context *ctx,
- struct pipe_query *query,
- boolean wait, void *vresult)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_query *rquery = (struct r600_query *)query;
-
- if (rquery->num_results) {
- ctx->flush(ctx, 0, NULL);
- }
- return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult);
-}
-
-static void r600_init_query_functions2(struct r600_pipe_context *rctx)
-{
- rctx->context.create_query = r600_create_query;
- rctx->context.destroy_query = r600_destroy_query;
- rctx->context.begin_query = r600_begin_query;
- rctx->context.end_query = r600_end_query;
- rctx->context.get_query_result = r600_get_query_result;
-}
-
-static void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
-{
- struct pipe_depth_stencil_alpha_state dsa;
- struct r600_pipe_state *rstate;
- boolean quirk = false;
-
- if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 ||
- rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635)
- quirk = true;
-
- memset(&dsa, 0, sizeof(dsa));
-
- if (quirk) {
- dsa.depth.enabled = 1;
- dsa.depth.func = PIPE_FUNC_LEQUAL;
- dsa.stencil[0].enabled = 1;
- dsa.stencil[0].func = PIPE_FUNC_ALWAYS;
- dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP;
- dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_INCR;
- dsa.stencil[0].writemask = 0xff;
- }
-
- rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_02880C_DB_SHADER_CONTROL,
- 0x0,
- S_02880C_DUAL_EXPORT_ENABLE(1), NULL);
- r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT,
- R_028D0C_DB_RENDER_CONTROL,
- S_028D0C_DEPTH_COPY_ENABLE(1) |
- S_028D0C_STENCIL_COPY_ENABLE(1) |
- S_028D0C_COPY_CENTROID(1),
- S_028D0C_DEPTH_COPY_ENABLE(1) |
- S_028D0C_STENCIL_COPY_ENABLE(1) |
- S_028D0C_COPY_CENTROID(1), NULL);
- return rstate;
-}
-
-static struct pipe_context *r600_create_context2(struct pipe_screen *screen, void *priv)
-{
- struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context);
- struct r600_screen* rscreen = (struct r600_screen *)screen;
-
- if (rctx == NULL)
- return NULL;
- rctx->context.winsys = rscreen->screen.winsys;
- rctx->context.screen = screen;
- rctx->context.priv = priv;
- rctx->context.destroy = r600_destroy_context;
- rctx->context.flush = r600_flush2;
-
- /* Easy accessing of screen/winsys. */
- rctx->screen = rscreen;
- rctx->radeon = rscreen->radeon;
- rctx->family = r600_get_family(rctx->radeon);
-
- r600_init_blit_functions2(rctx);
- r600_init_query_functions2(rctx);
- r600_init_context_resource_functions2(rctx);
-
- switch (r600_get_family(rctx->radeon)) {
- case CHIP_R600:
- case CHIP_RV610:
- case CHIP_RV630:
- case CHIP_RV670:
- case CHIP_RV620:
- case CHIP_RV635:
- case CHIP_RS780:
- case CHIP_RS880:
- case CHIP_RV770:
- case CHIP_RV730:
- case CHIP_RV710:
- case CHIP_RV740:
- rctx->context.draw_vbo = r600_draw_vbo2;
- r600_init_state_functions2(rctx);
- if (r600_context_init(&rctx->ctx, rctx->radeon)) {
- r600_destroy_context(&rctx->context);
- return NULL;
- }
- r600_init_config2(rctx);
- break;
- case CHIP_CEDAR:
- case CHIP_REDWOOD:
- case CHIP_JUNIPER:
- case CHIP_CYPRESS:
- case CHIP_HEMLOCK:
- rctx->context.draw_vbo = evergreen_draw;
- evergreen_init_state_functions2(rctx);
- if (evergreen_context_init(&rctx->ctx, rctx->radeon)) {
- r600_destroy_context(&rctx->context);
- return NULL;
- }
- evergreen_init_config2(rctx);
- break;
- default:
- R600_ERR("unsupported family %d\n", r600_get_family(rctx->radeon));
- r600_destroy_context(&rctx->context);
- return NULL;
- }
-
- rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16,
- PIPE_BIND_INDEX_BUFFER);
- if (rctx->upload_ib == NULL) {
- r600_destroy_context(&rctx->context);
- return NULL;
- }
-
- rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16,
- PIPE_BIND_VERTEX_BUFFER);
- if (rctx->upload_vb == NULL) {
- r600_destroy_context(&rctx->context);
- return NULL;
- }
-
- rctx->blitter = util_blitter_create(&rctx->context);
- if (rctx->blitter == NULL) {
- FREE(rctx);
- return NULL;
- }
-
- LIST_INITHEAD(&rctx->query_list);
- rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx);
-
- r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth2;
-
- return &rctx->context;
-}
-
-static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param)
-{
- switch(shader)
- {
- case PIPE_SHADER_FRAGMENT:
- case PIPE_SHADER_VERTEX:
- break;
- case PIPE_SHADER_GEOMETRY:
- /* TODO: support and enable geometry programs */
- return 0;
- default:
- /* TODO: support tessellation on Evergreen */
- return 0;
- }
-
- /* TODO: all these should be fixed, since r600 surely supports much more! */
- switch (param) {
- case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
- case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
- case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
- case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
- return 16384;
- case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
- return 8; /* FIXME */
- case PIPE_SHADER_CAP_MAX_INPUTS:
- if(shader == PIPE_SHADER_FRAGMENT)
- return 10;
- else
- return 16;
- case PIPE_SHADER_CAP_MAX_TEMPS:
- return 256; //max native temporaries
- case PIPE_SHADER_CAP_MAX_ADDRS:
- return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */
- case PIPE_SHADER_CAP_MAX_CONSTS:
- return 256; //max native parameters
- case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
- return 1;
- case PIPE_SHADER_CAP_MAX_PREDS:
- return 0; /* FIXME */
- case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
- return 1;
- default:
- return 0;
- }
-}
-
-struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
- const struct pipe_resource *templ);
-struct pipe_resource *r600_user_buffer_create2(struct pipe_screen *screen,
- void *ptr, unsigned bytes,
- unsigned bind)
-{
- struct pipe_resource *resource;
- struct r600_resource *rresource;
- struct pipe_resource desc;
- struct radeon *radeon = (struct radeon *)screen->winsys;
- void *rptr;
-
- desc.screen = screen;
- desc.target = PIPE_BUFFER;
- desc.format = PIPE_FORMAT_R8_UNORM;
- desc.usage = PIPE_USAGE_IMMUTABLE;
- desc.bind = bind;
- desc.width0 = bytes;
- desc.height0 = 1;
- desc.depth0 = 1;
- desc.flags = 0;
- resource = r600_buffer_create(screen, &desc);
- if (resource == NULL) {
- return NULL;
- }
-
- rresource = (struct r600_resource *)resource;
- rptr = radeon_ws_bo_map(radeon, rresource->bo, 0, NULL);
- memcpy(rptr, ptr, bytes);
- radeon_ws_bo_unmap(radeon, rresource->bo);
-
- return resource;
-}
-
-void r600_init_screen_texture_functions(struct pipe_screen *screen);
-struct pipe_screen *r600_screen_create2(struct radeon *radeon)
-{
- struct r600_screen *rscreen;
-
- rscreen = CALLOC_STRUCT(r600_screen);
- if (rscreen == NULL) {
- return NULL;
- }
-
- rscreen->radeon = radeon;
- rscreen->screen.winsys = (struct pipe_winsys*)radeon;
- rscreen->screen.destroy = r600_destroy_screen;
- rscreen->screen.get_name = r600_get_name;
- rscreen->screen.get_vendor = r600_get_vendor;
- rscreen->screen.get_param = r600_get_param;
- rscreen->screen.get_shader_param = r600_get_shader_param;
- rscreen->screen.get_paramf = r600_get_paramf;
- rscreen->screen.is_format_supported = r600_is_format_supported;
- rscreen->screen.context_create = r600_create_context2;
- r600_init_screen_texture_functions(&rscreen->screen);
- r600_init_screen_resource_functions(&rscreen->screen);
-// rscreen->screen.user_buffer_create = r600_user_buffer_create2;
-
- return &rscreen->screen;
-}
-
-int r600_upload_index_buffer2(struct r600_pipe_context *rctx, struct r600_drawl *draw)
-{
- struct pipe_resource *upload_buffer = NULL;
- unsigned index_offset = draw->index_buffer_offset;
- int ret = 0;
-
- if (r600_buffer_is_user_buffer(draw->index_buffer)) {
- ret = u_upload_buffer(rctx->upload_ib,
- index_offset,
- draw->count * draw->index_size,
- draw->index_buffer,
- &index_offset,
- &upload_buffer);
- if (ret) {
- goto done;
- }
- draw->index_buffer_offset = index_offset;
- draw->index_buffer = upload_buffer;
- }
-
-done:
- return ret;
-}
-
-int r600_upload_user_buffers2(struct r600_pipe_context *rctx)
-{
- enum pipe_error ret = PIPE_OK;
- int i, nr;
-
- nr = rctx->vertex_elements->count;
-
- for (i = 0; i < nr; i++) {
- struct pipe_vertex_buffer *vb =
- &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index];
-
- if (r600_buffer_is_user_buffer(vb->buffer)) {
- struct pipe_resource *upload_buffer = NULL;
- unsigned offset = 0; /*vb->buffer_offset * 4;*/
- unsigned size = vb->buffer->width0;
- unsigned upload_offset;
- ret = u_upload_buffer(rctx->upload_vb,
- offset, size,
- vb->buffer,
- &upload_offset, &upload_buffer);
- if (ret)
- return ret;
-
- pipe_resource_reference(&vb->buffer, NULL);
- vb->buffer = upload_buffer;
- vb->buffer_offset = upload_offset;
- }
- }
- return ret;
-}
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 81ce1bb190..1c1978f8ab 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -25,6 +25,7 @@
#include "util/u_format.h"
#include "r600d.h"
+#include "r600_formats.h"
static INLINE uint32_t r600_translate_blend_function(int blend_func)
{
@@ -302,6 +303,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_Z16_UNORM:
return V_0280A0_SWAP_STD;
+ case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ return V_0280A0_SWAP_STD;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return V_0280A0_SWAP_STD;
+
/* 32-bit buffers. */
case PIPE_FORMAT_A8B8G8R8_SRGB:
@@ -339,16 +347,19 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
return V_0280A0_SWAP_STD_REV;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return V_0280A0_SWAP_STD;
+
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
- // return V_0280A0_COLOR_16_16_16_16;
+ // return FMT_16_16_16_16;
case PIPE_FORMAT_R16G16B16A16_FLOAT:
- // return V_0280A0_COLOR_16_16_16_16_FLOAT;
+ // return FMT_16_16_16_16_FLOAT;
/* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
- // return V_0280A0_COLOR_32_32_32_32_FLOAT;
+ // return FMT_32_32_32_32_FLOAT;
return 0;
default:
R600_ERR("unsupported colorswap format %d\n", format);
@@ -383,6 +394,13 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_Z16_UNORM:
return V_0280A0_COLOR_16;
+ case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ return V_0280A0_COLOR_8_8;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return V_0280A0_COLOR_16;
+
/* 32-bit buffers. */
case PIPE_FORMAT_A8B8G8R8_SRGB:
case PIPE_FORMAT_A8B8G8R8_UNORM:
@@ -420,6 +438,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_0280A0_COLOR_16_16_FLOAT;
case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16G16_UNORM:
return V_0280A0_COLOR_16_16;
@@ -458,29 +477,6 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
}
}
-static INLINE void r600_translate_vertex_num_format(enum pipe_format format, uint32_t *num_format_p,
- uint32_t *format_comp_p)
-{
- uint32_t num_format = 0, format_comp = 0;
- switch (format) {
- case PIPE_FORMAT_R16G16B16A16_SSCALED:
- case PIPE_FORMAT_R16G16B16_SSCALED:
- case PIPE_FORMAT_R16G16_SSCALED:
- case PIPE_FORMAT_R32G32_SSCALED:
- format_comp = 1;
- case PIPE_FORMAT_R16G16B16A16_USCALED:
- case PIPE_FORMAT_R16G16B16_USCALED:
- case PIPE_FORMAT_R16G16_USCALED:
- case PIPE_FORMAT_R32G32_USCALED:
- num_format = V_038008_SQ_NUM_FORMAT_SCALED;
- break;
- default:
- break;
- }
- *num_format_p = num_format;
- *format_comp_p = format_comp;
-}
-
static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format)
{
return r600_translate_texformat(format, NULL, NULL, NULL) != ~0;
@@ -502,4 +498,139 @@ static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format)
return r600_translate_colorformat(format) != ~0;
}
+static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format)
+{
+ uint32_t result = 0;
+ const struct util_format_description *desc;
+ unsigned i;
+
+ desc = util_format_description(format);
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ goto out_unknown;
+ }
+
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ switch (desc->channel[i].type) {
+ /* Half-floats, floats, doubles */
+ case UTIL_FORMAT_TYPE_FLOAT:
+ switch (desc->channel[i].size) {
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_16_FLOAT;
+ break;
+ case 2:
+ result = FMT_16_16_FLOAT;
+ break;
+ case 3:
+ result = FMT_16_16_16_FLOAT;
+ break;
+ case 4:
+ result = FMT_16_16_16_16_FLOAT;
+ break;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_32_FLOAT;
+ break;
+ case 2:
+ result = FMT_32_32_FLOAT;
+ break;
+ case 3:
+ result = FMT_32_32_32_FLOAT;
+ break;
+ case 4:
+ result = FMT_32_32_32_32_FLOAT;
+ break;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+ break;
+ /* Unsigned ints */
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ /* Signed ints */
+ case UTIL_FORMAT_TYPE_SIGNED:
+ switch (desc->channel[i].size) {
+ case 8:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_8;
+ break;
+ case 2:
+ result = FMT_8_8;
+ break;
+ case 3:
+ // result = FMT_8_8_8; /* fails piglit draw-vertices test */
+ // break;
+ case 4:
+ result = FMT_8_8_8_8;
+ break;
+ }
+ break;
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_16;
+ break;
+ case 2:
+ result = FMT_16_16;
+ break;
+ case 3:
+ // result = FMT_16_16_16; /* fails piglit draw-vertices test */
+ // break;
+ case 4:
+ result = FMT_16_16_16_16;
+ break;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_32;
+ break;
+ case 2:
+ result = FMT_32_32;
+ break;
+ case 3:
+ result = FMT_32_32_32;
+ break;
+ case 4:
+ result = FMT_32_32_32_32;
+ break;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+
+ result = S_038008_DATA_FORMAT(result);
+
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ result |= S_038008_FORMAT_COMP_ALL(1);
+ }
+ if (desc->channel[i].normalized) {
+ result |= S_038008_NUM_FORMAT_ALL(0);
+ } else {
+ result |= S_038008_NUM_FORMAT_ALL(2);
+ }
+ return result;
+out_unknown:
+ R600_ERR("unsupported vertex format %s\n", util_format_name(format));
+ return ~0;
+}
+
#endif
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index c24aaeefa7..95906a74eb 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -31,11 +31,11 @@
#include <util/u_inlines.h>
#include <util/u_memory.h>
#include "state_tracker/drm_driver.h"
-#include "r600_screen.h"
-#include "r600_context.h"
+#include "r600_pipe.h"
#include "r600_resource.h"
#include "r600_state_inlines.h"
#include "r600d.h"
+#include "r600_formats.h"
extern struct u_resource_vtbl r600_texture_vtbl;
@@ -54,11 +54,30 @@ static void r600_copy_from_tiled_texture(struct pipe_context *ctx, struct r600_t
transfer->box.width, transfer->box.height);
}
-static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex,
+
+/* Copy from a detiled texture to a tiled one. */
+static void r600_copy_into_tiled_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
+{
+ struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
+ struct pipe_resource *texture = transfer->resource;
+ struct pipe_subresource subsrc;
+
+ subsrc.face = 0;
+ subsrc.level = 0;
+ ctx->resource_copy_region(ctx, texture, transfer->sr,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ rtransfer->linear_texture, subsrc,
+ 0, 0, 0,
+ transfer->box.width, transfer->box.height);
+
+ ctx->flush(ctx, 0, NULL);
+}
+
+static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
unsigned level, unsigned zslice,
unsigned face)
{
- unsigned long offset = rtex->offset[level];
+ unsigned offset = rtex->offset[level];
switch (rtex->resource.base.b.target) {
case PIPE_TEXTURE_3D:
@@ -73,22 +92,63 @@ static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex,
}
}
-static void r600_setup_miptree(struct r600_resource_texture *rtex, enum chip_class chipc)
+static unsigned r600_texture_get_stride(struct pipe_screen *screen,
+ struct r600_resource_texture *rtex,
+ unsigned level)
+{
+ struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct radeon *radeon = (struct radeon *)screen->winsys;
+ enum chip_class chipc = r600_get_family_class(radeon);
+ unsigned width, stride;
+
+ if (rtex->pitch_override)
+ return rtex->pitch_override;
+
+ width = u_minify(ptex->width0, level);
+
+ stride = util_format_get_stride(ptex->format, align(width, 64));
+ if (chipc == EVERGREEN)
+ stride = align(stride, 512);
+ else
+ stride = align(stride, 256);
+ return stride;
+}
+
+static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen,
+ struct r600_resource_texture *rtex,
+ unsigned level)
+{
+ struct pipe_resource *ptex = &rtex->resource.base.b;
+ unsigned height;
+
+ height = u_minify(ptex->height0, level);
+ height = util_next_power_of_two(height);
+ return util_format_get_nblocksy(ptex->format, height);
+}
+
+/* Get a width in pixels from a stride in bytes. */
+static unsigned pitch_to_width(enum pipe_format format,
+ unsigned pitch_in_bytes)
+{
+ return (pitch_in_bytes / util_format_get_blocksize(format)) *
+ util_format_get_blockwidth(format);
+}
+
+static void r600_setup_miptree(struct pipe_screen *screen,
+ struct r600_resource_texture *rtex)
{
struct pipe_resource *ptex = &rtex->resource.base.b;
- unsigned long w, h, pitch, size, layer_size, i, offset;
+ struct radeon *radeon = (struct radeon *)screen->winsys;
+ enum chip_class chipc = r600_get_family_class(radeon);
+ unsigned pitch, size, layer_size, i, offset;
+ unsigned nblocksy;
- rtex->bpt = util_format_get_blocksize(ptex->format);
for (i = 0, offset = 0; i <= ptex->last_level; i++) {
- w = u_minify(ptex->width0, i);
- h = u_minify(ptex->height0, i);
- h = util_next_power_of_two(h);
- pitch = util_format_get_stride(ptex->format, align(w, 64));
- if (chipc == EVERGREEN)
- pitch = align(pitch, 512);
- else
- pitch = align(pitch, 256);
- layer_size = pitch * h;
+ pitch = r600_texture_get_stride(screen, rtex, i);
+ nblocksy = r600_texture_get_nblocksy(screen, rtex, i);
+
+ layer_size = pitch * nblocksy;
+
if (ptex->target == PIPE_TEXTURE_CUBE) {
if (chipc >= R700)
size = layer_size * 8;
@@ -99,41 +159,63 @@ static void r600_setup_miptree(struct r600_resource_texture *rtex, enum chip_cla
size = layer_size * u_minify(ptex->depth0, i);
rtex->offset[i] = offset;
rtex->layer_size[i] = layer_size;
- rtex->pitch[i] = pitch;
- rtex->width[i] = w;
- rtex->height[i] = h;
+ rtex->pitch_in_bytes[i] = pitch;
+ rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch);
offset += size;
}
rtex->size = offset;
}
-struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
- const struct pipe_resource *templ)
+static struct r600_resource_texture *
+r600_texture_create_object(struct pipe_screen *screen,
+ const struct pipe_resource *base,
+ unsigned array_mode,
+ unsigned pitch_in_bytes_override,
+ unsigned max_buffer_size,
+ struct r600_bo *bo)
{
struct r600_resource_texture *rtex;
struct r600_resource *resource;
struct radeon *radeon = (struct radeon *)screen->winsys;
rtex = CALLOC_STRUCT(r600_resource_texture);
- if (!rtex) {
+ if (rtex == NULL)
return NULL;
- }
+
resource = &rtex->resource;
- resource->base.b = *templ;
+ resource->base.b = *base;
resource->base.vtbl = &r600_texture_vtbl;
pipe_reference_init(&resource->base.b.reference, 1);
resource->base.b.screen = screen;
- r600_setup_miptree(rtex, radeon_get_family_class(radeon));
-
- /* FIXME alignment 4096 enought ? too much ? */
+ resource->bo = bo;
resource->domain = r600_domain_from_usage(resource->base.b.bind);
+ rtex->pitch_override = pitch_in_bytes_override;
+ rtex->array_mode = array_mode;
+
+ if (array_mode)
+ rtex->tiled = 1;
+ r600_setup_miptree(screen, rtex);
+
resource->size = rtex->size;
- resource->bo = radeon_ws_bo(radeon, rtex->size, 4096, 0);
- if (resource->bo == NULL) {
- FREE(rtex);
- return NULL;
+
+ if (!resource->bo) {
+ resource->bo = r600_bo(radeon, rtex->size, 4096, 0);
+ if (!resource->bo) {
+ FREE(rtex);
+ return NULL;
+ }
}
- return &resource->base.b;
+ return rtex;
+}
+
+struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ unsigned array_mode = 0;
+
+ return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
+ 0, 0, NULL);
+
}
static void r600_texture_destroy(struct pipe_screen *screen,
@@ -147,7 +229,7 @@ static void r600_texture_destroy(struct pipe_screen *screen,
pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
if (resource->bo) {
- radeon_ws_bo_reference(radeon, &resource->bo, NULL);
+ r600_bo_reference(radeon, &resource->bo, NULL);
}
FREE(rtex);
}
@@ -159,7 +241,7 @@ static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen,
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface);
- unsigned long offset;
+ unsigned offset;
if (surface == NULL)
return NULL;
@@ -184,46 +266,29 @@ static void r600_tex_surface_destroy(struct pipe_surface *surface)
FREE(surface);
}
+
struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
const struct pipe_resource *templ,
struct winsys_handle *whandle)
{
struct radeon *rw = (struct radeon*)screen->winsys;
- struct r600_resource_texture *rtex;
- struct r600_resource *resource;
- struct radeon_ws_bo *bo = NULL;
+ struct r600_bo *bo = NULL;
+ unsigned array_mode = 0;
/* Support only 2D textures without mipmaps */
if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
templ->depth0 != 1 || templ->last_level != 0)
return NULL;
- rtex = CALLOC_STRUCT(r600_resource_texture);
- if (rtex == NULL)
- return NULL;
-
- bo = radeon_ws_bo_handle(rw, whandle->handle);
+ bo = r600_bo_handle(rw, whandle->handle, &array_mode);
if (bo == NULL) {
- FREE(rtex);
return NULL;
}
- resource = &rtex->resource;
- resource->base.b = *templ;
- resource->base.vtbl = &r600_texture_vtbl;
- pipe_reference_init(&resource->base.b.reference, 1);
- resource->base.b.screen = screen;
- resource->bo = bo;
- rtex->depth = 0;
- rtex->pitch_override = whandle->stride;
- rtex->bpt = util_format_get_blocksize(templ->format);
- rtex->pitch[0] = whandle->stride;
- rtex->width[0] = templ->width0;
- rtex->height[0] = templ->height0;
- rtex->offset[0] = 0;
- rtex->size = align(rtex->pitch[0] * templ->height0, 64);
-
- return &resource->base.b;
+ return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
+ whandle->stride,
+ 0,
+ bo);
}
static unsigned int r600_texture_is_referenced(struct pipe_context *context,
@@ -249,14 +314,14 @@ int r600_texture_depth_flush(struct pipe_context *ctx,
resource.format = texture->format;
resource.width0 = texture->width0;
resource.height0 = texture->height0;
- resource.depth0 = 0;
+ resource.depth0 = 1;
resource.last_level = 0;
resource.nr_samples = 0;
resource.usage = PIPE_USAGE_DYNAMIC;
resource.bind = 0;
resource.flags = 0;
- resource.bind |= PIPE_BIND_RENDER_TARGET;
+ resource.bind |= PIPE_BIND_DEPTH_STENCIL;
rtex->flushed_depth_texture = (struct r600_resource_texture *)ctx->screen->resource_create(ctx->screen, &resource);
if (rtex->flushed_depth_texture == NULL) {
@@ -287,8 +352,6 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
trans->transfer.sr = sr;
trans->transfer.usage = usage;
trans->transfer.box = *box;
- trans->transfer.stride = rtex->pitch[sr.level];
- trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face);
if (rtex->depth) {
r = r600_texture_depth_flush(ctx, texture);
if (r < 0) {
@@ -302,7 +365,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
resource.format = texture->format;
resource.width0 = box->width;
resource.height0 = box->height;
- resource.depth0 = 0;
+ resource.depth0 = 1;
resource.last_level = 0;
resource.nr_samples = 0;
resource.usage = PIPE_USAGE_DYNAMIC;
@@ -326,6 +389,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
FREE(trans);
return NULL;
}
+
+ trans->transfer.stride =
+ ((struct r600_resource_texture *)trans->linear_texture)->pitch_in_bytes[0];
if (usage & PIPE_TRANSFER_READ) {
/* We cannot map a tiled texture directly because the data is
* in a different order, therefore we do detiling using a blit. */
@@ -333,7 +399,10 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
/* Always referenced in the blit. */
ctx->flush(ctx, 0, NULL);
}
+ return &trans->transfer;
}
+ trans->transfer.stride = rtex->pitch_in_bytes[sr.level];
+ trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face);
return &trans->transfer;
}
@@ -344,12 +413,12 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx,
struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource;
if (rtransfer->linear_texture) {
+ if (transfer->usage & PIPE_TRANSFER_WRITE) {
+ r600_copy_into_tiled_texture(ctx, rtransfer);
+ }
pipe_resource_reference(&rtransfer->linear_texture, NULL);
}
if (rtex->flushed_depth_texture) {
- if (transfer->usage & PIPE_TRANSFER_WRITE) {
- // TODO
- }
pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
}
pipe_resource_reference(&transfer->resource, NULL);
@@ -360,10 +429,10 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
struct pipe_transfer* transfer)
{
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
- struct radeon_ws_bo *bo;
+ struct r600_bo *bo;
enum pipe_format format = transfer->resource->format;
struct radeon *radeon = (struct radeon *)ctx->screen->winsys;
- unsigned long offset = 0;
+ unsigned offset = 0;
char *map;
if (rtransfer->linear_texture) {
@@ -380,7 +449,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
transfer->box.y / util_format_get_blockheight(format) * transfer->stride +
transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
}
- map = radeon_ws_bo_map(radeon, bo, 0, ctx);
+ map = r600_bo_map(radeon, bo, 0, ctx);
if (!map) {
return NULL;
}
@@ -393,7 +462,7 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
{
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
struct radeon *radeon = (struct radeon *)ctx->screen->winsys;
- struct radeon_ws_bo *bo;
+ struct r600_bo *bo;
if (rtransfer->linear_texture) {
bo = ((struct r600_resource *)rtransfer->linear_texture)->bo;
@@ -406,7 +475,7 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
bo = ((struct r600_resource *)transfer->resource)->bo;
}
}
- radeon_ws_bo_unmap(radeon, bo);
+ r600_bo_unmap(radeon, bo);
}
struct u_resource_vtbl r600_texture_vtbl =
@@ -501,15 +570,23 @@ uint32_t r600_translate_texformat(enum pipe_format format,
case UTIL_FORMAT_COLORSPACE_ZS:
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
- result = V_0280A0_COLOR_16;
+ result = FMT_16;
goto out_word4;
+ case PIPE_FORMAT_X24S8_USCALED:
+ word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- result = V_0280A0_COLOR_8_24;
+ result = FMT_8_24;
goto out_word4;
+ case PIPE_FORMAT_S8X24_USCALED:
+ word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- result = V_0280A0_COLOR_24_8;
+ result = FMT_24_8;
+ goto out_word4;
+ case PIPE_FORMAT_S8_USCALED:
+ result = V_0280A0_COLOR_8;
+ word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
goto out_word4;
default:
goto out_unknown;
@@ -563,7 +640,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
if (desc->channel[0].size == 5 &&
desc->channel[1].size == 6 &&
desc->channel[2].size == 5) {
- result = V_0280A0_COLOR_5_6_5;
+ result = FMT_5_6_5;
goto out_word4;
}
goto out_unknown;
@@ -572,14 +649,14 @@ uint32_t r600_translate_texformat(enum pipe_format format,
desc->channel[1].size == 5 &&
desc->channel[2].size == 5 &&
desc->channel[3].size == 1) {
- result = V_0280A0_COLOR_1_5_5_5;
+ result = FMT_1_5_5_5;
goto out_word4;
}
if (desc->channel[0].size == 10 &&
desc->channel[1].size == 10 &&
desc->channel[2].size == 10 &&
desc->channel[3].size == 2) {
- result = V_0280A0_COLOR_10_10_10_2;
+ result = FMT_10_10_10_2;
goto out_word4;
}
goto out_unknown;
@@ -587,79 +664,89 @@ uint32_t r600_translate_texformat(enum pipe_format format,
goto out_unknown;
}
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ if (i == 4)
+ goto out_unknown;
+
/* uniform formats */
- switch (desc->channel[0].type) {
+ switch (desc->channel[i].type) {
case UTIL_FORMAT_TYPE_UNSIGNED:
case UTIL_FORMAT_TYPE_SIGNED:
- if (!desc->channel[0].normalized &&
+ if (!desc->channel[i].normalized &&
desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) {
goto out_unknown;
}
- switch (desc->channel[0].size) {
+ switch (desc->channel[i].size) {
case 4:
switch (desc->nr_channels) {
case 2:
- result = V_0280A0_COLOR_4_4;
+ result = FMT_4_4;
goto out_word4;
case 4:
- result = V_0280A0_COLOR_4_4_4_4;
+ result = FMT_4_4_4_4;
goto out_word4;
}
goto out_unknown;
case 8:
switch (desc->nr_channels) {
case 1:
- result = V_0280A0_COLOR_8;
+ result = FMT_8;
goto out_word4;
case 2:
- result = V_0280A0_COLOR_8_8;
+ result = FMT_8_8;
goto out_word4;
case 4:
- result = V_0280A0_COLOR_8_8_8_8;
+ result = FMT_8_8_8_8;
goto out_word4;
}
goto out_unknown;
case 16:
switch (desc->nr_channels) {
case 1:
- result = V_0280A0_COLOR_16;
+ result = FMT_16;
goto out_word4;
case 2:
- result = V_0280A0_COLOR_16_16;
+ result = FMT_16_16;
goto out_word4;
case 4:
- result = V_0280A0_COLOR_16_16_16_16;
+ result = FMT_16_16_16_16;
goto out_word4;
}
}
goto out_unknown;
case UTIL_FORMAT_TYPE_FLOAT:
- switch (desc->channel[0].size) {
+ switch (desc->channel[i].size) {
case 16:
switch (desc->nr_channels) {
case 1:
- result = V_0280A0_COLOR_16_FLOAT;
+ result = FMT_16_FLOAT;
goto out_word4;
case 2:
- result = V_0280A0_COLOR_16_16_FLOAT;
+ result = FMT_16_16_FLOAT;
goto out_word4;
case 4:
- result = V_0280A0_COLOR_16_16_16_16_FLOAT;
+ result = FMT_16_16_16_16_FLOAT;
goto out_word4;
}
goto out_unknown;
case 32:
switch (desc->nr_channels) {
case 1:
- result = V_0280A0_COLOR_32_FLOAT;
+ result = FMT_32_FLOAT;
goto out_word4;
case 2:
- result = V_0280A0_COLOR_32_32_FLOAT;
+ result = FMT_32_32_FLOAT;
goto out_word4;
case 4:
- result = V_0280A0_COLOR_32_32_32_32_FLOAT;
+ result = FMT_32_32_32_32_FLOAT;
goto out_word4;
}
}
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 47ab1eb965..a3cb5b8600 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -667,6 +667,9 @@
#define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0)
#define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1)
#define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE
+#define S_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) & 0x1) << 1)
+#define G_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) >> 1) & 0x1)
+#define C_02880C_STENCIL_REF_EXPORT_ENABLE 0xFFFFFFFD
#define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4)
#define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3)
#define C_02880C_Z_ORDER 0xFFFFFCFF
@@ -901,6 +904,10 @@
#define S_038000_TILE_MODE(x) (((x) & 0xF) << 3)
#define G_038000_TILE_MODE(x) (((x) >> 3) & 0xF)
#define C_038000_TILE_MODE 0xFFFFFF87
+#define V_038000_ARRAY_LINEAR_GENERAL 0x00000000
+#define V_038000_ARRAY_LINEAR_ALIGNED 0x00000001
+#define V_038000_ARRAY_1D_TILED_THIN1 0x00000002
+#define V_038000_ARRAY_2D_TILED_THIN1 0x00000004
#define S_038000_TILE_TYPE(x) (((x) & 0x1) << 7)
#define G_038000_TILE_TYPE(x) (((x) >> 7) & 0x1)
#define C_038000_TILE_TYPE 0xFFFFFF7F
@@ -1025,40 +1032,7 @@
#define S_038008_DATA_FORMAT(x) (((x) & 0x3F) << 20)
#define G_038008_DATA_FORMAT(x) (((x) >> 20) & 0x3F)
#define C_038008_DATA_FORMAT 0xFC0FFFFF
-#define V_038008_COLOR_INVALID 0x00000000
-#define V_038008_COLOR_8 0x00000001
-#define V_038008_COLOR_4_4 0x00000002
-#define V_038008_COLOR_3_3_2 0x00000003
-#define V_038008_COLOR_16 0x00000005
-#define V_038008_COLOR_16_FLOAT 0x00000006
-#define V_038008_COLOR_8_8 0x00000007
-#define V_038008_COLOR_5_6_5 0x00000008
-#define V_038008_COLOR_6_5_5 0x00000009
-#define V_038008_COLOR_1_5_5_5 0x0000000A
-#define V_038008_COLOR_4_4_4_4 0x0000000B
-#define V_038008_COLOR_5_5_5_1 0x0000000C
-#define V_038008_COLOR_32 0x0000000D
-#define V_038008_COLOR_32_FLOAT 0x0000000E
-#define V_038008_COLOR_16_16 0x0000000F
-#define V_038008_COLOR_16_16_FLOAT 0x00000010
-#define V_038008_COLOR_8_24 0x00000011
-#define V_038008_COLOR_8_24_FLOAT 0x00000012
-#define V_038008_COLOR_24_8 0x00000013
-#define V_038008_COLOR_24_8_FLOAT 0x00000014
-#define V_038008_COLOR_10_11_11 0x00000015
-#define V_038008_COLOR_10_11_11_FLOAT 0x00000016
-#define V_038008_COLOR_11_11_10 0x00000017
-#define V_038008_COLOR_11_11_10_FLOAT 0x00000018
-#define V_038008_COLOR_2_10_10_10 0x00000019
-#define V_038008_COLOR_8_8_8_8 0x0000001A
-#define V_038008_COLOR_10_10_10_2 0x0000001B
-#define V_038008_COLOR_X24_8_32_FLOAT 0x0000001C
-#define V_038008_COLOR_32_32 0x0000001D
-#define V_038008_COLOR_32_32_FLOAT 0x0000001E
-#define V_038008_COLOR_16_16_16_16 0x0000001F
-#define V_038008_COLOR_16_16_16_16_FLOAT 0x00000020
-#define V_038008_COLOR_32_32_32_32 0x00000022
-#define V_038008_COLOR_32_32_32_32_FLOAT 0x00000023
+
#define S_038008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26)
#define G_038008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3)
#define C_038008_NUM_FORMAT_ALL 0xF3FFFFFF
@@ -3484,6 +3458,16 @@
#define R_038014_RESOURCE0_WORD5 0x038014
#define R_038018_RESOURCE0_WORD6 0x038018
+#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0 0x00028140
+#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0 0x00028180
+#define R_028940_ALU_CONST_CACHE_PS_0 0x00028940
+#define R_028980_ALU_CONST_CACHE_VS_0 0x00028980
+
+#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0
+#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4
+
+#define R_03E200_SQ_LOOP_CONST_0 0x3E200
+
#define SQ_TEX_INST_LD 0x03
#define SQ_TEX_INST_GET_GRADIENTS_H 0x7
#define SQ_TEX_INST_GET_GRADIENTS_V 0x8
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index 9c731f2dbb..892dee86ba 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -20,12 +20,11 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "radeon.h"
-#include "r600_context.h"
-#include "r600_asm.h"
+#include <stdio.h>
#include "util/u_memory.h"
+#include "r600_pipe.h"
+#include "r600_asm.h"
#include "r700_sq.h"
-#include <stdio.h>
int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h
deleted file mode 100644
index a7e7982c19..0000000000
--- a/src/gallium/drivers/r600/radeon.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright © 2009 Jerome Glisse <glisse@freedesktop.org>
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-#ifndef RADEON_H
-#define RADEON_H
-
-#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4)
-
-#include <stdint.h>
-
-#include <pipe/p_compiler.h>
-
-typedef uint64_t u64;
-typedef uint32_t u32;
-typedef uint16_t u16;
-typedef uint8_t u8;
-
-struct radeon;
-
-enum radeon_family {
- CHIP_UNKNOWN,
- CHIP_R100,
- CHIP_RV100,
- CHIP_RS100,
- CHIP_RV200,
- CHIP_RS200,
- CHIP_R200,
- CHIP_RV250,
- CHIP_RS300,
- CHIP_RV280,
- CHIP_R300,
- CHIP_R350,
- CHIP_RV350,
- CHIP_RV380,
- CHIP_R420,
- CHIP_R423,
- CHIP_RV410,
- CHIP_RS400,
- CHIP_RS480,
- CHIP_RS600,
- CHIP_RS690,
- CHIP_RS740,
- CHIP_RV515,
- CHIP_R520,
- CHIP_RV530,
- CHIP_RV560,
- CHIP_RV570,
- CHIP_R580,
- CHIP_R600,
- CHIP_RV610,
- CHIP_RV630,
- CHIP_RV670,
- CHIP_RV620,
- CHIP_RV635,
- CHIP_RS780,
- CHIP_RS880,
- CHIP_RV770,
- CHIP_RV730,
- CHIP_RV710,
- CHIP_RV740,
- CHIP_CEDAR,
- CHIP_REDWOOD,
- CHIP_JUNIPER,
- CHIP_CYPRESS,
- CHIP_HEMLOCK,
- CHIP_LAST,
-};
-
-enum chip_class {
- R600,
- R700,
- EVERGREEN,
-};
-
-enum {
- R600_SHADER_PS = 1,
- R600_SHADER_VS,
- R600_SHADER_GS,
- R600_SHADER_FS,
- R600_SHADER_MAX = R600_SHADER_FS,
-};
-
-enum radeon_family radeon_get_family(struct radeon *rw);
-enum chip_class radeon_get_family_class(struct radeon *radeon);
-void radeon_set_mem_constant(struct radeon *radeon, boolean state);
-
-/* lowlevel WS bo */
-struct radeon_ws_bo;
-struct radeon_ws_bo *radeon_ws_bo(struct radeon *radeon,
- unsigned size, unsigned alignment, unsigned usage);
-struct radeon_ws_bo *radeon_ws_bo_handle(struct radeon *radeon,
- unsigned handle);
-void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned usage, void *ctx);
-void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo);
-void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst,
- struct radeon_ws_bo *src);
-
-struct radeon_stype_info;
-
-/* currently limited to max buffers in a cb flush */
-#define RADEON_STATE_MAX_BO 8
-/*
- * states functions
- */
-struct radeon_state {
- struct radeon *radeon;
- unsigned refcount;
- struct radeon_stype_info *stype;
- unsigned state_id;
- unsigned id;
- unsigned shader_index;
- unsigned nstates;
- u32 states[64];
- unsigned npm4;
- unsigned cpm4;
- u32 pm4_crc;
- u32 pm4[128];
- unsigned nbo;
- struct radeon_ws_bo *bo[RADEON_STATE_MAX_BO];
- unsigned nreloc;
- unsigned reloc_pm4_id[8];
- unsigned reloc_bo_id[8];
- u32 placement[8];
- unsigned bo_dirty[4];
-};
-
-int radeon_state_init(struct radeon_state *rstate, struct radeon *radeon, u32 type, u32 id, u32 shader_class);
-void radeon_state_fini(struct radeon_state *state);
-int radeon_state_pm4(struct radeon_state *state);
-int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type);
-
-/*
- * draw functions
- */
-struct radeon_draw {
- struct radeon *radeon;
- struct radeon_state **state;
-};
-
-int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon);
-void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state);
-void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state);
-
-/*
- * radeon context functions
- */
-#pragma pack(1)
-struct radeon_cs_reloc {
- uint32_t handle;
- uint32_t read_domain;
- uint32_t write_domain;
- uint32_t flags;
-};
-#pragma pack()
-
-struct radeon_ctx;
-
-struct radeon_ctx *radeon_ctx_init(struct radeon *radeon);
-void radeon_ctx_fini(struct radeon_ctx *ctx);
-void radeon_ctx_clear(struct radeon_ctx *ctx);
-int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw);
-int radeon_ctx_submit(struct radeon_ctx *ctx);
-void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file);
-int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state);
-
-/*
- * R600/R700
- */
-
-enum r600_stype {
- R600_STATE_CONFIG,
- R600_STATE_CB_CNTL,
- R600_STATE_RASTERIZER,
- R600_STATE_VIEWPORT,
- R600_STATE_SCISSOR,
- R600_STATE_BLEND,
- R600_STATE_DSA,
- R600_STATE_SHADER, /* has PS,VS,GS,FS variants */
- R600_STATE_CONSTANT, /* has PS,VS,GS,FS variants */
- R600_STATE_CBUF, /* has PS,VS,GS,FS variants */
- R600_STATE_RESOURCE, /* has PS,VS,GS,FS variants */
- R600_STATE_SAMPLER, /* has PS,VS,GS,FS variants */
- R600_STATE_SAMPLER_BORDER, /* has PS,VS,GS,FS variants */
- R600_STATE_CB0,
- R600_STATE_CB1,
- R600_STATE_CB2,
- R600_STATE_CB3,
- R600_STATE_CB4,
- R600_STATE_CB5,
- R600_STATE_CB6,
- R600_STATE_CB7,
- R600_STATE_DB,
- R600_STATE_QUERY_BEGIN,
- R600_STATE_QUERY_END,
- R600_STATE_UCP,
- R600_STATE_VGT,
- R600_STATE_DRAW,
- R600_STATE_CB_FLUSH,
- R600_STATE_DB_FLUSH,
- R600_STATE_MAX,
-};
-
-#include "r600_states_inc.h"
-#include "eg_states_inc.h"
-
-/* R600 QUERY BEGIN/END */
-#define R600_QUERY__OFFSET 0
-#define R600_QUERY_SIZE 1
-#define R600_QUERY_PM4 128
-
-#endif
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 67e2c8f8bc..346e1b402b 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -158,9 +158,17 @@ exec_run( const struct sp_fragment_shader *base,
case TGSI_SEMANTIC_POSITION:
{
uint j;
- for (j = 0; j < 4; j++) {
+
+ for (j = 0; j < 4; j++)
quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j];
- }
+ }
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ {
+ uint j;
+
+ for (j = 0; j < 4; j++)
+ quad->output.stencil[j] = (unsigned)machine->Outputs[i].xyzw[1].f[j];
}
break;
}
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index daa158df7c..5b18cd035e 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -169,9 +169,15 @@ fs_sse_run( const struct sp_fragment_shader *base,
case TGSI_SEMANTIC_POSITION:
{
uint j;
- for (j = 0; j < 4; j++) {
- quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j];
- }
+ for (j = 0; j < 4; j++)
+ quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j];
+ }
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ {
+ uint j;
+ for (j = 0; j < 4; j++)
+ quad->output.stencil[j] = machine->Outputs[i].xyzw[1].f[j];
}
break;
}
diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h
index a3236bd116..e745aa8061 100644
--- a/src/gallium/drivers/softpipe/sp_quad.h
+++ b/src/gallium/drivers/softpipe/sp_quad.h
@@ -85,6 +85,7 @@ struct quad_header_output
/** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
float depth[QUAD_SIZE];
+ uint8_t stencil[QUAD_SIZE];
};
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
index e9b9262617..c8f5f89568 100644
--- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -47,6 +47,8 @@ struct depth_data {
unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */
unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */
ubyte stencilVals[QUAD_SIZE];
+ boolean use_shader_stencil_refs;
+ ubyte shader_stencil_refs[QUAD_SIZE];
struct softpipe_cached_tile *tile;
};
@@ -186,6 +188,33 @@ convert_quad_depth( struct depth_data *data,
}
+/**
+ * Compute the depth_data::shader_stencil_refs[] values from the float fragment stencil values.
+ */
+static void
+convert_quad_stencil( struct depth_data *data,
+ const struct quad_header *quad )
+{
+ unsigned j;
+
+ data->use_shader_stencil_refs = TRUE;
+ /* Copy quads stencil values
+ */
+ switch (data->format) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ {
+ for (j = 0; j < QUAD_SIZE; j++) {
+ data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
/**
* Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.
@@ -272,8 +301,14 @@ do_stencil_test(struct depth_data *data,
{
unsigned passMask = 0x0;
unsigned j;
+ ubyte refs[QUAD_SIZE];
- ref &= valMask;
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (data->use_shader_stencil_refs)
+ refs[j] = data->shader_stencil_refs[j] & valMask;
+ else
+ refs[j] = ref & valMask;
+ }
switch (func) {
case PIPE_FUNC_NEVER:
@@ -281,42 +316,42 @@ do_stencil_test(struct depth_data *data,
break;
case PIPE_FUNC_LESS:
for (j = 0; j < QUAD_SIZE; j++) {
- if (ref < (data->stencilVals[j] & valMask)) {
+ if (refs[j] < (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_EQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
- if (ref == (data->stencilVals[j] & valMask)) {
+ if (refs[j] == (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_LEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
- if (ref <= (data->stencilVals[j] & valMask)) {
+ if (refs[j] <= (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_GREATER:
for (j = 0; j < QUAD_SIZE; j++) {
- if (ref > (data->stencilVals[j] & valMask)) {
+ if (refs[j] > (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_NOTEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
- if (ref != (data->stencilVals[j] & valMask)) {
+ if (refs[j] != (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_GEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
- if (ref >= (data->stencilVals[j] & valMask)) {
+ if (refs[j] >= (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
@@ -348,9 +383,14 @@ apply_stencil_op(struct depth_data *data,
{
unsigned j;
ubyte newstencil[QUAD_SIZE];
+ ubyte refs[QUAD_SIZE];
for (j = 0; j < QUAD_SIZE; j++) {
newstencil[j] = data->stencilVals[j];
+ if (data->use_shader_stencil_refs)
+ refs[j] = data->shader_stencil_refs[j];
+ else
+ refs[j] = ref;
}
switch (op) {
@@ -367,7 +407,7 @@ apply_stencil_op(struct depth_data *data,
case PIPE_STENCIL_OP_REPLACE:
for (j = 0; j < QUAD_SIZE; j++) {
if (mask & (1 << j)) {
- newstencil[j] = ref;
+ newstencil[j] = refs[j];
}
}
break;
@@ -688,8 +728,10 @@ depth_test_quads_fallback(struct quad_stage *qs,
unsigned i, pass = 0;
const struct sp_fragment_shader *fs = qs->softpipe->fs;
boolean interp_depth = !fs->info.writes_z;
+ boolean shader_stencil_ref = fs->info.writes_stencil;
struct depth_data data;
+ data.use_shader_stencil_refs = FALSE;
if (qs->softpipe->depth_stencil->alpha.enabled) {
nr = alpha_test_quads(qs, quads, nr);
@@ -716,6 +758,9 @@ depth_test_quads_fallback(struct quad_stage *qs,
}
if (qs->softpipe->depth_stencil->stencil[0].enabled) {
+ if (shader_stencil_ref)
+ convert_quad_stencil(&data, quads[i]);
+
depth_stencil_test_quad(qs, &data, quads[i]);
write_depth_stencil_values(&data, quads[i]);
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c
index 43b8e88e33..2cfd02a22c 100644
--- a/src/gallium/drivers/softpipe/sp_quad_pipe.c
+++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c
@@ -47,7 +47,8 @@ sp_build_quad_pipeline(struct softpipe_context *sp)
sp->framebuffer.zsbuf &&
!sp->depth_stencil->alpha.enabled &&
!sp->fs->info.uses_kill &&
- !sp->fs->info.writes_z;
+ !sp->fs->info.writes_z &&
+ !sp->fs->info.writes_stencil;
sp->quad.first = sp->quad.blend;
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 2053d02f62..37557d1194 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -114,6 +114,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 1;
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
return 0;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return 1;
default:
return 0;
}
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index aedb5bb19b..b59fbc33ed 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -33,7 +33,6 @@
#include "util/u_inlines.h"
#include "draw/draw_context.h"
-#include "draw/draw_context.h"
#include "sp_context.h"
#include "sp_state.h"
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 271cd4aff5..04f30f82c3 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -92,15 +92,7 @@ trace_context_draw_vbo(struct pipe_context *_pipe,
trace_dump_call_begin("pipe_context", "draw_vbo");
trace_dump_arg(ptr, pipe);
- trace_dump_arg(bool, info->indexed);
- trace_dump_arg(uint, info->mode);
- trace_dump_arg(uint, info->start);
- trace_dump_arg(uint, info->count);
- trace_dump_arg(uint, info->start_instance);
- trace_dump_arg(uint, info->instance_count);
- trace_dump_arg(int, info->index_bias);
- trace_dump_arg(uint, info->min_index);
- trace_dump_arg(uint, info->max_index);
+ trace_dump_arg(draw_info, info);
pipe->draw_vbo(pipe, info);
@@ -987,24 +979,24 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe,
static INLINE void
trace_context_set_index_buffer(struct pipe_context *_pipe,
- const struct pipe_index_buffer *_ib)
+ const struct pipe_index_buffer *ib)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct pipe_context *pipe = tr_ctx->pipe;
- struct pipe_index_buffer unwrapped_ib, *ib = NULL;
-
- if (_ib) {
- unwrapped_ib = *_ib;
- unwrapped_ib.buffer = trace_resource_unwrap(tr_ctx, _ib->buffer);
- ib = &unwrapped_ib;
- }
trace_dump_call_begin("pipe_context", "set_index_buffer");
trace_dump_arg(ptr, pipe);
- trace_dump_arg(index_buffer, _ib);
+ trace_dump_arg(index_buffer, ib);
- pipe->set_index_buffer(pipe, ib);
+ if (ib) {
+ struct pipe_index_buffer _ib;
+ _ib = *ib;
+ _ib.buffer = trace_resource_unwrap(tr_ctx, ib->buffer);
+ pipe->set_index_buffer(pipe, &_ib);
+ } else {
+ pipe->set_index_buffer(pipe, NULL);
+ }
trace_dump_call_end();
}
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index bd9a9bfaf1..8f81606032 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -573,3 +573,32 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state)
trace_dump_struct_end();
}
+
+
+void trace_dump_draw_info(const struct pipe_draw_info *state)
+{
+ if (!trace_dumping_enabled_locked())
+ return;
+
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_draw_info");
+
+ trace_dump_member(bool, state, indexed);
+
+ trace_dump_member(uint, state, mode);
+ trace_dump_member(uint, state, start);
+ trace_dump_member(uint, state, count);
+
+ trace_dump_member(uint, state, start_instance);
+ trace_dump_member(uint, state, instance_count);
+
+ trace_dump_member(int, state, index_bias);
+ trace_dump_member(uint, state, min_index);
+ trace_dump_member(uint, state, max_index);
+
+ trace_dump_struct_end();
+}
diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h
index 2e70f4e1c7..078d208610 100644
--- a/src/gallium/drivers/trace/tr_dump_state.h
+++ b/src/gallium/drivers/trace/tr_dump_state.h
@@ -79,5 +79,7 @@ void trace_dump_index_buffer(const struct pipe_index_buffer *state);
void trace_dump_vertex_element(const struct pipe_vertex_element *state);
+void trace_dump_draw_info(const struct pipe_draw_info *state);
+
#endif /* TR_STATE_H */
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index 5020599591..3d6b5b5c81 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -122,6 +122,27 @@ typedef unsigned char boolean;
# endif
#endif
+/*
+ * Define the C99 restrict keyword.
+ *
+ * See also:
+ * - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html
+ */
+#ifndef restrict
+# if (__STDC_VERSION__ >= 199901L)
+ /* C99 */
+# elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+ /* C99 */
+# elif defined(__GNUC__)
+# define restrict __restrict__
+# elif defined(_MSC_VER)
+# define restrict __restrict
+# else
+# define restrict /* */
+# endif
+#endif
+
+
/* Function visibility */
#ifndef PUBLIC
# if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 8b4663742f..b6894c09e8 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -464,7 +464,8 @@ enum pipe_cap {
PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT,
PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER,
PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER,
- PIPE_CAP_DEPTH_CLAMP
+ PIPE_CAP_DEPTH_CLAMP,
+ PIPE_CAP_SHADER_STENCIL_EXPORT,
};
/* Shader caps not specific to any single stage */
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 06412f4894..22cc7aa18a 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -186,6 +186,10 @@ enum pipe_format {
PIPE_FORMAT_R8G8B8X8_UNORM = 134,
PIPE_FORMAT_B4G4R4X4_UNORM = 135,
+ /* some stencil samplers formats */
+ PIPE_FORMAT_X24S8_USCALED = 136,
+ PIPE_FORMAT_S8X24_USCALED = 137,
+ PIPE_FORMAT_X32_S8X24_USCALED = 138,
PIPE_FORMAT_COUNT
};
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 74488de17e..ba433b2bd2 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -143,7 +143,8 @@ struct tgsi_declaration_dimension
#define TGSI_SEMANTIC_EDGEFLAG 8
#define TGSI_SEMANTIC_PRIMID 9
#define TGSI_SEMANTIC_INSTANCEID 10
-#define TGSI_SEMANTIC_COUNT 11 /**< number of semantic values */
+#define TGSI_SEMANTIC_STENCIL 11
+#define TGSI_SEMANTIC_COUNT 12 /**< number of semantic values */
struct tgsi_declaration_semantic
{
diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c
index 22e1b6dd70..770b37037f 100644
--- a/src/gallium/state_trackers/dri/common/dri_context.c
+++ b/src/gallium/state_trackers/dri/common/dri_context.c
@@ -49,7 +49,7 @@ dri_init_extensions(struct dri_context *ctx)
}
GLboolean
-dri_create_context(gl_api api, const __GLcontextModes * visual,
+dri_create_context(gl_api api, const struct gl_config * visual,
__DRIcontext * cPriv, void *sharedContextPrivate)
{
__DRIscreen *sPriv = cPriv->driScreenPriv;
diff --git a/src/gallium/state_trackers/dri/common/dri_context.h b/src/gallium/state_trackers/dri/common/dri_context.h
index beb59c6f68..35105e861f 100644
--- a/src/gallium/state_trackers/dri/common/dri_context.h
+++ b/src/gallium/state_trackers/dri/common/dri_context.h
@@ -88,7 +88,7 @@ dri_get_current(__DRIscreen * driScreenPriv);
boolean
dri_create_context(gl_api api,
- const __GLcontextModes * visual,
+ const struct gl_config * visual,
__DRIcontext * driContextPriv,
void *sharedContextPrivate);
diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c
index 1bdfdccf43..5fd6e7863c 100644
--- a/src/gallium/state_trackers/dri/common/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/common/dri_drawable.c
@@ -112,7 +112,7 @@ dri_st_framebuffer_flush_front(struct st_framebuffer_iface *stfbi,
boolean
dri_create_buffer(__DRIscreen * sPriv,
__DRIdrawable * dPriv,
- const __GLcontextModes * visual, boolean isPixmap)
+ const struct gl_config * visual, boolean isPixmap)
{
struct dri_screen *screen = sPriv->private;
struct dri_drawable *drawable = NULL;
diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h b/src/gallium/state_trackers/dri/common/dri_drawable.h
index 74e662d36c..837d398374 100644
--- a/src/gallium/state_trackers/dri/common/dri_drawable.h
+++ b/src/gallium/state_trackers/dri/common/dri_drawable.h
@@ -79,7 +79,7 @@ dri_drawable(__DRIdrawable * driDrawPriv)
boolean
dri_create_buffer(__DRIscreen * sPriv,
__DRIdrawable * dPriv,
- const __GLcontextModes * visual, boolean isPixmap);
+ const struct gl_config * visual, boolean isPixmap);
void dri_destroy_buffer(__DRIdrawable * dPriv);
diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c
index b3b09b605f..252ad1768d 100644
--- a/src/gallium/state_trackers/dri/common/dri_screen.c
+++ b/src/gallium/state_trackers/dri/common/dri_screen.c
@@ -227,7 +227,7 @@ dri_fill_in_modes(struct dri_screen *screen,
*/
void
dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen,
- const __GLcontextModes *mode)
+ const struct gl_config *mode)
{
memset(stvis, 0, sizeof(*stvis));
diff --git a/src/gallium/state_trackers/dri/common/dri_screen.h b/src/gallium/state_trackers/dri/common/dri_screen.h
index d4eb8f454f..0da9b5510f 100644
--- a/src/gallium/state_trackers/dri/common/dri_screen.h
+++ b/src/gallium/state_trackers/dri/common/dri_screen.h
@@ -114,7 +114,7 @@ dri_with_format(__DRIscreen * sPriv)
void
dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen,
- const __GLcontextModes *mode);
+ const struct gl_config *mode);
const __DRIconfig **
dri_init_screen_helper(struct dri_screen *screen,
diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c
index 116afccb19..3c5b075617 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -502,7 +502,7 @@ static const __DRIextension *dri_screen_extensions[] = {
/**
* This is the driver specific part of the createNewScreen entry point.
*
- * Returns the __GLcontextModes supported by this driver.
+ * Returns the struct gl_config supported by this driver.
*/
static const __DRIconfig **
dri2_init_screen(__DRIscreen * sPriv)
@@ -548,7 +548,7 @@ fail:
}
static boolean
-dri2_create_context(gl_api api, const __GLcontextModes * visual,
+dri2_create_context(gl_api api, const struct gl_config * visual,
__DRIcontext * cPriv, void *sharedContextPrivate)
{
struct dri_context *ctx = NULL;
@@ -564,7 +564,7 @@ dri2_create_context(gl_api api, const __GLcontextModes * visual,
static boolean
dri2_create_buffer(__DRIscreen * sPriv,
__DRIdrawable * dPriv,
- const __GLcontextModes * visual, boolean isPixmap)
+ const struct gl_config * visual, boolean isPixmap)
{
struct dri_drawable *drawable = NULL;
diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c
index 04bba631ae..c48cc44036 100644
--- a/src/gallium/state_trackers/dri/sw/drisw.c
+++ b/src/gallium/state_trackers/dri/sw/drisw.c
@@ -298,7 +298,7 @@ fail:
static boolean
drisw_create_buffer(__DRIscreen * sPriv,
__DRIdrawable * dPriv,
- const __GLcontextModes * visual, boolean isPixmap)
+ const struct gl_config * visual, boolean isPixmap)
{
struct dri_drawable *drawable = NULL;
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c
index ce2b1f7bb9..aaa2ff6bb2 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.c
@@ -194,53 +194,48 @@ init_config_attributes(_EGLConfig *conf, const struct native_config *nconf,
if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_BACK_LEFT))
surface_type |= EGL_PBUFFER_BIT;
- SET_CONFIG_ATTRIB(conf, EGL_CONFORMANT, api_mask);
- SET_CONFIG_ATTRIB(conf, EGL_RENDERABLE_TYPE, api_mask);
+ conf->Conformant = api_mask;
+ conf->RenderableType = api_mask;
- SET_CONFIG_ATTRIB(conf, EGL_RED_SIZE, rgba[0]);
- SET_CONFIG_ATTRIB(conf, EGL_GREEN_SIZE, rgba[1]);
- SET_CONFIG_ATTRIB(conf, EGL_BLUE_SIZE, rgba[2]);
- SET_CONFIG_ATTRIB(conf, EGL_ALPHA_SIZE, rgba[3]);
- SET_CONFIG_ATTRIB(conf, EGL_BUFFER_SIZE, buffer_size);
+ conf->RedSize = rgba[0];
+ conf->GreenSize = rgba[1];
+ conf->BlueSize = rgba[2];
+ conf->AlphaSize = rgba[3];
+ conf->BufferSize = buffer_size;
- SET_CONFIG_ATTRIB(conf, EGL_DEPTH_SIZE, depth_stencil[0]);
- SET_CONFIG_ATTRIB(conf, EGL_STENCIL_SIZE, depth_stencil[1]);
+ conf->DepthSize = depth_stencil[0];
+ conf->StencilSize = depth_stencil[1];
- SET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE, surface_type);
+ conf->SurfaceType = surface_type;
- SET_CONFIG_ATTRIB(conf, EGL_NATIVE_RENDERABLE, EGL_TRUE);
+ conf->NativeRenderable = EGL_TRUE;
if (surface_type & EGL_WINDOW_BIT) {
- SET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_ID, nconf->native_visual_id);
- SET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_TYPE,
- nconf->native_visual_type);
+ conf->NativeVisualID = nconf->native_visual_id;
+ conf->NativeVisualType = nconf->native_visual_type;
}
if (surface_type & EGL_PBUFFER_BIT) {
- SET_CONFIG_ATTRIB(conf, EGL_BIND_TO_TEXTURE_RGB, EGL_TRUE);
+ conf->BindToTextureRGB = EGL_TRUE;
if (rgba[3])
- SET_CONFIG_ATTRIB(conf, EGL_BIND_TO_TEXTURE_RGBA, EGL_TRUE);
+ conf->BindToTextureRGBA = EGL_TRUE;
- SET_CONFIG_ATTRIB(conf, EGL_MAX_PBUFFER_WIDTH, 4096);
- SET_CONFIG_ATTRIB(conf, EGL_MAX_PBUFFER_HEIGHT, 4096);
- SET_CONFIG_ATTRIB(conf, EGL_MAX_PBUFFER_PIXELS, 4096 * 4096);
+ conf->MaxPbufferWidth = 4096;
+ conf->MaxPbufferHeight = 4096;
+ conf->MaxPbufferPixels = 4096 * 4096;
}
- SET_CONFIG_ATTRIB(conf, EGL_LEVEL, nconf->level);
- SET_CONFIG_ATTRIB(conf, EGL_SAMPLES, nconf->samples);
- SET_CONFIG_ATTRIB(conf, EGL_SAMPLE_BUFFERS, 1);
+ conf->Level = nconf->level;
+ conf->Samples = nconf->samples;
+ conf->SampleBuffers = 0;
if (nconf->slow_config)
- SET_CONFIG_ATTRIB(conf, EGL_CONFIG_CAVEAT, EGL_SLOW_CONFIG);
+ conf->ConfigCaveat = EGL_SLOW_CONFIG;
if (nconf->transparent_rgb) {
- rgba[0] = nconf->transparent_rgb_values[0];
- rgba[1] = nconf->transparent_rgb_values[1];
- rgba[2] = nconf->transparent_rgb_values[2];
-
- SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_TYPE, EGL_TRANSPARENT_RGB);
- SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_RED_VALUE, rgba[0]);
- SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_GREEN_VALUE, rgba[1]);
- SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_BLUE_VALUE, rgba[2]);
+ conf->TransparentType = EGL_TRANSPARENT_RGB;
+ conf->TransparentRedValue = nconf->transparent_rgb_values[0];
+ conf->TransparentGreenValue = nconf->transparent_rgb_values[1];
+ conf->TransparentBlueValue = nconf->transparent_rgb_values[2];
}
return _eglValidateConfig(conf, EGL_FALSE);
@@ -258,6 +253,10 @@ egl_g3d_init_config(_EGLDriver *drv, _EGLDisplay *dpy,
EGLint buffer_mask, api_mask;
EGLBoolean valid;
+ /* skip single-buffered configs */
+ if (!(nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_BACK_LEFT)))
+ return EGL_FALSE;
+
buffer_mask = 0x0;
if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_FRONT_LEFT))
buffer_mask |= ST_ATTACHMENT_FRONT_LEFT_MASK;
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.c b/src/gallium/state_trackers/egl/common/egl_g3d_api.c
index c0164daf9c..3bde39737b 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_api.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.c
@@ -609,8 +609,10 @@ egl_g3d_wait_client(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx)
gctx->stctxi->flush(gctx->stctxi,
PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, &fence);
- screen->fence_finish(screen, fence, 0);
- screen->fence_reference(screen, &fence, NULL);
+ if (fence) {
+ screen->fence_finish(screen, fence, 0);
+ screen->fence_reference(screen, &fence, NULL);
+ }
return EGL_TRUE;
}
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.c b/src/gallium/state_trackers/egl/common/egl_g3d_st.c
index 05cdb0d421..0affe632cf 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_st.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.c
@@ -51,7 +51,6 @@ egl_g3d_st_manager(struct st_manager *smapi)
static boolean
egl_g3d_st_manager_get_egl_image(struct st_manager *smapi,
- struct st_context_iface *stctx,
void *egl_image,
struct st_egl_image *out)
{
diff --git a/src/gallium/state_trackers/egl/x11/glcore.h b/src/gallium/state_trackers/egl/x11/glcore.h
new file mode 100644
index 0000000000..547b111370
--- /dev/null
+++ b/src/gallium/state_trackers/egl/x11/glcore.h
@@ -0,0 +1,181 @@
+#ifndef __gl_core_h_
+#define __gl_core_h_
+
+/*
+ * SGI FREE SOFTWARE LICENSE B (Version 2.0, Sept. 18, 2008)
+ * Copyright (C) 1991-2000 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice including the dates of first publication and
+ * either this permission notice or a reference to
+ * http://oss.sgi.com/projects/FreeB/
+ * shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * SILICON GRAPHICS, INC. BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Silicon Graphics, Inc.
+ * shall not be used in advertising or otherwise to promote the sale, use or
+ * other dealings in this Software without prior written authorization from
+ * Silicon Graphics, Inc.
+ */
+
+#if !defined(_WIN32_WCE)
+#include <sys/types.h>
+#endif
+
+#define GL_CORE_SGI 1
+#define GL_CORE_MESA 2
+#define GL_CORE_APPLE 4
+#define GL_CORE_WINDOWS 8
+
+typedef struct __GLcontextRec __GLcontext;
+
+/*
+** This file defines the interface between the GL core and the surrounding
+** "operating system" that supports it (currently the GLX or WGL extensions).
+**
+** Members (data and function pointers) are documented as imported or
+** exported according to how they are used by the core rendering functions.
+** Imported members are initialized by the "operating system" and used by
+** the core functions. Exported members are initialized by the core functions
+** and used by the "operating system".
+*/
+
+/**
+ * Mode and limit information for a context. This information is
+ * kept around in the context so that values can be used during
+ * command execution, and for returning information about the
+ * context to the application.
+ *
+ * Instances of this structure are shared by the driver and the loader. To
+ * maintain binary compatability, new fields \b must be added only to the
+ * end of the structure.
+ *
+ * \sa _gl_context_modes_create
+ */
+typedef struct __GLcontextModesRec {
+ struct __GLcontextModesRec * next;
+
+ GLboolean rgbMode;
+ GLboolean floatMode;
+ GLboolean colorIndexMode;
+ GLuint doubleBufferMode;
+ GLuint stereoMode;
+
+ GLboolean haveAccumBuffer;
+ GLboolean haveDepthBuffer;
+ GLboolean haveStencilBuffer;
+
+ GLint redBits, greenBits, blueBits, alphaBits; /* bits per comp */
+ GLuint redMask, greenMask, blueMask, alphaMask;
+ GLint rgbBits; /* total bits for rgb */
+ GLint indexBits; /* total bits for colorindex */
+
+ GLint accumRedBits, accumGreenBits, accumBlueBits, accumAlphaBits;
+ GLint depthBits;
+ GLint stencilBits;
+
+ GLint numAuxBuffers;
+
+ GLint level;
+
+ GLint pixmapMode;
+
+ /* GLX */
+ GLint visualID;
+ GLint visualType; /**< One of the GLX X visual types. (i.e.,
+ * \c GLX_TRUE_COLOR, etc.)
+ */
+
+ /* EXT_visual_rating / GLX 1.2 */
+ GLint visualRating;
+
+ /* EXT_visual_info / GLX 1.2 */
+ GLint transparentPixel;
+ /* colors are floats scaled to ints */
+ GLint transparentRed, transparentGreen, transparentBlue, transparentAlpha;
+ GLint transparentIndex;
+
+ /* ARB_multisample / SGIS_multisample */
+ GLint sampleBuffers;
+ GLint samples;
+
+ /* SGIX_fbconfig / GLX 1.3 */
+ GLint drawableType;
+ GLint renderType;
+ GLint xRenderable;
+ GLint fbconfigID;
+
+ /* SGIX_pbuffer / GLX 1.3 */
+ GLint maxPbufferWidth;
+ GLint maxPbufferHeight;
+ GLint maxPbufferPixels;
+ GLint optimalPbufferWidth; /* Only for SGIX_pbuffer. */
+ GLint optimalPbufferHeight; /* Only for SGIX_pbuffer. */
+
+ /* SGIX_visual_select_group */
+ GLint visualSelectGroup;
+
+ /* OML_swap_method */
+ GLint swapMethod;
+
+ GLint screen;
+
+ /* EXT_texture_from_pixmap */
+ GLint bindToTextureRgb;
+ GLint bindToTextureRgba;
+ GLint bindToMipmapTexture;
+ GLint bindToTextureTargets;
+ GLint yInverted;
+} __GLcontextModes;
+
+/* Several fields of __GLcontextModes can take these as values. Since
+ * GLX header files may not be available everywhere they need to be used,
+ * redefine them here.
+ */
+#define GLX_NONE 0x8000
+#define GLX_SLOW_CONFIG 0x8001
+#define GLX_TRUE_COLOR 0x8002
+#define GLX_DIRECT_COLOR 0x8003
+#define GLX_PSEUDO_COLOR 0x8004
+#define GLX_STATIC_COLOR 0x8005
+#define GLX_GRAY_SCALE 0x8006
+#define GLX_STATIC_GRAY 0x8007
+#define GLX_TRANSPARENT_RGB 0x8008
+#define GLX_TRANSPARENT_INDEX 0x8009
+#define GLX_NON_CONFORMANT_CONFIG 0x800D
+#define GLX_SWAP_EXCHANGE_OML 0x8061
+#define GLX_SWAP_COPY_OML 0x8062
+#define GLX_SWAP_UNDEFINED_OML 0x8063
+
+#define GLX_DONT_CARE 0xFFFFFFFF
+
+#define GLX_RGBA_BIT 0x00000001
+#define GLX_COLOR_INDEX_BIT 0x00000002
+#define GLX_WINDOW_BIT 0x00000001
+#define GLX_PIXMAP_BIT 0x00000002
+#define GLX_PBUFFER_BIT 0x00000004
+
+#define GLX_BIND_TO_TEXTURE_RGB_EXT 0x20D0
+#define GLX_BIND_TO_TEXTURE_RGBA_EXT 0x20D1
+#define GLX_BIND_TO_MIPMAP_TEXTURE_EXT 0x20D2
+#define GLX_BIND_TO_TEXTURE_TARGETS_EXT 0x20D3
+#define GLX_Y_INVERTED_EXT 0x20D4
+
+#define GLX_TEXTURE_1D_BIT_EXT 0x00000001
+#define GLX_TEXTURE_2D_BIT_EXT 0x00000002
+#define GLX_TEXTURE_RECTANGLE_BIT_EXT 0x00000004
+
+#endif /* __gl_core_h_ */
diff --git a/src/gallium/state_trackers/egl/x11/glxinit.c b/src/gallium/state_trackers/egl/x11/glxinit.c
index 57c6aaff86..df8370f8d7 100644
--- a/src/gallium/state_trackers/egl/x11/glxinit.c
+++ b/src/gallium/state_trackers/egl/x11/glxinit.c
@@ -18,7 +18,7 @@
#include "GL/glxproto.h"
#include "GL/glxtokens.h"
#include "GL/gl.h" /* for GL types needed by __GLcontextModes */
-#include "GL/internal/glcore.h" /* for __GLcontextModes */
+#include "glcore.h" /* for __GLcontextModes */
#include "glxinit.h"
diff --git a/src/gallium/state_trackers/egl/x11/native_dri2.c b/src/gallium/state_trackers/egl/x11/native_dri2.c
index 1be1e42468..1169e273c3 100644
--- a/src/gallium/state_trackers/egl/x11/native_dri2.c
+++ b/src/gallium/state_trackers/egl/x11/native_dri2.c
@@ -518,10 +518,6 @@ dri2_display_convert_config(struct native_display *ndpy,
if (!(mode->renderType & GLX_RGBA_BIT) || !mode->rgbMode)
return FALSE;
- /* skip single-buffered configs */
- if (!mode->doubleBufferMode)
- return FALSE;
-
/* only interested in native renderable configs */
if (!mode->xRenderable || !mode->drawableType)
return FALSE;
diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.h b/src/gallium/state_trackers/egl/x11/x11_screen.h
index bc0ef69ec6..2e313e0148 100644
--- a/src/gallium/state_trackers/egl/x11/x11_screen.h
+++ b/src/gallium/state_trackers/egl/x11/x11_screen.h
@@ -30,7 +30,7 @@
#include <X11/Xutil.h>
#include <X11/extensions/dri2tokens.h>
#include "GL/gl.h" /* for GL types needed by __GLcontextModes */
-#include "GL/internal/glcore.h" /* for __GLcontextModes */
+#include "glcore.h" /* for __GLcontextModes */
#include "pipe/p_compiler.h"
#include "common/native.h"
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c
index f950c8858b..8332633f01 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@@ -423,7 +423,7 @@ static XMesaBuffer XMesaBufferList = NULL;
/**
* Allocate a new XMesaBuffer object which corresponds to the given drawable.
- * Note that XMesaBuffer is derived from GLframebuffer.
+ * Note that XMesaBuffer is derived from struct gl_framebuffer.
* The new XMesaBuffer will not have any size (Width=Height=0).
*
* \param d the corresponding X drawable (window or pixmap)
@@ -569,7 +569,7 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b,
/* RGB WINDOW:
* We support RGB rendering into almost any kind of visual.
*/
- const int xclass = v->mesa_visual.visualType;
+ const int xclass = v->visualType;
if (xclass != GLX_TRUE_COLOR && xclass == !GLX_DIRECT_COLOR) {
_mesa_warning(NULL,
"XMesa: RGB mode rendering not supported in given visual.\n");
@@ -716,13 +716,13 @@ XMesaVisual XMesaCreateVisual( Display *display,
v->mesa_visual.redMask = visinfo->red_mask;
v->mesa_visual.greenMask = visinfo->green_mask;
v->mesa_visual.blueMask = visinfo->blue_mask;
- v->mesa_visual.visualID = visinfo->visualid;
- v->mesa_visual.screen = visinfo->screen;
+ v->visualID = visinfo->visualid;
+ v->screen = visinfo->screen;
#if !(defined(__cplusplus) || defined(c_plusplus))
- v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->class);
+ v->visualType = xmesa_convert_from_x_visual_type(visinfo->class);
#else
- v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->c_class);
+ v->visualType = xmesa_convert_from_x_visual_type(visinfo->c_class);
#endif
v->mesa_visual.visualRating = visualCaveat;
@@ -733,7 +733,7 @@ XMesaVisual XMesaCreateVisual( Display *display,
(void) initialize_visual_and_buffer( v, NULL, rgb_flag, 0, 0 );
{
- const int xclass = v->mesa_visual.visualType;
+ const int xclass = v->visualType;
if (xclass == GLX_TRUE_COLOR || xclass == GLX_DIRECT_COLOR) {
red_bits = _mesa_bitcount(GET_REDMASK(v));
green_bits = _mesa_bitcount(GET_GREENMASK(v));
@@ -756,7 +756,7 @@ XMesaVisual XMesaCreateVisual( Display *display,
/* initialize visual */
{
- __GLcontextModes *vis = &v->mesa_visual;
+ struct gl_config *vis = &v->mesa_visual;
vis->rgbMode = GL_TRUE;
vis->doubleBufferMode = db_flag;
@@ -783,7 +783,6 @@ XMesaVisual XMesaCreateVisual( Display *display,
vis->numAuxBuffers = 0;
vis->level = 0;
- vis->pixmapMode = 0;
vis->sampleBuffers = 0;
vis->samples = 0;
}
@@ -855,7 +854,7 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
if (!xmdpy)
return NULL;
- /* Note: the XMesaContext contains a Mesa GLcontext struct (inheritance) */
+ /* Note: the XMesaContext contains a Mesa struct gl_context struct (inheritance) */
c = (XMesaContext) CALLOC_STRUCT(xmesa_context);
if (!c)
return NULL;
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.h b/src/gallium/state_trackers/glx/xlib/xm_api.h
index fedf2b2d5a..b8ac979edc 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_api.h
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.h
@@ -280,7 +280,8 @@ XMesaCopyContext(XMesaContext src, XMesaContext dst, unsigned long mask);
* Basically corresponds to an XVisualInfo.
*/
struct xmesa_visual {
- GLvisual mesa_visual; /* Device independent visual parameters */
+ struct gl_config mesa_visual;/* Device independent visual parameters */
+ int screen, visualID, visualType;
Display *display; /* The X11 display */
XVisualInfo * visinfo; /* X's visual info (pointer to private copy) */
XVisualInfo *vishandle; /* Only used in fakeglx.c */
diff --git a/src/gallium/state_trackers/glx/xlib/xm_st.c b/src/gallium/state_trackers/glx/xlib/xm_st.c
index 4d0f5e6625..e7466bdbee 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_st.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_st.c
@@ -196,7 +196,13 @@ xmesa_st_framebuffer_validate_textures(struct st_framebuffer_iface *stfbi,
/**
+ * Check that a framebuffer's attachments match the window's size.
+ *
* Called via st_framebuffer_iface::validate()
+ *
+ * \param statts array of framebuffer attachments
+ * \param count number of framebuffer attachments in statts[]
+ * \param out returns resources for each of the attachments
*/
static boolean
xmesa_st_framebuffer_validate(struct st_framebuffer_iface *stfbi,
@@ -209,9 +215,11 @@ xmesa_st_framebuffer_validate(struct st_framebuffer_iface *stfbi,
boolean resized;
boolean ret;
+ /* build mask of ST_ATTACHMENT bits */
statt_mask = 0x0;
for (i = 0; i < count; i++)
statt_mask |= 1 << statts[i];
+
/* record newly allocated textures */
new_mask = statt_mask & ~xstfb->texture_mask;
diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c
index dce24bc17d..29813456b5 100644
--- a/src/gallium/state_trackers/python/st_device.c
+++ b/src/gallium/state_trackers/python/st_device.c
@@ -31,7 +31,6 @@
#include "pipe/p_shader_tokens.h"
#include "util/u_inlines.h"
#include "cso_cache/cso_context.h"
-#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_sampler.h"
diff --git a/src/gallium/state_trackers/vega/api_images.c b/src/gallium/state_trackers/vega/api_images.c
index 547508f815..c36b3d2f3c 100644
--- a/src/gallium/state_trackers/vega/api_images.c
+++ b/src/gallium/state_trackers/vega/api_images.c
@@ -31,7 +31,6 @@
#include "vg_context.h"
#include "vg_translate.h"
#include "api_consts.h"
-#include "image.h"
#include "api.h"
#include "pipe/p_context.h"
diff --git a/src/gallium/state_trackers/vega/vg_manager.c b/src/gallium/state_trackers/vega/vg_manager.c
index e7996741d1..232deefa16 100644
--- a/src/gallium/state_trackers/vega/vg_manager.c
+++ b/src/gallium/state_trackers/vega/vg_manager.c
@@ -352,7 +352,7 @@ vg_api_create_context(struct st_api *stapi, struct st_manager *smapi,
return NULL;
/* only 1.0 is supported */
- if (attribs->major != 1 || attribs->minor > 0)
+ if (attribs->major > 1 || (attribs->major == 1 && attribs->minor > 0))
return NULL;
pipe = smapi->screen->context_create(smapi->screen, NULL);
diff --git a/src/gallium/state_trackers/xorg/Makefile b/src/gallium/state_trackers/xorg/Makefile
index cb2c3aea41..7a44d28017 100644
--- a/src/gallium/state_trackers/xorg/Makefile
+++ b/src/gallium/state_trackers/xorg/Makefile
@@ -10,7 +10,7 @@ LIBRARY_INCLUDES = \
$(shell pkg-config libkms --atleast-version=1.0 \
&& echo "-DHAVE_LIBKMS") \
$(shell pkg-config libkms --silence-errors --cflags-only-I) \
- $(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto) \
+ $(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto dri2proto) \
-I$(TOP)/src/gallium/include \
-I$(TOP)/src/gallium/auxiliary \
-I$(TOP)/include \
diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c
index 26a907f205..80af82d97b 100644
--- a/src/gallium/state_trackers/xorg/xorg_crtc.c
+++ b/src/gallium/state_trackers/xorg/xorg_crtc.c
@@ -234,6 +234,10 @@ crtc_load_cursor_argb_ga3d(xf86CrtcPtr crtc, CARD32 * image)
64, 64, (void*)image, 64 * 4, 0, 0);
ms->ctx->transfer_unmap(ms->ctx, transfer);
ms->ctx->transfer_destroy(ms->ctx, transfer);
+
+ if (crtc->cursor_shown)
+ drmModeSetCursor(ms->fd, crtcp->drm_crtc->crtc_id,
+ crtcp->cursor_handle, 64, 64);
}
#if HAVE_LIBKMS
@@ -271,6 +275,10 @@ crtc_load_cursor_argb_kms(xf86CrtcPtr crtc, CARD32 * image)
memcpy(ptr, image, 64*64*4);
kms_bo_unmap(crtcp->cursor_bo);
+ if (crtc->cursor_shown)
+ drmModeSetCursor(ms->fd, crtcp->drm_crtc->crtc_id,
+ crtcp->cursor_handle, 64, 64);
+
return;
err_bo_destroy:
@@ -353,7 +361,7 @@ crtc_destroy(xf86CrtcPtr crtc)
drmModeFreeCrtc(crtcp->drm_crtc);
- xfree(crtcp);
+ free(crtcp);
crtc->driver_private = NULL;
}
@@ -401,7 +409,7 @@ xorg_crtc_init(ScrnInfoPtr pScrn)
if (crtc == NULL)
goto out;
- crtcp = xcalloc(1, sizeof(struct crtc_private));
+ crtcp = calloc(1, sizeof(struct crtc_private));
if (!crtcp) {
xf86CrtcDestroy(crtc);
goto out;
diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c
index 704aed6a82..b723a8e9cb 100644
--- a/src/gallium/state_trackers/xorg/xorg_dri2.c
+++ b/src/gallium/state_trackers/xorg/xorg_dri2.c
@@ -201,11 +201,11 @@ dri2_create_buffer(DrawablePtr pDraw, unsigned int attachment, unsigned int form
DRI2Buffer2Ptr buffer;
BufferPrivatePtr private;
- buffer = xcalloc(1, sizeof *buffer);
+ buffer = calloc(1, sizeof *buffer);
if (!buffer)
return NULL;
- private = xcalloc(1, sizeof *private);
+ private = calloc(1, sizeof *private);
if (!private) {
goto fail;
}
@@ -217,9 +217,9 @@ dri2_create_buffer(DrawablePtr pDraw, unsigned int attachment, unsigned int form
if (dri2_do_create_buffer(pDraw, (DRI2BufferPtr)buffer, format))
return buffer;
- xfree(private);
+ free(private);
fail:
- xfree(buffer);
+ free(buffer);
return NULL;
}
@@ -229,8 +229,8 @@ dri2_destroy_buffer(DrawablePtr pDraw, DRI2Buffer2Ptr buffer)
/* So far it is safe to downcast a DRI2Buffer2Ptr to DRI2BufferPtr */
dri2_do_destroy_buffer(pDraw, (DRI2BufferPtr)buffer);
- xfree(buffer->driverPrivate);
- xfree(buffer);
+ free(buffer->driverPrivate);
+ free(buffer);
}
#endif /* DRI2INFOREC_VERSION >= 2 */
@@ -244,11 +244,11 @@ dri2_create_buffers(DrawablePtr pDraw, unsigned int *attachments, int count)
DRI2BufferPtr buffers;
int i;
- buffers = xcalloc(count, sizeof *buffers);
+ buffers = calloc(count, sizeof *buffers);
if (!buffers)
goto fail_buffers;
- privates = xcalloc(count, sizeof *privates);
+ privates = calloc(count, sizeof *privates);
if (!privates)
goto fail_privates;
@@ -263,9 +263,9 @@ dri2_create_buffers(DrawablePtr pDraw, unsigned int *attachments, int count)
return buffers;
fail:
- xfree(privates);
+ free(privates);
fail_privates:
- xfree(buffers);
+ free(buffers);
fail_buffers:
return NULL;
}
@@ -280,8 +280,8 @@ dri2_destroy_buffers(DrawablePtr pDraw, DRI2BufferPtr buffers, int count)
}
if (buffers) {
- xfree(buffers[0].driverPrivate);
- xfree(buffers);
+ free(buffers[0].driverPrivate);
+ free(buffers);
}
}
diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c
index e10ff2f950..1ec772df17 100644
--- a/src/gallium/state_trackers/xorg/xorg_driver.c
+++ b/src/gallium/state_trackers/xorg/xorg_driver.c
@@ -45,6 +45,7 @@
#include "miscstruct.h"
#include "dixstruct.h"
#include "xf86xv.h"
+#include "xorgVersion.h"
#ifndef XSERVER_LIBPCIACCESS
#error "libpciaccess needed"
#endif
@@ -122,7 +123,7 @@ xorg_tracker_set_functions(ScrnInfoPtr scrn)
Bool
xorg_tracker_have_modesetting(ScrnInfoPtr pScrn, struct pci_device *device)
{
- char *BusID = xalloc(64);
+ char *BusID = malloc(64);
sprintf(BusID, "pci:%04x:%02x:%02x.%d",
device->domain, device->bus,
device->dev, device->func);
@@ -130,14 +131,14 @@ xorg_tracker_have_modesetting(ScrnInfoPtr pScrn, struct pci_device *device)
if (drmCheckModesettingSupported(BusID)) {
xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 0,
"Drm modesetting not supported %s\n", BusID);
- xfree(BusID);
+ free(BusID);
return FALSE;
}
xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 0,
"Drm modesetting supported on %s\n", BusID);
- xfree(BusID);
+ free(BusID);
return TRUE;
}
@@ -174,7 +175,7 @@ drv_free_rec(ScrnInfoPtr pScrn)
if (!pScrn->driverPrivate)
return;
- xfree(pScrn->driverPrivate);
+ free(pScrn->driverPrivate);
pScrn->driverPrivate = NULL;
}
@@ -274,7 +275,7 @@ drv_init_drm(ScrnInfoPtr pScrn)
if (ms->fd < 0) {
char *BusID;
- BusID = xalloc(64);
+ BusID = malloc(64);
sprintf(BusID, "PCI:%d:%d:%d",
((ms->PciInfo->domain << 8) | ms->PciInfo->bus),
ms->PciInfo->dev, ms->PciInfo->func
@@ -283,7 +284,7 @@ drv_init_drm(ScrnInfoPtr pScrn)
ms->fd = drmOpen(driver_descriptor.driver_name, BusID);
ms->isMaster = TRUE;
- xfree(BusID);
+ free(BusID);
if (ms->fd >= 0)
return TRUE;
@@ -369,6 +370,7 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags)
ms = modesettingPTR(pScrn);
ms->pEnt = pEnt;
ms->cust = cust;
+ ms->fb_id = -1;
pScrn->displayWidth = 640; /* default it */
@@ -402,19 +404,6 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags)
if (!drv_init_drm(pScrn))
return FALSE;
- use3D = cust ? !cust->no_3d : TRUE;
- ms->from_3D = xf86GetOptValBool(ms->Options, OPTION_3D_ACCEL,
- &use3D) ?
- X_CONFIG : X_PROBED;
-
- ms->no3D = !use3D;
-
- if (!drv_init_resource_management(pScrn)) {
- xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Could not init "
- "Gallium3D or libKMS.");
- return FALSE;
- }
-
pScrn->monitor = pScrn->confScreen->monitor;
pScrn->progClock = TRUE;
pScrn->rgbBits = 8;
@@ -444,11 +433,24 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags)
/* Process the options */
xf86CollectOptions(pScrn, NULL);
- if (!(ms->Options = xalloc(sizeof(drv_options))))
+ if (!(ms->Options = malloc(sizeof(drv_options))))
return FALSE;
memcpy(ms->Options, drv_options, sizeof(drv_options));
xf86ProcessOptions(pScrn->scrnIndex, pScrn->options, ms->Options);
+ use3D = cust ? !cust->no_3d : TRUE;
+ ms->from_3D = xf86GetOptValBool(ms->Options, OPTION_3D_ACCEL,
+ &use3D) ?
+ X_CONFIG : X_PROBED;
+
+ ms->no3D = !use3D;
+
+ if (!drv_init_resource_management(pScrn)) {
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Could not init "
+ "Gallium3D or libKMS.");
+ return FALSE;
+ }
+
/* Allocate an xf86CrtcConfig */
xf86CrtcConfigInit(pScrn, &crtc_config_funcs);
xf86_config = XF86_CRTC_CONFIG_PTR(pScrn);
@@ -791,7 +793,9 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
if (!ms->SWCursor)
xf86_cursors_init(pScreen, 64, 64,
HARDWARE_CURSOR_SOURCE_MASK_INTERLEAVE_64 |
- HARDWARE_CURSOR_ARGB);
+ HARDWARE_CURSOR_ARGB |
+ ((cust && cust->unhidden_hw_cursor_update) ?
+ HARDWARE_CURSOR_UPDATE_UNHIDDEN : 0));
/* Must force it before EnterVT, so we are in control of VT and
* later memory should be bound when allocating, e.g rotate_mem */
@@ -862,8 +866,10 @@ drv_leave_vt(int scrnIndex, int flags)
}
}
- drmModeRmFB(ms->fd, ms->fb_id);
- ms->fb_id = -1;
+ if (ms->fb_id != -1) {
+ drmModeRmFB(ms->fd, ms->fb_id);
+ ms->fb_id = -1;
+ }
/* idle hardware */
if (!ms->kms)
@@ -944,7 +950,6 @@ drv_close_screen(int scrnIndex, ScreenPtr pScreen)
}
#endif
- drmModeRmFB(ms->fd, ms->fb_id);
ms->destroy_front_buffer(pScrn);
if (ms->exa)
@@ -1178,6 +1183,8 @@ drv_bind_front_buffer_kms(ScrnInfoPtr pScrn)
stride,
ptr);
+#if (XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1, 9, 99, 1, 0))
+
/* This a hack to work around EnableDisableFBAccess setting the pointer
* the real fix would be to replace pScrn->EnableDisableFBAccess hook
* and set the rootPixmap->devPrivate.ptr to something valid before that.
@@ -1187,6 +1194,8 @@ drv_bind_front_buffer_kms(ScrnInfoPtr pScrn)
*/
pScrn->pixmapPrivate.ptr = ptr;
+#endif
+
return TRUE;
err_destroy:
diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c
index 6b2c80fbca..4b1c02bad4 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa.c
@@ -720,7 +720,7 @@ ExaCreatePixmap(ScreenPtr pScreen, int size, int align)
{
struct exa_pixmap_priv *priv;
- priv = xcalloc(1, sizeof(struct exa_pixmap_priv));
+ priv = calloc(1, sizeof(struct exa_pixmap_priv));
if (!priv)
return NULL;
@@ -737,7 +737,7 @@ ExaDestroyPixmap(ScreenPtr pScreen, void *dPriv)
pipe_resource_reference(&priv->tex, NULL);
- xfree(priv);
+ free(priv);
}
static Bool
@@ -975,7 +975,7 @@ xorg_exa_close(ScrnInfoPtr pScrn)
ms->ctx = NULL;
exaDriverFini(pScrn->pScreen);
- xfree(exa);
+ free(exa);
ms->exa = NULL;
}
@@ -987,7 +987,7 @@ xorg_exa_init(ScrnInfoPtr pScrn, Bool accel)
ExaDriverPtr pExa;
CustomizerPtr cust = ms->cust;
- exa = xcalloc(1, sizeof(struct exa_context));
+ exa = calloc(1, sizeof(struct exa_context));
if (!exa)
return NULL;
@@ -1057,6 +1057,7 @@ xorg_exa_init(ScrnInfoPtr pScrn, Bool accel)
out_err:
xorg_exa_close(pScrn);
+ free(exa);
return NULL;
}
diff --git a/src/gallium/state_trackers/xorg/xorg_output.c b/src/gallium/state_trackers/xorg/xorg_output.c
index 61206ed751..5555b51131 100644
--- a/src/gallium/state_trackers/xorg/xorg_output.c
+++ b/src/gallium/state_trackers/xorg/xorg_output.c
@@ -128,7 +128,7 @@ output_get_modes(xf86OutputPtr output)
for (i = 0; i < drm_connector->count_modes; i++) {
drm_mode = &drm_connector->modes[i];
if (drm_mode) {
- mode = xcalloc(1, sizeof(DisplayModeRec));
+ mode = calloc(1, sizeof(DisplayModeRec));
if (!mode)
continue;
mode->Clock = drm_mode->clock;
@@ -195,7 +195,7 @@ output_destroy(xf86OutputPtr output)
{
struct output_private *priv = output->driver_private;
drmModeFreeConnector(priv->drm_connector);
- xfree(priv);
+ free(priv);
output->driver_private = NULL;
}
@@ -262,14 +262,14 @@ xorg_output_init(ScrnInfoPtr pScrn)
drm_connector->connector_type_id);
- priv = xcalloc(sizeof(*priv), 1);
+ priv = calloc(sizeof(*priv), 1);
if (!priv) {
continue;
}
output = xf86OutputCreate(pScrn, &output_funcs, name);
if (!output) {
- xfree(priv);
+ free(priv);
continue;
}
diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h
index be1a9fda48..a3fb5e5dad 100644
--- a/src/gallium/state_trackers/xorg/xorg_tracker.h
+++ b/src/gallium/state_trackers/xorg/xorg_tracker.h
@@ -76,6 +76,7 @@ typedef struct _CustomizerRec
Bool dirty_throttling;
Bool swap_throttling;
Bool no_3d;
+ Bool unhidden_hw_cursor_update;
Bool (*winsys_pre_init) (struct _CustomizerRec *cust, int fd);
Bool (*winsys_screen_init)(struct _CustomizerRec *cust);
Bool (*winsys_screen_close)(struct _CustomizerRec *cust);
diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c
index f98bd93901..f64959f00e 100644
--- a/src/gallium/state_trackers/xorg/xorg_xv.c
+++ b/src/gallium/state_trackers/xorg/xorg_xv.c
@@ -536,8 +536,10 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id,
dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst);
hdtv = ((src_w >= RES_720P_X) && (src_h >= RES_720P_Y));
+#ifdef COMPOSITE
REGION_TRANSLATE(pScrn->pScreen, dstRegion, -pPixmap->screen_x,
-pPixmap->screen_y);
+#endif
dxo = dstRegion->extents.x1;
dyo = dstRegion->extents.y1;
@@ -562,11 +564,16 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id,
int box_y2 = pbox->y2;
float diff_x = (float)src_w / (float)dst_w;
float diff_y = (float)src_h / (float)dst_h;
- float offset_x = box_x1 - dstX + pPixmap->screen_x;
- float offset_y = box_y1 - dstY + pPixmap->screen_y;
+ float offset_x = box_x1 - dstX;
+ float offset_y = box_y1 - dstY;
float offset_w;
float offset_h;
+#ifdef COMPOSITE
+ offset_x += pPixmap->screen_x;
+ offset_y += pPixmap->screen_y;
+#endif
+
x = box_x1;
y = box_y1;
w = box_x2 - box_x1;
diff --git a/src/gallium/targets/Makefile.xorg b/src/gallium/targets/Makefile.xorg
index 762c905985..87eedd7136 100644
--- a/src/gallium/targets/Makefile.xorg
+++ b/src/gallium/targets/Makefile.xorg
@@ -29,7 +29,7 @@ INCLUDES = \
LIBNAME_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET)
ifeq ($(MESA_LLVM),1)
-LD = g++
+LD = $(CXX)
LDFLAGS += $(LLVM_LDFLAGS)
USE_CXX=1
DRIVER_PIPES += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
@@ -42,7 +42,7 @@ endif
default: depend $(TOP)/$(LIB_DIR)/gallium $(LIBNAME) $(LIBNAME_STAGING)
$(LIBNAME): $(OBJECTS) Makefile ../Makefile.xorg $(LIBS) $(DRIVER_PIPES)
- $(MKLIB) -noprefix -o $@ $(LDFLAGS) $(OBJECTS) $(DRIVER_PIPES) $(GALLIUM_AUXILIARIES) $(DRIVER_LINKS)
+ $(MKLIB) -linker $(CC) -noprefix -o $@ $(LDFLAGS) $(OBJECTS) $(DRIVER_PIPES) $(GALLIUM_AUXILIARIES) $(DRIVER_LINKS)
depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) $(GENERATED_SOURCES)
rm -f depend
diff --git a/src/gallium/targets/dri-i915/target.c b/src/gallium/targets/dri-i915/target.c
index 5ae6ca367d..a27b7bd6d8 100644
--- a/src/gallium/targets/dri-i915/target.c
+++ b/src/gallium/targets/dri-i915/target.c
@@ -19,8 +19,7 @@ create_screen(int fd)
if (!screen)
return NULL;
- if (debug_get_bool_option("I915_SOFTWARE", FALSE))
- screen = sw_screen_wrap(screen);
+ screen = sw_screen_wrap(screen);
screen = debug_screen_wrap(screen);
diff --git a/src/gallium/targets/dri-i965/target.c b/src/gallium/targets/dri-i965/target.c
index ce97f82027..0632b97bea 100644
--- a/src/gallium/targets/dri-i965/target.c
+++ b/src/gallium/targets/dri-i965/target.c
@@ -19,8 +19,7 @@ create_screen(int fd)
if (!screen)
return NULL;
- if (debug_get_bool_option("BRW_SOFTPIPE", FALSE))
- screen = sw_screen_wrap(screen);
+ screen = sw_screen_wrap(screen);
screen = debug_screen_wrap(screen);
diff --git a/src/gallium/targets/dri-r600/target.c b/src/gallium/targets/dri-r600/target.c
index 2c1b2f5be4..8753e2bab1 100644
--- a/src/gallium/targets/dri-r600/target.c
+++ b/src/gallium/targets/dri-r600/target.c
@@ -1,33 +1,9 @@
-
#include "state_tracker/drm_driver.h"
#include "target-helpers/inline_debug_helper.h"
#include "r600/drm/r600_drm_public.h"
#include "r600/r600_public.h"
-#if 0
-static struct pipe_screen *
-create_screen(int fd)
-{
- struct radeon *rw;
- struct pipe_screen *screen;
-
- rw = r600_drm_winsys_create(fd);
- if (!rw)
- return NULL;
-
- screen = r600_screen_create(rw);
- if (!screen)
- return NULL;
-
- screen = debug_screen_wrap(screen);
-
- return screen;
-}
-#else
-struct radeon *r600_new(int fd, unsigned device);
-struct pipe_screen *r600_screen_create2(struct radeon *radeon);
-static struct pipe_screen *
-create_screen(int fd)
+static struct pipe_screen *create_screen(int fd)
{
struct radeon *radeon;
struct pipe_screen *screen;
@@ -36,7 +12,7 @@ create_screen(int fd)
if (!radeon)
return NULL;
- screen = r600_screen_create2(radeon);
+ screen = r600_screen_create(radeon);
if (!screen)
return NULL;
@@ -44,6 +20,5 @@ create_screen(int fd)
return screen;
}
-#endif
DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen)
diff --git a/src/gallium/targets/egl/Makefile b/src/gallium/targets/egl/Makefile
index 47c24cefe5..38e60dbafb 100644
--- a/src/gallium/targets/egl/Makefile
+++ b/src/gallium/targets/egl/Makefile
@@ -24,7 +24,9 @@ common_CPPFLAGS := \
-I$(TOP)/src/gallium/auxiliary \
-I$(TOP)/src/gallium/drivers \
-I$(TOP)/src/gallium/include \
- -I$(TOP)/src/gallium/winsys
+ -I$(TOP)/src/gallium/winsys \
+ $(LIBDRM_CFLAGS)
+
common_SYS :=
common_LIBS := \
$(TOP)/src/gallium/drivers/identity/libidentity.a \
@@ -41,11 +43,11 @@ egl_SYS := -lm $(DLOPEN_LIBS) -L$(TOP)/$(LIB_DIR) -lEGL
egl_LIBS := $(TOP)/src/gallium/state_trackers/egl/libegl.a
ifneq ($(findstring x11, $(EGL_PLATFORMS)),)
-egl_SYS += -lX11 -lXext -lXfixes
+egl_SYS += -lX11 -lXext -lXfixes $(LIBDRM_LIB)
egl_LIBS += $(TOP)/src/gallium/winsys/sw/xlib/libws_xlib.a
endif
-ifneq ($(findstring kms, $(EGL_PLATFORMS)),)
-egl_SYS += -ldrm
+ifneq ($(findstring drm, $(EGL_PLATFORMS)),)
+egl_SYS += $(LIBDRM_LIB)
endif
ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),)
egl_LIBS += $(TOP)/src/gallium/winsys/sw/fbdev/libfbdev.a
diff --git a/src/gallium/targets/egl/pipe_i915.c b/src/gallium/targets/egl/pipe_i915.c
index 758a921b48..cd74044d8c 100644
--- a/src/gallium/targets/egl/pipe_i915.c
+++ b/src/gallium/targets/egl/pipe_i915.c
@@ -1,5 +1,4 @@
-#include "target-helpers/inline_wrapper_sw_helper.h"
#include "target-helpers/inline_debug_helper.h"
#include "state_tracker/drm_driver.h"
#include "i915/drm/i915_drm_public.h"
diff --git a/src/gallium/targets/egl/pipe_i965.c b/src/gallium/targets/egl/pipe_i965.c
index 43bf646e82..f810ecffb0 100644
--- a/src/gallium/targets/egl/pipe_i965.c
+++ b/src/gallium/targets/egl/pipe_i965.c
@@ -1,6 +1,6 @@
-#include "target-helpers/inline_wrapper_sw_helper.h"
#include "target-helpers/inline_debug_helper.h"
+#include "target-helpers/inline_wrapper_sw_helper.h"
#include "state_tracker/drm_driver.h"
#include "i965/drm/i965_drm_public.h"
#include "i965/brw_public.h"
@@ -19,8 +19,7 @@ create_screen(int fd)
if (!screen)
return NULL;
- if (debug_get_bool_option("BRW_SOFTPIPE", FALSE))
- screen = sw_screen_wrap(screen);
+ screen = sw_screen_wrap(screen);
screen = debug_screen_wrap(screen);
diff --git a/src/gallium/targets/libgl-xlib/Makefile b/src/gallium/targets/libgl-xlib/Makefile
index 79e516a2a7..076a040a5a 100644
--- a/src/gallium/targets/libgl-xlib/Makefile
+++ b/src/gallium/targets/libgl-xlib/Makefile
@@ -10,7 +10,7 @@ include $(TOP)/configs/current
GL_MAJOR = 1
GL_MINOR = 5
-GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY)
+GL_TINY = 0$(MESA_MAJOR)$(MESA_MINOR)0$(MESA_TINY)
INCLUDE_DIRS = \
diff --git a/src/gallium/targets/xorg-i965/intel_target.c b/src/gallium/targets/xorg-i965/intel_target.c
index ce97f82027..0632b97bea 100644
--- a/src/gallium/targets/xorg-i965/intel_target.c
+++ b/src/gallium/targets/xorg-i965/intel_target.c
@@ -19,8 +19,7 @@ create_screen(int fd)
if (!screen)
return NULL;
- if (debug_get_bool_option("BRW_SOFTPIPE", FALSE))
- screen = sw_screen_wrap(screen);
+ screen = sw_screen_wrap(screen);
screen = debug_screen_wrap(screen);
diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c b/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c
index 237b308ae3..9b422e661b 100644
--- a/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c
+++ b/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c
@@ -32,6 +32,7 @@
* allows X clients to communicate with the driver.
*/
+#include <xorg-server.h>
#include "dixstruct.h"
#include "extnsionst.h"
#include <X11/X.h>
@@ -211,7 +212,7 @@ VMwareCtrlDoSetTopology(ScrnInfoPtr pScrn,
struct vmw_customizer *vmw = vmw_customizer(xorg_customizer(pScrn));
int i;
- rects = xcalloc(number, sizeof(*rects));
+ rects = calloc(number, sizeof(*rects));
if (!rects)
return FALSE;
@@ -224,7 +225,7 @@ VMwareCtrlDoSetTopology(ScrnInfoPtr pScrn,
vmw_ioctl_update_layout(vmw, number, rects);
- xfree(rects);
+ free(rects);
return TRUE;
}
diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_ioctl.c b/src/gallium/targets/xorg-vmwgfx/vmw_ioctl.c
index 7c799b5827..7625d2fb8f 100644
--- a/src/gallium/targets/xorg-vmwgfx/vmw_ioctl.c
+++ b/src/gallium/targets/xorg-vmwgfx/vmw_ioctl.c
@@ -165,7 +165,7 @@ vmw_ioctl_buffer_create(struct vmw_customizer *vmw, uint32_t size, unsigned *han
struct drm_vmw_dmabuf_rep *rep = &arg.rep;
int ret;
- buf = xcalloc(1, sizeof(*buf));
+ buf = calloc(1, sizeof(*buf));
if (!buf)
goto err;
@@ -192,7 +192,7 @@ vmw_ioctl_buffer_create(struct vmw_customizer *vmw, uint32_t size, unsigned *han
return buf;
err_free:
- xfree(buf);
+ free(buf);
err:
return NULL;
}
@@ -211,7 +211,7 @@ vmw_ioctl_buffer_destroy(struct vmw_customizer *vmw, struct vmw_dma_buffer *buf)
arg.handle = buf->handle;
drmCommandWrite(vmw->fd, DRM_VMW_UNREF_DMABUF, &arg, sizeof(arg));
- xfree(buf);
+ free(buf);
}
void *
diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_screen.c b/src/gallium/targets/xorg-vmwgfx/vmw_screen.c
index 8173908f55..7662203165 100644
--- a/src/gallium/targets/xorg-vmwgfx/vmw_screen.c
+++ b/src/gallium/targets/xorg-vmwgfx/vmw_screen.c
@@ -245,6 +245,7 @@ vmw_screen_pre_init(ScrnInfoPtr pScrn, int flags)
cust->winsys_enter_vt = vmw_screen_enter_vt;
cust->winsys_leave_vt = vmw_screen_leave_vt;
cust->no_3d = TRUE;
+ cust->unhidden_hw_cursor_update = TRUE;
vmw->pScrn = pScrn;
pScrn->driverPrivate = cust;
diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_video.c b/src/gallium/targets/xorg-vmwgfx/vmw_video.c
index eced60d0ec..94465e5204 100644
--- a/src/gallium/targets/xorg-vmwgfx/vmw_video.c
+++ b/src/gallium/targets/xorg-vmwgfx/vmw_video.c
@@ -300,7 +300,7 @@ vmw_video_init(struct vmw_customizer *vmw)
numAdaptors = 1;
overlayAdaptors = &newAdaptor;
} else {
- newAdaptors = xalloc((numAdaptors + 1) *
+ newAdaptors = malloc((numAdaptors + 1) *
sizeof(XF86VideoAdaptorPtr*));
if (!newAdaptors) {
xf86XVFreeVideoAdaptorRec(newAdaptor);
@@ -320,7 +320,7 @@ vmw_video_init(struct vmw_customizer *vmw)
}
if (newAdaptors) {
- xfree(newAdaptors);
+ free(newAdaptors);
}
debug_printf("Initialized VMware Xv extension successfully\n");
@@ -438,7 +438,7 @@ vmw_video_init_adaptor(ScrnInfoPtr pScrn, struct vmw_customizer *vmw)
return NULL;
}
- video = xcalloc(1, sizeof(*video));
+ video = calloc(1, sizeof(*video));
if (!video) {
debug_printf("Not enough memory.\n");
xf86XVFreeVideoAdaptorRec(adaptor);
@@ -742,7 +742,7 @@ vmw_video_buffer_alloc(struct vmw_customizer *vmw, int size,
}
out->size = size;
- out->extra_data = xcalloc(1, size);
+ out->extra_data = calloc(1, size);
debug_printf("\t\t%s: allocated buffer %p of size %i\n", __func__, out, size);
@@ -773,7 +773,7 @@ vmw_video_buffer_free(struct vmw_customizer *vmw,
if (out->size == 0)
return Success;
- xfree(out->extra_data);
+ free(out->extra_data);
vmw_ioctl_buffer_unmap(vmw, out->buf);
vmw_ioctl_buffer_destroy(vmw, out->buf);
diff --git a/src/gallium/tests/python/retrace/interpreter.py b/src/gallium/tests/python/retrace/interpreter.py
index 37d7fd6415..954a701a53 100755
--- a/src/gallium/tests/python/retrace/interpreter.py
+++ b/src/gallium/tests/python/retrace/interpreter.py
@@ -111,6 +111,7 @@ struct_factories = {
#"pipe_texture": gallium.Texture,
'pipe_subresource': gallium.pipe_subresource,
'pipe_box': gallium.pipe_box,
+ 'pipe_draw_info': gallium.pipe_draw_info,
}
@@ -533,30 +534,22 @@ class Context(Object):
return minindex + ibias, maxindex + ibias
- def draw_arrays(self, mode, start, count):
- self.dump_vertices(start, count)
-
- self.real.draw_arrays(mode, start, count)
- self._set_dirty()
-
- def draw_elements(self, indexBuffer, indexSize, indexBias, mode, start, count):
- if self.interpreter.verbosity(2):
- minindex, maxindex = self.dump_indices(indexBuffer, indexSize, indexBias, start, count)
- self.dump_vertices(minindex, maxindex - minindex)
+ def set_index_buffer(self, ib):
+ if ib:
+ self.real.set_index_buffer(ib.index_size, ib.offset, ib.buffer)
+ else:
+ self.real.set_index_buffer(0, 0, None)
- self.real.draw_elements(indexBuffer, indexSize, indexBias, mode, start, count)
- self._set_dirty()
-
- def draw_range_elements(self, indexBuffer, indexSize, indexBias, minIndex, maxIndex, mode, start, count):
+ def draw_vbo(self, info):
if self.interpreter.verbosity(2):
- minindex, maxindex = self.dump_indices(indexBuffer, indexSize, indexBias, start, count)
- minindex = min(minindex, minIndex)
- maxindex = min(maxindex, maxIndex)
- self.dump_vertices(minindex, maxindex - minindex)
+ if 0:
+ minindex, maxindex = self.dump_indices(indexBuffer, indexSize, indexBias, start, count)
+
+ self.dump_vertices(info.minindex, info.maxindex + 1 - info.minindex)
- self.real.draw_range_elements(indexBuffer, indexSize, indexBias, minIndex, maxIndex, mode, start, count)
+ self.real.draw_vbo(info)
self._set_dirty()
-
+
def resource_copy_region(self, dst, subdst, dstx, dsty, dstz, src, subsrc, srcx, srcy, srcz, width, height):
if dst is not None and src is not None:
if self.interpreter.options.all:
@@ -617,6 +610,15 @@ class Context(Object):
_rgba[i] = rgba[i]
self.real.clear(buffers, _rgba, depth, stencil)
+ def clear_render_target(self, dst, rgba, dstx, dsty, width, height):
+ _rgba = gallium.FloatArray(4)
+ for i in range(4):
+ _rgba[i] = rgba[i]
+ self.real.clear_render_target(dst, _rgba, dstx, dsty, width, height)
+
+ def clear_depth_stencil(self, dst, clear_flags, depth, stencil, dstx, dsty, width, height):
+ self.real.clear_depth_stencil(dst, clear_flags, depth, stencil, dstx, dsty, width, height)
+
def _present(self):
self.real.flush()
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-face.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-face.sh
new file mode 100644
index 0000000000..5745b6a5ab
--- /dev/null
+++ b/src/gallium/tests/python/tests/regress/fragment-shader/frag-face.sh
@@ -0,0 +1,14 @@
+FRAG
+
+DCL IN[0], COLOR, LINEAR
+DCL IN[1], FACE, CONSTANT
+DCL OUT[0], COLOR
+DCL TEMP[0]
+IMM FLT32 { 0.5, 1.0, 0.0, 0.0 }
+
+MUL TEMP[0], IN[1].xxxx, IMM[0].xxxx
+ADD TEMP[0], TEMP[0], IMM[0].yyyy
+
+MOV OUT[0], TEMP[0]
+
+END
diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile
index 8a84ceec69..a396205f89 100644
--- a/src/gallium/winsys/r600/drm/Makefile
+++ b/src/gallium/winsys/r600/drm/Makefile
@@ -6,19 +6,14 @@ LIBNAME = r600winsys
C_SOURCES = \
bof.c \
- r600_state.c \
- r600_state2.c \
- evergreen_state.c \
- r600.c \
- radeon_ctx.c \
- radeon_draw.c \
- radeon_state.c \
+ evergreen_hw_context.c \
radeon_bo.c \
+ radeon_bo_pb.c \
radeon_pciid.c \
- radeon.c \
+ r600.c \
+ r600_bo.c \
r600_drm.c \
- radeon_ws_bo.c \
- radeon_bo_pb.c
+ r600_hw_context.c
LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r600 \
$(shell pkg-config libdrm --cflags-only-I)
diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript
index 2f20d9f895..cc053c06dd 100644
--- a/src/gallium/winsys/r600/drm/SConscript
+++ b/src/gallium/winsys/r600/drm/SConscript
@@ -4,14 +4,14 @@ env = env.Clone()
r600_sources = [
'bof.c',
- 'r600_state.c',
- 'radeon_ctx.c',
- 'radeon_draw.c',
- 'radeon_state.c',
+ 'evergreen_hw_context.c',
'radeon_bo.c',
+ 'radeon_bo_pb.c',
'radeon_pciid.c',
- 'radeon.c',
- 'r600_drm.c'
+ 'r600.c',
+ 'r600_bo.c',
+ 'r600_drm.c',
+ 'r600_hw_context.c',
]
env.ParseConfig('pkg-config --cflags libdrm_radeon')
diff --git a/src/gallium/winsys/r600/drm/eg_states.h b/src/gallium/winsys/r600/drm/eg_states.h
deleted file mode 100644
index ced7f147c0..0000000000
--- a/src/gallium/winsys/r600/drm/eg_states.h
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- * Copyright © 2009 Jerome Glisse <glisse@freedesktop.org>
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-#ifndef EG_STATES_H
-#define EG_STATES_H
-
-static const struct radeon_register EG_names_CONFIG[] = {
- {0x00008C00, 0, 0, "SQ_CONFIG"},
- {0x00009100, 0, 0, "SPI_CONFIG_CNTL"},
- {0x0000913C, 0, 0, "SPI_CONFIG_CNTL_1"},
- {0x00008C04, 0, 0, "SQ_GPR_RESOURCE_MGMT_1"},
- {0x00008C08, 0, 0, "SQ_GPR_RESOURCE_MGMT_2"},
- {0x00008C0C, 0, 0, "SQ_GPR_RESOURCE_MGMT_3"},
- {0x00008C18, 0, 0, "SQ_THREAD_RESOURCE_MGMT_1"},
- {0x00008C1C, 0, 0, "SQ_THREAD_RESOURCE_MGMT_2"},
- {0x00008C20, 0, 0, "SQ_STACK_RESOURCE_MGMT_1"},
- {0x00008C24, 0, 0, "SQ_STACK_RESOURCE_MGMT_2"},
- {0x00008C28, 0, 0, "SQ_STACK_RESOURCE_MGMT_3"},
- {0x00008D8C, 0, 0, "SQ_DYN_GPR_CNTL_PS_FLUSH_REQ"},
- {0x00008A14, 0, 0, "PA_CL_ENHANCE"},
- {0x00028838, 0, 0, "SQ_DYN_GPR_RESOURCE_LIMIT_1"},
- {0x000288EC, 0, 0, "SQ_LDS_ALLOC_PS"},
- {0x00028350, 0, 0, "SX_MISC"},
- {0x00028900, 0, 0, "SQ_ESGS_RING_ITEMSIZE"},
- {0x00028904, 0, 0, "SQ_GSVS_RING_ITEMSIZE"},
- {0x00028908, 0, 0, "SQ_ESTMP_RING_ITEMSIZE"},
- {0x0002890C, 0, 0, "SQ_GSTMP_RING_ITEMSIZE"},
- {0x00028910, 0, 0, "SQ_VSTMP_RING_ITEMSIZE"},
- {0x00028914, 0, 0, "SQ_PSTMP_RING_ITEMSIZE"},
- {0x0002891C, 0, 0, "SQ_GS_VERT_ITEMSIZE"},
- {0x00028920, 0, 0, "SQ_GS_VERT_ITEMSIZE_1"},
- {0x00028924, 0, 0, "SQ_GS_VERT_ITEMSIZE_2"},
- {0x00028928, 0, 0, "SQ_GS_VERT_ITEMSIZE_3"},
- {0x00028A10, 0, 0, "VGT_OUTPUT_PATH_CNTL"},
- {0x00028A14, 0, 0, "VGT_HOS_CNTL"},
- {0x00028A18, 0, 0, "VGT_HOS_MAX_TESS_LEVEL"},
- {0x00028A1C, 0, 0, "VGT_HOS_MIN_TESS_LEVEL"},
- {0x00028A20, 0, 0, "VGT_HOS_REUSE_DEPTH"},
- {0x00028A24, 0, 0, "VGT_GROUP_PRIM_TYPE"},
- {0x00028A28, 0, 0, "VGT_GROUP_FIRST_DECR"},
- {0x00028A2C, 0, 0, "VGT_GROUP_DECR"},
- {0x00028A30, 0, 0, "VGT_GROUP_VECT_0_CNTL"},
- {0x00028A34, 0, 0, "VGT_GROUP_VECT_1_CNTL"},
- {0x00028A38, 0, 0, "VGT_GROUP_VECT_0_FMT_CNTL"},
- {0x00028A3C, 0, 0, "VGT_GROUP_VECT_1_FMT_CNTL"},
- {0x00028A40, 0, 0, "VGT_GS_MODE"},
- {0x00028A48, 0, 0, "PA_SC_MODE_CNTL_0"},
- {0x00028A4C, 0, 0, "PA_SC_MODE_CNTL_1"},
- {0x00028AB4, 0, 0, "VGT_REUSE_OFF"},
- {0x00028AB8, 0, 0, "VGT_VTX_CNT_EN"},
- {0x00028B54, 0, 0, "VGT_SHADER_STAGES_EN"},
- {0x00028B94, 0, 0, "VGT_STRMOUT_CONFIG"},
- {0x00028B98, 0, 0, "VGT_STRMOUT_BUFFER_CONFIG"},
-};
-
-static const struct radeon_register EG_names_CB_CNTL[] = {
- {0x00028238, 0, 0, "CB_TARGET_MASK"},
- {0x0002823C, 0, 0, "CB_SHADER_MASK"},
- {0x00028808, 0, 0, "CB_COLOR_CONTROL"},
- {0x00028C04, 0, 0, "PA_SC_AA_CONFIG"},
- {0x00028C1C, 0, 0, "PA_SC_AA_SAMPLE_LOCS_MCTX"},
- {0x00028C3C, 0, 0, "PA_SC_AA_MASK"},
-};
-
-static const struct radeon_register EG_names_RASTERIZER[] = {
- {0x000286D4, 0, 0, "SPI_INTERP_CONTROL_0"},
- {0x00028810, 0, 0, "PA_CL_CLIP_CNTL"},
- {0x00028814, 0, 0, "PA_SU_SC_MODE_CNTL"},
- {0x0002881C, 0, 0, "PA_CL_VS_OUT_CNTL"},
- {0x00028820, 0, 0, "PA_CL_NANINF_CNTL"},
- {0x00028A00, 0, 0, "PA_SU_POINT_SIZE"},
- {0x00028A04, 0, 0, "PA_SU_POINT_MINMAX"},
- {0x00028A08, 0, 0, "PA_SU_LINE_CNTL"},
- {0x00028A48, 0, 0, "PA_SC_MPASS_PS_CNTL"},
- {0x00028C00, 0, 0, "PA_SC_LINE_CNTL"},
- {0x00028C08, 0, 0, "PA_SU_VTX_CNTL"},
- {0x00028C0C, 0, 0, "PA_CL_GB_VERT_CLIP_ADJ"},
- {0x00028C10, 0, 0, "PA_CL_GB_VERT_DISC_ADJ"},
- {0x00028C14, 0, 0, "PA_CL_GB_HORZ_CLIP_ADJ"},
- {0x00028C18, 0, 0, "PA_CL_GB_HORZ_DISC_ADJ"},
- {0x00028B78, 0, 0, "PA_SU_POLY_OFFSET_DB_FMT_CNTL"},
- {0x00028B7C, 0, 0, "PA_SU_POLY_OFFSET_CLAMP"},
- {0x00028B80, 0, 0, "PA_SU_POLY_OFFSET_FRONT_SCALE"},
- {0x00028B84, 0, 0, "PA_SU_POLY_OFFSET_FRONT_OFFSET"},
- {0x00028B88, 0, 0, "PA_SU_POLY_OFFSET_BACK_SCALE"},
- {0x00028B8C, 0, 0, "PA_SU_POLY_OFFSET_BACK_OFFSET"},
-};
-
-/* Viewport states are same as r600 */
-static const struct radeon_register EG_names_VIEWPORT[] = {
- {0x000282D0, 0, 0, "PA_SC_VPORT_ZMIN_0"},
- {0x000282D4, 0, 0, "PA_SC_VPORT_ZMAX_0"},
- {0x0002843C, 0, 0, "PA_CL_VPORT_XSCALE_0"},
- {0x00028444, 0, 0, "PA_CL_VPORT_YSCALE_0"},
- {0x0002844C, 0, 0, "PA_CL_VPORT_ZSCALE_0"},
- {0x00028440, 0, 0, "PA_CL_VPORT_XOFFSET_0"},
- {0x00028448, 0, 0, "PA_CL_VPORT_YOFFSET_0"},
- {0x00028450, 0, 0, "PA_CL_VPORT_ZOFFSET_0"},
- {0x00028818, 0, 0, "PA_CL_VTE_CNTL"},
-};
-
-/* scissor is same as R600 */
-static const struct radeon_register EG_names_SCISSOR[] = {
- {0x00028030, 0, 0, "PA_SC_SCREEN_SCISSOR_TL"},
- {0x00028034, 0, 0, "PA_SC_SCREEN_SCISSOR_BR"},
- {0x00028200, 0, 0, "PA_SC_WINDOW_OFFSET"},
- {0x00028204, 0, 0, "PA_SC_WINDOW_SCISSOR_TL"},
- {0x00028208, 0, 0, "PA_SC_WINDOW_SCISSOR_BR"},
- {0x0002820C, 0, 0, "PA_SC_CLIPRECT_RULE"},
- {0x00028210, 0, 0, "PA_SC_CLIPRECT_0_TL"},
- {0x00028214, 0, 0, "PA_SC_CLIPRECT_0_BR"},
- {0x00028218, 0, 0, "PA_SC_CLIPRECT_1_TL"},
- {0x0002821C, 0, 0, "PA_SC_CLIPRECT_1_BR"},
- {0x00028220, 0, 0, "PA_SC_CLIPRECT_2_TL"},
- {0x00028224, 0, 0, "PA_SC_CLIPRECT_2_BR"},
- {0x00028228, 0, 0, "PA_SC_CLIPRECT_3_TL"},
- {0x0002822C, 0, 0, "PA_SC_CLIPRECT_3_BR"},
- {0x00028230, 0, 0, "PA_SC_EDGERULE"},
- {0x00028240, 0, 0, "PA_SC_GENERIC_SCISSOR_TL"},
- {0x00028244, 0, 0, "PA_SC_GENERIC_SCISSOR_BR"},
- {0x00028250, 0, 0, "PA_SC_VPORT_SCISSOR_0_TL"},
- {0x00028254, 0, 0, "PA_SC_VPORT_SCISSOR_0_BR"},
- {0x00028234, 0, 0, "PA_SU_HARDWARE_SCREEN_OFFSET"},
-};
-
-/* same as r700 i.e. no blend control */
-static const struct radeon_register EG_names_BLEND[] = {
- {0x00028414, 0, 0, "CB_BLEND_RED"},
- {0x00028418, 0, 0, "CB_BLEND_GREEN"},
- {0x0002841C, 0, 0, "CB_BLEND_BLUE"},
- {0x00028420, 0, 0, "CB_BLEND_ALPHA"},
- {0x00028780, 0, 0, "CB_BLEND0_CONTROL"},
- {0x00028784, 0, 0, "CB_BLEND1_CONTROL"},
- {0x00028788, 0, 0, "CB_BLEND2_CONTROL"},
- {0x0002878C, 0, 0, "CB_BLEND3_CONTROL"},
- {0x00028790, 0, 0, "CB_BLEND4_CONTROL"},
- {0x00028794, 0, 0, "CB_BLEND5_CONTROL"},
- {0x00028798, 0, 0, "CB_BLEND6_CONTROL"},
- {0x0002879C, 0, 0, "CB_BLEND7_CONTROL"},
-};
-
-/* different */
-static const struct radeon_register EG_names_DSA[] = {
- {0x00028028, 0, 0, "DB_STENCIL_CLEAR"},
- {0x0002802C, 0, 0, "DB_DEPTH_CLEAR"},
- {0x00028410, 0, 0, "SX_ALPHA_TEST_CONTROL"},
- {0x00028430, 0, 0, "DB_STENCILREFMASK"},
- {0x00028434, 0, 0, "DB_STENCILREFMASK_BF"},
- {0x00028438, 0, 0, "SX_ALPHA_REF"},
- {0x000286DC, 0, 0, "SPI_FOG_CNTL"},
- {0x00028800, 0, 0, "DB_DEPTH_CONTROL"},
- {0x0002880C, 0, 0, "DB_SHADER_CONTROL"},
- {0x00028000, 0, 0, "DB_RENDER_CONTROL"},
- {0x00028004, 0, 0, "DB_COUNT_CONTROL"},
- {0x0002800C, 0, 0, "DB_RENDER_OVERRIDE"},
- {0x00028010, 0, 0, "DB_RENDER_OVERRIDE2"},
- {0x00028AC0, 0, 0, "DB_SRESULTS_COMPARE_STATE0"},
- {0x00028AC4, 0, 0, "DB_SRESULTS_COMPARE_STATE1"},
- {0x00028AC8, 0, 0, "DB_PRELOAD_CONTROL"},
- {0x00028B70, 0, 0, "DB_ALPHA_TO_MASK"},
-};
-
-/* different */
-static const struct radeon_register EG_names_VS_SHADER[] = {
- {0x00028380, 0, 0, "SQ_VTX_SEMANTIC_0"},
- {0x00028384, 0, 0, "SQ_VTX_SEMANTIC_1"},
- {0x00028388, 0, 0, "SQ_VTX_SEMANTIC_2"},
- {0x0002838C, 0, 0, "SQ_VTX_SEMANTIC_3"},
- {0x00028390, 0, 0, "SQ_VTX_SEMANTIC_4"},
- {0x00028394, 0, 0, "SQ_VTX_SEMANTIC_5"},
- {0x00028398, 0, 0, "SQ_VTX_SEMANTIC_6"},
- {0x0002839C, 0, 0, "SQ_VTX_SEMANTIC_7"},
- {0x000283A0, 0, 0, "SQ_VTX_SEMANTIC_8"},
- {0x000283A4, 0, 0, "SQ_VTX_SEMANTIC_9"},
- {0x000283A8, 0, 0, "SQ_VTX_SEMANTIC_10"},
- {0x000283AC, 0, 0, "SQ_VTX_SEMANTIC_11"},
- {0x000283B0, 0, 0, "SQ_VTX_SEMANTIC_12"},
- {0x000283B4, 0, 0, "SQ_VTX_SEMANTIC_13"},
- {0x000283B8, 0, 0, "SQ_VTX_SEMANTIC_14"},
- {0x000283BC, 0, 0, "SQ_VTX_SEMANTIC_15"},
- {0x000283C0, 0, 0, "SQ_VTX_SEMANTIC_16"},
- {0x000283C4, 0, 0, "SQ_VTX_SEMANTIC_17"},
- {0x000283C8, 0, 0, "SQ_VTX_SEMANTIC_18"},
- {0x000283CC, 0, 0, "SQ_VTX_SEMANTIC_19"},
- {0x000283D0, 0, 0, "SQ_VTX_SEMANTIC_20"},
- {0x000283D4, 0, 0, "SQ_VTX_SEMANTIC_21"},
- {0x000283D8, 0, 0, "SQ_VTX_SEMANTIC_22"},
- {0x000283DC, 0, 0, "SQ_VTX_SEMANTIC_23"},
- {0x000283E0, 0, 0, "SQ_VTX_SEMANTIC_24"},
- {0x000283E4, 0, 0, "SQ_VTX_SEMANTIC_25"},
- {0x000283E8, 0, 0, "SQ_VTX_SEMANTIC_26"},
- {0x000283EC, 0, 0, "SQ_VTX_SEMANTIC_27"},
- {0x000283F0, 0, 0, "SQ_VTX_SEMANTIC_28"},
- {0x000283F4, 0, 0, "SQ_VTX_SEMANTIC_29"},
- {0x000283F8, 0, 0, "SQ_VTX_SEMANTIC_30"},
- {0x000283FC, 0, 0, "SQ_VTX_SEMANTIC_31"},
- {0x0002861C, 0, 0, "SPI_VS_OUT_ID_0"}, // all diff belwo
- {0x00028620, 0, 0, "SPI_VS_OUT_ID_1"},
- {0x00028624, 0, 0, "SPI_VS_OUT_ID_2"},
- {0x00028628, 0, 0, "SPI_VS_OUT_ID_3"},
- {0x0002862C, 0, 0, "SPI_VS_OUT_ID_4"},
- {0x00028630, 0, 0, "SPI_VS_OUT_ID_5"},
- {0x00028634, 0, 0, "SPI_VS_OUT_ID_6"},
- {0x00028638, 0, 0, "SPI_VS_OUT_ID_7"},
- {0x0002863C, 0, 0, "SPI_VS_OUT_ID_8"},
- {0x00028640, 0, 0, "SPI_VS_OUT_ID_9"},
- {0x000286C4, 0, 0, "SPI_VS_OUT_CONFIG"},
- {0x0002885C, 1, 0, "SQ_PGM_START_VS"},
- {0x00028860, 0, 0, "SQ_PGM_RESOURCES_VS"},
- {0x00028864, 0, 0, "SQ_PGM_RESOURCES_2_VS"},
- {0x000288A4, 1, 1, "SQ_PGM_START_FS"},
- {0x000288A8, 0, 0, "SQ_PGM_RESOURCES_FS"},
-};
-
-static const struct radeon_register EG_names_PS_SHADER[] = {
- {0x00028644, 0, 0, "SPI_PS_INPUT_CNTL_0"},
- {0x00028648, 0, 0, "SPI_PS_INPUT_CNTL_1"},
- {0x0002864C, 0, 0, "SPI_PS_INPUT_CNTL_2"},
- {0x00028650, 0, 0, "SPI_PS_INPUT_CNTL_3"},
- {0x00028654, 0, 0, "SPI_PS_INPUT_CNTL_4"},
- {0x00028658, 0, 0, "SPI_PS_INPUT_CNTL_5"},
- {0x0002865C, 0, 0, "SPI_PS_INPUT_CNTL_6"},
- {0x00028660, 0, 0, "SPI_PS_INPUT_CNTL_7"},
- {0x00028664, 0, 0, "SPI_PS_INPUT_CNTL_8"},
- {0x00028668, 0, 0, "SPI_PS_INPUT_CNTL_9"},
- {0x0002866C, 0, 0, "SPI_PS_INPUT_CNTL_10"},
- {0x00028670, 0, 0, "SPI_PS_INPUT_CNTL_11"},
- {0x00028674, 0, 0, "SPI_PS_INPUT_CNTL_12"},
- {0x00028678, 0, 0, "SPI_PS_INPUT_CNTL_13"},
- {0x0002867C, 0, 0, "SPI_PS_INPUT_CNTL_14"},
- {0x00028680, 0, 0, "SPI_PS_INPUT_CNTL_15"},
- {0x00028684, 0, 0, "SPI_PS_INPUT_CNTL_16"},
- {0x00028688, 0, 0, "SPI_PS_INPUT_CNTL_17"},
- {0x0002868C, 0, 0, "SPI_PS_INPUT_CNTL_18"},
- {0x00028690, 0, 0, "SPI_PS_INPUT_CNTL_19"},
- {0x00028694, 0, 0, "SPI_PS_INPUT_CNTL_20"},
- {0x00028698, 0, 0, "SPI_PS_INPUT_CNTL_21"},
- {0x0002869C, 0, 0, "SPI_PS_INPUT_CNTL_22"},
- {0x000286A0, 0, 0, "SPI_PS_INPUT_CNTL_23"},
- {0x000286A4, 0, 0, "SPI_PS_INPUT_CNTL_24"},
- {0x000286A8, 0, 0, "SPI_PS_INPUT_CNTL_25"},
- {0x000286AC, 0, 0, "SPI_PS_INPUT_CNTL_26"},
- {0x000286B0, 0, 0, "SPI_PS_INPUT_CNTL_27"},
- {0x000286B4, 0, 0, "SPI_PS_INPUT_CNTL_28"},
- {0x000286B8, 0, 0, "SPI_PS_INPUT_CNTL_29"},
- {0x000286BC, 0, 0, "SPI_PS_INPUT_CNTL_30"},
- {0x000286C0, 0, 0, "SPI_PS_INPUT_CNTL_31"},
- {0x000286C8, 0, 0, "SPI_THREAD_GROUPING"},
- {0x000286CC, 0, 0, "SPI_PS_IN_CONTROL_0"},
- {0x000286D0, 0, 0, "SPI_PS_IN_CONTROL_1"},
- {0x000286D8, 0, 0, "SPI_INPUT_Z"},
- {0x000286E0, 0, 0, "SPI_BARYC_CNTL"},
- {0x000286E4, 0, 0, "SPI_PS_IN_CONTROL_2"},
- {0x000286E8, 0, 0, "SPI_COMPUTE_INPUT_CNTL"},
- {0x00028840, 1, 0, "SQ_PGM_START_PS"}, // diff
- {0x00028844, 0, 0, "SQ_PGM_RESOURCES_PS"}, // diff
- {0x00028848, 0, 0, "SQ_PGM_RESOURCES_2_PS"}, // diff
- {0x0002884C, 0, 0, "SQ_PGM_EXPORTS_PS"}, // diff
-};
-
-/* different */
-static const struct radeon_register EG_names_UCP[] = {
- {0x000285BC, 0, 0, "PA_CL_UCP0_X"},
- {0x000285C0, 0, 0, "PA_CL_UCP0_Y"},
- {0x000285C4, 0, 0, "PA_CL_UCP0_Z"},
- {0x000285C8, 0, 0, "PA_CL_UCP0_W"},
- {0x000285CC, 0, 0, "PA_CL_UCP1_X"},
- {0x000285D0, 0, 0, "PA_CL_UCP1_Y"},
- {0x000285D4, 0, 0, "PA_CL_UCP1_Z"},
- {0x000285D8, 0, 0, "PA_CL_UCP1_W"},
- {0x000285DC, 0, 0, "PA_CL_UCP2_X"},
- {0x000285E0, 0, 0, "PA_CL_UCP2_Y"},
- {0x000285E4, 0, 0, "PA_CL_UCP2_Z"},
- {0x000285E8, 0, 0, "PA_CL_UCP2_W"},
- {0x000285EC, 0, 0, "PA_CL_UCP3_X"},
- {0x000285F0, 0, 0, "PA_CL_UCP3_Y"},
- {0x000285F4, 0, 0, "PA_CL_UCP3_Z"},
- {0x000285F8, 0, 0, "PA_CL_UCP3_W"},
- {0x000285FC, 0, 0, "PA_CL_UCP4_X"},
- {0x00028600, 0, 0, "PA_CL_UCP4_Y"},
- {0x00028604, 0, 0, "PA_CL_UCP4_Z"},
- {0x00028608, 0, 0, "PA_CL_UCP4_W"},
- {0x0002860C, 0, 0, "PA_CL_UCP5_X"},
- {0x00028610, 0, 0, "PA_CL_UCP5_Y"},
- {0x00028614, 0, 0, "PA_CL_UCP5_Z"},
- {0x00028618, 0, 0, "PA_CL_UCP5_W"},
-};
-
-static const struct radeon_register EG_names_VS_CBUF[] = {
- {0x00028180, 0, 0, "ALU_CONST_BUFFER_SIZE_VS_0"},
- {0x00028980, 1, 0, "ALU_CONST_CACHE_VS_0"},
-};
-
-static const struct radeon_register EG_names_PS_CBUF[] = {
- {0x00028140, 0, 0, "ALU_CONST_BUFFER_SIZE_PS_0"},
- {0x00028940, 1, 0, "ALU_CONST_CACHE_PS_0"},
-};
-
-static const struct radeon_register EG_names_PS_RESOURCE[] = {
- {0x00030000, 0, 0, "RESOURCE0_WORD0"},
- {0x00030004, 0, 0, "RESOURCE0_WORD1"},
- {0x00030008, 0, 0, "RESOURCE0_WORD2"},
- {0x0003000C, 0, 0, "RESOURCE0_WORD3"},
- {0x00030010, 0, 0, "RESOURCE0_WORD4"},
- {0x00030014, 0, 0, "RESOURCE0_WORD5"},
- {0x00030018, 0, 0, "RESOURCE0_WORD6"},
- {0x0003001c, 0, 0, "RESOURCE0_WORD7"},
-};
-
-static const struct radeon_register EG_names_VS_RESOURCE[] = {
- {0x00031600, 0, 0, "RESOURCE160_WORD0"},
- {0x00031604, 0, 0, "RESOURCE160_WORD1"},
- {0x00031608, 0, 0, "RESOURCE160_WORD2"},
- {0x0003160C, 0, 0, "RESOURCE160_WORD3"},
- {0x00031610, 0, 0, "RESOURCE160_WORD4"},
- {0x00031614, 0, 0, "RESOURCE160_WORD5"},
- {0x00031618, 0, 0, "RESOURCE160_WORD6"},
- {0x0003161c, 0, 0, "RESOURCE160_WORD7"},
-};
-
-static const struct radeon_register EG_names_FS_RESOURCE[] = {
- {0x0003A300, 0, 0, "RESOURCE320_WORD0"},
- {0x0003A304, 0, 0, "RESOURCE320_WORD1"},
- {0x0003A308, 0, 0, "RESOURCE320_WORD2"},
- {0x0003A30C, 0, 0, "RESOURCE320_WORD3"},
- {0x0003A310, 0, 0, "RESOURCE320_WORD4"},
- {0x0003A314, 0, 0, "RESOURCE320_WORD5"},
- {0x0003A318, 0, 0, "RESOURCE320_WORD6"},
- {0x0003A31C, 0, 0, "RESOURCE320_WORD7"},
-};
-
-static const struct radeon_register EG_names_GS_RESOURCE[] = {
- {0x0003A4C0, 0, 0, "RESOURCE336_WORD0"},
- {0x0003A4C4, 0, 0, "RESOURCE336_WORD1"},
- {0x0003A4C8, 0, 0, "RESOURCE336_WORD2"},
- {0x0003A4CC, 0, 0, "RESOURCE336_WORD3"},
- {0x0003A4D0, 0, 0, "RESOURCE336_WORD4"},
- {0x0003A4D4, 0, 0, "RESOURCE336_WORD5"},
- {0x0003A4D8, 0, 0, "RESOURCE336_WORD6"},
- {0x0003A4DC, 0, 0, "RESOURCE336_WORD7"},
-};
-
-static const struct radeon_register EG_names_PS_SAMPLER[] = {
- {0x0003C000, 0, 0, "SQ_TEX_SAMPLER_WORD0_0"},
- {0x0003C004, 0, 0, "SQ_TEX_SAMPLER_WORD1_0"},
- {0x0003C008, 0, 0, "SQ_TEX_SAMPLER_WORD2_0"},
-};
-
-static const struct radeon_register EG_names_VS_SAMPLER[] = {
- {0x0003C0D8, 0, 0, "SQ_TEX_SAMPLER_WORD0_18"},
- {0x0003C0DC, 0, 0, "SQ_TEX_SAMPLER_WORD1_18"},
- {0x0003C0E0, 0, 0, "SQ_TEX_SAMPLER_WORD2_18"},
-};
-
-static const struct radeon_register EG_names_GS_SAMPLER[] = {
- {0x0003C1B0, 0, 0, "SQ_TEX_SAMPLER_WORD0_36"},
- {0x0003C1B4, 0, 0, "SQ_TEX_SAMPLER_WORD1_36"},
- {0x0003C1B8, 0, 0, "SQ_TEX_SAMPLER_WORD2_36"},
-};
-
-static const struct radeon_register EG_names_PS_SAMPLER_BORDER[] = {
- {0x0000A400, 0, 0, "TD_PS_SAMPLER0_BORDER_INDEX"},
- {0x0000A404, 0, 0, "TD_PS_SAMPLER0_BORDER_RED"},
- {0x0000A408, 0, 0, "TD_PS_SAMPLER0_BORDER_GREEN"},
- {0x0000A40C, 0, 0, "TD_PS_SAMPLER0_BORDER_BLUE"},
- {0x0000A410, 0, 0, "TD_PS_SAMPLER0_BORDER_ALPHA"},
-};
-
-static const struct radeon_register EG_names_VS_SAMPLER_BORDER[] = {
- {0x0000A414, 0, 0, "TD_VS_SAMPLER0_BORDER_INDEX"},
- {0x0000A418, 0, 0, "TD_VS_SAMPLER0_BORDER_RED"},
- {0x0000A41C, 0, 0, "TD_VS_SAMPLER0_BORDER_GREEN"},
- {0x0000A420, 0, 0, "TD_VS_SAMPLER0_BORDER_BLUE"},
- {0x0000A424, 0, 0, "TD_VS_SAMPLER0_BORDER_ALPHA"},
-};
-
-static const struct radeon_register EG_names_GS_SAMPLER_BORDER[] = {
- {0x0000A428, 0, 0, "TD_GS_SAMPLER0_BORDER_INDEX"},
- {0x0000A42C, 0, 0, "TD_GS_SAMPLER0_BORDER_RED"},
- {0x0000A430, 0, 0, "TD_GS_SAMPLER0_BORDER_GREEN"},
- {0x0000A434, 0, 0, "TD_GS_SAMPLER0_BORDER_BLUE"},
- {0x0000A438, 0, 0, "TD_GS_SAMPLER0_BORDER_ALPHA"},
-};
-
-static const struct radeon_register EG_names_CB[] = {
- {0x00028C60, 1, 0, "CB_COLOR0_BASE"},
- {0x00028C64, 0, 0, "CB_COLOR0_PITCH"},
- {0x00028C68, 0, 0, "CB_COLOR0_SLICE"},
- {0x00028C6C, 0, 0, "CB_COLOR0_VIEW"},
- {0x00028C70, 1, 0, "CB_COLOR0_INFO"},
- {0x00028C74, 0, 0, "CB_COLOR0_ATTRIB"},
- {0x00028C78, 0, 0, "CB_COLOR0_DIM"},
-};
-
-/* different - TODO */
-static const struct radeon_register EG_names_DB[] = {
- {0x00028014, 1, 0, "DB_HTILE_DATA_BASE"},
- {0x00028040, 1, 0, "DB_Z_INFO"},
- {0x00028044, 0, 0, "DB_STENCIL_INFO"},
- {0x00028058, 0, 0, "DB_DEPTH_SIZE"},
- {0x0002805C, 0, 0, "DB_DEPTH_SLICE"},
- {0x00028008, 0, 0, "DB_DEPTH_VIEW"},
- {0x00028ABC, 0, 0, "DB_HTILE_SURFACE"},
- {0x00028048, 1, 0, "DB_Z_READ_BASE"},
- {0x0002804C, 1, 0, "DB_STENCIL_READ_BASE"},
- {0x00028050, 1, 0, "DB_Z_WRITE_BASE"},
- {0x00028054, 1, 0, "DB_STENCIL_WRITE_BASE"},
-};
-
-static const struct radeon_register EG_names_VGT[] = {
- {0x00008958, 0, 0, "VGT_PRIMITIVE_TYPE"}, //s
- {0x00028400, 0, 0, "VGT_MAX_VTX_INDX"}, //s
- {0x00028404, 0, 0, "VGT_MIN_VTX_INDX"}, //s
- {0x00028408, 0, 0, "VGT_INDX_OFFSET"}, //s
- {0x00028A7C, 0, 0, "VGT_DMA_INDEX_TYPE"}, //s
- {0x00028A84, 0, 0, "VGT_PRIMITIVEID_EN"}, //s
- {0x00028A88, 0, 0, "VGT_DMA_NUM_INSTANCES"}, //s
- {0x00028A94, 0, 0, "VGT_MULTI_PRIM_IB_RESET_EN"}, //s
- {0x00028AA0, 0, 0, "VGT_INSTANCE_STEP_RATE_0"}, //s
- {0x00028AA4, 0, 0, "VGT_INSTANCE_STEP_RATE_1"}, //s
-};
-
-static const struct radeon_register EG_names_DRAW[] = {
- {0x00008970, 0, 0, "VGT_NUM_INDICES"},
- {0x000287E4, 0, 0, "VGT_DMA_BASE_HI"}, //same
- {0x000287E8, 1, 0, "VGT_DMA_BASE"}, //same
- {0x000287F0, 0, 0, "VGT_DRAW_INITIATOR"}, //same
-};
-
-static const struct radeon_register EG_names_VGT_EVENT[] = {
- {0x00028A90, 1, 0, "VGT_EVENT_INITIATOR"}, //done
-};
-
-static const struct radeon_register EG_names_CB_FLUSH[] = {
-};
-
-static const struct radeon_register EG_names_DB_FLUSH[] = {
-};
-
-#endif
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
new file mode 100644
index 0000000000..7f21b53ace
--- /dev/null
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -0,0 +1,919 @@
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse
+ */
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "xf86drm.h"
+#include "r600.h"
+#include "evergreend.h"
+#include "radeon_drm.h"
+#include "bof.h"
+#include "pipe/p_compiler.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include <pipebuffer/pb_bufmgr.h>
+#include "r600_priv.h"
+
+#define GROUP_FORCE_NEW_BLOCK 0
+
+static const struct r600_reg evergreen_config_reg_list[] = {
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008A14_PA_CL_ENHANCE, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C00_SQ_CONFIG, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C0C_SQ_THREAD_RESOURCE_MGMT, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008C28_SQ_STACK_RESOURCE_MGMT_3, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_009100_SPI_CONFIG_CNTL, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET, R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0},
+};
+
+static const struct r600_reg evergreen_ctl_const_list[] = {
+ {PKT3_SET_CTL_CONST, EVERGREEN_CTL_CONST_OFFSET, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0, 0},
+ {PKT3_SET_CTL_CONST, EVERGREEN_CTL_CONST_OFFSET, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0, 0},
+};
+
+static const struct r600_reg evergreen_context_reg_list[] = {
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028000_DB_RENDER_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028004_DB_COUNT_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028008_DB_DEPTH_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02800C_DB_RENDER_OVERRIDE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028010_DB_RENDER_OVERRIDE2, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028014_DB_HTILE_DATA_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028028_DB_STENCIL_CLEAR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02802C_DB_DEPTH_CLEAR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028034_PA_SC_SCREEN_SCISSOR_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028040_DB_Z_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028044_DB_STENCIL_INFO, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028048_DB_Z_READ_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02804C_DB_STENCIL_READ_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028050_DB_Z_WRITE_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028054_DB_STENCIL_WRITE_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028058_DB_DEPTH_SIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02805C_DB_DEPTH_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028200_PA_SC_WINDOW_OFFSET, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02820C_PA_SC_CLIPRECT_RULE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028210_PA_SC_CLIPRECT_0_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028214_PA_SC_CLIPRECT_0_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028218_PA_SC_CLIPRECT_1_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02821C_PA_SC_CLIPRECT_1_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028220_PA_SC_CLIPRECT_2_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028224_PA_SC_CLIPRECT_2_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028228_PA_SC_CLIPRECT_3_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02822C_PA_SC_CLIPRECT_3_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028230_PA_SC_EDGERULE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028238_CB_TARGET_MASK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02823C_CB_SHADER_MASK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028240_PA_SC_GENERIC_SCISSOR_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028244_PA_SC_GENERIC_SCISSOR_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028350_SX_MISC, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028380_SQ_VTX_SEMANTIC_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028384_SQ_VTX_SEMANTIC_1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028388_SQ_VTX_SEMANTIC_2, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02838C_SQ_VTX_SEMANTIC_3, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028390_SQ_VTX_SEMANTIC_4, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028394_SQ_VTX_SEMANTIC_5, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028398_SQ_VTX_SEMANTIC_6, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02839C_SQ_VTX_SEMANTIC_7, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283A0_SQ_VTX_SEMANTIC_8, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283A4_SQ_VTX_SEMANTIC_9, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283A8_SQ_VTX_SEMANTIC_10, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283AC_SQ_VTX_SEMANTIC_11, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283B0_SQ_VTX_SEMANTIC_12, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283B4_SQ_VTX_SEMANTIC_13, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283B8_SQ_VTX_SEMANTIC_14, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283BC_SQ_VTX_SEMANTIC_15, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283C0_SQ_VTX_SEMANTIC_16, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283C4_SQ_VTX_SEMANTIC_17, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283C8_SQ_VTX_SEMANTIC_18, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283CC_SQ_VTX_SEMANTIC_19, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283D0_SQ_VTX_SEMANTIC_20, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283D4_SQ_VTX_SEMANTIC_21, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283D8_SQ_VTX_SEMANTIC_22, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283DC_SQ_VTX_SEMANTIC_23, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283E0_SQ_VTX_SEMANTIC_24, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283E4_SQ_VTX_SEMANTIC_25, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283E8_SQ_VTX_SEMANTIC_26, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283EC_SQ_VTX_SEMANTIC_27, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283F0_SQ_VTX_SEMANTIC_28, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283F4_SQ_VTX_SEMANTIC_29, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283F8_SQ_VTX_SEMANTIC_30, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0283FC_SQ_VTX_SEMANTIC_31, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0282D0_PA_SC_VPORT_ZMIN_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0282D4_PA_SC_VPORT_ZMAX_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028400_VGT_MAX_VTX_INDX, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028404_VGT_MIN_VTX_INDX, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028408_VGT_INDX_OFFSET, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028414_CB_BLEND_RED, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028418_CB_BLEND_GREEN, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02841C_CB_BLEND_BLUE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028420_CB_BLEND_ALPHA, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028430_DB_STENCILREFMASK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028434_DB_STENCILREFMASK_BF, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028438_SX_ALPHA_REF, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02843C_PA_CL_VPORT_XSCALE_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028440_PA_CL_VPORT_XOFFSET_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028444_PA_CL_VPORT_YSCALE_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028448_PA_CL_VPORT_YOFFSET_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02844C_PA_CL_VPORT_ZSCALE_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028450_PA_CL_VPORT_ZOFFSET_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285BC_PA_CL_UCP0_X, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285C0_PA_CL_UCP0_Y, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285C4_PA_CL_UCP0_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285C8_PA_CL_UCP0_W, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285CC_PA_CL_UCP1_X, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285D0_PA_CL_UCP1_Y, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285D4_PA_CL_UCP1_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285D8_PA_CL_UCP1_W, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285DC_PA_CL_UCP2_X, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285E0_PA_CL_UCP2_Y, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285E4_PA_CL_UCP2_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285E8_PA_CL_UCP2_W, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285EC_PA_CL_UCP3_X, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285F0_PA_CL_UCP3_Y, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285F4_PA_CL_UCP3_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285F8_PA_CL_UCP3_W, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0285FC_PA_CL_UCP4_X, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028600_PA_CL_UCP4_Y, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028604_PA_CL_UCP4_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028608_PA_CL_UCP4_W, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02860C_PA_CL_UCP5_X, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028610_PA_CL_UCP5_Y, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028614_PA_CL_UCP5_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028618_PA_CL_UCP5_W, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02861C_SPI_VS_OUT_ID_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028620_SPI_VS_OUT_ID_1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028624_SPI_VS_OUT_ID_2, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028628_SPI_VS_OUT_ID_3, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02862C_SPI_VS_OUT_ID_4, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028630_SPI_VS_OUT_ID_5, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028634_SPI_VS_OUT_ID_6, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028638_SPI_VS_OUT_ID_7, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02863C_SPI_VS_OUT_ID_8, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028640_SPI_VS_OUT_ID_9, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028644_SPI_PS_INPUT_CNTL_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028648_SPI_PS_INPUT_CNTL_1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028650_SPI_PS_INPUT_CNTL_3, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028654_SPI_PS_INPUT_CNTL_4, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028658_SPI_PS_INPUT_CNTL_5, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028660_SPI_PS_INPUT_CNTL_7, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028664_SPI_PS_INPUT_CNTL_8, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028668_SPI_PS_INPUT_CNTL_9, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028670_SPI_PS_INPUT_CNTL_11, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028674_SPI_PS_INPUT_CNTL_12, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028678_SPI_PS_INPUT_CNTL_13, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028680_SPI_PS_INPUT_CNTL_15, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028684_SPI_PS_INPUT_CNTL_16, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028688_SPI_PS_INPUT_CNTL_17, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028690_SPI_PS_INPUT_CNTL_19, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028694_SPI_PS_INPUT_CNTL_20, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028698_SPI_PS_INPUT_CNTL_21, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286C4_SPI_VS_OUT_CONFIG, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286C8_SPI_THREAD_GROUPING, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286D4_SPI_INTERP_CONTROL_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286D8_SPI_INPUT_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286DC_SPI_FOG_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286E0_SPI_BARYC_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028780_CB_BLEND0_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028784_CB_BLEND1_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028788_CB_BLEND2_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02878C_CB_BLEND3_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028790_CB_BLEND4_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028794_CB_BLEND5_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028798_CB_BLEND6_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02879C_CB_BLEND7_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028800_DB_DEPTH_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02880C_DB_SHADER_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028808_CB_COLOR_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028810_PA_CL_CLIP_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028814_PA_SU_SC_MODE_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028818_PA_CL_VTE_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02881C_PA_CL_VS_OUT_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028820_PA_CL_NANINF_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028840_SQ_PGM_START_PS, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028844_SQ_PGM_RESOURCES_PS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028848_SQ_PGM_RESOURCES_2_PS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02884C_SQ_PGM_EXPORTS_PS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02885C_SQ_PGM_START_VS, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028860_SQ_PGM_RESOURCES_VS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028864_SQ_PGM_RESOURCES_2_VS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0288A4_SQ_PGM_START_FS, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0288A8_SQ_PGM_RESOURCES_FS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028900_SQ_ESGS_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028904_SQ_GSVS_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_02891C_SQ_GS_VERT_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028940_ALU_CONST_CACHE_PS_0, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028980_ALU_CONST_CACHE_VS_0, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A00_PA_SU_POINT_SIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A08_PA_SU_LINE_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A10_VGT_OUTPUT_PATH_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A14_VGT_HOS_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A20_VGT_HOS_REUSE_DEPTH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A24_VGT_GROUP_PRIM_TYPE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A28_VGT_GROUP_FIRST_DECR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A2C_VGT_GROUP_DECR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A30_VGT_GROUP_VECT_0_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A34_VGT_GROUP_VECT_1_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A40_VGT_GS_MODE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A48_PA_SC_MODE_CNTL_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028A4C_PA_SC_MODE_CNTL_1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028AB4_VGT_REUSE_OFF, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028AB8_VGT_VTX_CNT_EN, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028ABC_DB_HTILE_SURFACE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028AC8_DB_PRELOAD_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B54_VGT_SHADER_STAGES_EN, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B70_DB_ALPHA_TO_MASK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B94_VGT_STRMOUT_CONFIG, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C00_PA_SC_LINE_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C04_PA_SC_AA_CONFIG, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C08_PA_SU_VTX_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C3C_PA_SC_AA_MASK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C60_CB_COLOR0_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C64_CB_COLOR0_PITCH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C68_CB_COLOR0_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C6C_CB_COLOR0_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C70_CB_COLOR0_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C74_CB_COLOR0_ATTRIB, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C78_CB_COLOR0_DIM, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028C9C_CB_COLOR1_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CA0_CB_COLOR1_PITCH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CA4_CB_COLOR1_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CA8_CB_COLOR1_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CAC_CB_COLOR1_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CB0_CB_COLOR1_ATTRIB, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CB4_CB_COLOR1_DIM, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CD8_CB_COLOR2_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CDC_CB_COLOR2_PITCH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CE0_CB_COLOR2_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CE4_CB_COLOR2_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CE8_CB_COLOR2_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CEC_CB_COLOR2_ATTRIB, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028CF0_CB_COLOR2_DIM, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D14_CB_COLOR3_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D18_CB_COLOR3_PITCH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D1C_CB_COLOR3_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D20_CB_COLOR3_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D24_CB_COLOR3_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D28_CB_COLOR3_ATTRIB, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D2C_CB_COLOR3_DIM, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D50_CB_COLOR4_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D54_CB_COLOR4_PITCH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D58_CB_COLOR4_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D5C_CB_COLOR4_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D60_CB_COLOR4_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D64_CB_COLOR4_ATTRIB, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D68_CB_COLOR4_DIM, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D8C_CB_COLOR5_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D90_CB_COLOR5_PITCH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D94_CB_COLOR5_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D98_CB_COLOR5_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028D9C_CB_COLOR5_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DA0_CB_COLOR5_ATTRIB, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DA4_CB_COLOR5_DIM, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DC8_CB_COLOR6_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DCC_CB_COLOR6_PITCH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DD0_CB_COLOR6_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DD4_CB_COLOR6_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DD8_CB_COLOR6_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DDC_CB_COLOR6_ATTRIB, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028DE0_CB_COLOR6_DIM, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E04_CB_COLOR7_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E08_CB_COLOR7_PITCH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E0C_CB_COLOR7_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E10_CB_COLOR7_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E14_CB_COLOR7_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E18_CB_COLOR7_ATTRIB, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E1C_CB_COLOR7_DIM, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E40_CB_COLOR8_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E44_CB_COLOR8_PITCH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E48_CB_COLOR8_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E4C_CB_COLOR8_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E50_CB_COLOR8_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E54_CB_COLOR8_ATTRIB, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E58_CB_COLOR8_DIM, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E5C_CB_COLOR9_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E60_CB_COLOR9_PITCH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E64_CB_COLOR9_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E68_CB_COLOR9_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E6C_CB_COLOR9_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E70_CB_COLOR9_ATTRIB, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E74_CB_COLOR9_DIM, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E78_CB_COLOR10_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E7C_CB_COLOR10_PITCH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E80_CB_COLOR10_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E84_CB_COLOR10_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E88_CB_COLOR10_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E8C_CB_COLOR10_ATTRIB, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E90_CB_COLOR10_DIM, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E94_CB_COLOR11_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E98_CB_COLOR11_PITCH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028E9C_CB_COLOR11_SLICE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028EA0_CB_COLOR11_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028EA4_CB_COLOR11_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028EA8_CB_COLOR11_ATTRIB, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET, R_028EAC_CB_COLOR11_DIM, 0, 0, 0},
+};
+
+/* SHADER RESOURCE R600/R700 */
+static int evergreen_state_resource_init(struct r600_context *ctx, u32 offset)
+{
+ struct r600_reg r600_shader_resource[] = {
+ {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_030000_RESOURCE0_WORD0, 0, 0, 0},
+ {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_030004_RESOURCE0_WORD1, 0, 0, 0},
+ {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_030008_RESOURCE0_WORD2, 1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF},
+ {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_03000C_RESOURCE0_WORD3, 1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF},
+ {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_030010_RESOURCE0_WORD4, 0, 0, 0},
+ {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_030014_RESOURCE0_WORD5, 0, 0, 0},
+ {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_030018_RESOURCE0_WORD6, 0, 0, 0},
+ {PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET, R_03001C_RESOURCE0_WORD7, 0, 0, 0},
+ };
+ unsigned nreg = Elements(r600_shader_resource);
+
+ for (int i = 0; i < nreg; i++) {
+ r600_shader_resource[i].offset += offset;
+ }
+ return r600_context_add_block(ctx, r600_shader_resource, nreg);
+}
+
+/* SHADER SAMPLER R600/R700 */
+static int r600_state_sampler_init(struct r600_context *ctx, u32 offset)
+{
+ struct r600_reg r600_shader_sampler[] = {
+ {PKT3_SET_SAMPLER, EVERGREEN_SAMPLER_OFFSET, R_03C000_SQ_TEX_SAMPLER_WORD0_0, 0, 0, 0},
+ {PKT3_SET_SAMPLER, EVERGREEN_SAMPLER_OFFSET, R_03C004_SQ_TEX_SAMPLER_WORD1_0, 0, 0, 0},
+ {PKT3_SET_SAMPLER, EVERGREEN_SAMPLER_OFFSET, R_03C008_SQ_TEX_SAMPLER_WORD2_0, 0, 0, 0},
+ };
+ unsigned nreg = Elements(r600_shader_sampler);
+
+ for (int i = 0; i < nreg; i++) {
+ r600_shader_sampler[i].offset += offset;
+ }
+ return r600_context_add_block(ctx, r600_shader_sampler, nreg);
+}
+
+/* SHADER SAMPLER BORDER R600/R700 */
+static int evergreen_state_sampler_border_init(struct r600_context *ctx, u32 offset, unsigned id)
+{
+ struct r600_reg r600_shader_sampler_border[] = {
+ {PKT3_SET_CONFIG_REG, 0, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, 0, R_00A404_TD_PS_SAMPLER0_BORDER_RED, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, 0, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, 0, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, 0, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, 0, 0, 0},
+ };
+ unsigned nreg = Elements(r600_shader_sampler_border);
+ unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) * 0x100 + 0x40000 + id * 0x1C;
+ struct r600_range *range;
+ struct r600_block *block;
+ int r;
+
+ for (int i = 0; i < nreg; i++) {
+ r600_shader_sampler_border[i].offset -= R_00A400_TD_PS_SAMPLER0_BORDER_INDEX;
+ r600_shader_sampler_border[i].offset += fake_offset;
+ }
+ r = r600_context_add_block(ctx, r600_shader_sampler_border, nreg);
+ if (r) {
+ return r;
+ }
+ /* set proper offset */
+ range = &ctx->range[CTX_RANGE_ID(ctx, r600_shader_sampler_border[0].offset)];
+ block = range->blocks[CTX_BLOCK_ID(ctx, r600_shader_sampler_border[0].offset)];
+ block->pm4[1] = (offset - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
+ return 0;
+}
+
+static int evergreen_loop_const_init(struct r600_context *ctx, u32 offset)
+{
+ unsigned nreg = 32;
+ struct r600_reg r600_loop_consts[32];
+ int i;
+
+ for (i = 0; i < nreg; i++) {
+ r600_loop_consts[i].opcode = PKT3_SET_LOOP_CONST;
+ r600_loop_consts[i].offset_base = EVERGREEN_LOOP_CONST_OFFSET;
+ r600_loop_consts[i].offset = EVERGREEN_LOOP_CONST_OFFSET + ((offset + i) * 4);
+ r600_loop_consts[i].need_bo = 0;
+ r600_loop_consts[i].flush_flags = 0;
+ }
+ return r600_context_add_block(ctx, r600_loop_consts, nreg);
+}
+
+int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
+{
+ int r;
+
+ memset(ctx, 0, sizeof(struct r600_context));
+ ctx->radeon = radeon;
+ LIST_INITHEAD(&ctx->query_list);
+
+ /* initialize hash */
+ ctx->hash_size = 19;
+ ctx->hash_shift = 11;
+ for (int i = 0; i < 256; i++) {
+ ctx->range[i].start_offset = i << ctx->hash_shift;
+ ctx->range[i].end_offset = ((i + 1) << ctx->hash_shift) - 1;
+ ctx->range[i].blocks = calloc(1 << ctx->hash_shift, sizeof(void*));
+ if (ctx->range[i].blocks == NULL) {
+ return -ENOMEM;
+ }
+ }
+
+ /* add blocks */
+ r = r600_context_add_block(ctx, evergreen_config_reg_list,
+ Elements(evergreen_config_reg_list));
+ if (r)
+ goto out_err;
+ r = r600_context_add_block(ctx, evergreen_context_reg_list,
+ Elements(evergreen_context_reg_list));
+ if (r)
+ goto out_err;
+ r = r600_context_add_block(ctx, evergreen_ctl_const_list,
+ Elements(evergreen_ctl_const_list));
+ if (r)
+ goto out_err;
+
+
+ /* PS SAMPLER */
+ for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) {
+ r = r600_state_sampler_init(ctx, offset);
+ if (r)
+ goto out_err;
+ }
+ /* VS SAMPLER */
+ for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) {
+ r = r600_state_sampler_init(ctx, offset);
+ if (r)
+ goto out_err;
+ }
+ /* PS SAMPLER BORDER */
+ for (int j = 0; j < 18; j++) {
+ r = evergreen_state_sampler_border_init(ctx, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, j);
+ if (r)
+ goto out_err;
+ }
+ /* VS SAMPLER BORDER */
+ for (int j = 0; j < 18; j++) {
+ r = evergreen_state_sampler_border_init(ctx, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, j);
+ if (r)
+ goto out_err;
+ }
+ /* PS RESOURCE */
+ for (int j = 0, offset = 0; j < 176; j++, offset += 0x20) {
+ r = evergreen_state_resource_init(ctx, offset);
+ if (r)
+ goto out_err;
+ }
+ /* VS RESOURCE */
+ for (int j = 0, offset = 0x1600; j < 160; j++, offset += 0x20) {
+ r = evergreen_state_resource_init(ctx, offset);
+ if (r)
+ goto out_err;
+ }
+
+ /* PS loop const */
+ evergreen_loop_const_init(ctx, 0);
+ /* VS loop const */
+ evergreen_loop_const_init(ctx, 32);
+
+ /* setup block table */
+ ctx->blocks = calloc(ctx->nblocks, sizeof(void*));
+ for (int i = 0, c = 0; i < 256; i++) {
+ for (int j = 0; j < (1 << ctx->hash_shift); j++) {
+ if (ctx->range[i].blocks[j]) {
+ assert(c < ctx->nblocks);
+ ctx->blocks[c++] = ctx->range[i].blocks[j];
+ j += (ctx->range[i].blocks[j]->nreg << 2) - 1;
+ }
+ }
+ }
+
+ /* allocate cs variables */
+ ctx->nreloc = RADEON_CTX_MAX_PM4;
+ ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc));
+ if (ctx->reloc == NULL) {
+ r = -ENOMEM;
+ goto out_err;
+ }
+ ctx->bo = calloc(ctx->nreloc, sizeof(void *));
+ if (ctx->bo == NULL) {
+ r = -ENOMEM;
+ goto out_err;
+ }
+ ctx->pm4_ndwords = RADEON_CTX_MAX_PM4;
+ ctx->pm4 = calloc(ctx->pm4_ndwords, 4);
+ if (ctx->pm4 == NULL) {
+ r = -ENOMEM;
+ goto out_err;
+ }
+ /* save 16dwords space for fence mecanism */
+ ctx->pm4_ndwords -= 16;
+
+ r = r600_context_init_fence(ctx);
+ if (r) {
+ goto out_err;
+ }
+
+ /* init dirty list */
+ LIST_INITHEAD(&ctx->dirty);
+ return 0;
+out_err:
+ r600_context_fini(ctx);
+ return r;
+}
+
+static inline void evergreen_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
+{
+ struct r600_range *range;
+ struct r600_block *block;
+
+ range = &ctx->range[CTX_RANGE_ID(ctx, offset)];
+ block = range->blocks[CTX_BLOCK_ID(ctx, offset)];
+ if (state == NULL) {
+ block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
+ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
+ r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
+ LIST_DELINIT(&block->list);
+ return;
+ }
+ block->reg[0] = state->regs[0].value;
+ block->reg[1] = state->regs[1].value;
+ block->reg[2] = state->regs[2].value;
+ block->reg[3] = state->regs[3].value;
+ block->reg[4] = state->regs[4].value;
+ block->reg[5] = state->regs[5].value;
+ block->reg[6] = state->regs[6].value;
+ block->reg[7] = state->regs[7].value;
+ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
+ r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
+ if (state->regs[0].bo) {
+ /* VERTEX RESOURCE, we preted there is 2 bo to relocate so
+ * we have single case btw VERTEX & TEXTURE resource
+ */
+ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo);
+ r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo);
+ } else {
+ /* TEXTURE RESOURCE */
+ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo);
+ r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
+ }
+ if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+ block->status |= R600_BLOCK_STATUS_ENABLED;
+ block->status |= R600_BLOCK_STATUS_DIRTY;
+ ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
+ LIST_ADDTAIL(&block->list,&ctx->dirty);
+ }
+}
+
+void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
+{
+ unsigned offset = R_030000_SQ_TEX_RESOURCE_WORD0_0 + 0x20 * rid;
+
+ evergreen_context_pipe_state_set_resource(ctx, state, offset);
+}
+
+void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
+{
+ unsigned offset = R_030000_SQ_TEX_RESOURCE_WORD0_0 + 0x1600 + 0x20 * rid;
+
+ evergreen_context_pipe_state_set_resource(ctx, state, offset);
+}
+
+static inline void evergreen_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
+{
+ struct r600_range *range;
+ struct r600_block *block;
+
+ range = &ctx->range[CTX_RANGE_ID(ctx, offset)];
+ block = range->blocks[CTX_BLOCK_ID(ctx, offset)];
+ if (state == NULL) {
+ block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
+ LIST_DELINIT(&block->list);
+ return;
+ }
+ block->reg[0] = state->regs[0].value;
+ block->reg[1] = state->regs[1].value;
+ block->reg[2] = state->regs[2].value;
+ if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+ block->status |= R600_BLOCK_STATUS_ENABLED;
+ block->status |= R600_BLOCK_STATUS_DIRTY;
+ ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
+ LIST_ADDTAIL(&block->list,&ctx->dirty);
+ }
+}
+
+static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset, unsigned id)
+{
+ unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) * 0x100 + 0x40000 + id * 0x1C;
+ struct r600_range *range;
+ struct r600_block *block;
+
+ range = &ctx->range[CTX_RANGE_ID(ctx, fake_offset)];
+ block = range->blocks[CTX_BLOCK_ID(ctx, fake_offset)];
+ if (state == NULL) {
+ block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
+ LIST_DELINIT(&block->list);
+ return;
+ }
+ if (state->nregs <= 3) {
+ return;
+ }
+ block->reg[0] = id;
+ block->reg[1] = state->regs[3].value;
+ block->reg[2] = state->regs[4].value;
+ block->reg[3] = state->regs[5].value;
+ block->reg[4] = state->regs[6].value;
+ if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+ block->status |= R600_BLOCK_STATUS_ENABLED;
+ block->status |= R600_BLOCK_STATUS_DIRTY;
+ ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
+ LIST_ADDTAIL(&block->list,&ctx->dirty);
+ }
+}
+
+void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
+{
+ unsigned offset;
+
+ offset = 0x0003C000 + id * 0xc;
+ evergreen_context_pipe_state_set_sampler(ctx, state, offset);
+ evergreen_context_pipe_state_set_sampler_border(ctx, state, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, id);
+}
+
+void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
+{
+ unsigned offset;
+
+ offset = 0x0003C0D8 + id * 0xc;
+ evergreen_context_pipe_state_set_sampler(ctx, state, offset);
+ evergreen_context_pipe_state_set_sampler_border(ctx, state, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, id);
+}
+
+
+void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
+{
+ struct r600_bo *cb[12];
+ struct r600_bo *db;
+ unsigned ndwords = 9, flush;
+ struct r600_block *dirty_block = NULL;
+ struct r600_block *next_block;
+
+ if (draw->indices) {
+ ndwords = 13;
+ /* make sure there is enough relocation space before scheduling draw */
+ if (ctx->creloc >= (ctx->nreloc - 1)) {
+ r600_context_flush(ctx);
+ }
+ }
+
+ /* find number of color buffer */
+ db = r600_context_reg_bo(ctx, R_028048_DB_Z_READ_BASE);
+ cb[0] = r600_context_reg_bo(ctx, R_028C60_CB_COLOR0_BASE);
+ cb[1] = r600_context_reg_bo(ctx, R_028C9C_CB_COLOR1_BASE);
+ cb[2] = r600_context_reg_bo(ctx, R_028CD8_CB_COLOR2_BASE);
+ cb[3] = r600_context_reg_bo(ctx, R_028D14_CB_COLOR3_BASE);
+ cb[4] = r600_context_reg_bo(ctx, R_028D50_CB_COLOR4_BASE);
+ cb[5] = r600_context_reg_bo(ctx, R_028D8C_CB_COLOR5_BASE);
+ cb[6] = r600_context_reg_bo(ctx, R_028DC8_CB_COLOR6_BASE);
+ cb[7] = r600_context_reg_bo(ctx, R_028E04_CB_COLOR7_BASE);
+ cb[8] = r600_context_reg_bo(ctx, R_028E40_CB_COLOR8_BASE);
+ cb[9] = r600_context_reg_bo(ctx, R_028E5C_CB_COLOR9_BASE);
+ cb[10] = r600_context_reg_bo(ctx, R_028E78_CB_COLOR10_BASE);
+ cb[11] = r600_context_reg_bo(ctx, R_028E94_CB_COLOR11_BASE);
+ for (int i = 0; i < 12; i++) {
+ if (cb[i]) {
+ ndwords += 7;
+ }
+ }
+ if (db)
+ ndwords += 7;
+
+ /* queries need some special values */
+ if (ctx->num_query_running) {
+ r600_context_reg(ctx,
+ R_028004_DB_COUNT_CONTROL,
+ S_028004_PERFECT_ZPASS_COUNTS(1),
+ S_028004_PERFECT_ZPASS_COUNTS(1));
+ r600_context_reg(ctx,
+ R_02800C_DB_RENDER_OVERRIDE,
+ S_02800C_NOOP_CULL_DISABLE(1),
+ S_02800C_NOOP_CULL_DISABLE(1));
+ }
+
+ if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
+ /* need to flush */
+ r600_context_flush(ctx);
+ }
+ /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
+ if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) {
+ R600_ERR("context is too big to be scheduled\n");
+ return;
+ }
+
+ /* enough room to copy packet */
+ LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->dirty,list) {
+ r600_context_block_emit_dirty(ctx, dirty_block);
+ }
+
+ /* draw packet */
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type;
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances;
+ if (draw->indices) {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3);
+ ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
+ ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices);
+ } else {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
+ ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
+ ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
+ }
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
+
+ /* flush color buffer */
+ for (int i = 0; i < 12; i++) {
+ if (cb[i]) {
+ if (i > 7) {
+ flush = (S_0085F0_CB8_DEST_BASE_ENA(1) << (i - 8)) |
+ S_0085F0_CB_ACTION_ENA(1);
+ } else {
+ flush = (S_0085F0_CB0_DEST_BASE_ENA(1) << i) |
+ S_0085F0_CB_ACTION_ENA(1);
+ }
+ r600_context_bo_flush(ctx, flush, 0, cb[i]);
+ }
+ }
+ if (db) {
+ r600_context_bo_flush(ctx,
+ S_0085F0_DB_ACTION_ENA(1) |
+ S_0085F0_DB_DEST_BASE_ENA(1),
+ 0, db);
+ }
+
+ /* all dirty state have been scheduled in current cs */
+ ctx->pm4_dirty_cdwords = 0;
+}
+
+static inline void evergreen_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
+{
+ struct r600_range *range;
+ struct r600_block *block;
+
+ range = &ctx->range[CTX_RANGE_ID(ctx, offset)];
+ block = range->blocks[CTX_BLOCK_ID(ctx, offset)];
+ block->reg[0] = state->regs[0].value;
+ block->reg[1] = state->regs[1].value;
+ block->reg[2] = state->regs[2].value;
+ block->reg[3] = state->regs[3].value;
+ block->reg[4] = state->regs[4].value;
+ block->reg[5] = state->regs[5].value;
+ block->reg[6] = state->regs[6].value;
+ block->reg[7] = state->regs[7].value;
+ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
+ r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
+ if (state->regs[0].bo) {
+ /* VERTEX RESOURCE, we preted there is 2 bo to relocate so
+ * we have single case btw VERTEX & TEXTURE resource
+ */
+ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo);
+ r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo);
+ } else {
+ /* TEXTURE RESOURCE */
+ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo);
+ r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
+ }
+ if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+ block->status |= R600_BLOCK_STATUS_ENABLED;
+ block->status |= R600_BLOCK_STATUS_DIRTY;
+ ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
+ LIST_ADDTAIL(&block->list,&ctx->dirty);
+ }
+}
+
+void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
+{
+ unsigned offset = R_030000_RESOURCE0_WORD0 + 0x20 * rid;
+
+ evergreen_resource_set(ctx, state, offset);
+}
+
+void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
+{
+ unsigned offset = R_030000_RESOURCE0_WORD0 + 0x1600 + 0x20 * rid;
+
+ evergreen_resource_set(ctx, state, offset);
+}
diff --git a/src/gallium/winsys/r600/drm/evergreen_state.c b/src/gallium/winsys/r600/drm/evergreen_state.c
deleted file mode 100644
index 1cc22a98fd..0000000000
--- a/src/gallium/winsys/r600/drm/evergreen_state.c
+++ /dev/null
@@ -1,885 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- */
-#include <errno.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdlib.h>
-#include <assert.h>
-#include "xf86drm.h"
-#include "r600.h"
-#include "evergreend.h"
-#include "r600_priv.h"
-#include "radeon_drm.h"
-#include "bof.h"
-#include "pipe/p_compiler.h"
-#include "util/u_inlines.h"
-#include <pipebuffer/pb_bufmgr.h>
-
-struct radeon_bo {
- struct pipe_reference reference;
- unsigned handle;
- unsigned size;
- unsigned alignment;
- unsigned map_count;
- void *data;
-};
-struct radeon_ws_bo {
- struct pipe_reference reference;
- struct pb_buffer *pb;
-};
-struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf);
-
-struct radeon_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned group_id, unsigned offset);
-void r600_context_group_emit_dirty(struct r600_context *ctx, struct r600_group *group);
-void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct radeon_bo *bo);
-int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg, unsigned opcode);
-int r600_group_init(struct r600_group *group, unsigned start_offset, unsigned end_offset);
-
-#define GROUP_FORCE_NEW_BLOCK 0
-static const struct r600_reg evergreen_config_reg_list[] = {
- {0, 0, R_008958_VGT_PRIMITIVE_TYPE},
- {0, 0, R_008A14_PA_CL_ENHANCE},
- {0, 0, R_008C00_SQ_CONFIG},
- {0, 0, R_008C04_SQ_GPR_RESOURCE_MGMT_1},
- {0, 0, R_008C08_SQ_GPR_RESOURCE_MGMT_2},
- {0, 0, R_008C0C_SQ_THREAD_RESOURCE_MGMT},
- {0, 0, R_008C18_SQ_THREAD_RESOURCE_MGMT_1},
- {0, 0, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2},
- {0, 0, R_008C20_SQ_STACK_RESOURCE_MGMT_1},
- {0, 0, R_008C24_SQ_STACK_RESOURCE_MGMT_2},
- {0, 0, R_008C28_SQ_STACK_RESOURCE_MGMT_3},
- {0, 0, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ},
- {0, 0, R_009100_SPI_CONFIG_CNTL},
- {0, 0, R_00913C_SPI_CONFIG_CNTL_1},
-};
-
-static const struct r600_reg evergreen_context_reg_list[] = {
- {0, 0, R_028000_DB_RENDER_CONTROL},
- {0, 0, R_028004_DB_COUNT_CONTROL},
- {0, 0, R_028008_DB_DEPTH_VIEW},
- {0, 0, R_02800C_DB_RENDER_OVERRIDE},
- {0, 0, R_028010_DB_RENDER_OVERRIDE2},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028014_DB_HTILE_DATA_BASE},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {0, 0, R_028028_DB_STENCIL_CLEAR},
- {0, 0, R_02802C_DB_DEPTH_CLEAR},
- {0, 0, R_028030_PA_SC_SCREEN_SCISSOR_TL},
- {0, 0, R_028034_PA_SC_SCREEN_SCISSOR_BR},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028040_DB_Z_INFO},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {0, 0, R_028044_DB_STENCIL_INFO},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028048_DB_Z_READ_BASE},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_02804C_DB_STENCIL_READ_BASE},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028050_DB_Z_WRITE_BASE},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028054_DB_STENCIL_WRITE_BASE},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {0, 0, R_028058_DB_DEPTH_SIZE},
- {0, 0, R_02805C_DB_DEPTH_SLICE},
- {0, 0, R_028140_ALU_CONST_BUFFER_SIZE_PS_0},
- {0, 0, R_028180_ALU_CONST_BUFFER_SIZE_VS_0},
- {0, 0, R_028200_PA_SC_WINDOW_OFFSET},
- {0, 0, R_028204_PA_SC_WINDOW_SCISSOR_TL},
- {0, 0, R_028208_PA_SC_WINDOW_SCISSOR_BR},
- {0, 0, R_02820C_PA_SC_CLIPRECT_RULE},
- {0, 0, R_028210_PA_SC_CLIPRECT_0_TL},
- {0, 0, R_028214_PA_SC_CLIPRECT_0_BR},
- {0, 0, R_028218_PA_SC_CLIPRECT_1_TL},
- {0, 0, R_02821C_PA_SC_CLIPRECT_1_BR},
- {0, 0, R_028220_PA_SC_CLIPRECT_2_TL},
- {0, 0, R_028224_PA_SC_CLIPRECT_2_BR},
- {0, 0, R_028228_PA_SC_CLIPRECT_3_TL},
- {0, 0, R_02822C_PA_SC_CLIPRECT_3_BR},
- {0, 0, R_028230_PA_SC_EDGERULE},
- {0, 0, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET},
- {0, 0, R_028238_CB_TARGET_MASK},
- {0, 0, R_02823C_CB_SHADER_MASK},
- {0, 0, R_028240_PA_SC_GENERIC_SCISSOR_TL},
- {0, 0, R_028244_PA_SC_GENERIC_SCISSOR_BR},
- {0, 0, R_028250_PA_SC_VPORT_SCISSOR_0_TL},
- {0, 0, R_028254_PA_SC_VPORT_SCISSOR_0_BR},
- {0, 0, R_028350_SX_MISC},
- {0, 0, R_028380_SQ_VTX_SEMANTIC_0},
- {0, 0, R_028384_SQ_VTX_SEMANTIC_1},
- {0, 0, R_028388_SQ_VTX_SEMANTIC_2},
- {0, 0, R_02838C_SQ_VTX_SEMANTIC_3},
- {0, 0, R_028390_SQ_VTX_SEMANTIC_4},
- {0, 0, R_028394_SQ_VTX_SEMANTIC_5},
- {0, 0, R_028398_SQ_VTX_SEMANTIC_6},
- {0, 0, R_02839C_SQ_VTX_SEMANTIC_7},
- {0, 0, R_0283A0_SQ_VTX_SEMANTIC_8},
- {0, 0, R_0283A4_SQ_VTX_SEMANTIC_9},
- {0, 0, R_0283A8_SQ_VTX_SEMANTIC_10},
- {0, 0, R_0283AC_SQ_VTX_SEMANTIC_11},
- {0, 0, R_0283B0_SQ_VTX_SEMANTIC_12},
- {0, 0, R_0283B4_SQ_VTX_SEMANTIC_13},
- {0, 0, R_0283B8_SQ_VTX_SEMANTIC_14},
- {0, 0, R_0283BC_SQ_VTX_SEMANTIC_15},
- {0, 0, R_0283C0_SQ_VTX_SEMANTIC_16},
- {0, 0, R_0283C4_SQ_VTX_SEMANTIC_17},
- {0, 0, R_0283C8_SQ_VTX_SEMANTIC_18},
- {0, 0, R_0283CC_SQ_VTX_SEMANTIC_19},
- {0, 0, R_0283D0_SQ_VTX_SEMANTIC_20},
- {0, 0, R_0283D4_SQ_VTX_SEMANTIC_21},
- {0, 0, R_0283D8_SQ_VTX_SEMANTIC_22},
- {0, 0, R_0283DC_SQ_VTX_SEMANTIC_23},
- {0, 0, R_0283E0_SQ_VTX_SEMANTIC_24},
- {0, 0, R_0283E4_SQ_VTX_SEMANTIC_25},
- {0, 0, R_0283E8_SQ_VTX_SEMANTIC_26},
- {0, 0, R_0283EC_SQ_VTX_SEMANTIC_27},
- {0, 0, R_0283F0_SQ_VTX_SEMANTIC_28},
- {0, 0, R_0283F4_SQ_VTX_SEMANTIC_29},
- {0, 0, R_0283F8_SQ_VTX_SEMANTIC_30},
- {0, 0, R_0283FC_SQ_VTX_SEMANTIC_31},
- {0, 0, R_0282D0_PA_SC_VPORT_ZMIN_0},
- {0, 0, R_0282D4_PA_SC_VPORT_ZMAX_0},
- {0, 0, R_028400_VGT_MAX_VTX_INDX},
- {0, 0, R_028404_VGT_MIN_VTX_INDX},
- {0, 0, R_028408_VGT_INDX_OFFSET},
- {0, 0, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX},
- {0, 0, R_028410_SX_ALPHA_TEST_CONTROL},
- {0, 0, R_028414_CB_BLEND_RED},
- {0, 0, R_028418_CB_BLEND_GREEN},
- {0, 0, R_02841C_CB_BLEND_BLUE},
- {0, 0, R_028420_CB_BLEND_ALPHA},
- {0, 0, R_028430_DB_STENCILREFMASK},
- {0, 0, R_028434_DB_STENCILREFMASK_BF},
- {0, 0, R_028438_SX_ALPHA_REF},
- {0, 0, R_02843C_PA_CL_VPORT_XSCALE_0},
- {0, 0, R_028440_PA_CL_VPORT_XOFFSET_0},
- {0, 0, R_028444_PA_CL_VPORT_YSCALE_0},
- {0, 0, R_028448_PA_CL_VPORT_YOFFSET_0},
- {0, 0, R_02844C_PA_CL_VPORT_ZSCALE_0},
- {0, 0, R_028450_PA_CL_VPORT_ZOFFSET_0},
- {0, 0, R_0285BC_PA_CL_UCP0_X},
- {0, 0, R_0285C0_PA_CL_UCP0_Y},
- {0, 0, R_0285C4_PA_CL_UCP0_Z},
- {0, 0, R_0285C8_PA_CL_UCP0_W},
- {0, 0, R_0285CC_PA_CL_UCP1_X},
- {0, 0, R_0285D0_PA_CL_UCP1_Y},
- {0, 0, R_0285D4_PA_CL_UCP1_Z},
- {0, 0, R_0285D8_PA_CL_UCP1_W},
- {0, 0, R_0285DC_PA_CL_UCP2_X},
- {0, 0, R_0285E0_PA_CL_UCP2_Y},
- {0, 0, R_0285E4_PA_CL_UCP2_Z},
- {0, 0, R_0285E8_PA_CL_UCP2_W},
- {0, 0, R_0285EC_PA_CL_UCP3_X},
- {0, 0, R_0285F0_PA_CL_UCP3_Y},
- {0, 0, R_0285F4_PA_CL_UCP3_Z},
- {0, 0, R_0285F8_PA_CL_UCP3_W},
- {0, 0, R_0285FC_PA_CL_UCP4_X},
- {0, 0, R_028600_PA_CL_UCP4_Y},
- {0, 0, R_028604_PA_CL_UCP4_Z},
- {0, 0, R_028608_PA_CL_UCP4_W},
- {0, 0, R_02860C_PA_CL_UCP5_X},
- {0, 0, R_028610_PA_CL_UCP5_Y},
- {0, 0, R_028614_PA_CL_UCP5_Z},
- {0, 0, R_028618_PA_CL_UCP5_W},
- {0, 0, R_02861C_SPI_VS_OUT_ID_0},
- {0, 0, R_028620_SPI_VS_OUT_ID_1},
- {0, 0, R_028624_SPI_VS_OUT_ID_2},
- {0, 0, R_028628_SPI_VS_OUT_ID_3},
- {0, 0, R_02862C_SPI_VS_OUT_ID_4},
- {0, 0, R_028630_SPI_VS_OUT_ID_5},
- {0, 0, R_028634_SPI_VS_OUT_ID_6},
- {0, 0, R_028638_SPI_VS_OUT_ID_7},
- {0, 0, R_02863C_SPI_VS_OUT_ID_8},
- {0, 0, R_028640_SPI_VS_OUT_ID_9},
- {0, 0, R_028644_SPI_PS_INPUT_CNTL_0},
- {0, 0, R_028648_SPI_PS_INPUT_CNTL_1},
- {0, 0, R_02864C_SPI_PS_INPUT_CNTL_2},
- {0, 0, R_028650_SPI_PS_INPUT_CNTL_3},
- {0, 0, R_028654_SPI_PS_INPUT_CNTL_4},
- {0, 0, R_028658_SPI_PS_INPUT_CNTL_5},
- {0, 0, R_02865C_SPI_PS_INPUT_CNTL_6},
- {0, 0, R_028660_SPI_PS_INPUT_CNTL_7},
- {0, 0, R_028664_SPI_PS_INPUT_CNTL_8},
- {0, 0, R_028668_SPI_PS_INPUT_CNTL_9},
- {0, 0, R_02866C_SPI_PS_INPUT_CNTL_10},
- {0, 0, R_028670_SPI_PS_INPUT_CNTL_11},
- {0, 0, R_028674_SPI_PS_INPUT_CNTL_12},
- {0, 0, R_028678_SPI_PS_INPUT_CNTL_13},
- {0, 0, R_02867C_SPI_PS_INPUT_CNTL_14},
- {0, 0, R_028680_SPI_PS_INPUT_CNTL_15},
- {0, 0, R_028684_SPI_PS_INPUT_CNTL_16},
- {0, 0, R_028688_SPI_PS_INPUT_CNTL_17},
- {0, 0, R_02868C_SPI_PS_INPUT_CNTL_18},
- {0, 0, R_028690_SPI_PS_INPUT_CNTL_19},
- {0, 0, R_028694_SPI_PS_INPUT_CNTL_20},
- {0, 0, R_028698_SPI_PS_INPUT_CNTL_21},
- {0, 0, R_02869C_SPI_PS_INPUT_CNTL_22},
- {0, 0, R_0286A0_SPI_PS_INPUT_CNTL_23},
- {0, 0, R_0286A4_SPI_PS_INPUT_CNTL_24},
- {0, 0, R_0286A8_SPI_PS_INPUT_CNTL_25},
- {0, 0, R_0286AC_SPI_PS_INPUT_CNTL_26},
- {0, 0, R_0286B0_SPI_PS_INPUT_CNTL_27},
- {0, 0, R_0286B4_SPI_PS_INPUT_CNTL_28},
- {0, 0, R_0286B8_SPI_PS_INPUT_CNTL_29},
- {0, 0, R_0286BC_SPI_PS_INPUT_CNTL_30},
- {0, 0, R_0286C0_SPI_PS_INPUT_CNTL_31},
- {0, 0, R_0286C4_SPI_VS_OUT_CONFIG},
- {0, 0, R_0286C8_SPI_THREAD_GROUPING},
- {0, 0, R_0286CC_SPI_PS_IN_CONTROL_0},
- {0, 0, R_0286D0_SPI_PS_IN_CONTROL_1},
- {0, 0, R_0286D4_SPI_INTERP_CONTROL_0},
- {0, 0, R_0286D8_SPI_INPUT_Z},
- {0, 0, R_0286DC_SPI_FOG_CNTL},
- {0, 0, R_0286E0_SPI_BARYC_CNTL},
- {0, 0, R_0286E4_SPI_PS_IN_CONTROL_2},
- {0, 0, R_0286E8_SPI_COMPUTE_INPUT_CNTL},
- {0, 0, R_028780_CB_BLEND0_CONTROL},
- {0, 0, R_028784_CB_BLEND1_CONTROL},
- {0, 0, R_028788_CB_BLEND2_CONTROL},
- {0, 0, R_02878C_CB_BLEND3_CONTROL},
- {0, 0, R_028790_CB_BLEND4_CONTROL},
- {0, 0, R_028794_CB_BLEND5_CONTROL},
- {0, 0, R_028798_CB_BLEND6_CONTROL},
- {0, 0, R_02879C_CB_BLEND7_CONTROL},
- {0, 0, R_028800_DB_DEPTH_CONTROL},
- {0, 0, R_02880C_DB_SHADER_CONTROL},
- {0, 0, R_028808_CB_COLOR_CONTROL},
- {0, 0, R_028810_PA_CL_CLIP_CNTL},
- {0, 0, R_028814_PA_SU_SC_MODE_CNTL},
- {0, 0, R_028818_PA_CL_VTE_CNTL},
- {0, 0, R_02881C_PA_CL_VS_OUT_CNTL},
- {0, 0, R_028820_PA_CL_NANINF_CNTL},
- {0, 0, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1},
- {1, S_0085F0_SH_ACTION_ENA(1), R_028840_SQ_PGM_START_PS},
- {0, 0, R_028844_SQ_PGM_RESOURCES_PS},
- {0, 0, R_028848_SQ_PGM_RESOURCES_2_PS},
- {0, 0, R_02884C_SQ_PGM_EXPORTS_PS},
- {1, S_0085F0_SH_ACTION_ENA(1), R_02885C_SQ_PGM_START_VS},
- {0, 0, R_028860_SQ_PGM_RESOURCES_VS},
- {0, 0, R_028864_SQ_PGM_RESOURCES_2_VS},
- {1, S_0085F0_SH_ACTION_ENA(1), R_0288A4_SQ_PGM_START_FS},
- {0, 0, R_0288A8_SQ_PGM_RESOURCES_FS},
- {0, 0, R_0288EC_SQ_LDS_ALLOC_PS},
- {0, 0, R_028900_SQ_ESGS_RING_ITEMSIZE},
- {0, 0, R_028904_SQ_GSVS_RING_ITEMSIZE},
- {0, 0, R_028908_SQ_ESTMP_RING_ITEMSIZE},
- {0, 0, R_02890C_SQ_GSTMP_RING_ITEMSIZE},
- {0, 0, R_028910_SQ_VSTMP_RING_ITEMSIZE},
- {0, 0, R_028914_SQ_PSTMP_RING_ITEMSIZE},
- {0, 0, R_02891C_SQ_GS_VERT_ITEMSIZE},
- {0, 0, R_028920_SQ_GS_VERT_ITEMSIZE_1},
- {0, 0, R_028924_SQ_GS_VERT_ITEMSIZE_2},
- {0, 0, R_028928_SQ_GS_VERT_ITEMSIZE_3},
- {1, 0, R_028940_ALU_CONST_CACHE_PS_0},
- {1, 0, R_028980_ALU_CONST_CACHE_VS_0},
- {0, 0, R_028A00_PA_SU_POINT_SIZE},
- {0, 0, R_028A04_PA_SU_POINT_MINMAX},
- {0, 0, R_028A08_PA_SU_LINE_CNTL},
- {0, 0, R_028A10_VGT_OUTPUT_PATH_CNTL},
- {0, 0, R_028A14_VGT_HOS_CNTL},
- {0, 0, R_028A18_VGT_HOS_MAX_TESS_LEVEL},
- {0, 0, R_028A1C_VGT_HOS_MIN_TESS_LEVEL},
- {0, 0, R_028A20_VGT_HOS_REUSE_DEPTH},
- {0, 0, R_028A24_VGT_GROUP_PRIM_TYPE},
- {0, 0, R_028A28_VGT_GROUP_FIRST_DECR},
- {0, 0, R_028A2C_VGT_GROUP_DECR},
- {0, 0, R_028A30_VGT_GROUP_VECT_0_CNTL},
- {0, 0, R_028A34_VGT_GROUP_VECT_1_CNTL},
- {0, 0, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL},
- {0, 0, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL},
- {0, 0, R_028A40_VGT_GS_MODE},
- {0, 0, R_028A48_PA_SC_MODE_CNTL_0},
- {0, 0, R_028A4C_PA_SC_MODE_CNTL_1},
- {0, 0, R_028AB4_VGT_REUSE_OFF},
- {0, 0, R_028AB8_VGT_VTX_CNT_EN},
- {0, 0, R_028ABC_DB_HTILE_SURFACE},
- {0, 0, R_028AC0_DB_SRESULTS_COMPARE_STATE0},
- {0, 0, R_028AC4_DB_SRESULTS_COMPARE_STATE1},
- {0, 0, R_028AC8_DB_PRELOAD_CONTROL},
- {0, 0, R_028B54_VGT_SHADER_STAGES_EN},
- {0, 0, R_028B70_DB_ALPHA_TO_MASK},
- {0, 0, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL},
- {0, 0, R_028B7C_PA_SU_POLY_OFFSET_CLAMP},
- {0, 0, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE},
- {0, 0, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET},
- {0, 0, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE},
- {0, 0, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET},
- {0, 0, R_028B94_VGT_STRMOUT_CONFIG},
- {0, 0, R_028B98_VGT_STRMOUT_BUFFER_CONFIG},
- {0, 0, R_028C00_PA_SC_LINE_CNTL},
- {0, 0, R_028C04_PA_SC_AA_CONFIG},
- {0, 0, R_028C08_PA_SU_VTX_CNTL},
- {0, 0, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ},
- {0, 0, R_028C10_PA_CL_GB_VERT_DISC_ADJ},
- {0, 0, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ},
- {0, 0, R_028C18_PA_CL_GB_HORZ_DISC_ADJ},
- {0, 0, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX},
- {0, 0, R_028C3C_PA_SC_AA_MASK},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028C60_CB_COLOR0_BASE},
- {0, 0, R_028C64_CB_COLOR0_PITCH},
- {0, 0, R_028C68_CB_COLOR0_SLICE},
- {0, 0, R_028C6C_CB_COLOR0_VIEW},
- {1, 0, R_028C70_CB_COLOR0_INFO},
- {0, 0, R_028C74_CB_COLOR0_ATTRIB},
- {0, 0, R_028C78_CB_COLOR0_DIM},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028C9C_CB_COLOR1_BASE},
- {0, 0, R_028CA0_CB_COLOR1_PITCH},
- {0, 0, R_028CA4_CB_COLOR1_SLICE},
- {0, 0, R_028CA8_CB_COLOR1_VIEW},
- {1, 0, R_028CAC_CB_COLOR1_INFO},
- {0, 0, R_028CB0_CB_COLOR1_ATTRIB},
- {0, 0, R_028CB8_CB_COLOR1_DIM},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028CD8_CB_COLOR2_BASE},
- {0, 0, R_028CDC_CB_COLOR2_PITCH},
- {0, 0, R_028CE0_CB_COLOR2_SLICE},
- {0, 0, R_028CE4_CB_COLOR2_VIEW},
- {1, 0, R_028CE8_CB_COLOR2_INFO},
- {0, 0, R_028CEC_CB_COLOR2_ATTRIB},
- {0, 0, R_028CF0_CB_COLOR2_DIM},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028D14_CB_COLOR3_BASE},
- {0, 0, R_028D18_CB_COLOR3_PITCH},
- {0, 0, R_028D1C_CB_COLOR3_SLICE},
- {0, 0, R_028D20_CB_COLOR3_VIEW},
- {1, 0, R_028D24_CB_COLOR3_INFO},
- {0, 0, R_028D28_CB_COLOR3_ATTRIB},
- {0, 0, R_028D2C_CB_COLOR3_DIM},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028D50_CB_COLOR4_BASE},
- {0, 0, R_028D54_CB_COLOR4_PITCH},
- {0, 0, R_028D58_CB_COLOR4_SLICE},
- {0, 0, R_028D5C_CB_COLOR4_VIEW},
- {1, 0, R_028D60_CB_COLOR4_INFO},
- {0, 0, R_028D64_CB_COLOR4_ATTRIB},
- {0, 0, R_028D68_CB_COLOR4_DIM},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028D8C_CB_COLOR5_BASE},
- {0, 0, R_028D90_CB_COLOR5_PITCH},
- {0, 0, R_028D94_CB_COLOR5_SLICE},
- {0, 0, R_028D98_CB_COLOR5_VIEW},
- {1, 0, R_028D9C_CB_COLOR5_INFO},
- {0, 0, R_028DA0_CB_COLOR5_ATTRIB},
- {0, 0, R_028DA4_CB_COLOR5_DIM},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028DC8_CB_COLOR6_BASE},
- {0, 0, R_028DCC_CB_COLOR6_PITCH},
- {0, 0, R_028DD0_CB_COLOR6_SLICE},
- {0, 0, R_028DD4_CB_COLOR6_VIEW},
- {1, 0, R_028DD8_CB_COLOR6_INFO},
- {0, 0, R_028DDC_CB_COLOR6_ATTRIB},
- {0, 0, R_028DE0_CB_COLOR6_DIM},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028E04_CB_COLOR7_BASE},
- {0, 0, R_028E08_CB_COLOR7_PITCH},
- {0, 0, R_028E0C_CB_COLOR7_SLICE},
- {0, 0, R_028E10_CB_COLOR7_VIEW},
- {1, 0, R_028E14_CB_COLOR7_INFO},
- {0, 0, R_028E18_CB_COLOR7_ATTRIB},
- {0, 0, R_028E1C_CB_COLOR7_DIM},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028E40_CB_COLOR8_BASE},
- {0, 0, R_028E44_CB_COLOR8_PITCH},
- {0, 0, R_028E48_CB_COLOR8_SLICE},
- {0, 0, R_028E4C_CB_COLOR8_VIEW},
- {1, 0, R_028E50_CB_COLOR8_INFO},
- {0, 0, R_028E54_CB_COLOR8_ATTRIB},
- {0, 0, R_028E58_CB_COLOR8_DIM},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028E5C_CB_COLOR9_BASE},
- {0, 0, R_028E60_CB_COLOR9_PITCH},
- {0, 0, R_028E64_CB_COLOR9_SLICE},
- {0, 0, R_028E68_CB_COLOR9_VIEW},
- {1, 0, R_028E6C_CB_COLOR9_INFO},
- {0, 0, R_028E70_CB_COLOR9_ATTRIB},
- {0, 0, R_028E74_CB_COLOR9_DIM},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028E78_CB_COLOR10_BASE},
- {0, 0, R_028E7C_CB_COLOR10_PITCH},
- {0, 0, R_028E80_CB_COLOR10_SLICE},
- {0, 0, R_028E84_CB_COLOR10_VIEW},
- {1, 0, R_028E88_CB_COLOR10_INFO},
- {0, 0, R_028E8C_CB_COLOR10_ATTRIB},
- {0, 0, R_028E90_CB_COLOR10_DIM},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028E94_CB_COLOR11_BASE},
- {0, 0, R_028E98_CB_COLOR11_PITCH},
- {0, 0, R_028E9C_CB_COLOR11_SLICE},
- {0, 0, R_028EA0_CB_COLOR11_VIEW},
- {1, 0, R_028EA4_CB_COLOR11_INFO},
- {0, 0, R_028EA8_CB_COLOR11_ATTRIB},
- {0, 0, R_028EAC_CB_COLOR11_DIM},
-};
-
-/* SHADER RESOURCE R600/R700 */
-static int evergreen_state_resource_init(struct r600_context *ctx, u32 offset)
-{
- struct r600_reg r600_shader_resource[] = {
- {0, 0, R_030000_RESOURCE0_WORD0},
- {0, 0, R_030004_RESOURCE0_WORD1},
- {1, 0, R_030008_RESOURCE0_WORD2},
- {1, 0, R_03000C_RESOURCE0_WORD3},
- {0, 0, R_030010_RESOURCE0_WORD4},
- {0, 0, R_030014_RESOURCE0_WORD5},
- {0, 0, R_030018_RESOURCE0_WORD6},
- {0, 0, R_03001C_RESOURCE0_WORD7},
- };
- unsigned nreg = sizeof(r600_shader_resource)/sizeof(struct r600_reg);
-
- for (int i = 0; i < nreg; i++) {
- r600_shader_resource[i].offset += offset;
- }
- return r600_context_add_block(ctx, r600_shader_resource, nreg, PKT3_SET_RESOURCE);
-}
-
-/* SHADER SAMPLER R600/R700 */
-static int r600_state_sampler_init(struct r600_context *ctx, u32 offset)
-{
- struct r600_reg r600_shader_sampler[] = {
- {0, 0, R_03C000_SQ_TEX_SAMPLER_WORD0_0},
- {0, 0, R_03C004_SQ_TEX_SAMPLER_WORD1_0},
- {0, 0, R_03C008_SQ_TEX_SAMPLER_WORD2_0},
- };
- unsigned nreg = sizeof(r600_shader_sampler)/sizeof(struct r600_reg);
-
- for (int i = 0; i < nreg; i++) {
- r600_shader_sampler[i].offset += offset;
- }
- return r600_context_add_block(ctx, r600_shader_sampler, nreg, PKT3_SET_SAMPLER);
-}
-
-/* SHADER SAMPLER BORDER R600/R700 */
-static int evergreen_state_sampler_border_init(struct r600_context *ctx, u32 offset, unsigned id)
-{
- struct r600_reg r600_shader_sampler_border[] = {
- {0, 0, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX},
- {0, 0, R_00A404_TD_PS_SAMPLER0_BORDER_RED},
- {0, 0, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN},
- {0, 0, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE},
- {0, 0, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA},
- };
- unsigned nreg = sizeof(r600_shader_sampler_border)/sizeof(struct r600_reg);
- unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) * 0x10 + 0x40000 + id * 0x1C;
- struct r600_group_block *block;
- struct r600_group *group;
- int r;
-
- for (int i = 0; i < nreg; i++) {
- r600_shader_sampler_border[i].offset -= R_00A400_TD_PS_SAMPLER0_BORDER_INDEX;
- r600_shader_sampler_border[i].offset += fake_offset;
- }
- r = r600_context_add_block(ctx, r600_shader_sampler_border, nreg, PKT3_SET_CONFIG_REG);
- if (r) {
- return r;
- }
- /* set proper offset */
- group = &ctx->groups[EVERGREEN_GROUP_SAMPLER_BORDER];
- id = group->offset_block_id[((fake_offset - group->start_offset) >> 2)];
- block = &group->blocks[id];
- block->pm4[1] = (offset - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
- return 0;
-}
-
-int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
-{
- int r;
-
- memset(ctx, 0, sizeof(struct r600_context));
- radeon->use_mem_constant = TRUE;
- ctx->radeon = radeon;
- LIST_INITHEAD(&ctx->query_list);
- /* initialize groups */
- r = r600_group_init(&ctx->groups[EVERGREEN_GROUP_CONFIG], EVERGREEN_CONFIG_REG_OFFSET, EVERGREEN_CONFIG_REG_END);
- if (r) {
- goto out_err;
- }
- r = r600_group_init(&ctx->groups[EVERGREEN_GROUP_LOOP_CONST], EVERGREEN_LOOP_CONST_OFFSET, EVERGREEN_LOOP_CONST_END);
- if (r) {
- goto out_err;
- }
- r = r600_group_init(&ctx->groups[EVERGREEN_GROUP_BOOL_CONST], EVERGREEN_BOOL_CONST_OFFSET, EVERGREEN_BOOL_CONST_END);
- if (r) {
- goto out_err;
- }
- r = r600_group_init(&ctx->groups[EVERGREEN_GROUP_SAMPLER], EVERGREEN_SAMPLER_OFFSET, EVERGREEN_SAMPLER_END);
- if (r) {
- goto out_err;
- }
- r = r600_group_init(&ctx->groups[EVERGREEN_GROUP_RESOURCE], EVERGREEN_RESOURCE_OFFSET, EVERGREEN_RESOURCE_END);
- if (r) {
- goto out_err;
- }
- r = r600_group_init(&ctx->groups[EVERGREEN_GROUP_CONTEXT], EVERGREEN_CONTEXT_REG_OFFSET, EVERGREEN_CONTEXT_REG_END);
- if (r) {
- goto out_err;
- }
- /* we use unassigned range of GPU reg to fake border color register */
- r = r600_group_init(&ctx->groups[EVERGREEN_GROUP_SAMPLER_BORDER], 0x40000, 0x41000);
- if (r) {
- goto out_err;
- }
- ctx->ngroups = EVERGREEN_NGROUPS;
-
- /* add blocks */
- r = r600_context_add_block(ctx, evergreen_config_reg_list,
- sizeof(evergreen_config_reg_list)/sizeof(struct r600_reg),
- PKT3_SET_CONFIG_REG);
- if (r)
- goto out_err;
- r = r600_context_add_block(ctx, evergreen_context_reg_list,
- sizeof(evergreen_context_reg_list)/sizeof(struct r600_reg),
- PKT3_SET_CONTEXT_REG);
- if (r)
- goto out_err;
-
- /* PS SAMPLER */
- for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) {
- r = r600_state_sampler_init(ctx, offset);
- if (r)
- goto out_err;
- }
- /* VS SAMPLER */
- for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) {
- r = r600_state_sampler_init(ctx, offset);
- if (r)
- goto out_err;
- }
- /* PS SAMPLER BORDER */
- for (int j = 0; j < 18; j++) {
- r = evergreen_state_sampler_border_init(ctx, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, j);
- if (r)
- goto out_err;
- }
- /* VS SAMPLER BORDER */
- for (int j = 0; j < 18; j++) {
- r = evergreen_state_sampler_border_init(ctx, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, j);
- if (r)
- goto out_err;
- }
- /* PS RESOURCE */
- for (int j = 0, offset = 0; j < 176; j++, offset += 0x20) {
- r = evergreen_state_resource_init(ctx, offset);
- if (r)
- goto out_err;
- }
- /* VS RESOURCE */
- for (int j = 0, offset = 0x1600; j < 160; j++, offset += 0x20) {
- r = evergreen_state_resource_init(ctx, offset);
- if (r)
- goto out_err;
- }
-
- /* allocate cs variables */
- ctx->nreloc = RADEON_CTX_MAX_PM4;
- ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc));
- if (ctx->reloc == NULL) {
- r = -ENOMEM;
- goto out_err;
- }
- ctx->bo = calloc(ctx->nreloc, sizeof(void *));
- if (ctx->bo == NULL) {
- r = -ENOMEM;
- goto out_err;
- }
- ctx->pm4_ndwords = RADEON_CTX_MAX_PM4;
- ctx->pm4 = calloc(ctx->pm4_ndwords, 4);
- if (ctx->pm4 == NULL) {
- r = -ENOMEM;
- goto out_err;
- }
- return 0;
-out_err:
- r600_context_fini(ctx);
- return r;
-}
-
-static inline void evergreen_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
-{
- struct r600_group_block *block;
- unsigned id;
-
- offset -= ctx->groups[EVERGREEN_GROUP_RESOURCE].start_offset;
- id = ctx->groups[EVERGREEN_GROUP_RESOURCE].offset_block_id[offset >> 2];
- block = &ctx->groups[EVERGREEN_GROUP_RESOURCE].blocks[id];
- if (state == NULL) {
- block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
- radeon_ws_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
- return;
- }
- block->reg[0] = state->regs[0].value;
- block->reg[1] = state->regs[1].value;
- block->reg[2] = state->regs[2].value;
- block->reg[3] = state->regs[3].value;
- block->reg[4] = state->regs[4].value;
- block->reg[5] = state->regs[5].value;
- block->reg[6] = state->regs[6].value;
- block->reg[7] = state->regs[7].value;
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
- radeon_ws_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
- if (state->regs[0].bo) {
- /* VERTEX RESOURCE, we preted there is 2 bo to relocate so
- * we have single case btw VERTEX & TEXTURE resource
- */
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo);
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo);
- } else {
- /* TEXTURE RESOURCE */
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo);
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
- }
- block->status |= R600_BLOCK_STATUS_ENABLED;
- block->status |= R600_BLOCK_STATUS_DIRTY;
- ctx->pm4_dirty_cdwords += block->pm4_ndwords;
-}
-
-void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
-{
- unsigned offset = R_030000_SQ_TEX_RESOURCE_WORD0_0 + 0x20 * rid;
-
- evergreen_context_pipe_state_set_resource(ctx, state, offset);
-}
-
-void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
-{
- unsigned offset = R_030000_SQ_TEX_RESOURCE_WORD0_0 + 0x1600 + 0x20 * rid;
-
- evergreen_context_pipe_state_set_resource(ctx, state, offset);
-}
-
-static inline void evergreen_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
-{
- struct r600_group_block *block;
- unsigned id;
-
- offset -= ctx->groups[EVERGREEN_GROUP_SAMPLER].start_offset;
- id = ctx->groups[EVERGREEN_GROUP_SAMPLER].offset_block_id[offset >> 2];
- block = &ctx->groups[EVERGREEN_GROUP_SAMPLER].blocks[id];
- if (state == NULL) {
- block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
- return;
- }
- block->reg[0] = state->regs[0].value;
- block->reg[1] = state->regs[1].value;
- block->reg[2] = state->regs[2].value;
- block->status |= R600_BLOCK_STATUS_ENABLED;
- block->status |= R600_BLOCK_STATUS_DIRTY;
- ctx->pm4_dirty_cdwords += block->pm4_ndwords;
-}
-
-static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset, unsigned id)
-{
- struct r600_group_block *block;
- unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) * 0x10 + 0x40000 + id * 0x1C;
-
- fake_offset -= ctx->groups[EVERGREEN_GROUP_SAMPLER_BORDER].start_offset;
- id = ctx->groups[EVERGREEN_GROUP_SAMPLER_BORDER].offset_block_id[fake_offset >> 2];
- block = &ctx->groups[EVERGREEN_GROUP_SAMPLER_BORDER].blocks[id];
- if (state == NULL) {
- block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
- return;
- }
- if (state->nregs <= 3) {
- return;
- }
- block->reg[0] = id;
- block->reg[1] = state->regs[3].value;
- block->reg[2] = state->regs[4].value;
- block->reg[3] = state->regs[5].value;
- block->status |= R600_BLOCK_STATUS_ENABLED;
- block->status |= R600_BLOCK_STATUS_DIRTY;
- ctx->pm4_dirty_cdwords += block->pm4_ndwords;
-}
-
-void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
-{
- unsigned offset;
-
- offset = 0x0003C000 + id * 0xc;
- evergreen_context_pipe_state_set_sampler(ctx, state, offset);
- evergreen_context_pipe_state_set_sampler_border(ctx, state, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, id);
-}
-
-void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
-{
- unsigned offset;
-
- offset = 0x0003C0D8 + id * 0xc;
- evergreen_context_pipe_state_set_sampler(ctx, state, offset);
- evergreen_context_pipe_state_set_sampler_border(ctx, state, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, id);
-}
-
-
-void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
-{
- struct radeon_bo *cb[12];
- unsigned ndwords = 9;
-
- if (draw->indices) {
- ndwords = 13;
- /* make sure there is enough relocation space before scheduling draw */
- if (ctx->creloc >= (ctx->nreloc - 1)) {
- r600_context_flush(ctx);
- }
- }
-
- /* find number of color buffer */
- cb[0] = r600_context_reg_bo(ctx, EVERGREEN_GROUP_CONTEXT, R_028C60_CB_COLOR0_BASE);
- cb[1] = r600_context_reg_bo(ctx, EVERGREEN_GROUP_CONTEXT, R_028C9C_CB_COLOR1_BASE);
- cb[2] = r600_context_reg_bo(ctx, EVERGREEN_GROUP_CONTEXT, R_028CD8_CB_COLOR2_BASE);
- cb[3] = r600_context_reg_bo(ctx, EVERGREEN_GROUP_CONTEXT, R_028D14_CB_COLOR3_BASE);
- cb[4] = r600_context_reg_bo(ctx, EVERGREEN_GROUP_CONTEXT, R_028D50_CB_COLOR4_BASE);
- cb[5] = r600_context_reg_bo(ctx, EVERGREEN_GROUP_CONTEXT, R_028D8C_CB_COLOR5_BASE);
- cb[6] = r600_context_reg_bo(ctx, EVERGREEN_GROUP_CONTEXT, R_028DC8_CB_COLOR6_BASE);
- cb[7] = r600_context_reg_bo(ctx, EVERGREEN_GROUP_CONTEXT, R_028E04_CB_COLOR7_BASE);
- cb[8] = r600_context_reg_bo(ctx, EVERGREEN_GROUP_CONTEXT, R_028E40_CB_COLOR8_BASE);
- cb[9] = r600_context_reg_bo(ctx, EVERGREEN_GROUP_CONTEXT, R_028E5C_CB_COLOR9_BASE);
- cb[10] = r600_context_reg_bo(ctx, EVERGREEN_GROUP_CONTEXT, R_028E78_CB_COLOR10_BASE);
- cb[11] = r600_context_reg_bo(ctx, EVERGREEN_GROUP_CONTEXT, R_028E94_CB_COLOR11_BASE);
- for (int i = 0; i < 12; i++) {
- if (cb[i]) {
- ndwords += 7;
- }
- }
-
- /* queries need some special values */
- if (ctx->num_query_running) {
- r600_context_reg(ctx, EVERGREEN_GROUP_CONTEXT,
- R_028004_DB_COUNT_CONTROL,
- S_028004_PERFECT_ZPASS_COUNTS(1),
- S_028004_PERFECT_ZPASS_COUNTS(1));
- r600_context_reg(ctx, EVERGREEN_GROUP_CONTEXT,
- R_02800C_DB_RENDER_OVERRIDE,
- S_02800C_NOOP_CULL_DISABLE(1),
- S_02800C_NOOP_CULL_DISABLE(1));
- }
-
- if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
- /* need to flush */
- r600_context_flush(ctx);
- }
- /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
- if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) {
- R600_ERR("context is too big to be scheduled\n");
- return;
- }
-
- /* enough room to copy packet */
- r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_CONFIG]);
- r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_CONTEXT]);
- r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_RESOURCE]);
- r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_SAMPLER]);
- r600_context_group_emit_dirty(ctx, &ctx->groups[EVERGREEN_GROUP_SAMPLER_BORDER]);
-
- /* draw packet */
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
- ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0);
- ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances;
- if (draw->indices) {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3);
- ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset;
- ctx->pm4[ctx->pm4_cdwords++] = 0;
- ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
- ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
- ctx->pm4[ctx->pm4_cdwords++] = 0;
- r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], radeon_bo_pb_get_bo(draw->indices->pb));
- } else {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
- ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
- ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
- }
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
- ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
-
- /* flush color buffer */
- for (int i = 0; i < 8; i++) {
- if (cb[i]) {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3);
- ctx->pm4[ctx->pm4_cdwords++] = (S_0085F0_CB0_DEST_BASE_ENA(1) << i) |
- S_0085F0_CB_ACTION_ENA(1);
- ctx->pm4[ctx->pm4_cdwords++] = (cb[i]->size + 255) >> 8;
- ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
- ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
- ctx->pm4[ctx->pm4_cdwords++] = 0;
- r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], cb[i]);
- }
- }
-
- /* all dirty state have been scheduled in current cs */
- ctx->pm4_dirty_cdwords = 0;
-}
-
-static inline void evergreen_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
-{
- struct r600_group_block *block;
- unsigned id;
-
- offset -= ctx->groups[EVERGREEN_GROUP_RESOURCE].start_offset;
- id = ctx->groups[EVERGREEN_GROUP_RESOURCE].offset_block_id[offset >> 2];
- block = &ctx->groups[EVERGREEN_GROUP_RESOURCE].blocks[id];
- block->reg[0] = state->regs[0].value;
- block->reg[1] = state->regs[1].value;
- block->reg[2] = state->regs[2].value;
- block->reg[3] = state->regs[3].value;
- block->reg[4] = state->regs[4].value;
- block->reg[5] = state->regs[5].value;
- block->reg[6] = state->regs[6].value;
- block->reg[7] = state->regs[7].value;
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
- radeon_ws_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
- if (state->regs[0].bo) {
- /* VERTEX RESOURCE, we preted there is 2 bo to relocate so
- * we have single case btw VERTEX & TEXTURE resource
- */
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo);
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo);
- } else {
- /* TEXTURE RESOURCE */
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo);
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
- }
- block->status |= R600_BLOCK_STATUS_ENABLED;
- block->status |= R600_BLOCK_STATUS_DIRTY;
- ctx->pm4_dirty_cdwords += block->pm4_ndwords;
-}
-
-void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
-{
- unsigned offset = R_030000_RESOURCE0_WORD0 + 0x20 * rid;
-
- evergreen_resource_set(ctx, state, offset);
-}
-
-void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
-{
- unsigned offset = R_030000_RESOURCE0_WORD0 + 0x1600 + 0x20 * rid;
-
- evergreen_resource_set(ctx, state, offset);
-}
diff --git a/src/gallium/winsys/r600/drm/gen_eg_states.py b/src/gallium/winsys/r600/drm/gen_eg_states.py
deleted file mode 100644
index b2e5b2203a..0000000000
--- a/src/gallium/winsys/r600/drm/gen_eg_states.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import os
-import re
-
-def main():
- fileIN = open('eg_states.h', 'r')
- line = fileIN.readline()
- next_is_reg = False
- count = 0
-
- print "/* This file is autogenerated from eg_states.h - do not edit directly */"
- print "/* autogenerating script is gen_eg_states.py */"
- print ""
- while line:
- if line[0:2] == "};":
- if next_is_reg == True:
- print "#define " + name + "_SIZE\t\t", count
- print "#define " + name + "_PM4 128\t\t"
- next_is_reg = False
- count = 0
- print ""
-
- if line[0:6] == "static":
- name = line.rstrip("\n")
- cline = name.split()
- name = cline[4].split('[')
- name = name[0].replace("_names", "")
- print "/* " + name + " */"
- next_is_reg = True
- elif next_is_reg == True:
- reg = line.split();
- reg = reg[3].replace("},", "")
- reg = reg.replace("\"", "")
- print "#define " + name + "__" + reg + "\t\t", count
- count = count + 1
-
- line = fileIN.readline()
-
-if __name__ == "__main__":
- main()
diff --git a/src/gallium/winsys/r600/drm/gen_r600_states.py b/src/gallium/winsys/r600/drm/gen_r600_states.py
deleted file mode 100644
index 9bd5ab2082..0000000000
--- a/src/gallium/winsys/r600/drm/gen_r600_states.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import os
-import re
-
-def main():
- fileIN = open('r600_states.h', 'r')
- line = fileIN.readline()
- next_is_reg = False
- count = 0
-
- print "/* This file is autogenerated from r600_states.h - do not edit directly */"
- print "/* autogenerating script is gen_r600_states.py */"
- print ""
- while line:
- if line[0:2] == "};":
- if next_is_reg == True:
- print "#define " + name + "_SIZE\t\t", count
- print "#define " + name + "_PM4 128\t\t"
- next_is_reg = False
- count = 0
- print ""
-
- if line[0:6] == "static":
- name = line.rstrip("\n")
- cline = name.split()
- name = cline[4].split('[')
- name = name[0].replace("_names", "")
- print "/* " + name + " */"
- next_is_reg = True
- elif next_is_reg == True:
- reg = line.split();
- reg = reg[3].replace("},", "")
- reg = reg.replace("\"", "")
- print "#define " + name + "__" + reg + "\t\t", count
- count = count + 1
-
- line = fileIN.readline()
-
-if __name__ == "__main__":
- main()
diff --git a/src/gallium/winsys/r600/drm/r600.c b/src/gallium/winsys/r600/drm/r600.c
index fdcadffc53..0a4d2e791d 100644
--- a/src/gallium/winsys/r600/drm/r600.c
+++ b/src/gallium/winsys/r600/drm/r600.c
@@ -25,6 +25,9 @@
*/
#include "xf86drm.h"
#include "radeon_drm.h"
+#include "pipe/p_compiler.h"
+#include "util/u_inlines.h"
+#include <pipebuffer/pb_bufmgr.h>
#include "r600_priv.h"
enum radeon_family r600_get_family(struct radeon *r600)
@@ -37,6 +40,11 @@ enum chip_class r600_get_family_class(struct radeon *radeon)
return radeon->chip_class;
}
+struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon)
+{
+ return &radeon->tiling_info;
+}
+
static int r600_get_device(struct radeon *r600)
{
struct drm_radeon_info info;
diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
new file mode 100644
index 0000000000..7d54ff18fc
--- /dev/null
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2010 Dave Airlie
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Dave Airlie
+ */
+#include <pipe/p_compiler.h>
+#include <pipe/p_screen.h>
+#include <pipebuffer/pb_bufmgr.h>
+#include "radeon_drm.h"
+#include "r600_priv.h"
+#include "r600d.h"
+
+struct r600_bo *r600_bo(struct radeon *radeon,
+ unsigned size, unsigned alignment, unsigned usage)
+{
+ struct r600_bo *ws_bo = calloc(1, sizeof(struct r600_bo));
+ struct pb_desc desc;
+ struct pb_manager *man;
+
+ desc.alignment = alignment;
+ desc.usage = usage;
+ ws_bo->size = size;
+
+ if (usage & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
+ man = radeon->cman;
+ else
+ man = radeon->kman;
+
+ ws_bo->pb = man->create_buffer(man, size, &desc);
+ if (ws_bo->pb == NULL) {
+ free(ws_bo);
+ return NULL;
+ }
+
+ pipe_reference_init(&ws_bo->reference, 1);
+ return ws_bo;
+}
+
+struct r600_bo *r600_bo_handle(struct radeon *radeon,
+ unsigned handle, unsigned *array_mode)
+{
+ struct r600_bo *ws_bo = calloc(1, sizeof(struct r600_bo));
+ struct radeon_bo *bo;
+
+ ws_bo->pb = radeon_bo_pb_create_buffer_from_handle(radeon->kman, handle);
+ if (!ws_bo->pb) {
+ free(ws_bo);
+ return NULL;
+ }
+ bo = radeon_bo_pb_get_bo(ws_bo->pb);
+ ws_bo->size = bo->size;
+ pipe_reference_init(&ws_bo->reference, 1);
+
+ radeon_bo_get_tiling_flags(radeon, bo, &ws_bo->tiling_flags,
+ &ws_bo->kernel_pitch);
+ if (array_mode) {
+ if (ws_bo->tiling_flags) {
+ if (ws_bo->tiling_flags & RADEON_TILING_MICRO)
+ *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1;
+ if ((ws_bo->tiling_flags & (RADEON_TILING_MICRO | RADEON_TILING_MACRO)) ==
+ (RADEON_TILING_MICRO | RADEON_TILING_MACRO))
+ *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1;
+ } else {
+ *array_mode = 0;
+ }
+ }
+ return ws_bo;
+}
+
+void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx)
+{
+ return pb_map(bo->pb, usage, ctx);
+}
+
+void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo)
+{
+ pb_unmap(bo->pb);
+}
+
+static void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo)
+{
+ if (bo->pb)
+ pb_reference(&bo->pb, NULL);
+ free(bo);
+}
+
+void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst,
+ struct r600_bo *src)
+{
+ struct r600_bo *old = *dst;
+
+ if (pipe_reference(&(*dst)->reference, &src->reference)) {
+ r600_bo_destroy(radeon, old);
+ }
+ *dst = src;
+}
+
+unsigned r600_bo_get_handle(struct r600_bo *pb_bo)
+{
+ struct radeon_bo *bo;
+
+ bo = radeon_bo_pb_get_bo(pb_bo->pb);
+ if (!bo)
+ return 0;
+
+ return bo->handle;
+}
+
+unsigned r600_bo_get_size(struct r600_bo *pb_bo)
+{
+ struct radeon_bo *bo;
+
+ bo = radeon_bo_pb_get_bo(pb_bo->pb);
+ if (!bo)
+ return 0;
+
+ return bo->size;
+}
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index 7a1a762f54..c9de95ffc0 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -25,14 +25,231 @@
* Corbin Simpson <MostAwesomeDude@gmail.com>
* Joakim Sindholt <opensource@zhasha.com>
*/
+#include <stdio.h>
+#include <errno.h>
#include <sys/ioctl.h>
#include "util/u_inlines.h"
#include "util/u_debug.h"
-#include "radeon_priv.h"
+#include <pipebuffer/pb_bufmgr.h>
+#include "r600.h"
+#include "r600_priv.h"
#include "r600_drm_public.h"
+#include "xf86drm.h"
+#include "radeon_drm.h"
+
+#ifndef RADEON_INFO_TILING_CONFIG
+#define RADEON_INFO_TILING_CONFIG 0x6
+#endif
+static int radeon_get_device(struct radeon *radeon)
+{
+ struct drm_radeon_info info;
+ int r;
+
+ radeon->device = 0;
+ info.request = RADEON_INFO_DEVICE_ID;
+ info.value = (uintptr_t)&radeon->device;
+ r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
+ sizeof(struct drm_radeon_info));
+ return r;
+}
+
+static int radeon_drm_get_tiling(struct radeon *radeon)
+{
+ struct drm_radeon_info info;
+ int r;
+ uint32_t tiling_config;
+
+ info.request = RADEON_INFO_TILING_CONFIG;
+ info.value = (uintptr_t)&tiling_config;
+ r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
+ sizeof(struct drm_radeon_info));
+
+ if (r)
+ return r;
+
+ switch ((tiling_config & 0xe) >> 1) {
+ case 0:
+ radeon->tiling_info.num_channels = 1;
+ break;
+ case 1:
+ radeon->tiling_info.num_channels = 2;
+ break;
+ case 2:
+ radeon->tiling_info.num_channels = 4;
+ break;
+ case 3:
+ radeon->tiling_info.num_channels = 8;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch ((tiling_config & 0x30) >> 4) {
+ case 0:
+ radeon->tiling_info.num_banks = 4;
+ break;
+ case 1:
+ radeon->tiling_info.num_banks = 8;
+ break;
+ default:
+ return -EINVAL;
+
+ }
+ switch ((tiling_config & 0xc0) >> 6) {
+ case 0:
+ radeon->tiling_info.group_bytes = 256;
+ break;
+ case 1:
+ radeon->tiling_info.group_bytes = 512;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+struct radeon *radeon_new(int fd, unsigned device)
+{
+ struct radeon *radeon;
+ int r;
+
+ radeon = calloc(1, sizeof(*radeon));
+ if (radeon == NULL) {
+ return NULL;
+ }
+ radeon->fd = fd;
+ radeon->device = device;
+ radeon->refcount = 1;
+ if (fd >= 0) {
+ r = radeon_get_device(radeon);
+ if (r) {
+ fprintf(stderr, "Failed to get device id\n");
+ return radeon_decref(radeon);
+ }
+ }
+ radeon->family = radeon_family_from_device(radeon->device);
+ if (radeon->family == CHIP_UNKNOWN) {
+ fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device);
+ return radeon_decref(radeon);
+ }
+ switch (radeon->family) {
+ case CHIP_R600:
+ case CHIP_RV610:
+ case CHIP_RV630:
+ case CHIP_RV670:
+ case CHIP_RV620:
+ case CHIP_RV635:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ case CHIP_RV770:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_RV740:
+ case CHIP_CEDAR:
+ case CHIP_REDWOOD:
+ case CHIP_JUNIPER:
+ case CHIP_CYPRESS:
+ case CHIP_HEMLOCK:
+ break;
+ case CHIP_R100:
+ case CHIP_RV100:
+ case CHIP_RS100:
+ case CHIP_RV200:
+ case CHIP_RS200:
+ case CHIP_R200:
+ case CHIP_RV250:
+ case CHIP_RS300:
+ case CHIP_RV280:
+ case CHIP_R300:
+ case CHIP_R350:
+ case CHIP_RV350:
+ case CHIP_RV380:
+ case CHIP_R420:
+ case CHIP_R423:
+ case CHIP_RV410:
+ case CHIP_RS400:
+ case CHIP_RS480:
+ case CHIP_RS600:
+ case CHIP_RS690:
+ case CHIP_RS740:
+ case CHIP_RV515:
+ case CHIP_R520:
+ case CHIP_RV530:
+ case CHIP_RV560:
+ case CHIP_RV570:
+ case CHIP_R580:
+ default:
+ fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n",
+ __func__, radeon->device);
+ break;
+ }
+
+ /* setup class */
+ switch (radeon->family) {
+ case CHIP_R600:
+ case CHIP_RV610:
+ case CHIP_RV630:
+ case CHIP_RV670:
+ case CHIP_RV620:
+ case CHIP_RV635:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ radeon->chip_class = R600;
+ break;
+ case CHIP_RV770:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_RV740:
+ radeon->chip_class = R700;
+ break;
+ case CHIP_CEDAR:
+ case CHIP_REDWOOD:
+ case CHIP_JUNIPER:
+ case CHIP_CYPRESS:
+ case CHIP_HEMLOCK:
+ radeon->chip_class = EVERGREEN;
+ break;
+ default:
+ fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n",
+ __func__, radeon->device);
+ break;
+ }
+
+ if (radeon->chip_class == R600 || radeon->chip_class == R700) {
+ if (radeon_drm_get_tiling(radeon))
+ return NULL;
+ }
+ radeon->kman = radeon_bo_pbmgr_create(radeon);
+ if (!radeon->kman)
+ return NULL;
+ radeon->cman = pb_cache_manager_create(radeon->kman, 100000);
+ if (!radeon->cman)
+ return NULL;
+ return radeon;
+}
struct radeon *r600_drm_winsys_create(int drmfd)
{
return radeon_new(drmfd, 0);
}
+struct radeon *radeon_decref(struct radeon *radeon)
+{
+ if (radeon == NULL)
+ return NULL;
+ if (--radeon->refcount > 0) {
+ return NULL;
+ }
+
+ if (radeon->cman)
+ radeon->cman->destroy(radeon->cman);
+
+ if (radeon->kman)
+ radeon->kman->destroy(radeon->kman);
+
+ if (radeon->fd >= 0)
+ drmClose(radeon->fd);
+
+ free(radeon);
+ return NULL;
+}
diff --git a/src/gallium/winsys/r600/drm/r600_drm_public.h b/src/gallium/winsys/r600/drm/r600_drm_public.h
index 84f2dce437..cfce8df9c2 100644
--- a/src/gallium/winsys/r600/drm/r600_drm_public.h
+++ b/src/gallium/winsys/r600/drm/r600_drm_public.h
@@ -1,4 +1,28 @@
-
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse
+ */
#ifndef R600_DRM_PUBLIC_H
#define R600_DRM_PUBLIC_H
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
new file mode 100644
index 0000000000..2521ff9647
--- /dev/null
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -0,0 +1,1362 @@
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse
+ */
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "xf86drm.h"
+#include "r600.h"
+#include "r600d.h"
+#include "radeon_drm.h"
+#include "bof.h"
+#include "pipe/p_compiler.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include <pipebuffer/pb_bufmgr.h>
+#include "r600_priv.h"
+
+#define GROUP_FORCE_NEW_BLOCK 0
+
+int r600_context_init_fence(struct r600_context *ctx)
+{
+ ctx->fence = 1;
+ ctx->fence_bo = r600_bo(ctx->radeon, 4096, 0, 0);
+ if (ctx->fence_bo == NULL) {
+ return -ENOMEM;
+ }
+ ctx->cfence = r600_bo_map(ctx->radeon, ctx->fence_bo, PB_USAGE_UNSYNCHRONIZED, NULL);
+ *ctx->cfence = 0;
+ LIST_INITHEAD(&ctx->fenced_bo);
+ return 0;
+}
+
+static void INLINE r600_context_update_fenced_list(struct r600_context *ctx)
+{
+ for (int i = 0; i < ctx->creloc; i++) {
+ if (!LIST_IS_EMPTY(&ctx->bo[i]->fencedlist))
+ LIST_DELINIT(&ctx->bo[i]->fencedlist);
+ LIST_ADDTAIL(&ctx->bo[i]->fencedlist, &ctx->fenced_bo);
+ ctx->bo[i]->fence = ctx->fence;
+ ctx->bo[i]->ctx = ctx;
+ }
+}
+
+static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsigned fence)
+{
+ struct radeon_bo *bo = NULL;
+ struct radeon_bo *tmp;
+
+ LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) {
+ if (bo->fence <= *ctx->cfence) {
+ LIST_DELINIT(&bo->fencedlist);
+ bo->fence = 0;
+ } else {
+ bo->fence = fence;
+ }
+ }
+}
+
+int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg)
+{
+ struct r600_block *block;
+ struct r600_range *range;
+ int offset;
+
+ for (unsigned i = 0, n = 0; i < nreg; i += n) {
+ u32 j;
+
+ /* ignore new block balise */
+ if (reg[i].offset == GROUP_FORCE_NEW_BLOCK) {
+ n = 1;
+ continue;
+ }
+
+ /* register that need relocation are in their own group */
+ /* find number of consecutive registers */
+ n = 0;
+ offset = reg[i].offset;
+ while (reg[i + n].offset == offset) {
+ n++;
+ offset += 4;
+ if ((n + i) >= nreg)
+ break;
+ if (n >= (R600_BLOCK_MAX_REG - 2))
+ break;
+ }
+
+ /* allocate new block */
+ block = calloc(1, sizeof(struct r600_block));
+ if (block == NULL) {
+ return -ENOMEM;
+ }
+ ctx->nblocks++;
+ for (int j = 0; j < n; j++) {
+ range = &ctx->range[CTX_RANGE_ID(ctx, reg[i + j].offset)];
+ range->blocks[CTX_BLOCK_ID(ctx, reg[i + j].offset)] = block;
+ }
+
+ /* initialize block */
+ block->start_offset = reg[i].offset;
+ block->pm4[block->pm4_ndwords++] = PKT3(reg[i].opcode, n);
+ block->pm4[block->pm4_ndwords++] = (block->start_offset - reg[i].offset_base) >> 2;
+ block->reg = &block->pm4[block->pm4_ndwords];
+ block->pm4_ndwords += n;
+ block->nreg = n;
+ LIST_INITHEAD(&block->list);
+
+ for (j = 0; j < n; j++) {
+ if (reg[i+j].need_bo) {
+ block->nbo++;
+ assert(block->nbo < R600_BLOCK_MAX_BO);
+ block->pm4_bo_index[j] = block->nbo;
+ block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0);
+ block->pm4[block->pm4_ndwords++] = 0x00000000;
+ block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags;
+ block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask;
+ block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1;
+ }
+ }
+ for (j = 0; j < n; j++) {
+ if (reg[i+j].flush_flags) {
+ block->pm4_flush_ndwords += 7;
+ }
+ }
+ /* check that we stay in limit */
+ assert(block->pm4_ndwords < R600_BLOCK_MAX_REG);
+ }
+ return 0;
+}
+
+/* R600/R700 configuration */
+static const struct r600_reg r600_config_reg_list[] = {
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008C00_SQ_CONFIG, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008C0C_SQ_THREAD_RESOURCE_MGMT, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008C10_SQ_STACK_RESOURCE_MGMT_1, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008C14_SQ_STACK_RESOURCE_MGMT_2, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_009508_TA_CNTL_AUX, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_009714_VC_ENHANCE, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_009830_DB_DEBUG, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_009838_DB_WATERMARKS, 0, 0, 0},
+};
+
+static const struct r600_reg r600_ctl_const_list[] = {
+ {PKT3_SET_CTL_CONST, R600_CTL_CONST_OFFSET, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0, 0},
+ {PKT3_SET_CTL_CONST, R600_CTL_CONST_OFFSET, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0, 0},
+};
+
+static const struct r600_reg r600_context_reg_list[] = {
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028350_SX_MISC, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286C8_SPI_THREAD_GROUPING, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A10_VGT_OUTPUT_PATH_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A14_VGT_HOS_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A20_VGT_HOS_REUSE_DEPTH, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A24_VGT_GROUP_PRIM_TYPE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A28_VGT_GROUP_FIRST_DECR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A2C_VGT_GROUP_DECR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A30_VGT_GROUP_VECT_0_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A34_VGT_GROUP_VECT_1_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A40_VGT_GS_MODE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A4C_PA_SC_MODE_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028AB0_VGT_STRMOUT_EN, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028AB4_VGT_REUSE_OFF, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028AB8_VGT_VTX_CNT_EN, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028B20_VGT_STRMOUT_BUFFER_EN, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028028_DB_STENCIL_CLEAR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02802C_DB_DEPTH_CLEAR, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028040_CB_COLOR0_BASE, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A0_CB_COLOR0_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028060_CB_COLOR0_SIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028080_CB_COLOR0_VIEW, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280E0_CB_COLOR0_FRAG, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280C0_CB_COLOR0_TILE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028100_CB_COLOR0_MASK, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028044_CB_COLOR1_BASE, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A4_CB_COLOR1_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028064_CB_COLOR1_SIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028084_CB_COLOR1_VIEW, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280E4_CB_COLOR1_FRAG, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280C4_CB_COLOR1_TILE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028104_CB_COLOR1_MASK, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028048_CB_COLOR2_BASE, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A8_CB_COLOR2_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028068_CB_COLOR2_SIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028088_CB_COLOR2_VIEW, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280E8_CB_COLOR2_FRAG, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280C8_CB_COLOR2_TILE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028108_CB_COLOR2_MASK, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02804C_CB_COLOR3_BASE, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280AC_CB_COLOR3_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02806C_CB_COLOR3_SIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02808C_CB_COLOR3_VIEW, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280EC_CB_COLOR3_FRAG, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280CC_CB_COLOR3_TILE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02810C_CB_COLOR3_MASK, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028050_CB_COLOR4_BASE, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B0_CB_COLOR4_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028070_CB_COLOR4_SIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028090_CB_COLOR4_VIEW, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280F0_CB_COLOR4_FRAG, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280D0_CB_COLOR4_TILE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028110_CB_COLOR4_MASK, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028054_CB_COLOR5_BASE, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B4_CB_COLOR5_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028074_CB_COLOR5_SIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028094_CB_COLOR5_VIEW, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280F4_CB_COLOR5_FRAG, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280D4_CB_COLOR5_TILE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028114_CB_COLOR5_MASK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028058_CB_COLOR6_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B8_CB_COLOR6_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028078_CB_COLOR6_SIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028098_CB_COLOR6_VIEW, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280F8_CB_COLOR6_FRAG, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280D8_CB_COLOR6_TILE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028118_CB_COLOR6_MASK, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02805C_CB_COLOR7_BASE, 1, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280BC_CB_COLOR7_INFO, 1, 0, 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02807C_CB_COLOR7_SIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02809C_CB_COLOR7_VIEW, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280FC_CB_COLOR7_FRAG, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280DC_CB_COLOR7_TILE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02811C_CB_COLOR7_MASK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028120_CB_CLEAR_RED, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028124_CB_CLEAR_GREEN, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028128_CB_CLEAR_BLUE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02812C_CB_CLEAR_ALPHA, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028940_ALU_CONST_CACHE_PS_0, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028980_ALU_CONST_CACHE_VS_0, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02823C_CB_SHADER_MASK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028238_CB_TARGET_MASK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028414_CB_BLEND_RED, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028418_CB_BLEND_GREEN, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02841C_CB_BLEND_BLUE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028420_CB_BLEND_ALPHA, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028424_CB_FOG_RED, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028428_CB_FOG_GREEN, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02842C_CB_FOG_BLUE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028430_DB_STENCILREFMASK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028434_DB_STENCILREFMASK_BF, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028438_SX_ALPHA_REF, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286DC_SPI_FOG_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286E0_SPI_FOG_FUNC_SCALE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286E4_SPI_FOG_FUNC_BIAS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028780_CB_BLEND0_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028784_CB_BLEND1_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028788_CB_BLEND2_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02878C_CB_BLEND3_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028790_CB_BLEND4_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028794_CB_BLEND5_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028798_CB_BLEND6_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02879C_CB_BLEND7_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0287A0_CB_SHADER_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028800_DB_DEPTH_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028804_CB_BLEND_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028808_CB_COLOR_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02880C_DB_SHADER_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C04_PA_SC_AA_CONFIG, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C30_CB_CLRCMP_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C34_CB_CLRCMP_SRC, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C38_CB_CLRCMP_DST, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C3C_CB_CLRCMP_MSK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C48_PA_SC_AA_MASK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D44_DB_ALPHA_TO_MASK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02800C_DB_DEPTH_BASE, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028000_DB_DEPTH_SIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028004_DB_DEPTH_VIEW, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028010_DB_DEPTH_INFO, 1, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D0C_DB_RENDER_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D10_DB_RENDER_OVERRIDE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D24_DB_HTILE_SURFACE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D30_DB_PRELOAD_CONTROL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028D34_DB_PREFETCH_LIMIT, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028034_PA_SC_SCREEN_SCISSOR_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028200_PA_SC_WINDOW_OFFSET, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02820C_PA_SC_CLIPRECT_RULE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028210_PA_SC_CLIPRECT_0_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028214_PA_SC_CLIPRECT_0_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028218_PA_SC_CLIPRECT_1_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02821C_PA_SC_CLIPRECT_1_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028220_PA_SC_CLIPRECT_2_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028224_PA_SC_CLIPRECT_2_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028228_PA_SC_CLIPRECT_3_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02822C_PA_SC_CLIPRECT_3_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028230_PA_SC_EDGERULE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028240_PA_SC_GENERIC_SCISSOR_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028244_PA_SC_GENERIC_SCISSOR_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0282D0_PA_SC_VPORT_ZMIN_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0282D4_PA_SC_VPORT_ZMAX_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02843C_PA_CL_VPORT_XSCALE_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028440_PA_CL_VPORT_XOFFSET_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028444_PA_CL_VPORT_YSCALE_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028448_PA_CL_VPORT_YOFFSET_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02844C_PA_CL_VPORT_ZSCALE_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028450_PA_CL_VPORT_ZOFFSET_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286D4_SPI_INTERP_CONTROL_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028810_PA_CL_CLIP_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028814_PA_SU_SC_MODE_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028818_PA_CL_VTE_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02881C_PA_CL_VS_OUT_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028820_PA_CL_NANINF_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A00_PA_SU_POINT_SIZE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A08_PA_SU_LINE_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A0C_PA_SC_LINE_STIPPLE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A48_PA_SC_MPASS_PS_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C00_PA_SC_LINE_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E20_PA_CL_UCP0_X, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E24_PA_CL_UCP0_Y, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E28_PA_CL_UCP0_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E2C_PA_CL_UCP0_W, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E30_PA_CL_UCP1_X, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E34_PA_CL_UCP1_Y, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E38_PA_CL_UCP1_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E3C_PA_CL_UCP1_W, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E40_PA_CL_UCP2_X, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E44_PA_CL_UCP2_Y, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E48_PA_CL_UCP2_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E4C_PA_CL_UCP2_W, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E50_PA_CL_UCP3_X, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E54_PA_CL_UCP3_Y, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E58_PA_CL_UCP3_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E5C_PA_CL_UCP3_W, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E60_PA_CL_UCP4_X, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E64_PA_CL_UCP4_Y, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E68_PA_CL_UCP4_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E6C_PA_CL_UCP4_W, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E70_PA_CL_UCP5_X, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E74_PA_CL_UCP5_Y, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E78_PA_CL_UCP5_Z, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028E7C_PA_CL_UCP5_W, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028380_SQ_VTX_SEMANTIC_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028384_SQ_VTX_SEMANTIC_1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028388_SQ_VTX_SEMANTIC_2, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02838C_SQ_VTX_SEMANTIC_3, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028390_SQ_VTX_SEMANTIC_4, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028394_SQ_VTX_SEMANTIC_5, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028398_SQ_VTX_SEMANTIC_6, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02839C_SQ_VTX_SEMANTIC_7, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283A0_SQ_VTX_SEMANTIC_8, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283A4_SQ_VTX_SEMANTIC_9, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283A8_SQ_VTX_SEMANTIC_10, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283AC_SQ_VTX_SEMANTIC_11, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283B0_SQ_VTX_SEMANTIC_12, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283B4_SQ_VTX_SEMANTIC_13, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283B8_SQ_VTX_SEMANTIC_14, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283BC_SQ_VTX_SEMANTIC_15, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283C0_SQ_VTX_SEMANTIC_16, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283C4_SQ_VTX_SEMANTIC_17, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283C8_SQ_VTX_SEMANTIC_18, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283CC_SQ_VTX_SEMANTIC_19, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283D0_SQ_VTX_SEMANTIC_20, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283D4_SQ_VTX_SEMANTIC_21, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283D8_SQ_VTX_SEMANTIC_22, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283DC_SQ_VTX_SEMANTIC_23, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283E0_SQ_VTX_SEMANTIC_24, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283E4_SQ_VTX_SEMANTIC_25, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283E8_SQ_VTX_SEMANTIC_26, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283EC_SQ_VTX_SEMANTIC_27, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283F0_SQ_VTX_SEMANTIC_28, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283F4_SQ_VTX_SEMANTIC_29, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283F8_SQ_VTX_SEMANTIC_30, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0283FC_SQ_VTX_SEMANTIC_31, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028614_SPI_VS_OUT_ID_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028618_SPI_VS_OUT_ID_1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02861C_SPI_VS_OUT_ID_2, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028620_SPI_VS_OUT_ID_3, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028624_SPI_VS_OUT_ID_4, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028628_SPI_VS_OUT_ID_5, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02862C_SPI_VS_OUT_ID_6, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028630_SPI_VS_OUT_ID_7, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028634_SPI_VS_OUT_ID_8, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028638_SPI_VS_OUT_ID_9, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286C4_SPI_VS_OUT_CONFIG, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028858_SQ_PGM_START_VS, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028868_SQ_PGM_RESOURCES_VS, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028894_SQ_PGM_START_FS, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288A4_SQ_PGM_RESOURCES_FS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288D0_SQ_PGM_CF_OFFSET_VS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288DC_SQ_PGM_CF_OFFSET_FS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028644_SPI_PS_INPUT_CNTL_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028648_SPI_PS_INPUT_CNTL_1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028650_SPI_PS_INPUT_CNTL_3, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028654_SPI_PS_INPUT_CNTL_4, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028658_SPI_PS_INPUT_CNTL_5, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028660_SPI_PS_INPUT_CNTL_7, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028664_SPI_PS_INPUT_CNTL_8, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028668_SPI_PS_INPUT_CNTL_9, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028670_SPI_PS_INPUT_CNTL_11, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028674_SPI_PS_INPUT_CNTL_12, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028678_SPI_PS_INPUT_CNTL_13, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028680_SPI_PS_INPUT_CNTL_15, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028684_SPI_PS_INPUT_CNTL_16, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028688_SPI_PS_INPUT_CNTL_17, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028690_SPI_PS_INPUT_CNTL_19, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028694_SPI_PS_INPUT_CNTL_20, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028698_SPI_PS_INPUT_CNTL_21, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0286D8_SPI_INPUT_Z, 0, 0, 0},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028840_SQ_PGM_START_PS, 1, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+ {0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028850_SQ_PGM_RESOURCES_PS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028854_SQ_PGM_EXPORTS_PS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0288CC_SQ_PGM_CF_OFFSET_PS, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028400_VGT_MAX_VTX_INDX, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028404_VGT_MIN_VTX_INDX, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028408_VGT_INDX_OFFSET, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A84_VGT_PRIMITIVEID_EN, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0, 0, 0},
+ {PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0, 0, 0},
+};
+
+/* SHADER RESOURCE R600/R700 */
+static int r600_state_resource_init(struct r600_context *ctx, u32 offset)
+{
+ struct r600_reg r600_shader_resource[] = {
+ {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_038000_RESOURCE0_WORD0, 0, 0, 0},
+ {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_038004_RESOURCE0_WORD1, 0, 0, 0},
+ {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_038008_RESOURCE0_WORD2, 1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF},
+ {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_03800C_RESOURCE0_WORD3, 1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF},
+ {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_038010_RESOURCE0_WORD4, 0, 0, 0},
+ {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_038014_RESOURCE0_WORD5, 0, 0, 0},
+ {PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET, R_038018_RESOURCE0_WORD6, 0, 0, 0},
+ };
+ unsigned nreg = Elements(r600_shader_resource);
+
+ for (int i = 0; i < nreg; i++) {
+ r600_shader_resource[i].offset += offset;
+ }
+ return r600_context_add_block(ctx, r600_shader_resource, nreg);
+}
+
+/* SHADER SAMPLER R600/R700 */
+static int r600_state_sampler_init(struct r600_context *ctx, u32 offset)
+{
+ struct r600_reg r600_shader_sampler[] = {
+ {PKT3_SET_SAMPLER, R600_SAMPLER_OFFSET, R_03C000_SQ_TEX_SAMPLER_WORD0_0, 0, 0, 0},
+ {PKT3_SET_SAMPLER, R600_SAMPLER_OFFSET, R_03C004_SQ_TEX_SAMPLER_WORD1_0, 0, 0, 0},
+ {PKT3_SET_SAMPLER, R600_SAMPLER_OFFSET, R_03C008_SQ_TEX_SAMPLER_WORD2_0, 0, 0, 0},
+ };
+ unsigned nreg = Elements(r600_shader_sampler);
+
+ for (int i = 0; i < nreg; i++) {
+ r600_shader_sampler[i].offset += offset;
+ }
+ return r600_context_add_block(ctx, r600_shader_sampler, nreg);
+}
+
+/* SHADER SAMPLER BORDER R600/R700 */
+static int r600_state_sampler_border_init(struct r600_context *ctx, u32 offset)
+{
+ struct r600_reg r600_shader_sampler_border[] = {
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_00A400_TD_PS_SAMPLER0_BORDER_RED, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, 0, 0, 0},
+ {PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, 0, 0, 0},
+ };
+ unsigned nreg = Elements(r600_shader_sampler_border);
+
+ for (int i = 0; i < nreg; i++) {
+ r600_shader_sampler_border[i].offset += offset;
+ }
+ return r600_context_add_block(ctx, r600_shader_sampler_border, nreg);
+}
+
+static int r600_loop_const_init(struct r600_context *ctx, u32 offset)
+{
+ unsigned nreg = 32;
+ struct r600_reg r600_loop_consts[32];
+ int i;
+
+ for (i = 0; i < nreg; i++) {
+ r600_loop_consts[i].opcode = PKT3_SET_LOOP_CONST;
+ r600_loop_consts[i].offset_base = R600_LOOP_CONST_OFFSET;
+ r600_loop_consts[i].offset = R600_LOOP_CONST_OFFSET + ((offset + i) * 4);
+ r600_loop_consts[i].need_bo = 0;
+ r600_loop_consts[i].flush_flags = 0;
+ r600_loop_consts[i].flush_mask = 0;
+ }
+ return r600_context_add_block(ctx, r600_loop_consts, nreg);
+}
+
+/* initialize */
+void r600_context_fini(struct r600_context *ctx)
+{
+ struct r600_block *block;
+ struct r600_range *range;
+
+ for (int i = 0; i < 256; i++) {
+ for (int j = 0; j < (1 << ctx->hash_shift); j++) {
+ block = ctx->range[i].blocks[j];
+ if (block) {
+ for (int k = 0, offset = block->start_offset; k < block->nreg; k++, offset += 4) {
+ range = &ctx->range[CTX_RANGE_ID(ctx, offset)];
+ range->blocks[CTX_BLOCK_ID(ctx, offset)] = NULL;
+ }
+ free(block);
+ }
+ }
+ free(ctx->range[i].blocks);
+ }
+ free(ctx->reloc);
+ free(ctx->pm4);
+ if (ctx->fence_bo) {
+ r600_bo_reference(ctx->radeon, &ctx->fence_bo, NULL);
+ }
+ memset(ctx, 0, sizeof(struct r600_context));
+}
+
+int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
+{
+ int r;
+
+ memset(ctx, 0, sizeof(struct r600_context));
+ ctx->radeon = radeon;
+ LIST_INITHEAD(&ctx->query_list);
+
+ /* initialize hash */
+ ctx->hash_size = 19;
+ ctx->hash_shift = 11;
+ for (int i = 0; i < 256; i++) {
+ ctx->range[i].start_offset = i << ctx->hash_shift;
+ ctx->range[i].end_offset = ((i + 1) << ctx->hash_shift) - 1;
+ ctx->range[i].blocks = calloc(1 << ctx->hash_shift, sizeof(void*));
+ if (ctx->range[i].blocks == NULL) {
+ return -ENOMEM;
+ }
+ }
+
+ /* add blocks */
+ r = r600_context_add_block(ctx, r600_config_reg_list,
+ Elements(r600_config_reg_list));
+ if (r)
+ goto out_err;
+ r = r600_context_add_block(ctx, r600_context_reg_list,
+ Elements(r600_context_reg_list));
+ if (r)
+ goto out_err;
+ r = r600_context_add_block(ctx, r600_ctl_const_list,
+ Elements(r600_ctl_const_list));
+ if (r)
+ goto out_err;
+
+ /* PS SAMPLER BORDER */
+ for (int j = 0, offset = 0; j < 18; j++, offset += 0x10) {
+ r = r600_state_sampler_border_init(ctx, offset);
+ if (r)
+ goto out_err;
+ }
+
+ /* VS SAMPLER BORDER */
+ for (int j = 0, offset = 0x200; j < 18; j++, offset += 0x10) {
+ r = r600_state_sampler_border_init(ctx, offset);
+ if (r)
+ goto out_err;
+ }
+ /* PS SAMPLER */
+ for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) {
+ r = r600_state_sampler_init(ctx, offset);
+ if (r)
+ goto out_err;
+ }
+ /* VS SAMPLER */
+ for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) {
+ r = r600_state_sampler_init(ctx, offset);
+ if (r)
+ goto out_err;
+ }
+ /* PS RESOURCE */
+ for (int j = 0, offset = 0; j < 160; j++, offset += 0x1C) {
+ r = r600_state_resource_init(ctx, offset);
+ if (r)
+ goto out_err;
+ }
+ /* VS RESOURCE */
+ for (int j = 0, offset = 0x1180; j < 160; j++, offset += 0x1C) {
+ r = r600_state_resource_init(ctx, offset);
+ if (r)
+ goto out_err;
+ }
+
+ /* PS loop const */
+ r600_loop_const_init(ctx, 0);
+ /* VS loop const */
+ r600_loop_const_init(ctx, 32);
+
+ /* setup block table */
+ ctx->blocks = calloc(ctx->nblocks, sizeof(void*));
+ for (int i = 0, c = 0; i < 256; i++) {
+ for (int j = 0, add; j < (1 << ctx->hash_shift); j++) {
+ if (ctx->range[i].blocks[j]) {
+ add = 1;
+ for (int k = 0; k < c; k++) {
+ if (ctx->blocks[k] == ctx->range[i].blocks[j]) {
+ add = 0;
+ break;
+ }
+ }
+ if (add) {
+ assert(c < ctx->nblocks);
+ ctx->blocks[c++] = ctx->range[i].blocks[j];
+ j += (ctx->range[i].blocks[j]->nreg << 2) - 1;
+ }
+ }
+ }
+ }
+
+ /* allocate cs variables */
+ ctx->nreloc = RADEON_CTX_MAX_PM4;
+ ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc));
+ if (ctx->reloc == NULL) {
+ r = -ENOMEM;
+ goto out_err;
+ }
+ ctx->bo = calloc(ctx->nreloc, sizeof(void *));
+ if (ctx->bo == NULL) {
+ r = -ENOMEM;
+ goto out_err;
+ }
+ ctx->pm4_ndwords = RADEON_CTX_MAX_PM4;
+ ctx->pm4 = calloc(ctx->pm4_ndwords, 4);
+ if (ctx->pm4 == NULL) {
+ r = -ENOMEM;
+ goto out_err;
+ }
+ /* save 16dwords space for fence mecanism */
+ ctx->pm4_ndwords -= 16;
+
+ r = r600_context_init_fence(ctx);
+ if (r) {
+ goto out_err;
+ }
+
+ /* init dirty list */
+ LIST_INITHEAD(&ctx->dirty);
+ return 0;
+out_err:
+ r600_context_fini(ctx);
+ return r;
+}
+
+void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
+ unsigned flush_mask, struct r600_bo *rbo)
+{
+ struct radeon_bo *bo;
+
+ bo = r600_bo_get_bo(rbo);
+ /* if bo has already been flush */
+ if (!(bo->last_flush ^ flush_flags)) {
+ bo->last_flush &= flush_mask;
+ return;
+ }
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3);
+ ctx->pm4[ctx->pm4_cdwords++] = flush_flags;
+ ctx->pm4[ctx->pm4_cdwords++] = (bo->size + 255) >> 8;
+ ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
+ ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = bo->reloc_id;
+ bo->last_flush = (bo->last_flush | flush_flags) & flush_mask;
+}
+
+void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo)
+{
+ struct radeon_bo *bo;
+
+ bo = r600_bo_get_bo(rbo);
+ assert(bo != NULL);
+ if (bo->reloc) {
+ *pm4 = bo->reloc_id;
+ return;
+ }
+ bo->reloc = &ctx->reloc[ctx->creloc];
+ bo->reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4;
+ ctx->reloc[ctx->creloc].handle = bo->handle;
+ ctx->reloc[ctx->creloc].read_domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
+ ctx->reloc[ctx->creloc].write_domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
+ ctx->reloc[ctx->creloc].flags = 0;
+ radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo);
+ ctx->creloc++;
+ /* set PKT3 to point to proper reloc */
+ *pm4 = bo->reloc_id;
+}
+
+void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state)
+{
+ struct r600_range *range;
+ struct r600_block *block;
+
+ for (int i = 0; i < state->nregs; i++) {
+ unsigned id;
+
+ range = &ctx->range[CTX_RANGE_ID(ctx, state->regs[i].offset)];
+ block = range->blocks[CTX_BLOCK_ID(ctx, state->regs[i].offset)];
+ id = (state->regs[i].offset - block->start_offset) >> 2;
+ block->reg[id] &= ~state->regs[i].mask;
+ block->reg[id] |= state->regs[i].value;
+ if (block->pm4_bo_index[id]) {
+ /* find relocation */
+ id = block->pm4_bo_index[id];
+ r600_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo);
+ }
+ if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+ block->status |= R600_BLOCK_STATUS_ENABLED;
+ block->status |= R600_BLOCK_STATUS_DIRTY;
+ ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
+ LIST_ADDTAIL(&block->list,&ctx->dirty);
+ }
+ }
+}
+
+static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
+{
+ struct r600_range *range;
+ struct r600_block *block;
+
+ range = &ctx->range[CTX_RANGE_ID(ctx, offset)];
+ block = range->blocks[CTX_BLOCK_ID(ctx, offset)];
+ if (state == NULL) {
+ block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
+ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
+ r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
+ LIST_DELINIT(&block->list);
+ return;
+ }
+ block->reg[0] = state->regs[0].value;
+ block->reg[1] = state->regs[1].value;
+ block->reg[2] = state->regs[2].value;
+ block->reg[3] = state->regs[3].value;
+ block->reg[4] = state->regs[4].value;
+ block->reg[5] = state->regs[5].value;
+ block->reg[6] = state->regs[6].value;
+ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
+ r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
+ if (state->regs[0].bo) {
+ /* VERTEX RESOURCE, we preted there is 2 bo to relocate so
+ * we have single case btw VERTEX & TEXTURE resource
+ */
+ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo);
+ r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo);
+ } else {
+ /* TEXTURE RESOURCE */
+ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo);
+ r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
+ }
+ if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+ block->status |= R600_BLOCK_STATUS_ENABLED;
+ block->status |= R600_BLOCK_STATUS_DIRTY;
+ ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
+ LIST_ADDTAIL(&block->list,&ctx->dirty);
+ }
+}
+
+void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
+{
+ unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1C * rid;
+
+ r600_context_pipe_state_set_resource(ctx, state, offset);
+}
+
+void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
+{
+ unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1180 + 0x1C * rid;
+
+ r600_context_pipe_state_set_resource(ctx, state, offset);
+}
+
+static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
+{
+ struct r600_range *range;
+ struct r600_block *block;
+
+ range = &ctx->range[CTX_RANGE_ID(ctx, offset)];
+ block = range->blocks[CTX_BLOCK_ID(ctx, offset)];
+ if (state == NULL) {
+ block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
+ LIST_DELINIT(&block->list);
+ return;
+ }
+ block->reg[0] = state->regs[0].value;
+ block->reg[1] = state->regs[1].value;
+ block->reg[2] = state->regs[2].value;
+ if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+ block->status |= R600_BLOCK_STATUS_ENABLED;
+ block->status |= R600_BLOCK_STATUS_DIRTY;
+ ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
+ LIST_ADDTAIL(&block->list,&ctx->dirty);
+ }
+}
+
+static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
+{
+ struct r600_range *range;
+ struct r600_block *block;
+
+ range = &ctx->range[CTX_RANGE_ID(ctx, offset)];
+ block = range->blocks[CTX_BLOCK_ID(ctx, offset)];
+ if (state == NULL) {
+ block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
+ LIST_DELINIT(&block->list);
+ return;
+ }
+ if (state->nregs <= 3) {
+ return;
+ }
+ block->reg[0] = state->regs[3].value;
+ block->reg[1] = state->regs[4].value;
+ block->reg[2] = state->regs[5].value;
+ block->reg[3] = state->regs[6].value;
+ if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+ block->status |= R600_BLOCK_STATUS_ENABLED;
+ block->status |= R600_BLOCK_STATUS_DIRTY;
+ ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
+ LIST_ADDTAIL(&block->list,&ctx->dirty);
+ }
+}
+
+void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
+{
+ unsigned offset;
+
+ offset = 0x0003C000 + id * 0xc;
+ r600_context_pipe_state_set_sampler(ctx, state, offset);
+ offset = 0x0000A400 + id * 0x10;
+ r600_context_pipe_state_set_sampler_border(ctx, state, offset);
+}
+
+void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
+{
+ unsigned offset;
+
+ offset = 0x0003C0D8 + id * 0xc;
+ r600_context_pipe_state_set_sampler(ctx, state, offset);
+ offset = 0x0000A600 + id * 0x10;
+ r600_context_pipe_state_set_sampler_border(ctx, state, offset);
+}
+
+struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset)
+{
+ struct r600_range *range;
+ struct r600_block *block;
+ unsigned id;
+
+ range = &ctx->range[CTX_RANGE_ID(ctx, offset)];
+ block = range->blocks[CTX_BLOCK_ID(ctx, offset)];
+ offset -= block->start_offset;
+ id = block->pm4_bo_index[offset >> 2];
+ if (block->reloc[id].bo) {
+ return block->reloc[id].bo;
+ }
+ return NULL;
+}
+
+void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
+{
+ struct r600_bo *cb[8];
+ struct r600_bo *db;
+ unsigned ndwords = 9;
+ struct r600_block *dirty_block = NULL;
+ struct r600_block *next_block;
+
+ if (draw->indices) {
+ ndwords = 13;
+ /* make sure there is enough relocation space before scheduling draw */
+ if (ctx->creloc >= (ctx->nreloc - 1)) {
+ r600_context_flush(ctx);
+ }
+ }
+
+ /* find number of color buffer */
+ db = r600_context_reg_bo(ctx, R_02800C_DB_DEPTH_BASE);
+ cb[0] = r600_context_reg_bo(ctx, R_028040_CB_COLOR0_BASE);
+ cb[1] = r600_context_reg_bo(ctx, R_028044_CB_COLOR1_BASE);
+ cb[2] = r600_context_reg_bo(ctx, R_028048_CB_COLOR2_BASE);
+ cb[3] = r600_context_reg_bo(ctx, R_02804C_CB_COLOR3_BASE);
+ cb[4] = r600_context_reg_bo(ctx, R_028050_CB_COLOR4_BASE);
+ cb[5] = r600_context_reg_bo(ctx, R_028054_CB_COLOR5_BASE);
+ cb[6] = r600_context_reg_bo(ctx, R_028058_CB_COLOR6_BASE);
+ cb[7] = r600_context_reg_bo(ctx, R_02805C_CB_COLOR7_BASE);
+ for (int i = 0; i < 8; i++) {
+ if (cb[i]) {
+ ndwords += 7;
+ }
+ }
+ if (db)
+ ndwords += 7;
+
+ /* queries need some special values */
+ if (ctx->num_query_running) {
+ if (ctx->radeon->family >= CHIP_RV770) {
+ r600_context_reg(ctx,
+ R_028D0C_DB_RENDER_CONTROL,
+ S_028D0C_R700_PERFECT_ZPASS_COUNTS(1),
+ S_028D0C_R700_PERFECT_ZPASS_COUNTS(1));
+ }
+ r600_context_reg(ctx,
+ R_028D10_DB_RENDER_OVERRIDE,
+ S_028D10_NOOP_CULL_DISABLE(1),
+ S_028D10_NOOP_CULL_DISABLE(1));
+ }
+
+ if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
+ /* need to flush */
+ r600_context_flush(ctx);
+ }
+ /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
+ if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) {
+ R600_ERR("context is too big to be scheduled\n");
+ return;
+ }
+
+ /* enough room to copy packet */
+ LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->dirty,list) {
+ r600_context_block_emit_dirty(ctx, dirty_block);
+ }
+
+ /* draw packet */
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type;
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances;
+ if (draw->indices) {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3);
+ ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
+ ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices);
+ } else {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
+ ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
+ ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
+ }
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
+
+ /* flush color buffer */
+ for (int i = 0; i < 8; i++) {
+ if (cb[i]) {
+ r600_context_bo_flush(ctx,
+ (S_0085F0_CB0_DEST_BASE_ENA(1) << i) |
+ S_0085F0_CB_ACTION_ENA(1),
+ 0, cb[i]);
+ }
+ }
+ if (db) {
+ r600_context_bo_flush(ctx, S_0085F0_DB_ACTION_ENA(1), 0, db);
+ }
+
+ /* all dirty state have been scheduled in current cs */
+ ctx->pm4_dirty_cdwords = 0;
+}
+
+void r600_context_flush(struct r600_context *ctx)
+{
+ struct drm_radeon_cs drmib;
+ struct drm_radeon_cs_chunk chunks[2];
+ uint64_t chunk_array[2];
+ unsigned fence;
+ int r;
+
+ if (!ctx->pm4_cdwords)
+ return;
+
+ /* suspend queries */
+ r600_context_queries_suspend(ctx);
+
+ radeon_bo_pbmgr_flush_maps(ctx->radeon->kman);
+
+ /* emit fence */
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT | (5 << 8);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24);
+ ctx->pm4[ctx->pm4_cdwords++] = ctx->fence;
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->fence_bo);
+
+#if 1
+ /* emit cs */
+ drmib.num_chunks = 2;
+ drmib.chunks = (uint64_t)(uintptr_t)chunk_array;
+ chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
+ chunks[0].length_dw = ctx->pm4_cdwords;
+ chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4;
+ chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
+ chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4;
+ chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc;
+ chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0];
+ chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1];
+ r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib,
+ sizeof(struct drm_radeon_cs));
+#endif
+
+ r600_context_update_fenced_list(ctx);
+
+ fence = ctx->fence + 1;
+ if (fence < ctx->fence) {
+ /* wrap around */
+ fence = 1;
+ r600_context_fence_wraparound(ctx, fence);
+ }
+ ctx->fence = fence;
+
+ /* restart */
+ for (int i = 0; i < ctx->creloc; i++) {
+ ctx->bo[i]->reloc = NULL;
+ ctx->bo[i]->last_flush = 0;
+ radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
+ }
+ ctx->creloc = 0;
+ ctx->pm4_dirty_cdwords = 0;
+ ctx->pm4_cdwords = 0;
+
+ /* resume queries */
+ r600_context_queries_resume(ctx);
+
+ /* set all valid group as dirty so they get reemited on
+ * next draw command
+ */
+ for (int i = 0; i < ctx->nblocks; i++) {
+ if (ctx->blocks[i]->status & R600_BLOCK_STATUS_ENABLED) {
+ if(!(ctx->blocks[i]->status & R600_BLOCK_STATUS_DIRTY)) {
+ LIST_ADDTAIL(&ctx->blocks[i]->list,&ctx->dirty);
+ }
+ ctx->pm4_dirty_cdwords += ctx->blocks[i]->pm4_ndwords + ctx->blocks[i]->pm4_flush_ndwords;
+ ctx->blocks[i]->status |= R600_BLOCK_STATUS_DIRTY;
+ }
+ }
+}
+
+void r600_context_dump_bof(struct r600_context *ctx, const char *file)
+{
+ bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root;
+ unsigned i;
+
+ root = device_id = bcs = blob = array = bo = size = handle = NULL;
+ root = bof_object();
+ if (root == NULL)
+ goto out_err;
+ device_id = bof_int32(ctx->radeon->device);
+ if (device_id == NULL)
+ goto out_err;
+ if (bof_object_set(root, "device_id", device_id))
+ goto out_err;
+ bof_decref(device_id);
+ device_id = NULL;
+ /* dump relocs */
+ blob = bof_blob(ctx->creloc * 16, ctx->reloc);
+ if (blob == NULL)
+ goto out_err;
+ if (bof_object_set(root, "reloc", blob))
+ goto out_err;
+ bof_decref(blob);
+ blob = NULL;
+ /* dump cs */
+ blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4);
+ if (blob == NULL)
+ goto out_err;
+ if (bof_object_set(root, "pm4", blob))
+ goto out_err;
+ bof_decref(blob);
+ blob = NULL;
+ /* dump bo */
+ array = bof_array();
+ if (array == NULL)
+ goto out_err;
+ for (i = 0; i < ctx->creloc; i++) {
+ struct radeon_bo *rbo = ctx->bo[i];
+ bo = bof_object();
+ if (bo == NULL)
+ goto out_err;
+ size = bof_int32(rbo->size);
+ if (size == NULL)
+ goto out_err;
+ if (bof_object_set(bo, "size", size))
+ goto out_err;
+ bof_decref(size);
+ size = NULL;
+ handle = bof_int32(rbo->handle);
+ if (handle == NULL)
+ goto out_err;
+ if (bof_object_set(bo, "handle", handle))
+ goto out_err;
+ bof_decref(handle);
+ handle = NULL;
+ radeon_bo_map(ctx->radeon, rbo);
+ blob = bof_blob(rbo->size, rbo->data);
+ radeon_bo_unmap(ctx->radeon, rbo);
+ if (blob == NULL)
+ goto out_err;
+ if (bof_object_set(bo, "data", blob))
+ goto out_err;
+ bof_decref(blob);
+ blob = NULL;
+ if (bof_array_append(array, bo))
+ goto out_err;
+ bof_decref(bo);
+ bo = NULL;
+ }
+ if (bof_object_set(root, "bo", array))
+ goto out_err;
+ bof_dump_file(root, file);
+out_err:
+ bof_decref(blob);
+ bof_decref(array);
+ bof_decref(bo);
+ bof_decref(size);
+ bof_decref(handle);
+ bof_decref(device_id);
+ bof_decref(root);
+}
+
+static void r600_query_result(struct r600_context *ctx, struct r600_query *query)
+{
+ u64 start, end;
+ u32 *results;
+ int i;
+
+ results = r600_bo_map(ctx->radeon, query->buffer, 0, NULL);
+ for (i = 0; i < query->num_results; i += 4) {
+ start = (u64)results[i] | (u64)results[i + 1] << 32;
+ end = (u64)results[i + 2] | (u64)results[i + 3] << 32;
+ if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) {
+ query->result += end - start;
+ }
+ }
+ r600_bo_unmap(ctx->radeon, query->buffer);
+ query->num_results = 0;
+}
+
+void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
+{
+ /* query request needs 6 dwords for begin + 6 dwords for end */
+ if ((12 + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
+ /* need to flush */
+ r600_context_flush(ctx);
+ }
+
+ /* if query buffer is full force a flush */
+ if (query->num_results >= ((query->buffer_size >> 2) - 2)) {
+ r600_context_flush(ctx);
+ r600_query_result(ctx, query);
+ }
+
+ /* emit begin query */
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_ZPASS_DONE;
+ ctx->pm4[ctx->pm4_cdwords++] = query->num_results + r600_bo_offset(query->buffer);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
+
+ query->state |= R600_QUERY_STATE_STARTED;
+ query->state ^= R600_QUERY_STATE_ENDED;
+ ctx->num_query_running++;
+}
+
+void r600_query_end(struct r600_context *ctx, struct r600_query *query)
+{
+ /* emit begin query */
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_ZPASS_DONE;
+ ctx->pm4[ctx->pm4_cdwords++] = query->num_results + 8 + r600_bo_offset(query->buffer);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
+
+ query->num_results += 16;
+ query->state ^= R600_QUERY_STATE_STARTED;
+ query->state |= R600_QUERY_STATE_ENDED;
+ ctx->num_query_running--;
+}
+
+struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type)
+{
+ struct r600_query *query;
+
+ if (query_type != PIPE_QUERY_OCCLUSION_COUNTER)
+ return NULL;
+
+ query = calloc(1, sizeof(struct r600_query));
+ if (query == NULL)
+ return NULL;
+
+ query->type = query_type;
+ query->buffer_size = 4096;
+
+ query->buffer = r600_bo(ctx->radeon, query->buffer_size, 1, 0);
+ if (!query->buffer) {
+ free(query);
+ return NULL;
+ }
+
+ LIST_ADDTAIL(&query->list, &ctx->query_list);
+
+ return query;
+}
+
+void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
+{
+ r600_bo_reference(ctx->radeon, &query->buffer, NULL);
+ LIST_DELINIT(&query->list);
+ free(query);
+}
+
+boolean r600_context_query_result(struct r600_context *ctx,
+ struct r600_query *query,
+ boolean wait, void *vresult)
+{
+ uint64_t *result = (uint64_t*)vresult;
+
+ if (query->num_results) {
+ r600_context_flush(ctx);
+ }
+ r600_query_result(ctx, query);
+ *result = query->result;
+ query->result = 0;
+ return TRUE;
+}
+
+void r600_context_queries_suspend(struct r600_context *ctx)
+{
+ struct r600_query *query;
+
+ LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) {
+ if (query->state & R600_QUERY_STATE_STARTED) {
+ r600_query_end(ctx, query);
+ query->state |= R600_QUERY_STATE_SUSPENDED;
+ }
+ }
+}
+
+void r600_context_queries_resume(struct r600_context *ctx)
+{
+ struct r600_query *query;
+
+ LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) {
+ if (query->state & R600_QUERY_STATE_SUSPENDED) {
+ r600_query_begin(ctx, query);
+ query->state ^= R600_QUERY_STATE_SUSPENDED;
+ }
+ }
+}
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 189644f31c..b5bd7bd92c 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -30,49 +30,157 @@
#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
+#include <pipebuffer/pb_bufmgr.h>
+#include "util/u_double_list.h"
#include "r600.h"
-
struct radeon {
int fd;
int refcount;
unsigned device;
unsigned family;
enum chip_class chip_class;
- boolean use_mem_constant; /* true for evergreen */
+ struct pb_manager *kman; /* kernel bo manager */
+ struct pb_manager *cman; /* cached bo manager */
+ struct r600_tiling_info tiling_info;
};
struct radeon *r600_new(int fd, unsigned device);
void r600_delete(struct radeon *r600);
struct r600_reg {
+ unsigned opcode;
+ unsigned offset_base;
+ unsigned offset;
unsigned need_bo;
unsigned flush_flags;
- unsigned offset;
+ unsigned flush_mask;
+};
+
+struct radeon_bo {
+ struct pipe_reference reference;
+ unsigned handle;
+ unsigned size;
+ unsigned alignment;
+ unsigned map_count;
+ void *data;
+ struct list_head fencedlist;
+ unsigned fence;
+ struct r600_context *ctx;
+ boolean shared;
+ struct r600_reloc *reloc;
+ unsigned reloc_id;
+ unsigned last_flush;
+};
+
+struct r600_bo {
+ struct pipe_reference reference;
+ struct pb_buffer *pb;
+ unsigned size;
+ unsigned tiling_flags;
+ unsigned kernel_pitch;
};
+
/* radeon_pciid.c */
unsigned radeon_family_from_device(unsigned device);
+/* r600_drm.c */
+struct radeon *radeon_decref(struct radeon *radeon);
+
+/* radeon_bo.c */
+struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
+ unsigned size, unsigned alignment, void *ptr);
+void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst,
+ struct radeon_bo *src);
+int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo);
+int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain);
+void radeon_bo_pbmgr_flush_maps(struct pb_manager *_mgr);
+int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo);
+int radeon_bo_get_tiling_flags(struct radeon *radeon,
+ struct radeon_bo *bo,
+ uint32_t *tiling_flags,
+ uint32_t *pitch);
+
+/* radeon_bo_pb.c */
+struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf);
+struct pb_manager *radeon_bo_pbmgr_create(struct radeon *radeon);
+struct pb_buffer *radeon_bo_pb_create_buffer_from_handle(struct pb_manager *_mgr,
+ uint32_t handle);
+
+/* r600_hw_context.c */
+int r600_context_init_fence(struct r600_context *ctx);
+void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo);
+void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
+ unsigned flush_mask, struct r600_bo *rbo);
+struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset);
+int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg);
+
+/* r600_bo.c */
+unsigned r600_bo_get_handle(struct r600_bo *bo);
+unsigned r600_bo_get_size(struct r600_bo *bo);
+static INLINE struct radeon_bo *r600_bo_get_bo(struct r600_bo *bo)
+{
+ return radeon_bo_pb_get_bo(bo->pb);
+}
+
+#define CTX_RANGE_ID(ctx, offset) (((offset) >> (ctx)->hash_shift) & 255)
+#define CTX_BLOCK_ID(ctx, offset) ((offset) & ((1 << (ctx)->hash_shift) - 1))
-static void inline r600_context_reg(struct r600_context *ctx, unsigned group_id,
+static void inline r600_context_reg(struct r600_context *ctx,
unsigned offset, unsigned value,
unsigned mask)
{
- struct r600_group *group = &ctx->groups[group_id];
- struct r600_group_block *block;
+ struct r600_range *range;
+ struct r600_block *block;
unsigned id;
- id = group->offset_block_id[(offset - group->start_offset) >> 2];
- block = &group->blocks[id];
+ range = &ctx->range[CTX_RANGE_ID(ctx, offset)];
+ block = range->blocks[CTX_BLOCK_ID(ctx, offset)];
id = (offset - block->start_offset) >> 2;
block->reg[id] &= ~mask;
block->reg[id] |= value;
if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
ctx->pm4_dirty_cdwords += block->pm4_ndwords;
+ block->status |= R600_BLOCK_STATUS_ENABLED;
+ block->status |= R600_BLOCK_STATUS_DIRTY;
+ LIST_ADDTAIL(&block->list,&ctx->dirty);
+ }
+}
+
+static inline void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block)
+{
+ int id;
+
+ for (int j = 0; j < block->nreg; j++) {
+ if (block->pm4_bo_index[j]) {
+ /* find relocation */
+ id = block->pm4_bo_index[j];
+ r600_context_bo_reloc(ctx,
+ &block->pm4[block->reloc[id].bo_pm4_index],
+ block->reloc[id].bo);
+ r600_context_bo_flush(ctx,
+ block->reloc[id].flush_flags,
+ block->reloc[id].flush_mask,
+ block->reloc[id].bo);
+ }
}
- block->status |= R600_BLOCK_STATUS_ENABLED;
- block->status |= R600_BLOCK_STATUS_DIRTY;
+ memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4);
+ ctx->pm4_cdwords += block->pm4_ndwords;
+ block->status ^= R600_BLOCK_STATUS_DIRTY;
+ LIST_DELINIT(&block->list);
+}
+
+static inline int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo)
+{
+ bo->map_count++;
+ return 0;
+}
+
+static inline void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo)
+{
+ bo->map_count--;
+ assert(bo->map_count >= 0);
}
#endif
diff --git a/src/gallium/winsys/r600/drm/r600_state.c b/src/gallium/winsys/r600/drm/r600_state.c
deleted file mode 100644
index 25dd8fe7d8..0000000000
--- a/src/gallium/winsys/r600/drm/r600_state.c
+++ /dev/null
@@ -1,662 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include "radeon_priv.h"
-#include "r600d.h"
-
-#include "util/u_memory.h"
-
-static int r600_state_pm4_resource(struct radeon_state *state);
-static int r600_state_pm4_cb0(struct radeon_state *state);
-static int r600_state_pm4_vgt(struct radeon_state *state);
-static int r600_state_pm4_db(struct radeon_state *state);
-static int r600_state_pm4_shader(struct radeon_state *state);
-static int r600_state_pm4_draw(struct radeon_state *state);
-static int r600_state_pm4_config(struct radeon_state *state);
-static int r600_state_pm4_generic(struct radeon_state *state);
-static int r600_state_pm4_query_begin(struct radeon_state *state);
-static int r600_state_pm4_query_end(struct radeon_state *state);
-static int r700_state_pm4_config(struct radeon_state *state);
-static int r600_state_pm4_db_flush(struct radeon_state *state);
-static int r600_state_pm4_cb_flush(struct radeon_state *state);
-
-static int eg_state_pm4_vgt(struct radeon_state *state);
-
-#include "r600_states.h"
-#include "eg_states.h"
-
-
-#define SUB_NONE(param) { { 0, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) } }
-#define SUB_PS(param) { R600_SHADER_PS, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) }
-#define SUB_VS(param) { R600_SHADER_VS, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) }
-#define SUB_GS(param) { R600_SHADER_GS, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) }
-#define SUB_FS(param) { R600_SHADER_FS, R600_names_##param, (sizeof(R600_names_##param)/sizeof(struct radeon_register)) }
-
-#define EG_SUB_NONE(param) { { 0, EG_names_##param, (sizeof(EG_names_##param)/sizeof(struct radeon_register)) } }
-#define EG_SUB_PS(param) { R600_SHADER_PS, EG_names_##param, (sizeof(EG_names_##param)/sizeof(struct radeon_register)) }
-#define EG_SUB_VS(param) { R600_SHADER_VS, EG_names_##param, (sizeof(EG_names_##param)/sizeof(struct radeon_register)) }
-#define EG_SUB_GS(param) { R600_SHADER_GS, EG_names_##param, (sizeof(EG_names_##param)/sizeof(struct radeon_register)) }
-#define EG_SUB_FS(param) { R600_SHADER_FS, EG_names_##param, (sizeof(EG_names_##param)/sizeof(struct radeon_register)) }
-
-/* some of these are overriden at runtime for R700 */
-struct radeon_stype_info r600_stypes[] = {
- { R600_STATE_CONFIG, 1, 0, r600_state_pm4_config, SUB_NONE(CONFIG), },
- { R600_STATE_CB_CNTL, 1, 0, r600_state_pm4_generic, SUB_NONE(CB_CNTL) },
- { R600_STATE_RASTERIZER, 1, 0, r600_state_pm4_generic, SUB_NONE(RASTERIZER) },
- { R600_STATE_VIEWPORT, 1, 0, r600_state_pm4_generic, SUB_NONE(VIEWPORT) },
- { R600_STATE_SCISSOR, 1, 0, r600_state_pm4_generic, SUB_NONE(SCISSOR) },
- { R600_STATE_BLEND, 1, 0, r600_state_pm4_generic, SUB_NONE(BLEND), },
- { R600_STATE_DSA, 1, 0, r600_state_pm4_generic, SUB_NONE(DSA), },
- { R600_STATE_SHADER, 1, 0, r600_state_pm4_shader, { SUB_PS(PS_SHADER), SUB_VS(VS_SHADER) } },
- { R600_STATE_CBUF, 1, 0, r600_state_pm4_shader, { SUB_PS(PS_CBUF), SUB_VS(VS_CBUF) } },
- { R600_STATE_CONSTANT, 256, 0x10, r600_state_pm4_generic, { SUB_PS(PS_CONSTANT), SUB_VS(VS_CONSTANT) } },
- { R600_STATE_RESOURCE, 160, 0x1c, r600_state_pm4_resource, { SUB_PS(PS_RESOURCE), SUB_VS(VS_RESOURCE), SUB_GS(GS_RESOURCE), SUB_FS(FS_RESOURCE)} },
- { R600_STATE_SAMPLER, 18, 0xc, r600_state_pm4_generic, { SUB_PS(PS_SAMPLER), SUB_VS(VS_SAMPLER), SUB_GS(GS_SAMPLER) } },
- { R600_STATE_SAMPLER_BORDER, 18, 0x10, r600_state_pm4_generic, { SUB_PS(PS_SAMPLER_BORDER), SUB_VS(VS_SAMPLER_BORDER), SUB_GS(GS_SAMPLER_BORDER) } },
- { R600_STATE_CB0, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB0) },
- { R600_STATE_CB1, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB1) },
- { R600_STATE_CB2, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB2) },
- { R600_STATE_CB3, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB3) },
- { R600_STATE_CB4, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB4) },
- { R600_STATE_CB5, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB5) },
- { R600_STATE_CB6, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB6) },
- { R600_STATE_CB7, 1, 0, r600_state_pm4_cb0, SUB_NONE(CB7) },
- { R600_STATE_QUERY_BEGIN, 1, 0, r600_state_pm4_query_begin, SUB_NONE(VGT_EVENT) },
- { R600_STATE_QUERY_END, 1, 0, r600_state_pm4_query_end, SUB_NONE(VGT_EVENT) },
- { R600_STATE_DB, 1, 0, r600_state_pm4_db, SUB_NONE(DB) },
- { R600_STATE_UCP, 1, 0, r600_state_pm4_generic, SUB_NONE(UCP) },
- { R600_STATE_VGT, 1, 0, r600_state_pm4_vgt, SUB_NONE(VGT) },
- { R600_STATE_DRAW, 1, 0, r600_state_pm4_draw, SUB_NONE(DRAW) },
- { R600_STATE_CB_FLUSH, 1, 0, r600_state_pm4_cb_flush, SUB_NONE(CB_FLUSH) },
- { R600_STATE_DB_FLUSH, 1, 0, r600_state_pm4_db_flush, SUB_NONE(DB_FLUSH) },
-};
-#define STYPES_SIZE Elements(r600_stypes)
-
-struct radeon_stype_info eg_stypes[] = {
- { R600_STATE_CONFIG, 1, 0, r700_state_pm4_config, EG_SUB_NONE(CONFIG), },
- { R600_STATE_CB_CNTL, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(CB_CNTL) },
- { R600_STATE_RASTERIZER, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(RASTERIZER) },
- { R600_STATE_VIEWPORT, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(VIEWPORT) },
- { R600_STATE_SCISSOR, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(SCISSOR) },
- { R600_STATE_BLEND, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(BLEND), },
- { R600_STATE_DSA, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(DSA), },
- { R600_STATE_SHADER, 1, 0, r600_state_pm4_shader, { EG_SUB_PS(PS_SHADER), EG_SUB_VS(VS_SHADER) } },
- { R600_STATE_CBUF, 1, 0, r600_state_pm4_shader, { EG_SUB_PS(PS_CBUF), EG_SUB_VS(VS_CBUF) } },
- { R600_STATE_RESOURCE, 176, 0x20, r600_state_pm4_resource, { EG_SUB_PS(PS_RESOURCE), EG_SUB_VS(VS_RESOURCE), EG_SUB_GS(GS_RESOURCE), EG_SUB_FS(FS_RESOURCE)} },
- { R600_STATE_SAMPLER, 18, 0xc, r600_state_pm4_generic, { EG_SUB_PS(PS_SAMPLER), EG_SUB_VS(VS_SAMPLER), EG_SUB_GS(GS_SAMPLER) } },
- { R600_STATE_SAMPLER_BORDER, 18, 0, r600_state_pm4_generic, { EG_SUB_PS(PS_SAMPLER_BORDER), EG_SUB_VS(VS_SAMPLER_BORDER), EG_SUB_GS(GS_SAMPLER_BORDER) } },
- { R600_STATE_CB0, 11, 0x3c, r600_state_pm4_generic, EG_SUB_NONE(CB) },
- { R600_STATE_QUERY_BEGIN, 1, 0, r600_state_pm4_query_begin, EG_SUB_NONE(VGT_EVENT) },
- { R600_STATE_QUERY_END, 1, 0, r600_state_pm4_query_end, EG_SUB_NONE(VGT_EVENT) },
- { R600_STATE_DB, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(DB) },
- { R600_STATE_UCP, 1, 0, r600_state_pm4_generic, EG_SUB_NONE(UCP) },
- { R600_STATE_VGT, 1, 0, eg_state_pm4_vgt, EG_SUB_NONE(VGT) },
- { R600_STATE_DRAW, 1, 0, r600_state_pm4_draw, EG_SUB_NONE(DRAW) },
- { R600_STATE_CB_FLUSH, 1, 0, r600_state_pm4_cb_flush, EG_SUB_NONE(CB_FLUSH) },
- { R600_STATE_DB_FLUSH, 1, 0, r600_state_pm4_db_flush, EG_SUB_NONE(DB_FLUSH) },
-
-};
-#define EG_STYPES_SIZE Elements(eg_stypes)
-
-static const struct radeon_register *get_regs(struct radeon_state *state)
-{
- return state->stype->reginfo[state->shader_index].regs;
-}
-
-/*
- * r600/r700 state functions
- */
-static int r600_state_pm4_bytecode(struct radeon_state *state, unsigned offset, unsigned id, unsigned nreg)
-{
- const struct radeon_register *regs = get_regs(state);
- unsigned i;
- int r;
-
- if (!offset) {
- fprintf(stderr, "%s invalid register for state %d %d\n",
- __func__, state->stype->stype, id);
- return -EINVAL;
- }
- if (offset >= R600_CONFIG_REG_OFFSET && offset < R600_CONFIG_REG_END) {
- state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONFIG_REG, nreg);
- state->pm4[state->cpm4++] = (offset - R600_CONFIG_REG_OFFSET) >> 2;
- for (i = 0; i < nreg; i++) {
- state->pm4[state->cpm4++] = state->states[id + i];
- }
- for (i = 0; i < nreg; i++) {
- if (regs[id + i].need_reloc) {
- state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0);
- r = radeon_state_reloc(state, state->cpm4, regs[id + i].bo_id);
- if (r)
- return r;
- state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[regs[id + i].bo_id]);
- }
- }
- return 0;
- }
- if (offset >= R600_CONTEXT_REG_OFFSET && offset < R600_CONTEXT_REG_END) {
- state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONTEXT_REG, nreg);
- state->pm4[state->cpm4++] = (offset - R600_CONTEXT_REG_OFFSET) >> 2;
- for (i = 0; i < nreg; i++) {
- state->pm4[state->cpm4++] = state->states[id + i];
- }
- for (i = 0; i < nreg; i++) {
- if (regs[id + i].need_reloc) {
- state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0);
- r = radeon_state_reloc(state, state->cpm4, regs[id + i].bo_id);
- if (r)
- return r;
- state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[regs[id + i].bo_id]);
- }
- }
- return 0;
- }
- if (offset >= R600_ALU_CONST_OFFSET && offset < R600_ALU_CONST_END) {
- state->pm4[state->cpm4++] = PKT3(PKT3_SET_ALU_CONST, nreg);
- state->pm4[state->cpm4++] = (offset - R600_ALU_CONST_OFFSET) >> 2;
- for (i = 0; i < nreg; i++) {
- state->pm4[state->cpm4++] = state->states[id + i];
- }
- return 0;
- }
- if (offset >= R600_SAMPLER_OFFSET && offset < R600_SAMPLER_END) {
- state->pm4[state->cpm4++] = PKT3(PKT3_SET_SAMPLER, nreg);
- state->pm4[state->cpm4++] = (offset - R600_SAMPLER_OFFSET) >> 2;
- for (i = 0; i < nreg; i++) {
- state->pm4[state->cpm4++] = state->states[id + i];
- }
- return 0;
- }
- fprintf(stderr, "%s unsupported offset 0x%08X\n", __func__, offset);
- return -EINVAL;
-}
-
-static int eg_state_pm4_bytecode(struct radeon_state *state, unsigned offset, unsigned id, unsigned nreg)
-{
- const struct radeon_register *regs = get_regs(state);
- unsigned i;
- int r;
-
- if (!offset) {
- fprintf(stderr, "%s invalid register for state %d %d\n",
- __func__, state->stype->stype, id);
- return -EINVAL;
- }
- if (offset >= R600_CONFIG_REG_OFFSET && offset < R600_CONFIG_REG_END) {
- state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONFIG_REG, nreg);
- state->pm4[state->cpm4++] = (offset - R600_CONFIG_REG_OFFSET) >> 2;
- for (i = 0; i < nreg; i++) {
- state->pm4[state->cpm4++] = state->states[id + i];
- }
- for (i = 0; i < nreg; i++) {
- if (regs[id + i].need_reloc) {
- state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0);
- r = radeon_state_reloc(state, state->cpm4, regs[id + i].bo_id);
- if (r)
- return r;
- state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[regs[id + i].bo_id]);
- }
- }
- return 0;
- }
- if (offset >= R600_CONTEXT_REG_OFFSET && offset < R600_CONTEXT_REG_END) {
- state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONTEXT_REG, nreg);
- state->pm4[state->cpm4++] = (offset - R600_CONTEXT_REG_OFFSET) >> 2;
- for (i = 0; i < nreg; i++) {
- state->pm4[state->cpm4++] = state->states[id + i];
- }
- for (i = 0; i < nreg; i++) {
- if (regs[id + i].need_reloc) {
- state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0);
- r = radeon_state_reloc(state, state->cpm4, regs[id + i].bo_id);
- if (r)
- return r;
- state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[regs[id + i].bo_id]);
- }
- }
- return 0;
- }
- if (offset >= EG_RESOURCE_OFFSET && offset < EG_RESOURCE_END) {
- state->pm4[state->cpm4++] = PKT3(PKT3_SET_RESOURCE, nreg);
- state->pm4[state->cpm4++] = (offset - EG_RESOURCE_OFFSET) >> 2;
- for (i = 0; i < nreg; i++) {
- state->pm4[state->cpm4++] = state->states[id + i];
- }
- return 0;
- }
- if (offset >= R600_SAMPLER_OFFSET && offset < R600_SAMPLER_END) {
- state->pm4[state->cpm4++] = PKT3(PKT3_SET_SAMPLER, nreg);
- state->pm4[state->cpm4++] = (offset - R600_SAMPLER_OFFSET) >> 2;
- for (i = 0; i < nreg; i++) {
- state->pm4[state->cpm4++] = state->states[id + i];
- }
- return 0;
- }
- fprintf(stderr, "%s unsupported offset 0x%08X\n", __func__, offset);
- return -EINVAL;
-}
-
-
-static int r600_state_pm4_generic(struct radeon_state *state)
-{
- const struct radeon_register *regs = get_regs(state);
- unsigned i, offset, nreg, coffset, loffset, soffset;
- unsigned start;
- int r;
-
- if (!state->nstates)
- return 0;
- soffset = state->id * state->stype->stride;
- offset = loffset = regs[0].offset + soffset;
- start = 0;
- for (i = 1, nreg = 1; i < state->nstates; i++) {
- coffset = regs[i].offset + soffset;
- if (coffset == (loffset + 4)) {
- nreg++;
- loffset = coffset;
- } else {
- if (state->radeon->family >= CHIP_CEDAR)
- r = eg_state_pm4_bytecode(state, offset, start, nreg);
- else
- r = r600_state_pm4_bytecode(state, offset, start, nreg);
- if (r) {
- fprintf(stderr, "%s invalid 0x%08X %d\n", __func__, start, nreg);
- return r;
- }
- offset = loffset = coffset;
- nreg = 1;
- start = i;
- }
- }
- if (state->radeon->family >= CHIP_CEDAR)
- r = eg_state_pm4_bytecode(state, offset, start, nreg);
- else
- r = r600_state_pm4_bytecode(state, offset, start, nreg);
- return r;
-}
-
-static void r600_state_pm4_with_flush(struct radeon_state *state, u32 flags, int bufs_are_cbs)
-{
- unsigned i, j, add, size;
- uint32_t flags_cb;
-
- state->nreloc = 0;
- for (i = 0; i < state->nbo; i++) {
- for (j = 0, add = 1; j < state->nreloc; j++) {
- if (state->bo[state->reloc_bo_id[j]] == state->bo[i]) {
- add = 0;
- break;
- }
- }
- if (add) {
- state->reloc_bo_id[state->nreloc++] = i;
- }
- }
- for (i = 0; i < state->nreloc; i++) {
- flags_cb = flags;
- size = (radeon_ws_bo_get_size(state->bo[state->reloc_bo_id[i]]) + 255) >> 8;
- state->pm4[state->cpm4++] = PKT3(PKT3_SURFACE_SYNC, 3);
- if (bufs_are_cbs)
- flags_cb |= S_0085F0_CB0_DEST_BASE_ENA(1 << i);
- state->pm4[state->cpm4++] = flags_cb;
- state->pm4[state->cpm4++] = size;
- state->pm4[state->cpm4++] = 0x00000000;
- state->pm4[state->cpm4++] = 0x0000000A;
- state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0);
- state->reloc_pm4_id[i] = state->cpm4;
- state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[state->reloc_bo_id[i]]);
- }
-}
-
-static int r600_state_pm4_cb0(struct radeon_state *state)
-{
- int r;
- uint32_t sbu;
- r = r600_state_pm4_generic(state);
- if (r)
- return r;
-
- sbu = (2 << (state->stype->stype - R600_STATE_CB0));
- state->pm4[state->cpm4++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0);
- state->pm4[state->cpm4++] = sbu;
- return 0;
-}
-
-static int r600_state_pm4_db(struct radeon_state *state)
-{
- int r;
-
- r = r600_state_pm4_generic(state);
- if (r)
- return r;
- state->pm4[state->cpm4++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0);
- state->pm4[state->cpm4++] = 0x00000001;
- return 0;
-}
-
-static int r600_state_pm4_config(struct radeon_state *state)
-{
- state->pm4[state->cpm4++] = PKT3(PKT3_START_3D_CMDBUF, 0);
- state->pm4[state->cpm4++] = 0x00000000;
- state->pm4[state->cpm4++] = PKT3(PKT3_CONTEXT_CONTROL, 1);
- state->pm4[state->cpm4++] = 0x80000000;
- state->pm4[state->cpm4++] = 0x80000000;
- state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 0);
- state->pm4[state->cpm4++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
- state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONFIG_REG, 1);
- state->pm4[state->cpm4++] = 0x00000010;
- state->pm4[state->cpm4++] = 0x00028000;
- return r600_state_pm4_generic(state);
-}
-
-static int r600_state_pm4_query_begin(struct radeon_state *state)
-{
- int r;
-
- state->cpm4 = 0;
- state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 2);
- state->pm4[state->cpm4++] = EVENT_TYPE_ZPASS_DONE;
- state->pm4[state->cpm4++] = state->states[0];
- state->pm4[state->cpm4++] = 0x0;
- state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0);
- r = radeon_state_reloc(state, state->cpm4, 0);
- if (r)
- return r;
- state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[0]);
- return 0;
-}
-
-static int r600_state_pm4_query_end(struct radeon_state *state)
-{
- int r;
-
- state->cpm4 = 0;
- state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 2);
- state->pm4[state->cpm4++] = EVENT_TYPE_ZPASS_DONE;
- state->pm4[state->cpm4++] = state->states[0];
- state->pm4[state->cpm4++] = 0x0;
- state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0);
- r = radeon_state_reloc(state, state->cpm4, 0);
- if (r)
- return r;
- state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[0]);
- return 0;
-}
-
-static int r700_state_pm4_config(struct radeon_state *state)
-{
- state->pm4[state->cpm4++] = PKT3(PKT3_CONTEXT_CONTROL, 1);
- state->pm4[state->cpm4++] = 0x80000000;
- state->pm4[state->cpm4++] = 0x80000000;
- state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 0);
- state->pm4[state->cpm4++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
- state->pm4[state->cpm4++] = PKT3(PKT3_SET_CONFIG_REG, 1);
- state->pm4[state->cpm4++] = 0x00000010;
- state->pm4[state->cpm4++] = 0x00028000;
- return r600_state_pm4_generic(state);
-}
-
-static int r600_state_pm4_shader(struct radeon_state *state)
-{
- r600_state_pm4_with_flush(state, S_0085F0_SH_ACTION_ENA(1), 0);
- return r600_state_pm4_generic(state);
-}
-
-static int eg_state_pm4_vgt(struct radeon_state *state)
-{
- int r;
- r = eg_state_pm4_bytecode(state, R_028400_VGT_MAX_VTX_INDX, EG_VGT__VGT_MAX_VTX_INDX, 1);
- if (r)
- return r;
- r = eg_state_pm4_bytecode(state, R_028404_VGT_MIN_VTX_INDX, EG_VGT__VGT_MIN_VTX_INDX, 1);
- if (r)
- return r;
- r = eg_state_pm4_bytecode(state, R_028408_VGT_INDX_OFFSET, EG_VGT__VGT_INDX_OFFSET, 1);
- if (r)
- return r;
- r = eg_state_pm4_bytecode(state, R_008958_VGT_PRIMITIVE_TYPE, EG_VGT__VGT_PRIMITIVE_TYPE, 1);
- if (r)
- return r;
- state->pm4[state->cpm4++] = PKT3(PKT3_INDEX_TYPE, 0);
- state->pm4[state->cpm4++] = state->states[EG_VGT__VGT_DMA_INDEX_TYPE];
- state->pm4[state->cpm4++] = PKT3(PKT3_NUM_INSTANCES, 0);
- state->pm4[state->cpm4++] = state->states[EG_VGT__VGT_DMA_NUM_INSTANCES];
- return 0;
-}
-
-static int r600_state_pm4_vgt(struct radeon_state *state)
-{
- int r;
-
- r = r600_state_pm4_bytecode(state, R_028400_VGT_MAX_VTX_INDX, R600_VGT__VGT_MAX_VTX_INDX, 1);
- if (r)
- return r;
- r = r600_state_pm4_bytecode(state, R_028404_VGT_MIN_VTX_INDX, R600_VGT__VGT_MIN_VTX_INDX, 1);
- if (r)
- return r;
- r = r600_state_pm4_bytecode(state, R_028408_VGT_INDX_OFFSET, R600_VGT__VGT_INDX_OFFSET, 1);
- if (r)
- return r;
- r = r600_state_pm4_bytecode(state, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX, 1);
- if (r)
- return r;
- r = r600_state_pm4_bytecode(state, R_008958_VGT_PRIMITIVE_TYPE, R600_VGT__VGT_PRIMITIVE_TYPE, 1);
- if (r)
- return r;
- state->pm4[state->cpm4++] = PKT3(PKT3_INDEX_TYPE, 0);
- state->pm4[state->cpm4++] = state->states[R600_VGT__VGT_DMA_INDEX_TYPE];
- state->pm4[state->cpm4++] = PKT3(PKT3_NUM_INSTANCES, 0);
- state->pm4[state->cpm4++] = state->states[R600_VGT__VGT_DMA_NUM_INSTANCES];
- return 0;
-}
-
-static int r600_state_pm4_draw(struct radeon_state *state)
-{
- int r;
-
- if (state->nbo) {
- state->pm4[state->cpm4++] = PKT3(PKT3_DRAW_INDEX, 3);
- state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DMA_BASE];
- state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DMA_BASE_HI];
- state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_NUM_INDICES];
- state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DRAW_INITIATOR];
- state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0);
- r = radeon_state_reloc(state, state->cpm4, 0);
- if (r)
- return r;
- state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[0]);
- } else {
- state->pm4[state->cpm4++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
- state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_NUM_INDICES];
- state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DRAW_INITIATOR];
- }
- state->pm4[state->cpm4++] = PKT3(PKT3_EVENT_WRITE, 0);
- state->pm4[state->cpm4++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
-
- return 0;
-}
-
-static int r600_state_pm4_cb_flush(struct radeon_state *state)
-{
- if (!state->nbo)
- return 0;
-
- r600_state_pm4_with_flush(state, S_0085F0_CB_ACTION_ENA(1), 1);
-
- return 0;
-}
-
-static int r600_state_pm4_db_flush(struct radeon_state *state)
-{
- if (!state->nbo)
- return 0;
-
- r600_state_pm4_with_flush(state, S_0085F0_DB_ACTION_ENA(1) |
- S_0085F0_DB_DEST_BASE_ENA(1), 0);
-
- return 0;
-}
-
-static int r600_state_pm4_resource(struct radeon_state *state)
-{
- u32 flags, type, nbo, offset, soffset;
- int r, nres;
- const struct radeon_register *regs = get_regs(state);
-
- soffset = state->id * state->stype->stride;
- if (state->radeon->family >= CHIP_CEDAR)
- type = G_038018_TYPE(state->states[7]);
- else
- type = G_038018_TYPE(state->states[6]);
-
- switch (type) {
- case 2:
- flags = S_0085F0_TC_ACTION_ENA(1);
- nbo = 2;
- break;
- case 3:
- flags = S_0085F0_VC_ACTION_ENA(1);
- nbo = 1;
- break;
- default:
- return 0;
- }
- if (state->nbo != nbo) {
- fprintf(stderr, "%s need %d bo got %d\n", __func__, nbo, state->nbo);
- return -EINVAL;
- }
- r600_state_pm4_with_flush(state, flags, 0);
- offset = regs[0].offset + soffset;
- if (state->radeon->family >= CHIP_CEDAR)
- nres = 8;
- else
- nres = 7;
- state->pm4[state->cpm4++] = PKT3(PKT3_SET_RESOURCE, nres);
- if (state->radeon->family >= CHIP_CEDAR)
- state->pm4[state->cpm4++] = (offset - EG_RESOURCE_OFFSET) >> 2;
- else
- state->pm4[state->cpm4++] = (offset - R_038000_SQ_TEX_RESOURCE_WORD0_0) >> 2;
- state->pm4[state->cpm4++] = state->states[0];
- state->pm4[state->cpm4++] = state->states[1];
- state->pm4[state->cpm4++] = state->states[2];
- state->pm4[state->cpm4++] = state->states[3];
- state->pm4[state->cpm4++] = state->states[4];
- state->pm4[state->cpm4++] = state->states[5];
- state->pm4[state->cpm4++] = state->states[6];
- if (state->radeon->family >= CHIP_CEDAR)
- state->pm4[state->cpm4++] = state->states[7];
-
- state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0);
- r = radeon_state_reloc(state, state->cpm4, 0);
- if (r)
- return r;
- state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[0]);
- if (type == 2) {
- state->pm4[state->cpm4++] = PKT3(PKT3_NOP, 0);
- r = radeon_state_reloc(state, state->cpm4, 1);
- if (r)
- return r;
- state->pm4[state->cpm4++] = radeon_ws_bo_get_handle(state->bo[1]);
- }
- return 0;
-}
-
-
-static void r600_modify_type_array(struct radeon *radeon)
-{
- int i;
- switch (radeon->family) {
- case CHIP_RV770:
- case CHIP_RV730:
- case CHIP_RV710:
- case CHIP_RV740:
- break;
- default:
- return;
- }
-
- /* r700 needs some mods */
- for (i = 0; i < radeon->nstype; i++) {
- struct radeon_stype_info *info = &radeon->stype[i];
-
- switch(info->stype) {
- case R600_STATE_CONFIG:
- info->pm4 = r700_state_pm4_config;
- break;
- case R600_STATE_CB0:
- info->pm4 = r600_state_pm4_generic;
- break;
- case R600_STATE_DB:
- info->pm4 = r600_state_pm4_generic;
- break;
- };
- }
-}
-
-static void build_types_array(struct radeon *radeon, struct radeon_stype_info *types, int size)
-{
- int i, j;
- int id = 0;
-
- for (i = 0; i < size; i++) {
- types[i].base_id = id;
- types[i].npm4 = 128;
- if (types[i].reginfo[0].shader_type == 0) {
- id += types[i].num;
- } else {
- for (j = 0; j < R600_SHADER_MAX; j++) {
- if (types[i].reginfo[j].shader_type)
- id += types[i].num;
- }
- }
- }
- radeon->max_states = id;
- radeon->stype = types;
- radeon->nstype = size;
-}
-
-static void r600_build_types_array(struct radeon *radeon)
-{
- build_types_array(radeon, r600_stypes, STYPES_SIZE);
- r600_modify_type_array(radeon);
-}
-
-static void eg_build_types_array(struct radeon *radeon)
-{
- build_types_array(radeon, eg_stypes, EG_STYPES_SIZE);
-}
-
-int r600_init(struct radeon *radeon)
-{
- if (radeon->family >= CHIP_CEDAR)
- eg_build_types_array(radeon);
- else
- r600_build_types_array(radeon);
- return 0;
-}
diff --git a/src/gallium/winsys/r600/drm/r600_state2.c b/src/gallium/winsys/r600/drm/r600_state2.c
deleted file mode 100644
index 97aecc7a42..0000000000
--- a/src/gallium/winsys/r600/drm/r600_state2.c
+++ /dev/null
@@ -1,1317 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- */
-#include <errno.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdlib.h>
-#include <assert.h>
-#include "xf86drm.h"
-#include "r600.h"
-#include "r600d.h"
-#include "r600_priv.h"
-#include "radeon_drm.h"
-#include "bof.h"
-#include "pipe/p_compiler.h"
-#include "util/u_inlines.h"
-#include <pipebuffer/pb_bufmgr.h>
-
-#define GROUP_FORCE_NEW_BLOCK 0
-struct radeon_ws_bo {
- struct pipe_reference reference;
- struct pb_buffer *pb;
-};
-
-struct radeon_bo {
- struct pipe_reference reference;
- unsigned handle;
- unsigned size;
- unsigned alignment;
- unsigned map_count;
- void *data;
-};
-struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf);
-int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo);
-void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo);
-void radeon_bo_reference(struct radeon *radeon,
- struct radeon_bo **dst,
- struct radeon_bo *src);
-
-unsigned radeon_ws_bo_get_handle(struct radeon_ws_bo *pb_bo);
-
-/* queries */
-static void r600_context_queries_suspend(struct r600_context *ctx);
-static void r600_context_queries_resume(struct r600_context *ctx);
-
-static int r600_group_id_register_offset(struct r600_context *ctx, unsigned offset)
-{
- for (int i = 0; i < ctx->ngroups; i++) {
- if (offset >= ctx->groups[i].start_offset && offset < ctx->groups[i].end_offset) {
- return i;
- }
- }
- return -1;
-}
-
-int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg, unsigned opcode)
-{
- struct r600_group_block *block, *tmp;
- struct r600_group *group;
- int group_id, id;
-
- for (unsigned i = 0, n = 0; i < nreg; i += n) {
- u32 j, r;
-
- /* register that need relocation are in their own group */
- n = 1;
- if (!reg[i].need_bo) {
- /* find number of consecutive registers */
- for (j = i + 1, r = reg[i].offset + 4, n = 1; j < (nreg - i); j++, n++, r+=4) {
- if (reg[i].need_bo || r != reg[j].offset) {
- break;
- }
- }
- }
-
- /* ignore new block balise */
- if (reg[i].offset == GROUP_FORCE_NEW_BLOCK)
- continue;
-
- /* find into which group this block is */
- group_id = r600_group_id_register_offset(ctx, reg[i].offset);
- assert(group_id >= 0);
- group = &ctx->groups[group_id];
-
- /* allocate new block */
- tmp = realloc(group->blocks, (group->nblocks + 1) * sizeof(struct r600_group_block));
- if (tmp == NULL) {
- return -ENOMEM;
- }
- /* update reg pointer */
- if (tmp != group->blocks) {
- for (int j = 0; j < group->nblocks; j++) {
- tmp[j].reg = &tmp[j].pm4[2];
- }
- }
- group->blocks = tmp;
- block = &group->blocks[group->nblocks++];
- for (int j = 0; j < n; j++) {
- group->offset_block_id[((reg[i].offset - group->start_offset) >> 2) + j] = group->nblocks - 1;
- }
-
- /* initialize block */
- memset(block, 0, sizeof(struct r600_group_block));
- block->start_offset = reg[i].offset;
- block->pm4[block->pm4_ndwords++] = PKT3(opcode, n);
- block->pm4[block->pm4_ndwords++] = (block->start_offset - group->start_offset) >> 2;
- block->reg = &block->pm4[block->pm4_ndwords];
- block->pm4_ndwords += n;
- block->nreg = n;
- for (j = 0; j < n; j++) {
- if (reg[i+j].need_bo) {
- block->nbo++;
- assert(block->nbo < R600_BLOCK_MAX_BO);
- block->pm4_bo_index[j] = block->nbo;
- block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0);
- block->pm4[block->pm4_ndwords++] = 0x00000000;
- block->reloc[block->nbo].bo_pm4_index[block->reloc[block->nbo].nreloc++] = block->pm4_ndwords - 1;
- }
- }
- for (j = 0; j < n; j++) {
- if (reg[i+j].flush_flags) {
- block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_SYNC, 3);
- block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags;
- block->pm4[block->pm4_ndwords++] = 0xFFFFFFFF;
- block->pm4[block->pm4_ndwords++] = 0x00000000;
- block->pm4[block->pm4_ndwords++] = 0x0000000A;
- block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0);
- block->pm4[block->pm4_ndwords++] = 0x00000000;
- id = block->pm4_bo_index[j];
- block->reloc[id].bo_pm4_index[block->reloc[id].nreloc++] = block->pm4_ndwords - 1;
- }
- }
- /* check that we stay in limit */
- assert(block->pm4_ndwords < R600_BLOCK_MAX_REG);
- }
- return 0;
-}
-
-int r600_group_init(struct r600_group *group, unsigned start_offset, unsigned end_offset)
-{
- group->start_offset = start_offset;
- group->end_offset = end_offset;
- group->nblocks = 0;
- group->blocks = NULL;
- group->offset_block_id = calloc((end_offset - start_offset) >> 2, sizeof(unsigned));
- if (group->offset_block_id == NULL)
- return -ENOMEM;
- return 0;
-}
-
-static void r600_group_fini(struct r600_group *group)
-{
- free(group->offset_block_id);
- free(group->blocks);
-}
-
-/* R600/R700 configuration */
-static const struct r600_reg r600_config_reg_list[] = {
- {0, 0, R_008958_VGT_PRIMITIVE_TYPE},
- {0, 0, R_008C00_SQ_CONFIG},
- {0, 0, R_008C04_SQ_GPR_RESOURCE_MGMT_1},
- {0, 0, R_008C08_SQ_GPR_RESOURCE_MGMT_2},
- {0, 0, R_008C0C_SQ_THREAD_RESOURCE_MGMT},
- {0, 0, R_008C10_SQ_STACK_RESOURCE_MGMT_1},
- {0, 0, R_008C14_SQ_STACK_RESOURCE_MGMT_2},
- {0, 0, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ},
- {0, 0, R_009508_TA_CNTL_AUX},
- {0, 0, R_009714_VC_ENHANCE},
- {0, 0, R_009830_DB_DEBUG},
- {0, 0, R_009838_DB_WATERMARKS},
-};
-
-static const struct r600_reg r600_context_reg_list[] = {
- {0, 0, R_028350_SX_MISC},
- {0, 0, R_0286C8_SPI_THREAD_GROUPING},
- {0, 0, R_0288A8_SQ_ESGS_RING_ITEMSIZE},
- {0, 0, R_0288AC_SQ_GSVS_RING_ITEMSIZE},
- {0, 0, R_0288B0_SQ_ESTMP_RING_ITEMSIZE},
- {0, 0, R_0288B4_SQ_GSTMP_RING_ITEMSIZE},
- {0, 0, R_0288B8_SQ_VSTMP_RING_ITEMSIZE},
- {0, 0, R_0288BC_SQ_PSTMP_RING_ITEMSIZE},
- {0, 0, R_0288C0_SQ_FBUF_RING_ITEMSIZE},
- {0, 0, R_0288C4_SQ_REDUC_RING_ITEMSIZE},
- {0, 0, R_0288C8_SQ_GS_VERT_ITEMSIZE},
- {0, 0, R_028A10_VGT_OUTPUT_PATH_CNTL},
- {0, 0, R_028A14_VGT_HOS_CNTL},
- {0, 0, R_028A18_VGT_HOS_MAX_TESS_LEVEL},
- {0, 0, R_028A1C_VGT_HOS_MIN_TESS_LEVEL},
- {0, 0, R_028A20_VGT_HOS_REUSE_DEPTH},
- {0, 0, R_028A24_VGT_GROUP_PRIM_TYPE},
- {0, 0, R_028A28_VGT_GROUP_FIRST_DECR},
- {0, 0, R_028A2C_VGT_GROUP_DECR},
- {0, 0, R_028A30_VGT_GROUP_VECT_0_CNTL},
- {0, 0, R_028A34_VGT_GROUP_VECT_1_CNTL},
- {0, 0, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL},
- {0, 0, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL},
- {0, 0, R_028A40_VGT_GS_MODE},
- {0, 0, R_028A4C_PA_SC_MODE_CNTL},
- {0, 0, R_028AB0_VGT_STRMOUT_EN},
- {0, 0, R_028AB4_VGT_REUSE_OFF},
- {0, 0, R_028AB8_VGT_VTX_CNT_EN},
- {0, 0, R_028B20_VGT_STRMOUT_BUFFER_EN},
- {0, 0, R_028028_DB_STENCIL_CLEAR},
- {0, 0, R_02802C_DB_DEPTH_CLEAR},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028040_CB_COLOR0_BASE},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280A0_CB_COLOR0_INFO},
- {0, 0, R_028060_CB_COLOR0_SIZE},
- {0, 0, R_028080_CB_COLOR0_VIEW},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280E0_CB_COLOR0_FRAG},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280C0_CB_COLOR0_TILE},
- {0, 0, R_028100_CB_COLOR0_MASK},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028044_CB_COLOR1_BASE},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280A4_CB_COLOR1_INFO},
- {0, 0, R_028064_CB_COLOR1_SIZE},
- {0, 0, R_028084_CB_COLOR1_VIEW},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280E4_CB_COLOR1_FRAG},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280C4_CB_COLOR1_TILE},
- {0, 0, R_028104_CB_COLOR1_MASK},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028048_CB_COLOR2_BASE},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280A8_CB_COLOR2_INFO},
- {0, 0, R_028068_CB_COLOR2_SIZE},
- {0, 0, R_028088_CB_COLOR2_VIEW},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280E8_CB_COLOR2_FRAG},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280C8_CB_COLOR2_TILE},
- {0, 0, R_028108_CB_COLOR2_MASK},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_02804C_CB_COLOR3_BASE},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280AC_CB_COLOR3_INFO},
- {0, 0, R_02806C_CB_COLOR3_SIZE},
- {0, 0, R_02808C_CB_COLOR3_VIEW},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280EC_CB_COLOR3_FRAG},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280CC_CB_COLOR3_TILE},
- {0, 0, R_02810C_CB_COLOR3_MASK},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028050_CB_COLOR4_BASE},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280B0_CB_COLOR4_INFO},
- {0, 0, R_028070_CB_COLOR4_SIZE},
- {0, 0, R_028090_CB_COLOR4_VIEW},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280F0_CB_COLOR4_FRAG},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280D0_CB_COLOR4_TILE},
- {0, 0, R_028110_CB_COLOR4_MASK},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028054_CB_COLOR5_BASE},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280B4_CB_COLOR5_INFO},
- {0, 0, R_028074_CB_COLOR5_SIZE},
- {0, 0, R_028094_CB_COLOR5_VIEW},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280F4_CB_COLOR5_FRAG},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280D4_CB_COLOR5_TILE},
- {0, 0, R_028114_CB_COLOR5_MASK},
- {1, 0, R_028058_CB_COLOR6_BASE},
- {1, 0, R_0280B8_CB_COLOR6_INFO},
- {0, 0, R_028078_CB_COLOR6_SIZE},
- {0, 0, R_028098_CB_COLOR6_VIEW},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280F8_CB_COLOR6_FRAG},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280D8_CB_COLOR6_TILE},
- {0, 0, R_028118_CB_COLOR6_MASK},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_02805C_CB_COLOR7_BASE},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_0280BC_CB_COLOR7_INFO},
- {0, 0, R_02807C_CB_COLOR7_SIZE},
- {0, 0, R_02809C_CB_COLOR7_VIEW},
- {1, 0, R_0280FC_CB_COLOR7_FRAG},
- {1, 0, R_0280DC_CB_COLOR7_TILE},
- {0, 0, R_02811C_CB_COLOR7_MASK},
- {0, 0, R_028120_CB_CLEAR_RED},
- {0, 0, R_028124_CB_CLEAR_GREEN},
- {0, 0, R_028128_CB_CLEAR_BLUE},
- {0, 0, R_02812C_CB_CLEAR_ALPHA},
- {0, 0, R_02823C_CB_SHADER_MASK},
- {0, 0, R_028238_CB_TARGET_MASK},
- {0, 0, R_028410_SX_ALPHA_TEST_CONTROL},
- {0, 0, R_028414_CB_BLEND_RED},
- {0, 0, R_028418_CB_BLEND_GREEN},
- {0, 0, R_02841C_CB_BLEND_BLUE},
- {0, 0, R_028420_CB_BLEND_ALPHA},
- {0, 0, R_028424_CB_FOG_RED},
- {0, 0, R_028428_CB_FOG_GREEN},
- {0, 0, R_02842C_CB_FOG_BLUE},
- {0, 0, R_028430_DB_STENCILREFMASK},
- {0, 0, R_028434_DB_STENCILREFMASK_BF},
- {0, 0, R_028438_SX_ALPHA_REF},
- {0, 0, R_0286DC_SPI_FOG_CNTL},
- {0, 0, R_0286E0_SPI_FOG_FUNC_SCALE},
- {0, 0, R_0286E4_SPI_FOG_FUNC_BIAS},
- {0, 0, R_028780_CB_BLEND0_CONTROL},
- {0, 0, R_028784_CB_BLEND1_CONTROL},
- {0, 0, R_028788_CB_BLEND2_CONTROL},
- {0, 0, R_02878C_CB_BLEND3_CONTROL},
- {0, 0, R_028790_CB_BLEND4_CONTROL},
- {0, 0, R_028794_CB_BLEND5_CONTROL},
- {0, 0, R_028798_CB_BLEND6_CONTROL},
- {0, 0, R_02879C_CB_BLEND7_CONTROL},
- {0, 0, R_0287A0_CB_SHADER_CONTROL},
- {0, 0, R_028800_DB_DEPTH_CONTROL},
- {0, 0, R_028804_CB_BLEND_CONTROL},
- {0, 0, R_028808_CB_COLOR_CONTROL},
- {0, 0, R_02880C_DB_SHADER_CONTROL},
- {0, 0, R_028C04_PA_SC_AA_CONFIG},
- {0, 0, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX},
- {0, 0, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX},
- {0, 0, R_028C30_CB_CLRCMP_CONTROL},
- {0, 0, R_028C34_CB_CLRCMP_SRC},
- {0, 0, R_028C38_CB_CLRCMP_DST},
- {0, 0, R_028C3C_CB_CLRCMP_MSK},
- {0, 0, R_028C48_PA_SC_AA_MASK},
- {0, 0, R_028D2C_DB_SRESULTS_COMPARE_STATE1},
- {0, 0, R_028D44_DB_ALPHA_TO_MASK},
- {1, 0, R_02800C_DB_DEPTH_BASE},
- {0, 0, R_028000_DB_DEPTH_SIZE},
- {0, 0, R_028004_DB_DEPTH_VIEW},
- {0, 0, GROUP_FORCE_NEW_BLOCK},
- {1, 0, R_028010_DB_DEPTH_INFO},
- {0, 0, R_028D0C_DB_RENDER_CONTROL},
- {0, 0, R_028D10_DB_RENDER_OVERRIDE},
- {0, 0, R_028D24_DB_HTILE_SURFACE},
- {0, 0, R_028D30_DB_PRELOAD_CONTROL},
- {0, 0, R_028D34_DB_PREFETCH_LIMIT},
- {0, 0, R_028030_PA_SC_SCREEN_SCISSOR_TL},
- {0, 0, R_028034_PA_SC_SCREEN_SCISSOR_BR},
- {0, 0, R_028200_PA_SC_WINDOW_OFFSET},
- {0, 0, R_028204_PA_SC_WINDOW_SCISSOR_TL},
- {0, 0, R_028208_PA_SC_WINDOW_SCISSOR_BR},
- {0, 0, R_02820C_PA_SC_CLIPRECT_RULE},
- {0, 0, R_028210_PA_SC_CLIPRECT_0_TL},
- {0, 0, R_028214_PA_SC_CLIPRECT_0_BR},
- {0, 0, R_028218_PA_SC_CLIPRECT_1_TL},
- {0, 0, R_02821C_PA_SC_CLIPRECT_1_BR},
- {0, 0, R_028220_PA_SC_CLIPRECT_2_TL},
- {0, 0, R_028224_PA_SC_CLIPRECT_2_BR},
- {0, 0, R_028228_PA_SC_CLIPRECT_3_TL},
- {0, 0, R_02822C_PA_SC_CLIPRECT_3_BR},
- {0, 0, R_028230_PA_SC_EDGERULE},
- {0, 0, R_028240_PA_SC_GENERIC_SCISSOR_TL},
- {0, 0, R_028244_PA_SC_GENERIC_SCISSOR_BR},
- {0, 0, R_028250_PA_SC_VPORT_SCISSOR_0_TL},
- {0, 0, R_028254_PA_SC_VPORT_SCISSOR_0_BR},
- {0, 0, R_0282D0_PA_SC_VPORT_ZMIN_0},
- {0, 0, R_0282D4_PA_SC_VPORT_ZMAX_0},
- {0, 0, R_02843C_PA_CL_VPORT_XSCALE_0},
- {0, 0, R_028440_PA_CL_VPORT_XOFFSET_0},
- {0, 0, R_028444_PA_CL_VPORT_YSCALE_0},
- {0, 0, R_028448_PA_CL_VPORT_YOFFSET_0},
- {0, 0, R_02844C_PA_CL_VPORT_ZSCALE_0},
- {0, 0, R_028450_PA_CL_VPORT_ZOFFSET_0},
- {0, 0, R_0286D4_SPI_INTERP_CONTROL_0},
- {0, 0, R_028810_PA_CL_CLIP_CNTL},
- {0, 0, R_028814_PA_SU_SC_MODE_CNTL},
- {0, 0, R_028818_PA_CL_VTE_CNTL},
- {0, 0, R_02881C_PA_CL_VS_OUT_CNTL},
- {0, 0, R_028820_PA_CL_NANINF_CNTL},
- {0, 0, R_028A00_PA_SU_POINT_SIZE},
- {0, 0, R_028A04_PA_SU_POINT_MINMAX},
- {0, 0, R_028A08_PA_SU_LINE_CNTL},
- {0, 0, R_028A0C_PA_SC_LINE_STIPPLE},
- {0, 0, R_028A48_PA_SC_MPASS_PS_CNTL},
- {0, 0, R_028C00_PA_SC_LINE_CNTL},
- {0, 0, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ},
- {0, 0, R_028C10_PA_CL_GB_VERT_DISC_ADJ},
- {0, 0, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ},
- {0, 0, R_028C18_PA_CL_GB_HORZ_DISC_ADJ},
- {0, 0, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL},
- {0, 0, R_028DFC_PA_SU_POLY_OFFSET_CLAMP},
- {0, 0, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE},
- {0, 0, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET},
- {0, 0, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE},
- {0, 0, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET},
- {0, 0, R_028E20_PA_CL_UCP0_X},
- {0, 0, R_028E24_PA_CL_UCP0_Y},
- {0, 0, R_028E28_PA_CL_UCP0_Z},
- {0, 0, R_028E2C_PA_CL_UCP0_W},
- {0, 0, R_028E30_PA_CL_UCP1_X},
- {0, 0, R_028E34_PA_CL_UCP1_Y},
- {0, 0, R_028E38_PA_CL_UCP1_Z},
- {0, 0, R_028E3C_PA_CL_UCP1_W},
- {0, 0, R_028E40_PA_CL_UCP2_X},
- {0, 0, R_028E44_PA_CL_UCP2_Y},
- {0, 0, R_028E48_PA_CL_UCP2_Z},
- {0, 0, R_028E4C_PA_CL_UCP2_W},
- {0, 0, R_028E50_PA_CL_UCP3_X},
- {0, 0, R_028E54_PA_CL_UCP3_Y},
- {0, 0, R_028E58_PA_CL_UCP3_Z},
- {0, 0, R_028E5C_PA_CL_UCP3_W},
- {0, 0, R_028E60_PA_CL_UCP4_X},
- {0, 0, R_028E64_PA_CL_UCP4_Y},
- {0, 0, R_028E68_PA_CL_UCP4_Z},
- {0, 0, R_028E6C_PA_CL_UCP4_W},
- {0, 0, R_028E70_PA_CL_UCP5_X},
- {0, 0, R_028E74_PA_CL_UCP5_Y},
- {0, 0, R_028E78_PA_CL_UCP5_Z},
- {0, 0, R_028E7C_PA_CL_UCP5_W},
- {0, 0, R_028380_SQ_VTX_SEMANTIC_0},
- {0, 0, R_028384_SQ_VTX_SEMANTIC_1},
- {0, 0, R_028388_SQ_VTX_SEMANTIC_2},
- {0, 0, R_02838C_SQ_VTX_SEMANTIC_3},
- {0, 0, R_028390_SQ_VTX_SEMANTIC_4},
- {0, 0, R_028394_SQ_VTX_SEMANTIC_5},
- {0, 0, R_028398_SQ_VTX_SEMANTIC_6},
- {0, 0, R_02839C_SQ_VTX_SEMANTIC_7},
- {0, 0, R_0283A0_SQ_VTX_SEMANTIC_8},
- {0, 0, R_0283A4_SQ_VTX_SEMANTIC_9},
- {0, 0, R_0283A8_SQ_VTX_SEMANTIC_10},
- {0, 0, R_0283AC_SQ_VTX_SEMANTIC_11},
- {0, 0, R_0283B0_SQ_VTX_SEMANTIC_12},
- {0, 0, R_0283B4_SQ_VTX_SEMANTIC_13},
- {0, 0, R_0283B8_SQ_VTX_SEMANTIC_14},
- {0, 0, R_0283BC_SQ_VTX_SEMANTIC_15},
- {0, 0, R_0283C0_SQ_VTX_SEMANTIC_16},
- {0, 0, R_0283C4_SQ_VTX_SEMANTIC_17},
- {0, 0, R_0283C8_SQ_VTX_SEMANTIC_18},
- {0, 0, R_0283CC_SQ_VTX_SEMANTIC_19},
- {0, 0, R_0283D0_SQ_VTX_SEMANTIC_20},
- {0, 0, R_0283D4_SQ_VTX_SEMANTIC_21},
- {0, 0, R_0283D8_SQ_VTX_SEMANTIC_22},
- {0, 0, R_0283DC_SQ_VTX_SEMANTIC_23},
- {0, 0, R_0283E0_SQ_VTX_SEMANTIC_24},
- {0, 0, R_0283E4_SQ_VTX_SEMANTIC_25},
- {0, 0, R_0283E8_SQ_VTX_SEMANTIC_26},
- {0, 0, R_0283EC_SQ_VTX_SEMANTIC_27},
- {0, 0, R_0283F0_SQ_VTX_SEMANTIC_28},
- {0, 0, R_0283F4_SQ_VTX_SEMANTIC_29},
- {0, 0, R_0283F8_SQ_VTX_SEMANTIC_30},
- {0, 0, R_0283FC_SQ_VTX_SEMANTIC_31},
- {0, 0, R_028614_SPI_VS_OUT_ID_0},
- {0, 0, R_028618_SPI_VS_OUT_ID_1},
- {0, 0, R_02861C_SPI_VS_OUT_ID_2},
- {0, 0, R_028620_SPI_VS_OUT_ID_3},
- {0, 0, R_028624_SPI_VS_OUT_ID_4},
- {0, 0, R_028628_SPI_VS_OUT_ID_5},
- {0, 0, R_02862C_SPI_VS_OUT_ID_6},
- {0, 0, R_028630_SPI_VS_OUT_ID_7},
- {0, 0, R_028634_SPI_VS_OUT_ID_8},
- {0, 0, R_028638_SPI_VS_OUT_ID_9},
- {0, 0, R_0286C4_SPI_VS_OUT_CONFIG},
- {1, 0, R_028858_SQ_PGM_START_VS},
- {0, S_0085F0_SH_ACTION_ENA(1), R_028868_SQ_PGM_RESOURCES_VS},
- {1, 0, R_028894_SQ_PGM_START_FS},
- {0, S_0085F0_SH_ACTION_ENA(1), R_0288A4_SQ_PGM_RESOURCES_FS},
- {0, 0, R_0288D0_SQ_PGM_CF_OFFSET_VS},
- {0, 0, R_0288DC_SQ_PGM_CF_OFFSET_FS},
- {0, 0, R_028644_SPI_PS_INPUT_CNTL_0},
- {0, 0, R_028648_SPI_PS_INPUT_CNTL_1},
- {0, 0, R_02864C_SPI_PS_INPUT_CNTL_2},
- {0, 0, R_028650_SPI_PS_INPUT_CNTL_3},
- {0, 0, R_028654_SPI_PS_INPUT_CNTL_4},
- {0, 0, R_028658_SPI_PS_INPUT_CNTL_5},
- {0, 0, R_02865C_SPI_PS_INPUT_CNTL_6},
- {0, 0, R_028660_SPI_PS_INPUT_CNTL_7},
- {0, 0, R_028664_SPI_PS_INPUT_CNTL_8},
- {0, 0, R_028668_SPI_PS_INPUT_CNTL_9},
- {0, 0, R_02866C_SPI_PS_INPUT_CNTL_10},
- {0, 0, R_028670_SPI_PS_INPUT_CNTL_11},
- {0, 0, R_028674_SPI_PS_INPUT_CNTL_12},
- {0, 0, R_028678_SPI_PS_INPUT_CNTL_13},
- {0, 0, R_02867C_SPI_PS_INPUT_CNTL_14},
- {0, 0, R_028680_SPI_PS_INPUT_CNTL_15},
- {0, 0, R_028684_SPI_PS_INPUT_CNTL_16},
- {0, 0, R_028688_SPI_PS_INPUT_CNTL_17},
- {0, 0, R_02868C_SPI_PS_INPUT_CNTL_18},
- {0, 0, R_028690_SPI_PS_INPUT_CNTL_19},
- {0, 0, R_028694_SPI_PS_INPUT_CNTL_20},
- {0, 0, R_028698_SPI_PS_INPUT_CNTL_21},
- {0, 0, R_02869C_SPI_PS_INPUT_CNTL_22},
- {0, 0, R_0286A0_SPI_PS_INPUT_CNTL_23},
- {0, 0, R_0286A4_SPI_PS_INPUT_CNTL_24},
- {0, 0, R_0286A8_SPI_PS_INPUT_CNTL_25},
- {0, 0, R_0286AC_SPI_PS_INPUT_CNTL_26},
- {0, 0, R_0286B0_SPI_PS_INPUT_CNTL_27},
- {0, 0, R_0286B4_SPI_PS_INPUT_CNTL_28},
- {0, 0, R_0286B8_SPI_PS_INPUT_CNTL_29},
- {0, 0, R_0286BC_SPI_PS_INPUT_CNTL_30},
- {0, 0, R_0286C0_SPI_PS_INPUT_CNTL_31},
- {0, 0, R_0286CC_SPI_PS_IN_CONTROL_0},
- {0, 0, R_0286D0_SPI_PS_IN_CONTROL_1},
- {0, 0, R_0286D8_SPI_INPUT_Z},
- {1, S_0085F0_SH_ACTION_ENA(1), R_028840_SQ_PGM_START_PS},
- {0, 0, R_028850_SQ_PGM_RESOURCES_PS},
- {0, 0, R_028854_SQ_PGM_EXPORTS_PS},
- {0, 0, R_0288CC_SQ_PGM_CF_OFFSET_PS},
- {0, 0, R_028400_VGT_MAX_VTX_INDX},
- {0, 0, R_028404_VGT_MIN_VTX_INDX},
- {0, 0, R_028408_VGT_INDX_OFFSET},
- {0, 0, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX},
- {0, 0, R_028A84_VGT_PRIMITIVEID_EN},
- {0, 0, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN},
- {0, 0, R_028AA0_VGT_INSTANCE_STEP_RATE_0},
- {0, 0, R_028AA4_VGT_INSTANCE_STEP_RATE_1},
-};
-
-/* SHADER CONSTANT R600/R700 */
-static int r600_state_constant_init(struct r600_context *ctx, u32 offset)
-{
- struct r600_reg r600_shader_constant[] = {
- {0, 0, R_030000_SQ_ALU_CONSTANT0_0},
- {0, 0, R_030004_SQ_ALU_CONSTANT1_0},
- {0, 0, R_030008_SQ_ALU_CONSTANT2_0},
- {0, 0, R_03000C_SQ_ALU_CONSTANT3_0},
- };
- unsigned nreg = sizeof(r600_shader_constant)/sizeof(struct r600_reg);
-
- for (int i = 0; i < nreg; i++) {
- r600_shader_constant[i].offset += offset;
- }
- return r600_context_add_block(ctx, r600_shader_constant, nreg, PKT3_SET_ALU_CONST);
-}
-
-/* SHADER RESOURCE R600/R700 */
-static int r600_state_resource_init(struct r600_context *ctx, u32 offset)
-{
- struct r600_reg r600_shader_resource[] = {
- {0, 0, R_038000_RESOURCE0_WORD0},
- {0, 0, R_038004_RESOURCE0_WORD1},
- {1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), R_038008_RESOURCE0_WORD2},
- {1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), R_03800C_RESOURCE0_WORD3},
- {0, 0, R_038010_RESOURCE0_WORD4},
- {0, 0, R_038014_RESOURCE0_WORD5},
- {0, 0, R_038018_RESOURCE0_WORD6},
- };
- unsigned nreg = sizeof(r600_shader_resource)/sizeof(struct r600_reg);
-
- for (int i = 0; i < nreg; i++) {
- r600_shader_resource[i].offset += offset;
- }
- return r600_context_add_block(ctx, r600_shader_resource, nreg, PKT3_SET_RESOURCE);
-}
-
-/* SHADER SAMPLER R600/R700 */
-static int r600_state_sampler_init(struct r600_context *ctx, u32 offset)
-{
- struct r600_reg r600_shader_sampler[] = {
- {0, 0, R_03C000_SQ_TEX_SAMPLER_WORD0_0},
- {0, 0, R_03C004_SQ_TEX_SAMPLER_WORD1_0},
- {0, 0, R_03C008_SQ_TEX_SAMPLER_WORD2_0},
- };
- unsigned nreg = sizeof(r600_shader_sampler)/sizeof(struct r600_reg);
-
- for (int i = 0; i < nreg; i++) {
- r600_shader_sampler[i].offset += offset;
- }
- return r600_context_add_block(ctx, r600_shader_sampler, nreg, PKT3_SET_SAMPLER);
-}
-
-/* SHADER SAMPLER BORDER R600/R700 */
-static int r600_state_sampler_border_init(struct r600_context *ctx, u32 offset)
-{
- struct r600_reg r600_shader_sampler_border[] = {
- {0, 0, R_00A400_TD_PS_SAMPLER0_BORDER_RED},
- {0, 0, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN},
- {0, 0, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE},
- {0, 0, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA},
- };
- unsigned nreg = sizeof(r600_shader_sampler_border)/sizeof(struct r600_reg);
-
- for (int i = 0; i < nreg; i++) {
- r600_shader_sampler_border[i].offset += offset;
- }
- return r600_context_add_block(ctx, r600_shader_sampler_border, nreg, PKT3_SET_CONFIG_REG);
-}
-
-/* initialize */
-void r600_context_fini(struct r600_context *ctx)
-{
- for (int i = 0; i < ctx->ngroups; i++) {
- r600_group_fini(&ctx->groups[i]);
- }
- free(ctx->reloc);
- free(ctx->pm4);
- memset(ctx, 0, sizeof(struct r600_context));
-}
-
-int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
-{
- int r;
-
- memset(ctx, 0, sizeof(struct r600_context));
- ctx->radeon = radeon;
- LIST_INITHEAD(&ctx->query_list);
- /* initialize groups */
- r = r600_group_init(&ctx->groups[R600_GROUP_CONFIG], R600_CONFIG_REG_OFFSET, R600_CONFIG_REG_END);
- if (r) {
- goto out_err;
- }
- r = r600_group_init(&ctx->groups[R600_GROUP_CTL_CONST], R600_CTL_CONST_OFFSET, R600_CTL_CONST_END);
- if (r) {
- goto out_err;
- }
- r = r600_group_init(&ctx->groups[R600_GROUP_LOOP_CONST], R600_LOOP_CONST_OFFSET, R600_LOOP_CONST_END);
- if (r) {
- goto out_err;
- }
- r = r600_group_init(&ctx->groups[R600_GROUP_BOOL_CONST], R600_BOOL_CONST_OFFSET, R600_BOOL_CONST_END);
- if (r) {
- goto out_err;
- }
- r = r600_group_init(&ctx->groups[R600_GROUP_SAMPLER], R600_SAMPLER_OFFSET, R600_SAMPLER_END);
- if (r) {
- goto out_err;
- }
- r = r600_group_init(&ctx->groups[R600_GROUP_RESOURCE], R600_RESOURCE_OFFSET, R600_RESOURCE_END);
- if (r) {
- goto out_err;
- }
- r = r600_group_init(&ctx->groups[R600_GROUP_ALU_CONST], R600_ALU_CONST_OFFSET, R600_ALU_CONST_END);
- if (r) {
- goto out_err;
- }
- r = r600_group_init(&ctx->groups[R600_GROUP_CONTEXT], R600_CONTEXT_REG_OFFSET, R600_CONTEXT_REG_END);
- if (r) {
- goto out_err;
- }
- ctx->ngroups = R600_NGROUPS;
-
- /* add blocks */
- r = r600_context_add_block(ctx, r600_config_reg_list,
- sizeof(r600_config_reg_list)/sizeof(struct r600_reg),
- PKT3_SET_CONFIG_REG);
- if (r)
- goto out_err;
- r = r600_context_add_block(ctx, r600_context_reg_list,
- sizeof(r600_context_reg_list)/sizeof(struct r600_reg),
- PKT3_SET_CONTEXT_REG);
- if (r)
- goto out_err;
-
- /* PS SAMPLER BORDER */
- for (int j = 0, offset = 0; j < 18; j++, offset += 0x10) {
- r = r600_state_sampler_border_init(ctx, offset);
- if (r)
- goto out_err;
- }
-
- /* VS SAMPLER BORDER */
- for (int j = 0, offset = 0x200; j < 18; j++, offset += 0x10) {
- r = r600_state_sampler_border_init(ctx, offset);
- if (r)
- goto out_err;
- }
- /* PS SAMPLER */
- for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) {
- r = r600_state_sampler_init(ctx, offset);
- if (r)
- goto out_err;
- }
- /* VS SAMPLER */
- for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) {
- r = r600_state_sampler_init(ctx, offset);
- if (r)
- goto out_err;
- }
- /* PS RESOURCE */
- for (int j = 0, offset = 0; j < 160; j++, offset += 0x1C) {
- r = r600_state_resource_init(ctx, offset);
- if (r)
- goto out_err;
- }
- /* VS RESOURCE */
- for (int j = 0, offset = 0x1180; j < 160; j++, offset += 0x1C) {
- r = r600_state_resource_init(ctx, offset);
- if (r)
- goto out_err;
- }
- /* PS CONSTANT */
- for (int j = 0, offset = 0; j < 256; j++, offset += 0x10) {
- r = r600_state_constant_init(ctx, offset);
- if (r)
- goto out_err;
- }
- /* VS CONSTANT */
- for (int j = 0, offset = 0x1000; j < 256; j++, offset += 0x10) {
- r = r600_state_constant_init(ctx, offset);
- if (r)
- goto out_err;
- }
-
- /* allocate cs variables */
- ctx->nreloc = RADEON_CTX_MAX_PM4;
- ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc));
- if (ctx->reloc == NULL) {
- r = -ENOMEM;
- goto out_err;
- }
- ctx->bo = calloc(ctx->nreloc, sizeof(void *));
- if (ctx->bo == NULL) {
- r = -ENOMEM;
- goto out_err;
- }
- ctx->pm4_ndwords = RADEON_CTX_MAX_PM4;
- ctx->pm4 = calloc(ctx->pm4_ndwords, 4);
- if (ctx->pm4 == NULL) {
- r = -ENOMEM;
- goto out_err;
- }
- return 0;
-out_err:
- r600_context_fini(ctx);
- return r;
-}
-
-void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct radeon_bo *bo)
-{
- int i, reloc_id;
-
- assert(bo != NULL);
- for (i = 0, reloc_id = -1; i < ctx->creloc; i++) {
- if (ctx->reloc[i].handle == bo->handle) {
- reloc_id = i * sizeof(struct r600_reloc) / 4;
- /* set PKT3 to point to proper reloc */
- *pm4 = reloc_id;
- }
- }
- if (reloc_id == -1) {
- /* add new relocation */
- if (ctx->creloc >= ctx->nreloc) {
- r600_context_flush(ctx);
- }
- reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4;
- ctx->reloc[ctx->creloc].handle = bo->handle;
- ctx->reloc[ctx->creloc].read_domain = RADEON_GEM_DOMAIN_GTT;
- ctx->reloc[ctx->creloc].write_domain = RADEON_GEM_DOMAIN_GTT;
- ctx->reloc[ctx->creloc].flags = 0;
- radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo);
- ctx->creloc++;
- /* set PKT3 to point to proper reloc */
- *pm4 = reloc_id;
- }
-}
-
-void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state)
-{
- struct r600_group *group;
- struct r600_group_block *block;
-
- for (int i = 0; i < state->nregs; i++) {
- unsigned id;
- group = &ctx->groups[state->regs[i].group_id];
- id = group->offset_block_id[(state->regs[i].offset - group->start_offset) >> 2];
- block = &group->blocks[id];
- id = (state->regs[i].offset - block->start_offset) >> 2;
- block->reg[id] &= ~state->regs[i].mask;
- block->reg[id] |= state->regs[i].value;
- if (block->pm4_bo_index[id]) {
- /* find relocation */
- id = block->pm4_bo_index[id];
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo);
- }
- block->status |= R600_BLOCK_STATUS_ENABLED;
- block->status |= R600_BLOCK_STATUS_DIRTY;
- ctx->pm4_dirty_cdwords += block->pm4_ndwords;
- }
-}
-
-static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
-{
- struct r600_group_block *block;
- unsigned id;
-
- offset -= ctx->groups[R600_GROUP_RESOURCE].start_offset;
- id = ctx->groups[R600_GROUP_RESOURCE].offset_block_id[offset >> 2];
- block = &ctx->groups[R600_GROUP_RESOURCE].blocks[id];
- if (state == NULL) {
- block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
- radeon_ws_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
- return;
- }
- block->reg[0] = state->regs[0].value;
- block->reg[1] = state->regs[1].value;
- block->reg[2] = state->regs[2].value;
- block->reg[3] = state->regs[3].value;
- block->reg[4] = state->regs[4].value;
- block->reg[5] = state->regs[5].value;
- block->reg[6] = state->regs[6].value;
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
- radeon_ws_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
- if (state->regs[0].bo) {
- /* VERTEX RESOURCE, we preted there is 2 bo to relocate so
- * we have single case btw VERTEX & TEXTURE resource
- */
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo);
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo);
- } else {
- /* TEXTURE RESOURCE */
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo);
- radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
- }
- block->status |= R600_BLOCK_STATUS_ENABLED;
- block->status |= R600_BLOCK_STATUS_DIRTY;
- ctx->pm4_dirty_cdwords += block->pm4_ndwords;
-}
-
-void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
-{
- unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1C * rid;
-
- r600_context_pipe_state_set_resource(ctx, state, offset);
-}
-
-void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
-{
- unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1180 + 0x1C * rid;
-
- r600_context_pipe_state_set_resource(ctx, state, offset);
-}
-
-static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
-{
- struct r600_group_block *block;
- unsigned id;
-
- offset -= ctx->groups[R600_GROUP_SAMPLER].start_offset;
- id = ctx->groups[R600_GROUP_SAMPLER].offset_block_id[offset >> 2];
- block = &ctx->groups[R600_GROUP_SAMPLER].blocks[id];
- if (state == NULL) {
- block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
- return;
- }
- block->reg[0] = state->regs[0].value;
- block->reg[1] = state->regs[1].value;
- block->reg[2] = state->regs[2].value;
- block->status |= R600_BLOCK_STATUS_ENABLED;
- block->status |= R600_BLOCK_STATUS_DIRTY;
- ctx->pm4_dirty_cdwords += block->pm4_ndwords;
-}
-
-static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
-{
- struct r600_group_block *block;
- unsigned id;
-
- offset -= ctx->groups[R600_GROUP_CONFIG].start_offset;
- id = ctx->groups[R600_GROUP_CONFIG].offset_block_id[offset >> 2];
- block = &ctx->groups[R600_GROUP_CONFIG].blocks[id];
- if (state == NULL) {
- block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
- return;
- }
- if (state->nregs <= 3) {
- return;
- }
- block->reg[0] = state->regs[3].value;
- block->reg[1] = state->regs[4].value;
- block->reg[2] = state->regs[5].value;
- block->reg[3] = state->regs[6].value;
- block->status |= R600_BLOCK_STATUS_ENABLED;
- block->status |= R600_BLOCK_STATUS_DIRTY;
- ctx->pm4_dirty_cdwords += block->pm4_ndwords;
-}
-
-void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
-{
- unsigned offset;
-
- offset = 0x0003C000 + id * 0xc;
- r600_context_pipe_state_set_sampler(ctx, state, offset);
- offset = 0x0000A400 + id * 0x10;
- r600_context_pipe_state_set_sampler_border(ctx, state, offset);
-}
-
-void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
-{
- unsigned offset;
-
- offset = 0x0003C0D8 + id * 0xc;
- r600_context_pipe_state_set_sampler(ctx, state, offset);
- offset = 0x0000A600 + id * 0x10;
- r600_context_pipe_state_set_sampler_border(ctx, state, offset);
-}
-
-void r600_context_group_emit_dirty(struct r600_context *ctx, struct r600_group *group)
-{
- struct radeon_bo *bo;
- int id;
-
- for (int i = 0; i < group->nblocks; i++) {
- struct r600_group_block *block = &group->blocks[i];
- if (block->status & R600_BLOCK_STATUS_DIRTY) {
- for (int j = 0; j < block->nreg; j++) {
- if (block->pm4_bo_index[j]) {
- /* find relocation */
- id = block->pm4_bo_index[j];
- bo = radeon_bo_pb_get_bo(block->reloc[id].bo->pb);
- for (int k = 0; k < block->reloc[id].nreloc; k++) {
- r600_context_bo_reloc(ctx, &block->pm4[block->reloc[id].bo_pm4_index[k]], bo);
- }
- }
- }
-
- memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4);
- ctx->pm4_cdwords += block->pm4_ndwords;
- block->status ^= R600_BLOCK_STATUS_DIRTY;
- }
- }
-}
-
-struct radeon_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned group_id, unsigned offset)
-{
- struct r600_group_block *block;
- unsigned id;
-
- id = ctx->groups[group_id].offset_block_id[(offset - ctx->groups[group_id].start_offset) >> 2];
- block = &ctx->groups[group_id].blocks[id];
- offset -= block->start_offset;
- id = block->pm4_bo_index[offset >> 2];
- if (block->reloc[id].bo) {
- return radeon_bo_pb_get_bo(block->reloc[id].bo->pb);
- }
- return NULL;
-}
-
-void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
-{
- struct radeon_bo *cb[8];
- unsigned ndwords = 9;
-
- if (draw->indices) {
- ndwords = 13;
- /* make sure there is enough relocation space before scheduling draw */
- if (ctx->creloc >= (ctx->nreloc - 1)) {
- r600_context_flush(ctx);
- }
- }
-
- /* find number of color buffer */
- for (int i = 0; i < 8; i++) {
- cb[i] = r600_context_reg_bo(ctx, R600_GROUP_CONTEXT, R_028040_CB_COLOR0_BASE + (i << 2));
- if (cb[i]) {
- ndwords += 7;
- }
- }
-
- /* queries need some special values */
- if (ctx->num_query_running) {
- if (ctx->radeon->family >= CHIP_RV770) {
- r600_context_reg(ctx, R600_GROUP_CONTEXT,
- R_028D0C_DB_RENDER_CONTROL,
- S_028D0C_R700_PERFECT_ZPASS_COUNTS(1),
- S_028D0C_R700_PERFECT_ZPASS_COUNTS(1));
- }
- r600_context_reg(ctx, R600_GROUP_CONTEXT,
- R_028D10_DB_RENDER_OVERRIDE,
- S_028D10_NOOP_CULL_DISABLE(1),
- S_028D10_NOOP_CULL_DISABLE(1));
- }
-
- if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
- /* need to flush */
- r600_context_flush(ctx);
- }
- /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
- if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) {
- R600_ERR("context is too big to be scheduled\n");
- return;
- }
-
- /* enough room to copy packet */
- r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONFIG]);
- r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONTEXT]);
- r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_ALU_CONST]);
- r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_RESOURCE]);
- r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_SAMPLER]);
-
- /* draw packet */
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
- ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0);
- ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances;
- if (draw->indices) {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3);
- ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset;
- ctx->pm4[ctx->pm4_cdwords++] = 0;
- ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
- ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
- ctx->pm4[ctx->pm4_cdwords++] = 0;
- r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], radeon_bo_pb_get_bo(draw->indices->pb));
- } else {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
- ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
- ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
- }
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
- ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
-
- /* flush color buffer */
- for (int i = 0; i < 8; i++) {
- if (cb[i]) {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3);
- ctx->pm4[ctx->pm4_cdwords++] = (S_0085F0_CB0_DEST_BASE_ENA(1) << i) |
- S_0085F0_CB_ACTION_ENA(1);
- ctx->pm4[ctx->pm4_cdwords++] = (cb[i]->size + 255) >> 8;
- ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
- ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
- ctx->pm4[ctx->pm4_cdwords++] = 0;
- r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], cb[i]);
- }
- }
-
- /* all dirty state have been scheduled in current cs */
- ctx->pm4_dirty_cdwords = 0;
-}
-
-void r600_context_flush(struct r600_context *ctx)
-{
- struct drm_radeon_cs drmib;
- struct drm_radeon_cs_chunk chunks[2];
- uint64_t chunk_array[2];
- struct r600_group_block *block;
- int r;
-
- if (!ctx->pm4_cdwords)
- return;
-
- /* suspend queries */
- r600_context_queries_suspend(ctx);
-
-#if 1
- /* emit cs */
- drmib.num_chunks = 2;
- drmib.chunks = (uint64_t)(uintptr_t)chunk_array;
- chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
- chunks[0].length_dw = ctx->pm4_cdwords;
- chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4;
- chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
- chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4;
- chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc;
- chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0];
- chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1];
- r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib,
- sizeof(struct drm_radeon_cs));
-#endif
- /* restart */
- for (int i = 0; i < ctx->creloc; i++) {
- radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
- }
- ctx->creloc = 0;
- ctx->pm4_dirty_cdwords = 0;
- ctx->pm4_cdwords = 0;
-
- /* resume queries */
- r600_context_queries_resume(ctx);
-
- /* set all valid group as dirty so they get reemited on
- * next draw command
- */
- for (int i = 0; i < ctx->ngroups; i++) {
- for (int j = 0; j < ctx->groups[i].nblocks; j++) {
- /* mark enabled block as dirty */
- block = &ctx->groups[i].blocks[j];
- if (block->status & R600_BLOCK_STATUS_ENABLED) {
- ctx->pm4_dirty_cdwords += block->pm4_ndwords;
- block->status |= R600_BLOCK_STATUS_DIRTY;
- }
- }
- }
-}
-
-void r600_context_dump_bof(struct r600_context *ctx, const char *file)
-{
- bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root;
- unsigned i;
-
- root = device_id = bcs = blob = array = bo = size = handle = NULL;
- root = bof_object();
- if (root == NULL)
- goto out_err;
- device_id = bof_int32(ctx->radeon->device);
- if (device_id == NULL)
- goto out_err;
- if (bof_object_set(root, "device_id", device_id))
- goto out_err;
- bof_decref(device_id);
- device_id = NULL;
- /* dump relocs */
- blob = bof_blob(ctx->creloc * 16, ctx->reloc);
- if (blob == NULL)
- goto out_err;
- if (bof_object_set(root, "reloc", blob))
- goto out_err;
- bof_decref(blob);
- blob = NULL;
- /* dump cs */
- blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4);
- if (blob == NULL)
- goto out_err;
- if (bof_object_set(root, "pm4", blob))
- goto out_err;
- bof_decref(blob);
- blob = NULL;
- /* dump bo */
- array = bof_array();
- if (array == NULL)
- goto out_err;
- for (i = 0; i < ctx->creloc; i++) {
- struct radeon_bo *rbo = ctx->bo[i];
- bo = bof_object();
- if (bo == NULL)
- goto out_err;
- size = bof_int32(rbo->size);
- if (size == NULL)
- goto out_err;
- if (bof_object_set(bo, "size", size))
- goto out_err;
- bof_decref(size);
- size = NULL;
- handle = bof_int32(rbo->handle);
- if (handle == NULL)
- goto out_err;
- if (bof_object_set(bo, "handle", handle))
- goto out_err;
- bof_decref(handle);
- handle = NULL;
- radeon_bo_map(ctx->radeon, rbo);
- blob = bof_blob(rbo->size, rbo->data);
- radeon_bo_unmap(ctx->radeon, rbo);
- if (blob == NULL)
- goto out_err;
- if (bof_object_set(bo, "data", blob))
- goto out_err;
- bof_decref(blob);
- blob = NULL;
- if (bof_array_append(array, bo))
- goto out_err;
- bof_decref(bo);
- bo = NULL;
- }
- if (bof_object_set(root, "bo", array))
- goto out_err;
- bof_dump_file(root, file);
-out_err:
- bof_decref(blob);
- bof_decref(array);
- bof_decref(bo);
- bof_decref(size);
- bof_decref(handle);
- bof_decref(device_id);
- bof_decref(root);
-}
-
-static void r600_query_result(struct r600_context *ctx, struct r600_query *query)
-{
- u64 start, end;
- u32 *results;
- int i;
-
- results = radeon_ws_bo_map(ctx->radeon, query->buffer, 0, NULL);
- for (i = 0; i < query->num_results; i += 4) {
- start = (u64)results[i] | (u64)results[i + 1] << 32;
- end = (u64)results[i + 2] | (u64)results[i + 3] << 32;
- if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) {
- query->result += end - start;
- }
- }
- radeon_ws_bo_unmap(ctx->radeon, query->buffer);
- query->num_results = 0;
-}
-
-void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
-{
- /* query request needs 6 dwords for begin + 6 dwords for end */
- if ((12 + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
- /* need to flush */
- r600_context_flush(ctx);
- }
-
- /* if query buffer is full force a flush */
- if (query->num_results >= ((query->buffer_size >> 2) - 2)) {
- r600_context_flush(ctx);
- r600_query_result(ctx, query);
- }
-
- /* emit begin query */
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
- ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_ZPASS_DONE;
- ctx->pm4[ctx->pm4_cdwords++] = query->num_results;
- ctx->pm4[ctx->pm4_cdwords++] = 0;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
- ctx->pm4[ctx->pm4_cdwords++] = 0;
- r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], radeon_bo_pb_get_bo(query->buffer->pb));
-
- query->state |= R600_QUERY_STATE_STARTED;
- query->state ^= R600_QUERY_STATE_ENDED;
- ctx->num_query_running++;
-}
-
-void r600_query_end(struct r600_context *ctx, struct r600_query *query)
-{
- /* emit begin query */
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
- ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_ZPASS_DONE;
- ctx->pm4[ctx->pm4_cdwords++] = query->num_results + 8;
- ctx->pm4[ctx->pm4_cdwords++] = 0;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
- ctx->pm4[ctx->pm4_cdwords++] = 0;
- r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], radeon_bo_pb_get_bo(query->buffer->pb));
-
- query->num_results += 16;
- query->state ^= R600_QUERY_STATE_STARTED;
- query->state |= R600_QUERY_STATE_ENDED;
- ctx->num_query_running--;
-}
-
-struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type)
-{
- struct r600_query *query;
-
- if (query_type != PIPE_QUERY_OCCLUSION_COUNTER)
- return NULL;
-
- query = calloc(1, sizeof(struct r600_query));
- if (query == NULL)
- return NULL;
-
- query->type = query_type;
- query->buffer_size = 4096;
-
- query->buffer = radeon_ws_bo(ctx->radeon, query->buffer_size, 1, 0);
- if (!query->buffer) {
- free(query);
- return NULL;
- }
-
- LIST_ADDTAIL(&query->list, &ctx->query_list);
-
- return query;
-}
-
-void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
-{
- radeon_ws_bo_reference(ctx->radeon, &query->buffer, NULL);
- LIST_DEL(&query->list);
- free(query);
-}
-
-boolean r600_context_query_result(struct r600_context *ctx,
- struct r600_query *query,
- boolean wait, void *vresult)
-{
- uint64_t *result = (uint64_t*)vresult;
-
- if (query->num_results) {
- r600_context_flush(ctx);
- }
- r600_query_result(ctx, query);
- *result = query->result;
- query->result = 0;
- return TRUE;
-}
-
-static void r600_context_queries_suspend(struct r600_context *ctx)
-{
- struct r600_query *query;
-
- LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) {
- if (query->state & R600_QUERY_STATE_STARTED) {
- r600_query_end(ctx, query);
- query->state |= R600_QUERY_STATE_SUSPENDED;
- }
- }
-}
-
-static void r600_context_queries_resume(struct r600_context *ctx)
-{
- struct r600_query *query;
-
- LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) {
- if (query->state & R600_QUERY_STATE_SUSPENDED) {
- r600_query_begin(ctx, query);
- query->state ^= R600_QUERY_STATE_SUSPENDED;
- }
- }
-}
diff --git a/src/gallium/winsys/r600/drm/r600_states.h b/src/gallium/winsys/r600/drm/r600_states.h
deleted file mode 100644
index 76e185ac03..0000000000
--- a/src/gallium/winsys/r600/drm/r600_states.h
+++ /dev/null
@@ -1,522 +0,0 @@
-/*
- * Copyright © 2009 Jerome Glisse <glisse@freedesktop.org>
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-#ifndef R600_STATES_H
-#define R600_STATES_H
-
-static const struct radeon_register R600_names_CONFIG[] = {
- {0x00008C00, 0, 0, "SQ_CONFIG"},
- {0x00008C04, 0, 0, "SQ_GPR_RESOURCE_MGMT_1"},
- {0x00008C08, 0, 0, "SQ_GPR_RESOURCE_MGMT_2"},
- {0x00008C0C, 0, 0, "SQ_THREAD_RESOURCE_MGMT"},
- {0x00008C10, 0, 0, "SQ_STACK_RESOURCE_MGMT_1"},
- {0x00008C14, 0, 0, "SQ_STACK_RESOURCE_MGMT_2"},
- {0x00008D8C, 0, 0, "SQ_DYN_GPR_CNTL_PS_FLUSH_REQ"},
- {0x00009508, 0, 0, "TA_CNTL_AUX"},
- {0x00009714, 0, 0, "VC_ENHANCE"},
- {0x00009830, 0, 0, "DB_DEBUG"},
- {0x00009838, 0, 0, "DB_WATERMARKS"},
- {0x00028350, 0, 0, "SX_MISC"},
- {0x000286C8, 0, 0, "SPI_THREAD_GROUPING"},
- {0x000288A8, 0, 0, "SQ_ESGS_RING_ITEMSIZE"},
- {0x000288AC, 0, 0, "SQ_GSVS_RING_ITEMSIZE"},
- {0x000288B0, 0, 0, "SQ_ESTMP_RING_ITEMSIZE"},
- {0x000288B4, 0, 0, "SQ_GSTMP_RING_ITEMSIZE"},
- {0x000288B8, 0, 0, "SQ_VSTMP_RING_ITEMSIZE"},
- {0x000288BC, 0, 0, "SQ_PSTMP_RING_ITEMSIZE"},
- {0x000288C0, 0, 0, "SQ_FBUF_RING_ITEMSIZE"},
- {0x000288C4, 0, 0, "SQ_REDUC_RING_ITEMSIZE"},
- {0x000288C8, 0, 0, "SQ_GS_VERT_ITEMSIZE"},
- {0x00028A10, 0, 0, "VGT_OUTPUT_PATH_CNTL"},
- {0x00028A14, 0, 0, "VGT_HOS_CNTL"},
- {0x00028A18, 0, 0, "VGT_HOS_MAX_TESS_LEVEL"},
- {0x00028A1C, 0, 0, "VGT_HOS_MIN_TESS_LEVEL"},
- {0x00028A20, 0, 0, "VGT_HOS_REUSE_DEPTH"},
- {0x00028A24, 0, 0, "VGT_GROUP_PRIM_TYPE"},
- {0x00028A28, 0, 0, "VGT_GROUP_FIRST_DECR"},
- {0x00028A2C, 0, 0, "VGT_GROUP_DECR"},
- {0x00028A30, 0, 0, "VGT_GROUP_VECT_0_CNTL"},
- {0x00028A34, 0, 0, "VGT_GROUP_VECT_1_CNTL"},
- {0x00028A38, 0, 0, "VGT_GROUP_VECT_0_FMT_CNTL"},
- {0x00028A3C, 0, 0, "VGT_GROUP_VECT_1_FMT_CNTL"},
- {0x00028A40, 0, 0, "VGT_GS_MODE"},
- {0x00028A4C, 0, 0, "PA_SC_MODE_CNTL"},
- {0x00028AB0, 0, 0, "VGT_STRMOUT_EN"},
- {0x00028AB4, 0, 0, "VGT_REUSE_OFF"},
- {0x00028AB8, 0, 0, "VGT_VTX_CNT_EN"},
- {0x00028B20, 0, 0, "VGT_STRMOUT_BUFFER_EN"},
-};
-
-static const struct radeon_register R600_names_CB_CNTL[] = {
- {0x00028120, 0, 0, "CB_CLEAR_RED"},
- {0x00028124, 0, 0, "CB_CLEAR_GREEN"},
- {0x00028128, 0, 0, "CB_CLEAR_BLUE"},
- {0x0002812C, 0, 0, "CB_CLEAR_ALPHA"},
- {0x0002823C, 0, 0, "CB_SHADER_MASK"},
- {0x00028238, 0, 0, "CB_TARGET_MASK"},
- {0x00028424, 0, 0, "CB_FOG_RED"},
- {0x00028428, 0, 0, "CB_FOG_GREEN"},
- {0x0002842C, 0, 0, "CB_FOG_BLUE"},
- {0x00028808, 0, 0, "CB_COLOR_CONTROL"},
- {0x00028C04, 0, 0, "PA_SC_AA_CONFIG"},
- {0x00028C1C, 0, 0, "PA_SC_AA_SAMPLE_LOCS_MCTX"},
- {0x00028C20, 0, 0, "PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX"},
- {0x00028C30, 0, 0, "CB_CLRCMP_CONTROL"},
- {0x00028C34, 0, 0, "CB_CLRCMP_SRC"},
- {0x00028C38, 0, 0, "CB_CLRCMP_DST"},
- {0x00028C3C, 0, 0, "CB_CLRCMP_MSK"},
- {0x00028C48, 0, 0, "PA_SC_AA_MASK"},
- {0x000287A0, 0, 0, "CB_SHADER_CONTROL"},
-};
-
-static const struct radeon_register R600_names_RASTERIZER[] = {
- {0x000286D4, 0, 0, "SPI_INTERP_CONTROL_0"},
- {0x00028810, 0, 0, "PA_CL_CLIP_CNTL"},
- {0x00028814, 0, 0, "PA_SU_SC_MODE_CNTL"},
- {0x0002881C, 0, 0, "PA_CL_VS_OUT_CNTL"},
- {0x00028820, 0, 0, "PA_CL_NANINF_CNTL"},
- {0x00028A00, 0, 0, "PA_SU_POINT_SIZE"},
- {0x00028A04, 0, 0, "PA_SU_POINT_MINMAX"},
- {0x00028A08, 0, 0, "PA_SU_LINE_CNTL"},
- {0x00028A0C, 0, 0, "PA_SC_LINE_STIPPLE"},
- {0x00028A48, 0, 0, "PA_SC_MPASS_PS_CNTL"},
- {0x00028C00, 0, 0, "PA_SC_LINE_CNTL"},
- {0x00028C0C, 0, 0, "PA_CL_GB_VERT_CLIP_ADJ"},
- {0x00028C10, 0, 0, "PA_CL_GB_VERT_DISC_ADJ"},
- {0x00028C14, 0, 0, "PA_CL_GB_HORZ_CLIP_ADJ"},
- {0x00028C18, 0, 0, "PA_CL_GB_HORZ_DISC_ADJ"},
- {0x00028DF8, 0, 0, "PA_SU_POLY_OFFSET_DB_FMT_CNTL"},
- {0x00028DFC, 0, 0, "PA_SU_POLY_OFFSET_CLAMP"},
- {0x00028E00, 0, 0, "PA_SU_POLY_OFFSET_FRONT_SCALE"},
- {0x00028E04, 0, 0, "PA_SU_POLY_OFFSET_FRONT_OFFSET"},
- {0x00028E08, 0, 0, "PA_SU_POLY_OFFSET_BACK_SCALE"},
- {0x00028E0C, 0, 0, "PA_SU_POLY_OFFSET_BACK_OFFSET"},
-};
-
-static const struct radeon_register R600_names_VIEWPORT[] = {
- {0x000282D0, 0, 0, "PA_SC_VPORT_ZMIN_0"},
- {0x000282D4, 0, 0, "PA_SC_VPORT_ZMAX_0"},
- {0x0002843C, 0, 0, "PA_CL_VPORT_XSCALE_0"},
- {0x00028444, 0, 0, "PA_CL_VPORT_YSCALE_0"},
- {0x0002844C, 0, 0, "PA_CL_VPORT_ZSCALE_0"},
- {0x00028440, 0, 0, "PA_CL_VPORT_XOFFSET_0"},
- {0x00028448, 0, 0, "PA_CL_VPORT_YOFFSET_0"},
- {0x00028450, 0, 0, "PA_CL_VPORT_ZOFFSET_0"},
- {0x00028818, 0, 0, "PA_CL_VTE_CNTL"},
-};
-
-static const struct radeon_register R600_names_SCISSOR[] = {
- {0x00028030, 0, 0, "PA_SC_SCREEN_SCISSOR_TL"},
- {0x00028034, 0, 0, "PA_SC_SCREEN_SCISSOR_BR"},
- {0x00028200, 0, 0, "PA_SC_WINDOW_OFFSET"},
- {0x00028204, 0, 0, "PA_SC_WINDOW_SCISSOR_TL"},
- {0x00028208, 0, 0, "PA_SC_WINDOW_SCISSOR_BR"},
- {0x0002820C, 0, 0, "PA_SC_CLIPRECT_RULE"},
- {0x00028210, 0, 0, "PA_SC_CLIPRECT_0_TL"},
- {0x00028214, 0, 0, "PA_SC_CLIPRECT_0_BR"},
- {0x00028218, 0, 0, "PA_SC_CLIPRECT_1_TL"},
- {0x0002821C, 0, 0, "PA_SC_CLIPRECT_1_BR"},
- {0x00028220, 0, 0, "PA_SC_CLIPRECT_2_TL"},
- {0x00028224, 0, 0, "PA_SC_CLIPRECT_2_BR"},
- {0x00028228, 0, 0, "PA_SC_CLIPRECT_3_TL"},
- {0x0002822C, 0, 0, "PA_SC_CLIPRECT_3_BR"},
- {0x00028230, 0, 0, "PA_SC_EDGERULE"},
- {0x00028240, 0, 0, "PA_SC_GENERIC_SCISSOR_TL"},
- {0x00028244, 0, 0, "PA_SC_GENERIC_SCISSOR_BR"},
- {0x00028250, 0, 0, "PA_SC_VPORT_SCISSOR_0_TL"},
- {0x00028254, 0, 0, "PA_SC_VPORT_SCISSOR_0_BR"},
-};
-
-static const struct radeon_register R600_names_BLEND[] = {
- {0x00028414, 0, 0, "CB_BLEND_RED"},
- {0x00028418, 0, 0, "CB_BLEND_GREEN"},
- {0x0002841C, 0, 0, "CB_BLEND_BLUE"},
- {0x00028420, 0, 0, "CB_BLEND_ALPHA"},
- {0x00028780, 0, 0, "CB_BLEND0_CONTROL"},
- {0x00028784, 0, 0, "CB_BLEND1_CONTROL"},
- {0x00028788, 0, 0, "CB_BLEND2_CONTROL"},
- {0x0002878C, 0, 0, "CB_BLEND3_CONTROL"},
- {0x00028790, 0, 0, "CB_BLEND4_CONTROL"},
- {0x00028794, 0, 0, "CB_BLEND5_CONTROL"},
- {0x00028798, 0, 0, "CB_BLEND6_CONTROL"},
- {0x0002879C, 0, 0, "CB_BLEND7_CONTROL"},
- {0x00028804, 0, 0, "CB_BLEND_CONTROL"},
-};
-
-static const struct radeon_register R600_names_DSA[] = {
- {0x00028028, 0, 0, "DB_STENCIL_CLEAR"},
- {0x0002802C, 0, 0, "DB_DEPTH_CLEAR"},
- {0x00028410, 0, 0, "SX_ALPHA_TEST_CONTROL"},
- {0x00028430, 0, 0, "DB_STENCILREFMASK"},
- {0x00028434, 0, 0, "DB_STENCILREFMASK_BF"},
- {0x00028438, 0, 0, "SX_ALPHA_REF"},
- {0x000286E0, 0, 0, "SPI_FOG_FUNC_SCALE"},
- {0x000286E4, 0, 0, "SPI_FOG_FUNC_BIAS"},
- {0x000286DC, 0, 0, "SPI_FOG_CNTL"},
- {0x00028800, 0, 0, "DB_DEPTH_CONTROL"},
- {0x0002880C, 0, 0, "DB_SHADER_CONTROL"},
- {0x00028D0C, 0, 0, "DB_RENDER_CONTROL"},
- {0x00028D10, 0, 0, "DB_RENDER_OVERRIDE"},
- {0x00028D2C, 0, 0, "DB_SRESULTS_COMPARE_STATE1"},
- {0x00028D30, 0, 0, "DB_PRELOAD_CONTROL"},
- {0x00028D44, 0, 0, "DB_ALPHA_TO_MASK"},
-};
-
-static const struct radeon_register R600_names_VS_SHADER[] = {
- {0x00028380, 0, 0, "SQ_VTX_SEMANTIC_0"},
- {0x00028384, 0, 0, "SQ_VTX_SEMANTIC_1"},
- {0x00028388, 0, 0, "SQ_VTX_SEMANTIC_2"},
- {0x0002838C, 0, 0, "SQ_VTX_SEMANTIC_3"},
- {0x00028390, 0, 0, "SQ_VTX_SEMANTIC_4"},
- {0x00028394, 0, 0, "SQ_VTX_SEMANTIC_5"},
- {0x00028398, 0, 0, "SQ_VTX_SEMANTIC_6"},
- {0x0002839C, 0, 0, "SQ_VTX_SEMANTIC_7"},
- {0x000283A0, 0, 0, "SQ_VTX_SEMANTIC_8"},
- {0x000283A4, 0, 0, "SQ_VTX_SEMANTIC_9"},
- {0x000283A8, 0, 0, "SQ_VTX_SEMANTIC_10"},
- {0x000283AC, 0, 0, "SQ_VTX_SEMANTIC_11"},
- {0x000283B0, 0, 0, "SQ_VTX_SEMANTIC_12"},
- {0x000283B4, 0, 0, "SQ_VTX_SEMANTIC_13"},
- {0x000283B8, 0, 0, "SQ_VTX_SEMANTIC_14"},
- {0x000283BC, 0, 0, "SQ_VTX_SEMANTIC_15"},
- {0x000283C0, 0, 0, "SQ_VTX_SEMANTIC_16"},
- {0x000283C4, 0, 0, "SQ_VTX_SEMANTIC_17"},
- {0x000283C8, 0, 0, "SQ_VTX_SEMANTIC_18"},
- {0x000283CC, 0, 0, "SQ_VTX_SEMANTIC_19"},
- {0x000283D0, 0, 0, "SQ_VTX_SEMANTIC_20"},
- {0x000283D4, 0, 0, "SQ_VTX_SEMANTIC_21"},
- {0x000283D8, 0, 0, "SQ_VTX_SEMANTIC_22"},
- {0x000283DC, 0, 0, "SQ_VTX_SEMANTIC_23"},
- {0x000283E0, 0, 0, "SQ_VTX_SEMANTIC_24"},
- {0x000283E4, 0, 0, "SQ_VTX_SEMANTIC_25"},
- {0x000283E8, 0, 0, "SQ_VTX_SEMANTIC_26"},
- {0x000283EC, 0, 0, "SQ_VTX_SEMANTIC_27"},
- {0x000283F0, 0, 0, "SQ_VTX_SEMANTIC_28"},
- {0x000283F4, 0, 0, "SQ_VTX_SEMANTIC_29"},
- {0x000283F8, 0, 0, "SQ_VTX_SEMANTIC_30"},
- {0x000283FC, 0, 0, "SQ_VTX_SEMANTIC_31"},
- {0x00028614, 0, 0, "SPI_VS_OUT_ID_0"},
- {0x00028618, 0, 0, "SPI_VS_OUT_ID_1"},
- {0x0002861C, 0, 0, "SPI_VS_OUT_ID_2"},
- {0x00028620, 0, 0, "SPI_VS_OUT_ID_3"},
- {0x00028624, 0, 0, "SPI_VS_OUT_ID_4"},
- {0x00028628, 0, 0, "SPI_VS_OUT_ID_5"},
- {0x0002862C, 0, 0, "SPI_VS_OUT_ID_6"},
- {0x00028630, 0, 0, "SPI_VS_OUT_ID_7"},
- {0x00028634, 0, 0, "SPI_VS_OUT_ID_8"},
- {0x00028638, 0, 0, "SPI_VS_OUT_ID_9"},
- {0x000286C4, 0, 0, "SPI_VS_OUT_CONFIG"},
- {0x00028858, 1, 0, "SQ_PGM_START_VS"},
- {0x00028868, 0, 0, "SQ_PGM_RESOURCES_VS"},
- {0x00028894, 1, 1, "SQ_PGM_START_FS"},
- {0x000288A4, 0, 0, "SQ_PGM_RESOURCES_FS"},
- {0x000288D0, 0, 0, "SQ_PGM_CF_OFFSET_VS"},
- {0x000288DC, 0, 0, "SQ_PGM_CF_OFFSET_FS"},
-};
-
-static const struct radeon_register R600_names_PS_SHADER[] = {
- {0x00028644, 0, 0, "SPI_PS_INPUT_CNTL_0"},
- {0x00028648, 0, 0, "SPI_PS_INPUT_CNTL_1"},
- {0x0002864C, 0, 0, "SPI_PS_INPUT_CNTL_2"},
- {0x00028650, 0, 0, "SPI_PS_INPUT_CNTL_3"},
- {0x00028654, 0, 0, "SPI_PS_INPUT_CNTL_4"},
- {0x00028658, 0, 0, "SPI_PS_INPUT_CNTL_5"},
- {0x0002865C, 0, 0, "SPI_PS_INPUT_CNTL_6"},
- {0x00028660, 0, 0, "SPI_PS_INPUT_CNTL_7"},
- {0x00028664, 0, 0, "SPI_PS_INPUT_CNTL_8"},
- {0x00028668, 0, 0, "SPI_PS_INPUT_CNTL_9"},
- {0x0002866C, 0, 0, "SPI_PS_INPUT_CNTL_10"},
- {0x00028670, 0, 0, "SPI_PS_INPUT_CNTL_11"},
- {0x00028674, 0, 0, "SPI_PS_INPUT_CNTL_12"},
- {0x00028678, 0, 0, "SPI_PS_INPUT_CNTL_13"},
- {0x0002867C, 0, 0, "SPI_PS_INPUT_CNTL_14"},
- {0x00028680, 0, 0, "SPI_PS_INPUT_CNTL_15"},
- {0x00028684, 0, 0, "SPI_PS_INPUT_CNTL_16"},
- {0x00028688, 0, 0, "SPI_PS_INPUT_CNTL_17"},
- {0x0002868C, 0, 0, "SPI_PS_INPUT_CNTL_18"},
- {0x00028690, 0, 0, "SPI_PS_INPUT_CNTL_19"},
- {0x00028694, 0, 0, "SPI_PS_INPUT_CNTL_20"},
- {0x00028698, 0, 0, "SPI_PS_INPUT_CNTL_21"},
- {0x0002869C, 0, 0, "SPI_PS_INPUT_CNTL_22"},
- {0x000286A0, 0, 0, "SPI_PS_INPUT_CNTL_23"},
- {0x000286A4, 0, 0, "SPI_PS_INPUT_CNTL_24"},
- {0x000286A8, 0, 0, "SPI_PS_INPUT_CNTL_25"},
- {0x000286AC, 0, 0, "SPI_PS_INPUT_CNTL_26"},
- {0x000286B0, 0, 0, "SPI_PS_INPUT_CNTL_27"},
- {0x000286B4, 0, 0, "SPI_PS_INPUT_CNTL_28"},
- {0x000286B8, 0, 0, "SPI_PS_INPUT_CNTL_29"},
- {0x000286BC, 0, 0, "SPI_PS_INPUT_CNTL_30"},
- {0x000286C0, 0, 0, "SPI_PS_INPUT_CNTL_31"},
- {0x000286CC, 0, 0, "SPI_PS_IN_CONTROL_0"},
- {0x000286D0, 0, 0, "SPI_PS_IN_CONTROL_1"},
- {0x000286D8, 0, 0, "SPI_INPUT_Z"},
- {0x00028840, 1, 0, "SQ_PGM_START_PS"},
- {0x00028850, 0, 0, "SQ_PGM_RESOURCES_PS"},
- {0x00028854, 0, 0, "SQ_PGM_EXPORTS_PS"},
- {0x000288CC, 0, 0, "SQ_PGM_CF_OFFSET_PS"},
-};
-
-static const struct radeon_register R600_names_VS_CBUF[] = {
- {0x00028180, 0, 0, "ALU_CONST_BUFFER_SIZE_VS_0"},
- {0x00028980, 1, 0, "ALU_CONST_CACHE_VS_0"},
-};
-
-static const struct radeon_register R600_names_PS_CBUF[] = {
- {0x00028140, 0, 0, "ALU_CONST_BUFFER_SIZE_PS_0"},
- {0x00028940, 1, 0, "ALU_CONST_CACHE_PS_0"},
-};
-
-static const struct radeon_register R600_names_PS_CONSTANT[] = {
- {0x00030000, 0, 0, "SQ_ALU_CONSTANT0_0"},
- {0x00030004, 0, 0, "SQ_ALU_CONSTANT1_0"},
- {0x00030008, 0, 0, "SQ_ALU_CONSTANT2_0"},
- {0x0003000C, 0, 0, "SQ_ALU_CONSTANT3_0"},
-};
-
-static const struct radeon_register R600_names_VS_CONSTANT[] = {
- {0x00031000, 0, 0, "SQ_ALU_CONSTANT0_256"},
- {0x00031004, 0, 0, "SQ_ALU_CONSTANT1_256"},
- {0x00031008, 0, 0, "SQ_ALU_CONSTANT2_256"},
- {0x0003100C, 0, 0, "SQ_ALU_CONSTANT3_256"},
-};
-
-static const struct radeon_register R600_names_UCP[] = {
- {0x00028E20, 0, 0, "PA_CL_UCP0_X"},
- {0x00028E24, 0, 0, "PA_CL_UCP0_Y"},
- {0x00028E28, 0, 0, "PA_CL_UCP0_Z"},
- {0x00028E2C, 0, 0, "PA_CL_UCP0_W"},
- {0x00028E30, 0, 0, "PA_CL_UCP1_X"},
- {0x00028E34, 0, 0, "PA_CL_UCP1_Y"},
- {0x00028E38, 0, 0, "PA_CL_UCP1_Z"},
- {0x00028E3C, 0, 0, "PA_CL_UCP1_W"},
- {0x00028E40, 0, 0, "PA_CL_UCP2_X"},
- {0x00028E44, 0, 0, "PA_CL_UCP2_Y"},
- {0x00028E48, 0, 0, "PA_CL_UCP2_Z"},
- {0x00028E4C, 0, 0, "PA_CL_UCP2_W"},
- {0x00028E50, 0, 0, "PA_CL_UCP3_X"},
- {0x00028E54, 0, 0, "PA_CL_UCP3_Y"},
- {0x00028E58, 0, 0, "PA_CL_UCP3_Z"},
- {0x00028E5C, 0, 0, "PA_CL_UCP3_W"},
- {0x00028E60, 0, 0, "PA_CL_UCP4_X"},
- {0x00028E64, 0, 0, "PA_CL_UCP4_Y"},
- {0x00028E68, 0, 0, "PA_CL_UCP4_Z"},
- {0x00028E6C, 0, 0, "PA_CL_UCP4_W"},
- {0x00028E70, 0, 0, "PA_CL_UCP5_X"},
- {0x00028E74, 0, 0, "PA_CL_UCP5_Y"},
- {0x00028E78, 0, 0, "PA_CL_UCP5_Z"},
- {0x00028E7C, 0, 0, "PA_CL_UCP5_W"},
-};
-
-static const struct radeon_register R600_names_PS_RESOURCE[] = {
- {0x00038000, 0, 0, "RESOURCE0_WORD0"},
- {0x00038004, 0, 0, "RESOURCE0_WORD1"},
- {0x00038008, 0, 0, "RESOURCE0_WORD2"},
- {0x0003800C, 0, 0, "RESOURCE0_WORD3"},
- {0x00038010, 0, 0, "RESOURCE0_WORD4"},
- {0x00038014, 0, 0, "RESOURCE0_WORD5"},
- {0x00038018, 0, 0, "RESOURCE0_WORD6"},
-};
-
-static const struct radeon_register R600_names_VS_RESOURCE[] = {
- {0x00039180, 0, 0, "RESOURCE160_WORD0"},
- {0x00039184, 0, 0, "RESOURCE160_WORD1"},
- {0x00039188, 0, 0, "RESOURCE160_WORD2"},
- {0x0003918C, 0, 0, "RESOURCE160_WORD3"},
- {0x00039190, 0, 0, "RESOURCE160_WORD4"},
- {0x00039194, 0, 0, "RESOURCE160_WORD5"},
- {0x00039198, 0, 0, "RESOURCE160_WORD6"},
-};
-
-static const struct radeon_register R600_names_FS_RESOURCE[] = {
- {0x0003A300, 0, 0, "RESOURCE320_WORD0"},
- {0x0003A304, 0, 0, "RESOURCE320_WORD1"},
- {0x0003A308, 0, 0, "RESOURCE320_WORD2"},
- {0x0003A30C, 0, 0, "RESOURCE320_WORD3"},
- {0x0003A310, 0, 0, "RESOURCE320_WORD4"},
- {0x0003A314, 0, 0, "RESOURCE320_WORD5"},
- {0x0003A318, 0, 0, "RESOURCE320_WORD6"},
-};
-
-static const struct radeon_register R600_names_GS_RESOURCE[] = {
- {0x0003A4C0, 0, 0, "RESOURCE336_WORD0"},
- {0x0003A4C4, 0, 0, "RESOURCE336_WORD1"},
- {0x0003A4C8, 0, 0, "RESOURCE336_WORD2"},
- {0x0003A4CC, 0, 0, "RESOURCE336_WORD3"},
- {0x0003A4D0, 0, 0, "RESOURCE336_WORD4"},
- {0x0003A4D4, 0, 0, "RESOURCE336_WORD5"},
- {0x0003A4D8, 0, 0, "RESOURCE336_WORD6"},
-};
-
-static const struct radeon_register R600_names_PS_SAMPLER[] = {
- {0x0003C000, 0, 0, "SQ_TEX_SAMPLER_WORD0_0"},
- {0x0003C004, 0, 0, "SQ_TEX_SAMPLER_WORD1_0"},
- {0x0003C008, 0, 0, "SQ_TEX_SAMPLER_WORD2_0"},
-};
-
-static const struct radeon_register R600_names_VS_SAMPLER[] = {
- {0x0003C0D8, 0, 0, "SQ_TEX_SAMPLER_WORD0_18"},
- {0x0003C0DC, 0, 0, "SQ_TEX_SAMPLER_WORD1_18"},
- {0x0003C0E0, 0, 0, "SQ_TEX_SAMPLER_WORD2_18"},
-};
-
-static const struct radeon_register R600_names_GS_SAMPLER[] = {
- {0x0003C1B0, 0, 0, "SQ_TEX_SAMPLER_WORD0_36"},
- {0x0003C1B4, 0, 0, "SQ_TEX_SAMPLER_WORD1_36"},
- {0x0003C1B8, 0, 0, "SQ_TEX_SAMPLER_WORD2_36"},
-};
-
-static const struct radeon_register R600_names_PS_SAMPLER_BORDER[] = {
- {0x0000A400, 0, 0, "TD_PS_SAMPLER0_BORDER_RED"},
- {0x0000A404, 0, 0, "TD_PS_SAMPLER0_BORDER_GREEN"},
- {0x0000A408, 0, 0, "TD_PS_SAMPLER0_BORDER_BLUE"},
- {0x0000A40C, 0, 0, "TD_PS_SAMPLER0_BORDER_ALPHA"},
-};
-
-static const struct radeon_register R600_names_VS_SAMPLER_BORDER[] = {
- {0x0000A600, 0, 0, "TD_VS_SAMPLER0_BORDER_RED"},
- {0x0000A604, 0, 0, "TD_VS_SAMPLER0_BORDER_GREEN"},
- {0x0000A608, 0, 0, "TD_VS_SAMPLER0_BORDER_BLUE"},
- {0x0000A60C, 0, 0, "TD_VS_SAMPLER0_BORDER_ALPHA"},
-};
-
-static const struct radeon_register R600_names_GS_SAMPLER_BORDER[] = {
- {0x0000A800, 0, 0, "TD_GS_SAMPLER0_BORDER_RED"},
- {0x0000A804, 0, 0, "TD_GS_SAMPLER0_BORDER_GREEN"},
- {0x0000A808, 0, 0, "TD_GS_SAMPLER0_BORDER_BLUE"},
- {0x0000A80C, 0, 0, "TD_GS_SAMPLER0_BORDER_ALPHA"},
-};
-
-static const struct radeon_register R600_names_CB0[] = {
- {0x00028040, 1, 0, "CB_COLOR0_BASE"},
- {0x000280A0, 1, 0, "CB_COLOR0_INFO"},
- {0x00028060, 0, 0, "CB_COLOR0_SIZE"},
- {0x00028080, 0, 0, "CB_COLOR0_VIEW"},
- {0x000280E0, 1, 0, "CB_COLOR0_FRAG"},
- {0x000280C0, 1, 0, "CB_COLOR0_TILE"},
- {0x00028100, 0, 0, "CB_COLOR0_MASK"},
-};
-
-static const struct radeon_register R600_names_CB1[] = {
- {0x00028044, 1, 0, "CB_COLOR1_BASE"},
- {0x000280A4, 1, 0, "CB_COLOR1_INFO"},
- {0x00028064, 0, 0, "CB_COLOR1_SIZE"},
- {0x00028084, 0, 0, "CB_COLOR1_VIEW"},
- {0x000280E4, 1, 0, "CB_COLOR1_FRAG"},
- {0x000280C4, 1, 0, "CB_COLOR1_TILE"},
- {0x00028104, 0, 0, "CB_COLOR1_MASK"},
-};
-
-static const struct radeon_register R600_names_CB2[] = {
- {0x00028048, 1, 0, "CB_COLOR2_BASE"},
- {0x000280A8, 1, 0, "CB_COLOR2_INFO"},
- {0x00028068, 0, 0, "CB_COLOR2_SIZE"},
- {0x00028088, 0, 0, "CB_COLOR2_VIEW"},
- {0x000280E8, 1, 0, "CB_COLOR2_FRAG"},
- {0x000280C8, 1, 0, "CB_COLOR2_TILE"},
- {0x00028108, 0, 0, "CB_COLOR2_MASK"},
-};
-
-static const struct radeon_register R600_names_CB3[] = {
- {0x0002804C, 1, 0, "CB_COLOR3_BASE"},
- {0x000280AC, 1, 0, "CB_COLOR3_INFO"},
- {0x0002806C, 0, 0, "CB_COLOR3_SIZE"},
- {0x0002808C, 0, 0, "CB_COLOR3_VIEW"},
- {0x000280EC, 1, 0, "CB_COLOR3_FRAG"},
- {0x000280CC, 1, 0, "CB_COLOR3_TILE"},
- {0x0002810C, 0, 0, "CB_COLOR3_MASK"},
-};
-
-static const struct radeon_register R600_names_CB4[] = {
- {0x00028050, 1, 0, "CB_COLOR4_BASE"},
- {0x000280B0, 1, 0, "CB_COLOR4_INFO"},
- {0x00028070, 0, 0, "CB_COLOR4_SIZE"},
- {0x00028090, 0, 0, "CB_COLOR4_VIEW"},
- {0x000280F0, 1, 0, "CB_COLOR4_FRAG"},
- {0x000280D0, 1, 0, "CB_COLOR4_TILE"},
- {0x00028110, 0, 0, "CB_COLOR4_MASK"},
-};
-
-static const struct radeon_register R600_names_CB5[] = {
- {0x00028054, 1, 0, "CB_COLOR5_BASE"},
- {0x000280B4, 1, 0, "CB_COLOR5_INFO"},
- {0x00028074, 0, 0, "CB_COLOR5_SIZE"},
- {0x00028094, 0, 0, "CB_COLOR5_VIEW"},
- {0x000280F4, 1, 0, "CB_COLOR5_FRAG"},
- {0x000280D4, 1, 0, "CB_COLOR5_TILE"},
- {0x00028114, 0, 0, "CB_COLOR5_MASK"},
-};
-
-static const struct radeon_register R600_names_CB6[] = {
- {0x00028058, 1, 0, "CB_COLOR6_BASE"},
- {0x000280B8, 1, 0, "CB_COLOR6_INFO"},
- {0x00028078, 0, 0, "CB_COLOR6_SIZE"},
- {0x00028098, 0, 0, "CB_COLOR6_VIEW"},
- {0x000280F8, 1, 0, "CB_COLOR6_FRAG"},
- {0x000280D8, 1, 0, "CB_COLOR6_TILE"},
- {0x00028118, 0, 0, "CB_COLOR6_MASK"},
-};
-
-static const struct radeon_register R600_names_CB7[] = {
- {0x0002805C, 1, 0, "CB_COLOR7_BASE"},
- {0x000280BC, 1, 0, "CB_COLOR7_INFO"},
- {0x0002807C, 0, 0, "CB_COLOR7_SIZE"},
- {0x0002809C, 0, 0, "CB_COLOR7_VIEW"},
- {0x000280FC, 1, 0, "CB_COLOR7_FRAG"},
- {0x000280DC, 1, 0, "CB_COLOR7_TILE"},
- {0x0002811C, 0, 0, "CB_COLOR7_MASK"},
-};
-
-static const struct radeon_register R600_names_DB[] = {
- {0x0002800C, 1, 0, "DB_DEPTH_BASE"},
- {0x00028000, 0, 0, "DB_DEPTH_SIZE"},
- {0x00028004, 0, 0, "DB_DEPTH_VIEW"},
- {0x00028010, 1, 0, "DB_DEPTH_INFO"},
- {0x00028D24, 0, 0, "DB_HTILE_SURFACE"},
- {0x00028D34, 0, 0, "DB_PREFETCH_LIMIT"},
-};
-
-static const struct radeon_register R600_names_VGT[] = {
- {0x00008958, 0, 0, "VGT_PRIMITIVE_TYPE"},
- {0x00028400, 0, 0, "VGT_MAX_VTX_INDX"},
- {0x00028404, 0, 0, "VGT_MIN_VTX_INDX"},
- {0x00028408, 0, 0, "VGT_INDX_OFFSET"},
- {0x0002840C, 0, 0, "VGT_MULTI_PRIM_IB_RESET_INDX"},
- {0x00028A7C, 0, 0, "VGT_DMA_INDEX_TYPE"},
- {0x00028A84, 0, 0, "VGT_PRIMITIVEID_EN"},
- {0x00028A88, 0, 0, "VGT_DMA_NUM_INSTANCES"},
- {0x00028A94, 0, 0, "VGT_MULTI_PRIM_IB_RESET_EN"},
- {0x00028AA0, 0, 0, "VGT_INSTANCE_STEP_RATE_0"},
- {0x00028AA4, 0, 0, "VGT_INSTANCE_STEP_RATE_1"},
-};
-
-static const struct radeon_register R600_names_DRAW[] = {
- {0x00008970, 0, 0, "VGT_NUM_INDICES"},
- {0x000287E4, 0, 0, "VGT_DMA_BASE_HI"},
- {0x000287E8, 1, 0, "VGT_DMA_BASE"},
- {0x000287F0, 0, 0, "VGT_DRAW_INITIATOR"},
-};
-
-static const struct radeon_register R600_names_VGT_EVENT[] = {
- {0x00028A90, 1, 0, "VGT_EVENT_INITIATOR"},
-};
-
-static const struct radeon_register R600_names_CB_FLUSH[] = {
-};
-
-static const struct radeon_register R600_names_DB_FLUSH[] = {
-};
-
-#endif
diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h
index fcce2934d3..d91f7737af 100644
--- a/src/gallium/winsys/r600/drm/r600d.h
+++ b/src/gallium/winsys/r600/drm/r600d.h
@@ -91,6 +91,7 @@
#define PKT3_SET_CTL_CONST 0x6F
#define PKT3_SURFACE_BASE_UPDATE 0x73
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
#define EVENT_TYPE_ZPASS_DONE 0x15
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
@@ -2200,4 +2201,12 @@
#define R_038014_RESOURCE0_WORD5 0x038014
#define R_038018_RESOURCE0_WORD6 0x038018
+#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0 0x00028140
+#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0 0x00028180
+#define R_028940_ALU_CONST_CACHE_PS_0 0x00028940
+#define R_028980_ALU_CONST_CACHE_VS_0 0x00028980
+
+#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0
+#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4
+
#endif
diff --git a/src/gallium/winsys/r600/drm/radeon.c b/src/gallium/winsys/r600/drm/radeon.c
deleted file mode 100644
index f7e3e354de..0000000000
--- a/src/gallium/winsys/r600/drm/radeon.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Copyright © 2009 Jerome Glisse <glisse@freedesktop.org>
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <unistd.h>
-#include <string.h>
-#include <errno.h>
-#include <pipebuffer/pb_bufmgr.h>
-#include "xf86drm.h"
-#include "radeon_priv.h"
-#include "radeon_drm.h"
-
-enum radeon_family radeon_get_family(struct radeon *radeon)
-{
- return radeon->family;
-}
-
-enum chip_class radeon_get_family_class(struct radeon *radeon)
-{
- return radeon->chip_class;
-}
-
-void radeon_set_mem_constant(struct radeon *radeon, boolean state)
-{
- radeon->use_mem_constant = state;
-}
-
-static int radeon_get_device(struct radeon *radeon)
-{
- struct drm_radeon_info info;
- int r;
-
- radeon->device = 0;
- info.request = RADEON_INFO_DEVICE_ID;
- info.value = (uintptr_t)&radeon->device;
- r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
- sizeof(struct drm_radeon_info));
- return r;
-}
-
-struct radeon *radeon_new(int fd, unsigned device)
-{
- struct radeon *radeon;
- int r;
-
- radeon = calloc(1, sizeof(*radeon));
- if (radeon == NULL) {
- return NULL;
- }
- radeon->fd = fd;
- radeon->device = device;
- radeon->refcount = 1;
- if (fd >= 0) {
- r = radeon_get_device(radeon);
- if (r) {
- fprintf(stderr, "Failed to get device id\n");
- return radeon_decref(radeon);
- }
- }
- radeon->family = radeon_family_from_device(radeon->device);
- if (radeon->family == CHIP_UNKNOWN) {
- fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device);
- return radeon_decref(radeon);
- }
- switch (radeon->family) {
- case CHIP_R600:
- case CHIP_RV610:
- case CHIP_RV630:
- case CHIP_RV670:
- case CHIP_RV620:
- case CHIP_RV635:
- case CHIP_RS780:
- case CHIP_RS880:
- case CHIP_RV770:
- case CHIP_RV730:
- case CHIP_RV710:
- case CHIP_RV740:
- case CHIP_CEDAR:
- case CHIP_REDWOOD:
- case CHIP_JUNIPER:
- case CHIP_CYPRESS:
- case CHIP_HEMLOCK:
- if (r600_init(radeon)) {
- return radeon_decref(radeon);
- }
- break;
- case CHIP_R100:
- case CHIP_RV100:
- case CHIP_RS100:
- case CHIP_RV200:
- case CHIP_RS200:
- case CHIP_R200:
- case CHIP_RV250:
- case CHIP_RS300:
- case CHIP_RV280:
- case CHIP_R300:
- case CHIP_R350:
- case CHIP_RV350:
- case CHIP_RV380:
- case CHIP_R420:
- case CHIP_R423:
- case CHIP_RV410:
- case CHIP_RS400:
- case CHIP_RS480:
- case CHIP_RS600:
- case CHIP_RS690:
- case CHIP_RS740:
- case CHIP_RV515:
- case CHIP_R520:
- case CHIP_RV530:
- case CHIP_RV560:
- case CHIP_RV570:
- case CHIP_R580:
- default:
- fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n",
- __func__, radeon->device);
- break;
- }
-
- /* setup class */
- switch (radeon->family) {
- case CHIP_R600:
- case CHIP_RV610:
- case CHIP_RV630:
- case CHIP_RV670:
- case CHIP_RV620:
- case CHIP_RV635:
- case CHIP_RS780:
- case CHIP_RS880:
- radeon->chip_class = R600;
- break;
- case CHIP_RV770:
- case CHIP_RV730:
- case CHIP_RV710:
- case CHIP_RV740:
- radeon->chip_class = R700;
- break;
- case CHIP_CEDAR:
- case CHIP_REDWOOD:
- case CHIP_JUNIPER:
- case CHIP_CYPRESS:
- case CHIP_HEMLOCK:
- radeon->chip_class = EVERGREEN;
- break;
- default:
- fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n",
- __func__, radeon->device);
- break;
- }
-
- radeon->mman = pb_malloc_bufmgr_create();
- if (!radeon->mman)
- return NULL;
- radeon->kman = radeon_bo_pbmgr_create(radeon);
- if (!radeon->kman)
- return NULL;
- radeon->cman = pb_cache_manager_create(radeon->kman, 100000);
- if (!radeon->cman)
- return NULL;
- return radeon;
-}
-
-struct radeon *radeon_incref(struct radeon *radeon)
-{
- if (radeon == NULL)
- return NULL;
- radeon->refcount++;
- return radeon;
-}
-
-struct radeon *radeon_decref(struct radeon *radeon)
-{
- if (radeon == NULL)
- return NULL;
- if (--radeon->refcount > 0) {
- return NULL;
- }
-
- radeon->mman->destroy(radeon->mman);
- radeon->cman->destroy(radeon->cman);
- radeon->kman->destroy(radeon->kman);
- drmClose(radeon->fd);
- free(radeon);
- return NULL;
-}
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index 51ce864974..9d664b7e53 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -29,10 +29,45 @@
#include <string.h>
#include <sys/mman.h>
#include <errno.h>
-#include "radeon_priv.h"
+#include "r600_priv.h"
#include "xf86drm.h"
#include "radeon_drm.h"
+static int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo)
+{
+ struct drm_radeon_gem_mmap args;
+ void *ptr;
+ int r;
+
+ /* Zero out args to make valgrind happy */
+ memset(&args, 0, sizeof(args));
+ args.handle = bo->handle;
+ args.offset = 0;
+ args.size = (uint64_t)bo->size;
+ r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_MMAP,
+ &args, sizeof(args));
+ if (r) {
+ fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n",
+ bo, bo->handle, r);
+ return r;
+ }
+ ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->fd, args.addr_ptr);
+ if (ptr == MAP_FAILED) {
+ fprintf(stderr, "%s failed to map bo\n", __func__);
+ return -errno;
+ }
+ bo->data = ptr;
+
+ bo->map_count++;
+ return 0;
+}
+
+static void radeon_bo_fixed_unmap(struct radeon *radeon, struct radeon_bo *bo)
+{
+ munmap(bo->data, bo->size);
+ bo->data = NULL;
+}
+
struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
unsigned size, unsigned alignment, void *ptr)
{
@@ -60,6 +95,7 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
}
bo->handle = open_arg.handle;
bo->size = open_arg.size;
+ bo->shared = TRUE;
} else {
struct drm_radeon_gem_create args;
@@ -79,65 +115,24 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
return NULL;
}
}
+ if (radeon_bo_fixed_map(radeon, bo)) {
+ R600_ERR("failed to map bo\n");
+ radeon_bo_reference(radeon, &bo, NULL);
+ return bo;
+ }
if (ptr) {
- if (radeon_bo_map(radeon, bo)) {
- fprintf(stderr, "%s failed to copy data into bo\n", __func__);
- radeon_bo_reference(radeon, &bo, NULL);
- return bo;
- }
memcpy(bo->data, ptr, size);
- radeon_bo_unmap(radeon, bo);
}
+ LIST_INITHEAD(&bo->fencedlist);
return bo;
}
-int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo)
-{
- struct drm_radeon_gem_mmap args;
- void *ptr;
- int r;
-
- if (bo->map_count != 0) {
- goto success;
- }
- /* Zero out args to make valgrind happy */
- memset(&args, 0, sizeof(args));
- args.handle = bo->handle;
- args.offset = 0;
- args.size = (uint64_t)bo->size;
- r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_MMAP,
- &args, sizeof(args));
- if (r) {
- fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n",
- bo, bo->handle, r);
- return r;
- }
- ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->fd, args.addr_ptr);
- if (ptr == MAP_FAILED) {
- fprintf(stderr, "%s failed to map bo\n", __func__);
- return -errno;
- }
- bo->data = ptr;
-
-success:
- bo->map_count++;
-
- return 0;
-}
-
-void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo)
-{
- if (--bo->map_count > 0) {
- return;
- }
- munmap(bo->data, bo->size);
- bo->data = NULL;
-}
-
static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo)
{
struct drm_gem_close args;
+ LIST_DEL(&bo->fencedlist);
+ radeon_bo_fixed_unmap(radeon, bo);
memset(&args, 0, sizeof(args));
args.handle = bo->handle;
drmIoctl(radeon->fd, DRM_IOCTL_GEM_CLOSE, &args);
@@ -161,6 +156,15 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo)
struct drm_radeon_gem_wait_idle args;
int ret;
+ if (!bo->fence && !bo->shared)
+ return 0;
+
+ if (bo->fence <= *bo->ctx->cfence) {
+ LIST_DELINIT(&bo->fencedlist);
+ bo->fence = 0;
+ return 0;
+ }
+
/* Zero out args to make valgrind happy */
memset(&args, 0, sizeof(args));
args.handle = bo->handle;
@@ -173,16 +177,45 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo)
int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain)
{
- struct drm_radeon_gem_busy args;
- int ret;
+ struct drm_radeon_gem_busy args;
+ int ret;
- memset(&args, 0, sizeof(args));
- args.handle = bo->handle;
- args.domain = 0;
+ if (!bo->shared) {
+ if (!bo->fence)
+ return 0;
+ if (bo->fence <= *bo->ctx->cfence) {
+ LIST_DELINIT(&bo->fencedlist);
+ bo->fence = 0;
+ return 0;
+ }
+ }
- ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_BUSY,
- &args, sizeof(args));
+ memset(&args, 0, sizeof(args));
+ args.handle = bo->handle;
+ args.domain = 0;
+
+ ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_BUSY,
+ &args, sizeof(args));
+
+ *domain = args.domain;
+ return ret;
+}
- *domain = args.domain;
- return ret;
+int radeon_bo_get_tiling_flags(struct radeon *radeon,
+ struct radeon_bo *bo,
+ uint32_t *tiling_flags,
+ uint32_t *pitch)
+{
+ struct drm_radeon_gem_get_tiling args;
+ int ret;
+
+ args.handle = bo->handle;
+ ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_GET_TILING,
+ &args, sizeof(args));
+ if (ret)
+ return ret;
+
+ *tiling_flags = args.tiling_flags;
+ *pitch = args.pitch;
+ return ret;
}
diff --git a/src/gallium/winsys/r600/drm/radeon_bo_pb.c b/src/gallium/winsys/r600/drm/radeon_bo_pb.c
index aac3d7b604..a3452027f2 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo_pb.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo_pb.c
@@ -1,10 +1,34 @@
-#include "radeon_priv.h"
-
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
-#include "util/u_double_list.h"
-#include "pipebuffer/pb_buffer.h"
-#include "pipebuffer/pb_bufmgr.h"
+/*
+ * Copyright 2010 Dave Airlie
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Dave Airlie
+ */
+#include <util/u_inlines.h>
+#include <util/u_memory.h>
+#include <util/u_double_list.h>
+#include <pipebuffer/pb_buffer.h>
+#include <pipebuffer/pb_bufmgr.h>
+#include "r600_priv.h"
struct radeon_bo_pb {
struct pb_buffer b;
@@ -76,6 +100,10 @@ radeon_bo_pb_map_internal(struct pb_buffer *_buf,
uint32_t domain;
if (radeon_bo_busy(buf->mgr->radeon, buf->bo, &domain))
return NULL;
+ if (radeon_bo_map(buf->mgr->radeon, buf->bo)) {
+ return NULL;
+ }
+ goto out;
}
if (buf->bo->data != NULL) {
@@ -91,7 +119,7 @@ radeon_bo_pb_map_internal(struct pb_buffer *_buf,
return NULL;
}
}
-
+out:
LIST_DELINIT(&buf->maplist);
return buf->bo->data;
}
diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c
deleted file mode 100644
index 7ccb524590..0000000000
--- a/src/gallium/winsys/r600/drm/radeon_ctx.c
+++ /dev/null
@@ -1,376 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "radeon_priv.h"
-#include "radeon_drm.h"
-#include "bof.h"
-
-static int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_ws_bo *bo)
-{
- if (ctx->nbo >= RADEON_CTX_MAX_PM4)
- return -EBUSY;
- /* take a reference to the kernel bo */
- radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->nbo], radeon_bo_pb_get_bo(bo->pb));
- ctx->nbo++;
- return 0;
-}
-
-static void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *placement)
-{
- struct radeon_cs_reloc *greloc;
- unsigned i;
-
- placement[0] = 0;
- placement[1] = 0;
- greloc = (void *)(((u8 *)ctx->reloc) + reloc * 4);
- for (i = 0; i < ctx->nbo; i++) {
- if (ctx->bo[i]->handle == greloc->handle) {
- placement[0] = greloc->read_domain | greloc->write_domain;
- placement[1] = placement[0];
- return;
- }
- }
-}
-
-void radeon_ctx_clear(struct radeon_ctx *ctx)
-{
- for (int i = 0; i < ctx->nbo; i++) {
- radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
- }
- ctx->ndwords = RADEON_CTX_MAX_PM4;
- ctx->cdwords = 0;
- ctx->nreloc = 0;
- ctx->nbo = 0;
-}
-
-struct radeon_ctx *radeon_ctx_init(struct radeon *radeon)
-{
- struct radeon_ctx *ctx;
- if (radeon == NULL)
- return NULL;
- ctx = calloc(1, sizeof(struct radeon_ctx));
- ctx->radeon = radeon_incref(radeon);
- radeon_ctx_clear(ctx);
- ctx->pm4 = malloc(RADEON_CTX_MAX_PM4 * 4);
- if (ctx->pm4 == NULL) {
- radeon_ctx_fini(ctx);
- return NULL;
- }
- ctx->reloc = malloc(sizeof(struct radeon_cs_reloc) * RADEON_CTX_MAX_PM4);
- if (ctx->reloc == NULL) {
- radeon_ctx_fini(ctx);
- return NULL;
- }
- ctx->bo = calloc(sizeof(void *), RADEON_CTX_MAX_PM4);
- if (ctx->bo == NULL) {
- radeon_ctx_fini(ctx);
- return NULL;
- }
- return ctx;
-}
-
-void radeon_ctx_fini(struct radeon_ctx *ctx)
-{
- unsigned i;
-
- if (ctx == NULL)
- return;
-
- for (i = 0; i < ctx->nbo; i++) {
- radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
- }
- ctx->radeon = radeon_decref(ctx->radeon);
- free(ctx->bo);
- free(ctx->pm4);
- free(ctx->reloc);
- free(ctx);
-}
-
-static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *state)
-{
- unsigned i, j;
- int r;
- struct radeon_bo *state_bo;
- if (state == NULL)
- return 0;
- for (i = 0; i < state->nbo; i++) {
- for (j = 0; j < ctx->nbo; j++) {
- state_bo = radeon_bo_pb_get_bo(state->bo[i]->pb);
- if (state_bo == ctx->bo[j])
- break;
- }
- if (j == ctx->nbo) {
- r = radeon_ctx_set_bo_new(ctx, state->bo[i]);
- if (r)
- return r;
- }
- }
- return 0;
-}
-
-
-int radeon_ctx_submit(struct radeon_ctx *ctx)
-{
- struct drm_radeon_cs drmib;
- struct drm_radeon_cs_chunk chunks[2];
- uint64_t chunk_array[2];
- int r = 0;
-
- if (!ctx->cdwords)
- return 0;
-
- radeon_bo_pbmgr_flush_maps(ctx->radeon->kman);
-#if 0
- for (r = 0; r < ctx->cdwords; r++) {
- fprintf(stderr, "0x%08X\n", ctx->pm4[r]);
- }
-#endif
- drmib.num_chunks = 2;
- drmib.chunks = (uint64_t)(uintptr_t)chunk_array;
- chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
- chunks[0].length_dw = ctx->cdwords;
- chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4;
- chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
- chunks[1].length_dw = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4;
- chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc;
- chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0];
- chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1];
-#if 1
- r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib,
- sizeof(struct drm_radeon_cs));
-#endif
- return r;
-}
-
-static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_ws_bo *bo,
- unsigned id, unsigned *placement)
-{
- unsigned i;
- unsigned bo_handle = radeon_ws_bo_get_handle(bo);
-
- for (i = 0; i < ctx->nreloc; i++) {
- if (ctx->reloc[i].handle == bo_handle) {
- ctx->pm4[id] = i * sizeof(struct radeon_cs_reloc) / 4;
- return 0;
- }
- }
- if (ctx->nreloc >= RADEON_CTX_MAX_PM4) {
- return -EBUSY;
- }
- ctx->reloc[ctx->nreloc].handle = bo_handle;
- ctx->reloc[ctx->nreloc].read_domain = placement[0] | placement [1];
- ctx->reloc[ctx->nreloc].write_domain = placement[0] | placement [1];
- ctx->reloc[ctx->nreloc].flags = 0;
- ctx->pm4[id] = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4;
- ctx->nreloc++;
- return 0;
-}
-
-static int radeon_ctx_state_schedule(struct radeon_ctx *ctx, struct radeon_state *state)
-{
- unsigned i, rid, bid, cid;
- int r;
-
- if (state == NULL)
- return 0;
- if (state->cpm4 > ctx->ndwords) {
- return -EBUSY;
- }
- memcpy(&ctx->pm4[ctx->cdwords], state->pm4, state->cpm4 * 4);
- for (i = 0; i < state->nreloc; i++) {
- rid = state->reloc_pm4_id[i];
- bid = state->reloc_bo_id[i];
- cid = ctx->cdwords + rid;
- r = radeon_ctx_reloc(ctx, state->bo[bid], cid,
- &state->placement[bid * 2]);
- if (r) {
- fprintf(stderr, "%s state %d failed to reloc\n", __func__, state->stype->stype);
- return r;
- }
- }
- ctx->cdwords += state->cpm4;
- ctx->ndwords -= state->cpm4;
- return 0;
-}
-
-int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state)
-{
- int r = 0;
-
- /* !!! ONLY ACCEPT QUERY STATE HERE !!! */
- r = radeon_state_pm4(state);
- if (r)
- return r;
- /* BEGIN/END query are balanced in the same cs so account for END
- * END query when scheduling BEGIN query
- */
- switch (state->stype->stype) {
- case R600_STATE_QUERY_BEGIN:
- /* is there enough place for begin & end */
- if ((state->cpm4 * 2) > ctx->ndwords)
- return -EBUSY;
- ctx->ndwords -= state->cpm4;
- break;
- case R600_STATE_QUERY_END:
- ctx->ndwords += state->cpm4;
- break;
- default:
- return -EINVAL;
- }
- return radeon_ctx_state_schedule(ctx, state);
-}
-
-int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw)
-{
- unsigned previous_cdwords;
- int r = 0;
- int i;
-
- for (i = 0; i < ctx->radeon->max_states; i++) {
- r = radeon_ctx_state_bo(ctx, draw->state[i]);
- if (r)
- return r;
- }
- previous_cdwords = ctx->cdwords;
- for (i = 0; i < ctx->radeon->max_states; i++) {
- if (draw->state[i]) {
- r = radeon_ctx_state_schedule(ctx, draw->state[i]);
- if (r) {
- ctx->cdwords = previous_cdwords;
- return r;
- }
- }
- }
-
- return 0;
-}
-
-#if 0
-int radeon_ctx_pm4(struct radeon_ctx *ctx)
-{
- unsigned i;
- int r;
-
- free(ctx->pm4);
- ctx->cpm4 = 0;
- ctx->pm4 = malloc(ctx->draw_cpm4 * 4);
- if (ctx->pm4 == NULL)
- return -EINVAL;
- for (i = 0, ctx->id = 0; i < ctx->nstate; i++) {
- }
- if (ctx->id != ctx->draw_cpm4) {
- fprintf(stderr, "%s miss predicted pm4 size %d for %d\n",
- __func__, ctx->draw_cpm4, ctx->id);
- return -EINVAL;
- }
- ctx->cpm4 = ctx->draw_cpm4;
- return 0;
-}
-#endif
-
-void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file)
-{
- bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root;
- unsigned i;
- unsigned bo_size;
- root = device_id = bcs = blob = array = bo = size = handle = NULL;
- root = bof_object();
- if (root == NULL)
- goto out_err;
- device_id = bof_int32(ctx->radeon->device);
- if (device_id == NULL)
- return;
- if (bof_object_set(root, "device_id", device_id))
- goto out_err;
- bof_decref(device_id);
- device_id = NULL;
- /* dump relocs */
- blob = bof_blob(ctx->nreloc * 16, ctx->reloc);
- if (blob == NULL)
- goto out_err;
- if (bof_object_set(root, "reloc", blob))
- goto out_err;
- bof_decref(blob);
- blob = NULL;
- /* dump cs */
- blob = bof_blob(ctx->cdwords * 4, ctx->pm4);
- if (blob == NULL)
- goto out_err;
- if (bof_object_set(root, "pm4", blob))
- goto out_err;
- bof_decref(blob);
- blob = NULL;
- /* dump bo */
- array = bof_array();
- if (array == NULL)
- goto out_err;
- for (i = 0; i < ctx->nbo; i++) {
- bo = bof_object();
- if (bo == NULL)
- goto out_err;
- bo_size = ctx->bo[i]->size;
- size = bof_int32(bo_size);
- if (size == NULL)
- goto out_err;
- if (bof_object_set(bo, "size", size))
- goto out_err;
- bof_decref(size);
- size = NULL;
- handle = bof_int32(ctx->bo[i]->handle);
- if (handle == NULL)
- goto out_err;
- if (bof_object_set(bo, "handle", handle))
- goto out_err;
- bof_decref(handle);
- handle = NULL;
- radeon_bo_map(ctx->radeon, ctx->bo[i]);
- blob = bof_blob(bo_size, ctx->bo[i]->data);
- radeon_bo_unmap(ctx->radeon, ctx->bo[i]);
- if (blob == NULL)
- goto out_err;
- if (bof_object_set(bo, "data", blob))
- goto out_err;
- bof_decref(blob);
- blob = NULL;
- if (bof_array_append(array, bo))
- goto out_err;
- bof_decref(bo);
- bo = NULL;
- }
- if (bof_object_set(root, "bo", array))
- goto out_err;
- bof_dump_file(root, file);
-out_err:
- bof_decref(blob);
- bof_decref(array);
- bof_decref(bo);
- bof_decref(size);
- bof_decref(handle);
- bof_decref(device_id);
- bof_decref(root);
-}
diff --git a/src/gallium/winsys/r600/drm/radeon_draw.c b/src/gallium/winsys/r600/drm/radeon_draw.c
deleted file mode 100644
index a126901495..0000000000
--- a/src/gallium/winsys/r600/drm/radeon_draw.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- */
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include "radeon_priv.h"
-
-/*
- * draw functions
- */
-int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon)
-{
- draw->radeon = radeon;
- draw->state = calloc(radeon->max_states, sizeof(void*));
- if (draw->state == NULL)
- return -ENOMEM;
- return 0;
-}
-
-void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state)
-{
- if (state == NULL)
- return;
- draw->state[state->state_id] = state;
-}
-
-void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state)
-{
- if (state == NULL)
- return;
- if (draw->state[state->state_id] == state) {
- draw->state[state->state_id] = NULL;
- }
-}
diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c
index dd6156d585..08cc1c41e3 100644
--- a/src/gallium/winsys/r600/drm/radeon_pciid.c
+++ b/src/gallium/winsys/r600/drm/radeon_pciid.c
@@ -24,7 +24,7 @@
* Jerome Glisse
*/
#include <stdlib.h>
-#include "radeon_priv.h"
+#include "r600.h"
struct pci_id {
unsigned vendor;
diff --git a/src/gallium/winsys/r600/drm/radeon_priv.h b/src/gallium/winsys/r600/drm/radeon_priv.h
deleted file mode 100644
index e780cfd96a..0000000000
--- a/src/gallium/winsys/r600/drm/radeon_priv.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright © 2009 Jerome Glisse <glisse@freedesktop.org>
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-#ifndef RADEON_PRIV_H
-#define RADEON_PRIV_H
-
-#include <stdint.h>
-#include "xf86drm.h"
-#include "xf86drmMode.h"
-#include <errno.h>
-#include "radeon.h"
-
-#include "pipe/p_compiler.h"
-#include "util/u_inlines.h"
-#include "pipe/p_defines.h"
-
-struct radeon;
-struct radeon_ctx;
-
-
-/*
- * radeon functions
- */
-typedef int (*radeon_state_pm4_t)(struct radeon_state *state);
-struct radeon_register {
- unsigned offset;
- unsigned need_reloc;
- unsigned bo_id;
- char name[64];
-};
-
-struct radeon_bo {
- struct pipe_reference reference;
- unsigned handle;
- unsigned size;
- unsigned alignment;
- unsigned map_count;
- void *data;
-};
-
-struct radeon_sub_type {
- int shader_type;
- const struct radeon_register *regs;
- unsigned nstates;
-};
-
-struct radeon_stype_info {
- unsigned stype;
- unsigned num;
- unsigned stride;
- radeon_state_pm4_t pm4;
- struct radeon_sub_type reginfo[R600_SHADER_MAX];
- unsigned base_id;
- unsigned npm4;
-};
-
-struct radeon_ctx {
- struct radeon *radeon;
- u32 *pm4;
- int cdwords;
- int ndwords;
- unsigned nreloc;
- struct radeon_cs_reloc *reloc;
- unsigned nbo;
- struct radeon_bo **bo;
-};
-
-struct radeon {
- int fd;
- int refcount;
- unsigned device;
- unsigned family;
- enum chip_class chip_class;
- boolean use_mem_constant; /* true for evergreen */
- unsigned nstype;
- struct radeon_stype_info *stype;
- unsigned max_states;
- struct pb_manager *mman; /* malloc manager */
- struct pb_manager *kman; /* kernel bo manager */
- struct pb_manager *cman; /* cached bo manager */
-};
-
-struct radeon_ws_bo {
- struct pipe_reference reference;
- struct pb_buffer *pb;
-};
-
-extern struct radeon *radeon_new(int fd, unsigned device);
-extern struct radeon *radeon_incref(struct radeon *radeon);
-extern struct radeon *radeon_decref(struct radeon *radeon);
-extern unsigned radeon_family_from_device(unsigned device);
-extern int radeon_is_family_compatible(unsigned family1, unsigned family2);
-
-/*
- * r600/r700 context functions
- */
-extern int r600_init(struct radeon *radeon);
-extern int r600_ctx_draw(struct radeon_ctx *ctx);
-extern int r600_ctx_next_reloc(struct radeon_ctx *ctx, unsigned *reloc);
-
-/*
- * radeon state functions
- */
-extern u32 radeon_state_register_get(struct radeon_state *state, unsigned offset);
-extern int radeon_state_register_set(struct radeon_state *state, unsigned offset, u32 value);
-extern struct radeon_state *radeon_state_duplicate(struct radeon_state *state);
-extern int radeon_state_replace_always(struct radeon_state *ostate, struct radeon_state *nstate);
-extern int radeon_state_pm4_generic(struct radeon_state *state);
-extern int radeon_state_reloc(struct radeon_state *state, unsigned id, unsigned bo_id);
-
-/*
- * radeon draw functions
- */
-extern int radeon_draw_pm4(struct radeon_draw *draw);
-
-/* ws bo winsys only */
-unsigned radeon_ws_bo_get_handle(struct radeon_ws_bo *bo);
-unsigned radeon_ws_bo_get_size(struct radeon_ws_bo *bo);
-
-/* bo */
-struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
- unsigned size, unsigned alignment, void *ptr);
-int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo);
-void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo);
-void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst,
- struct radeon_bo *src);
-int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo);
-int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain);
-
-/* pipebuffer kernel bo manager */
-struct pb_manager *radeon_bo_pbmgr_create(struct radeon *radeon);
-struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf);
-void radeon_bo_pbmgr_flush_maps(struct pb_manager *_mgr);
-struct pb_buffer *radeon_bo_pb_create_buffer_from_handle(struct pb_manager *_mgr,
- uint32_t handle);
-
-#endif
diff --git a/src/gallium/winsys/r600/drm/radeon_state.c b/src/gallium/winsys/r600/drm/radeon_state.c
deleted file mode 100644
index c7aa73c8d4..0000000000
--- a/src/gallium/winsys/r600/drm/radeon_state.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include "radeon_priv.h"
-
-/*
- * state core functions
- */
-int radeon_state_init(struct radeon_state *state, struct radeon *radeon, u32 stype, u32 id, u32 shader_type)
-{
- struct radeon_stype_info *found = NULL;
- int i, j, shader_index = -1;
-
- /* traverse the stype array */
- for (i = 0; i < radeon->nstype; i++) {
- /* if the type doesn't match, if the shader doesn't match */
- if (stype != radeon->stype[i].stype)
- continue;
- if (shader_type) {
- for (j = 0; j < 4; j++) {
- if (radeon->stype[i].reginfo[j].shader_type == shader_type) {
- shader_index = j;
- break;
- }
- }
- if (shader_index == -1)
- continue;
- } else {
- if (radeon->stype[i].reginfo[0].shader_type)
- continue;
- else
- shader_index = 0;
- }
- if (id > radeon->stype[i].num)
- continue;
-
- found = &radeon->stype[i];
- break;
- }
-
- if (!found) {
- fprintf(stderr, "%s invalid type %d/id %d/shader class %d\n", __func__, stype, id, shader_type);
- return -EINVAL;
- }
-
- memset(state, 0, sizeof(struct radeon_state));
- state->stype = found;
- state->state_id = state->stype->num * shader_index + state->stype->base_id + id;
- state->radeon = radeon;
- state->id = id;
- state->shader_index = shader_index;
- state->refcount = 1;
- state->npm4 = found->npm4;
- state->nstates = found->reginfo[shader_index].nstates;
- return 0;
-}
-
-int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type)
-{
- struct radeon_stype_info *found = NULL;
- int i, j, shader_index = -1;
-
- if (state == NULL)
- return 0;
- /* traverse the stype array */
- for (i = 0; i < state->radeon->nstype; i++) {
- /* if the type doesn't match, if the shader doesn't match */
- if (stype != state->radeon->stype[i].stype)
- continue;
- if (shader_type) {
- for (j = 0; j < 4; j++) {
- if (state->radeon->stype[i].reginfo[j].shader_type == shader_type) {
- shader_index = j;
- break;
- }
- }
- if (shader_index == -1)
- continue;
- } else {
- if (state->radeon->stype[i].reginfo[0].shader_type)
- continue;
- else
- shader_index = 0;
- }
- if (id > state->radeon->stype[i].num)
- continue;
-
- found = &state->radeon->stype[i];
- break;
- }
-
- if (!found) {
- fprintf(stderr, "%s invalid type %d/id %d/shader class %d\n", __func__, stype, id, shader_type);
- return -EINVAL;
- }
-
- if (found->reginfo[shader_index].nstates != state->nstates) {
- fprintf(stderr, "invalid type change from (%d %d %d) to (%d %d %d)\n",
- state->stype->stype, state->id, state->shader_index, stype, id, shader_index);
- }
-
- state->stype = found;
- state->id = id;
- state->shader_index = shader_index;
- state->state_id = state->stype->num * shader_index + state->stype->base_id + id;
- return radeon_state_pm4(state);
-}
-
-void radeon_state_fini(struct radeon_state *state)
-{
- unsigned i;
-
- if (state == NULL)
- return;
- for (i = 0; i < state->nbo; i++) {
- radeon_ws_bo_reference(state->radeon, &state->bo[i], NULL);
- }
- memset(state, 0, sizeof(struct radeon_state));
-}
-
-int radeon_state_replace_always(struct radeon_state *ostate,
- struct radeon_state *nstate)
-{
- return 1;
-}
-
-int radeon_state_pm4_generic(struct radeon_state *state)
-{
- return -EINVAL;
-}
-
-static u32 crc32(void *d, size_t len)
-{
- u16 *data = (uint16_t*)d;
- u32 sum1 = 0xffff, sum2 = 0xffff;
-
- len = len >> 1;
- while (len) {
- unsigned tlen = len > 360 ? 360 : len;
- len -= tlen;
- do {
- sum1 += *data++;
- sum2 += sum1;
- } while (--tlen);
- sum1 = (sum1 & 0xffff) + (sum1 >> 16);
- sum2 = (sum2 & 0xffff) + (sum2 >> 16);
- }
- /* Second reduction step to reduce sums to 16 bits */
- sum1 = (sum1 & 0xffff) + (sum1 >> 16);
- sum2 = (sum2 & 0xffff) + (sum2 >> 16);
- return sum2 << 16 | sum1;
-}
-
-int radeon_state_pm4(struct radeon_state *state)
-{
- int r;
-
- if (state == NULL)
- return 0;
- state->cpm4 = 0;
- r = state->stype->pm4(state);
- if (r) {
- fprintf(stderr, "%s failed to build PM4 for state(%d %d)\n",
- __func__, state->stype->stype, state->id);
- return r;
- }
- state->pm4_crc = crc32(state->pm4, state->cpm4 * 4);
- return 0;
-}
-
-int radeon_state_reloc(struct radeon_state *state, unsigned id, unsigned bo_id)
-{
- state->reloc_pm4_id[state->nreloc] = id;
- state->reloc_bo_id[state->nreloc] = bo_id;
- state->nreloc++;
- return 0;
-}
diff --git a/src/gallium/winsys/r600/drm/radeon_ws_bo.c b/src/gallium/winsys/r600/drm/radeon_ws_bo.c
deleted file mode 100644
index 4a64be23a2..0000000000
--- a/src/gallium/winsys/r600/drm/radeon_ws_bo.c
+++ /dev/null
@@ -1,95 +0,0 @@
-#include <pipe/p_compiler.h>
-#include <pipe/p_screen.h>
-#include <pipebuffer/pb_bufmgr.h>
-#include "radeon_priv.h"
-
-struct radeon_ws_bo *radeon_ws_bo(struct radeon *radeon,
- unsigned size, unsigned alignment, unsigned usage)
-{
- struct radeon_ws_bo *ws_bo = calloc(1, sizeof(struct radeon_ws_bo));
- struct pb_desc desc;
- struct pb_manager *man;
-
- desc.alignment = alignment;
- desc.usage = usage;
-
- if (!radeon->use_mem_constant && (usage & PIPE_BIND_CONSTANT_BUFFER)) {
- man = radeon->mman;
- } else if (usage & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
- man = radeon->cman;
- else
- man = radeon->kman;
-
- ws_bo->pb = man->create_buffer(man, size, &desc);
- if (ws_bo->pb == NULL) {
- free(ws_bo);
- return NULL;
- }
-
- pipe_reference_init(&ws_bo->reference, 1);
- return ws_bo;
-}
-
-struct radeon_ws_bo *radeon_ws_bo_handle(struct radeon *radeon,
- unsigned handle)
-{
- struct radeon_ws_bo *ws_bo = calloc(1, sizeof(struct radeon_ws_bo));
-
- ws_bo->pb = radeon_bo_pb_create_buffer_from_handle(radeon->kman, handle);
- if (!ws_bo->pb) {
- free(ws_bo);
- return NULL;
- }
- pipe_reference_init(&ws_bo->reference, 1);
- return ws_bo;
-}
-
-void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned usage, void *ctx)
-{
- return pb_map(bo->pb, usage, ctx);
-}
-
-void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo)
-{
- pb_unmap(bo->pb);
-}
-
-static void radeon_ws_bo_destroy(struct radeon *radeon, struct radeon_ws_bo *bo)
-{
- if (bo->pb)
- pb_reference(&bo->pb, NULL);
- free(bo);
-}
-
-void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst,
- struct radeon_ws_bo *src)
-{
- struct radeon_ws_bo *old = *dst;
-
- if (pipe_reference(&(*dst)->reference, &src->reference)) {
- radeon_ws_bo_destroy(radeon, old);
- }
- *dst = src;
-}
-
-unsigned radeon_ws_bo_get_handle(struct radeon_ws_bo *pb_bo)
-{
- struct radeon_bo *bo;
-
- bo = radeon_bo_pb_get_bo(pb_bo->pb);
- if (!bo)
- return 0;
-
- return bo->handle;
-}
-
-unsigned radeon_ws_bo_get_size(struct radeon_ws_bo *pb_bo)
-{
- struct radeon_bo *bo;
-
- bo = radeon_bo_pb_get_bo(pb_bo->pb);
- if (!bo)
- return 0;
-
- return bo->size;
-}
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_dri.c b/src/gallium/winsys/svga/drm/vmw_screen_dri.c
index 7bd4407e9f..258084a1f1 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_dri.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_dri.c
@@ -30,7 +30,6 @@
#include "util/u_format.h"
#include "vmw_screen.h"
-#include "vmw_screen.h"
#include "vmw_surface.h"
#include "svga_drm_public.h"
diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
index 3a76098b65..bc2623e7b7 100644
--- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
+++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
@@ -272,7 +272,7 @@ wsw_destroy(struct sw_winsys *ws)
}
struct sw_winsys *
-wrapper_sw_winsys_warp_pipe_screen(struct pipe_screen *screen)
+wrapper_sw_winsys_wrap_pipe_screen(struct pipe_screen *screen)
{
struct wrapper_sw_winsys *wsw = CALLOC_STRUCT(wrapper_sw_winsys);
@@ -304,3 +304,16 @@ err_free:
err:
return NULL;
}
+
+struct pipe_screen *
+wrapper_sw_winsys_dewrap_pipe_screen(struct sw_winsys *ws)
+{
+ struct wrapper_sw_winsys *wsw = wrapper_sw_winsys(ws);
+ struct pipe_screen *screen = wsw->screen;
+
+ wsw->pipe->destroy(wsw->pipe);
+ /* don't destroy the screen its needed later on */
+
+ FREE(wsw);
+ return screen;
+}
diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h
index b5c25a3c50..ae0196c432 100644
--- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h
+++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h
@@ -30,6 +30,15 @@
struct sw_winsys;
struct pipe_screen;
-struct sw_winsys *wrapper_sw_winsys_warp_pipe_screen(struct pipe_screen *screen);
+/*
+ * Wrap a pipe screen.
+ */
+struct sw_winsys *wrapper_sw_winsys_wrap_pipe_screen(struct pipe_screen *screen);
+
+/*
+ * Destroy the sw_winsys and return the wrapped pipe_screen.
+ * Not destroying it as sw_winsys::destroy does.
+ */
+struct pipe_screen *wrapper_sw_winsys_dewrap_pipe_screen(struct sw_winsys *sw_winsys);
#endif