summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/galahad/Makefile11
-rw-r--r--src/gallium/drivers/galahad/SConscript13
-rw-r--r--src/gallium/drivers/galahad/glhd_context.c997
-rw-r--r--src/gallium/drivers/galahad/glhd_context.h (renamed from src/gallium/drivers/identity/id_drm.c)77
-rw-r--r--src/gallium/drivers/galahad/glhd_objects.c187
-rw-r--r--src/gallium/drivers/galahad/glhd_objects.h175
-rw-r--r--src/gallium/drivers/galahad/glhd_public.h (renamed from src/gallium/drivers/identity/id_drm.h)12
-rw-r--r--src/gallium/drivers/galahad/glhd_screen.c334
-rw-r--r--src/gallium/drivers/galahad/glhd_screen.h (renamed from src/gallium/drivers/trace/tr_drm.h)23
-rw-r--r--src/gallium/drivers/i915/Makefile2
-rw-r--r--src/gallium/drivers/i915/SConscript2
-rw-r--r--src/gallium/drivers/i915/i915_batch.h14
-rw-r--r--src/gallium/drivers/i915/i915_batchbuffer.h9
-rw-r--r--src/gallium/drivers/i915/i915_blit.c19
-rw-r--r--src/gallium/drivers/i915/i915_context.h5
-rw-r--r--src/gallium/drivers/i915/i915_debug.c89
-rw-r--r--src/gallium/drivers/i915/i915_debug.h89
-rw-r--r--src/gallium/drivers/i915/i915_debug_fp.c1
-rw-r--r--src/gallium/drivers/i915/i915_debug_private.h45
-rw-r--r--src/gallium/drivers/i915/i915_flush.c27
-rw-r--r--src/gallium/drivers/i915/i915_prim_vbuf.c219
-rw-r--r--src/gallium/drivers/i915/i915_public.h13
-rw-r--r--src/gallium/drivers/i915/i915_resource_texture.c17
-rw-r--r--src/gallium/drivers/i915/i915_screen.c4
-rw-r--r--src/gallium/drivers/i915/i915_state.h20
-rw-r--r--src/gallium/drivers/i915/i915_state_derived.c54
-rw-r--r--src/gallium/drivers/i915/i915_state_dynamic.c217
-rw-r--r--src/gallium/drivers/i915/i915_state_emit.c41
-rw-r--r--src/gallium/drivers/i915/i915_state_fpc.c59
-rw-r--r--src/gallium/drivers/i915/i915_state_immediate.c91
-rw-r--r--src/gallium/drivers/i915/i915_state_sampler.c113
-rw-r--r--src/gallium/drivers/i915/i915_state_static.c47
-rw-r--r--src/gallium/drivers/i915/i915_winsys.h7
-rw-r--r--src/gallium/drivers/i965/brw_public.h13
-rw-r--r--src/gallium/drivers/i965/brw_screen.c9
-rw-r--r--src/gallium/drivers/i965/brw_winsys.h7
-rw-r--r--src/gallium/drivers/i965/brw_wm_fp.c2
-rw-r--r--src/gallium/drivers/identity/Makefile3
-rw-r--r--src/gallium/drivers/identity/SConscript1
-rw-r--r--src/gallium/drivers/identity/id_objects.c7
-rw-r--r--src/gallium/drivers/identity/id_objects.h1
-rw-r--r--src/gallium/drivers/llvmpipe/.gitignore2
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile5
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c41
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_draw_arrays.c132
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.h18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c14
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c16
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h26
-rw-r--r--src/gallium/drivers/llvmpipe/lp_memory.c45
-rw-r--r--src/gallium/drivers/llvmpipe/lp_memory.h40
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.c39
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_query.c32
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c208
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h85
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h87
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c179
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h238
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c61
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c26
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c148
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c616
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_vbuf.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c286
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_sampler.c84
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_so.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_surface.c46
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c240
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_round.c277
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_sincos.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c147
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.h11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_image.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_shuffle_mask.py32
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.py247
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c7
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h6
-rw-r--r--src/gallium/drivers/nouveau/nouveau_util.h2
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h2
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h4
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c5
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c4
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c32
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragprog.c5
-rw-r--r--src/gallium/drivers/nvfx/nvfx_screen.c4
-rw-r--r--src/gallium/drivers/r300/Makefile2
-rw-r--r--src/gallium/drivers/r300/SConscript2
-rw-r--r--src/gallium/drivers/r300/r300_blit.c110
-rw-r--r--src/gallium/drivers/r300/r300_cb.h3
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c7
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h2
-rw-r--r--src/gallium/drivers/r300/r300_context.c244
-rw-r--r--src/gallium/drivers/r300/r300_context.h171
-rw-r--r--src/gallium/drivers/r300/r300_cs.h73
-rw-r--r--src/gallium/drivers/r300/r300_debug.c10
-rw-r--r--src/gallium/drivers/r300/r300_defines.h5
-rw-r--r--src/gallium/drivers/r300/r300_emit.c443
-rw-r--r--src/gallium/drivers/r300/r300_emit.h18
-rw-r--r--src/gallium/drivers/r300/r300_flush.c11
-rw-r--r--src/gallium/drivers/r300/r300_fs.c10
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c22
-rw-r--r--src/gallium/drivers/r300/r300_public.h9
-rw-r--r--src/gallium/drivers/r300/r300_query.c10
-rw-r--r--src/gallium/drivers/r300/r300_reg.h24
-rw-r--r--src/gallium/drivers/r300/r300_render.c193
-rw-r--r--src/gallium/drivers/r300/r300_render_stencilref.c8
-rw-r--r--src/gallium/drivers/r300/r300_screen.c32
-rw-r--r--src/gallium/drivers/r300/r300_screen.h25
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.c149
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.h22
-rw-r--r--src/gallium/drivers/r300/r300_state.c315
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c63
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.c111
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.h31
-rw-r--r--src/gallium/drivers/r300/r300_texture.c618
-rw-r--r--src/gallium/drivers/r300/r300_texture.h9
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.c465
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.h57
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c2
-rw-r--r--src/gallium/drivers/r300/r300_transfer.c68
-rw-r--r--src/gallium/drivers/r300/r300_vs_draw.c2
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h311
-rw-r--r--src/gallium/drivers/r600/Makefile7
-rw-r--r--src/gallium/drivers/r600/SConscript7
-rw-r--r--src/gallium/drivers/r600/r600_asm.c468
-rw-r--r--src/gallium/drivers/r600/r600_asm.h141
-rw-r--r--src/gallium/drivers/r600/r600_buffer.c2
-rw-r--r--src/gallium/drivers/r600/r600_compiler.c446
-rw-r--r--src/gallium/drivers/r600/r600_compiler.h331
-rw-r--r--src/gallium/drivers/r600/r600_compiler_dump.c267
-rw-r--r--src/gallium/drivers/r600/r600_compiler_r600.c891
-rw-r--r--src/gallium/drivers/r600/r600_compiler_r700.c214
-rw-r--r--src/gallium/drivers/r600/r600_compiler_tgsi.c730
-rw-r--r--src/gallium/drivers/r600/r600_context.c284
-rw-r--r--src/gallium/drivers/r600/r600_context.h5
-rw-r--r--src/gallium/drivers/r600/r600_helper.c11
-rw-r--r--src/gallium/drivers/r600/r600_public.h9
-rw-r--r--src/gallium/drivers/r600/r600_screen.c11
-rw-r--r--src/gallium/drivers/r600/r600_shader.c982
-rw-r--r--src/gallium/drivers/r600/r600_shader.h242
-rw-r--r--src/gallium/drivers/r600/r600_sq.h42
-rw-r--r--src/gallium/drivers/r600/r600_state.c4
-rw-r--r--src/gallium/drivers/r600/r600_texture.c2
-rw-r--r--src/gallium/drivers/r600/r600d.h75
-rw-r--r--src/gallium/drivers/r600/r700_asm.c70
-rw-r--r--src/gallium/drivers/r600/r700_sq.h22
-rw-r--r--src/gallium/drivers/r600/radeon.h4
-rw-r--r--src/gallium/drivers/rbug/rbug_context.c8
-rw-r--r--src/gallium/drivers/rbug/rbug_core.c6
-rw-r--r--src/gallium/drivers/softpipe/sp_draw_arrays.c210
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_blend.c37
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_depth_test.c40
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_fs.c8
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c3
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.c24
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.c41
-rw-r--r--src/gallium/drivers/svga/svga_public.h42
-rw-r--r--src/gallium/drivers/svga/svga_screen.c1
-rw-r--r--src/gallium/drivers/svga/svga_winsys.h3
-rw-r--r--src/gallium/drivers/trace/Makefile1
-rw-r--r--src/gallium/drivers/trace/SConscript1
-rw-r--r--src/gallium/drivers/trace/tr_drm.c101
177 files changed, 9419 insertions, 6700 deletions
diff --git a/src/gallium/drivers/galahad/Makefile b/src/gallium/drivers/galahad/Makefile
new file mode 100644
index 0000000000..e9c4f7e28c
--- /dev/null
+++ b/src/gallium/drivers/galahad/Makefile
@@ -0,0 +1,11 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = galahad
+
+C_SOURCES = \
+ glhd_objects.c \
+ glhd_context.c \
+ glhd_screen.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/galahad/SConscript b/src/gallium/drivers/galahad/SConscript
new file mode 100644
index 0000000000..b398a3f061
--- /dev/null
+++ b/src/gallium/drivers/galahad/SConscript
@@ -0,0 +1,13 @@
+Import('*')
+
+env = env.Clone()
+
+galahad = env.ConvenienceLibrary(
+ target = 'identity',
+ source = [
+ 'glhd_context.c',
+ 'glhd_objects.c',
+ 'glhd_screen.c',
+ ])
+
+Export('galahad')
diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c
new file mode 100644
index 0000000000..ab6f17b3ab
--- /dev/null
+++ b/src/gallium/drivers/galahad/glhd_context.c
@@ -0,0 +1,997 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_context.h"
+
+#include "util/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+#include "glhd_context.h"
+#include "glhd_objects.h"
+
+
+static void
+galahad_destroy(struct pipe_context *_pipe)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->destroy(pipe);
+
+ FREE(glhd_pipe);
+}
+
+static void
+galahad_draw_arrays(struct pipe_context *_pipe,
+ unsigned prim,
+ unsigned start,
+ unsigned count)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->draw_arrays(pipe,
+ prim,
+ start,
+ count);
+}
+
+static void
+galahad_draw_elements(struct pipe_context *_pipe,
+ struct pipe_resource *_indexResource,
+ unsigned indexSize,
+ int indexBias,
+ unsigned prim,
+ unsigned start,
+ unsigned count)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct galahad_resource *glhd_resource = galahad_resource(_indexResource);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+ struct pipe_resource *indexResource = glhd_resource->resource;
+
+ pipe->draw_elements(pipe,
+ indexResource,
+ indexSize,
+ indexBias,
+ prim,
+ start,
+ count);
+}
+
+static void
+galahad_draw_range_elements(struct pipe_context *_pipe,
+ struct pipe_resource *_indexResource,
+ unsigned indexSize,
+ int indexBias,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct galahad_resource *glhd_resource = galahad_resource(_indexResource);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+ struct pipe_resource *indexResource = glhd_resource->resource;
+
+ pipe->draw_range_elements(pipe,
+ indexResource,
+ indexSize,
+ indexBias,
+ minIndex,
+ maxIndex,
+ mode,
+ start,
+ count);
+}
+
+static struct pipe_query *
+galahad_create_query(struct pipe_context *_pipe,
+ unsigned query_type)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ if (query_type == PIPE_QUERY_OCCLUSION_COUNTER &&
+ !pipe->screen->get_param(pipe->screen, PIPE_CAP_OCCLUSION_QUERY)) {
+ glhd_error("Occlusion query requested but not supported");
+ }
+
+ if (query_type == PIPE_QUERY_TIME_ELAPSED &&
+ !pipe->screen->get_param(pipe->screen, PIPE_CAP_TIMER_QUERY)) {
+ glhd_error("Timer query requested but not supported");
+ }
+
+ return pipe->create_query(pipe,
+ query_type);
+}
+
+static void
+galahad_destroy_query(struct pipe_context *_pipe,
+ struct pipe_query *query)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->destroy_query(pipe,
+ query);
+}
+
+static void
+galahad_begin_query(struct pipe_context *_pipe,
+ struct pipe_query *query)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->begin_query(pipe,
+ query);
+}
+
+static void
+galahad_end_query(struct pipe_context *_pipe,
+ struct pipe_query *query)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->end_query(pipe,
+ query);
+}
+
+static boolean
+galahad_get_query_result(struct pipe_context *_pipe,
+ struct pipe_query *query,
+ boolean wait,
+ void *result)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ return pipe->get_query_result(pipe,
+ query,
+ wait,
+ result);
+}
+
+static void *
+galahad_create_blend_state(struct pipe_context *_pipe,
+ const struct pipe_blend_state *blend)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ if (blend->logicop_enable) {
+ if (blend->rt[0].blend_enable) {
+ glhd_warn("Blending enabled for render target 0, but logicops "
+ "are enabled");
+ }
+ }
+
+ return pipe->create_blend_state(pipe,
+ blend);
+}
+
+static void
+galahad_bind_blend_state(struct pipe_context *_pipe,
+ void *blend)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->bind_blend_state(pipe,
+ blend);
+}
+
+static void
+galahad_delete_blend_state(struct pipe_context *_pipe,
+ void *blend)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->delete_blend_state(pipe,
+ blend);
+}
+
+static void *
+galahad_create_sampler_state(struct pipe_context *_pipe,
+ const struct pipe_sampler_state *sampler)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ return pipe->create_sampler_state(pipe,
+ sampler);
+}
+
+static void
+galahad_bind_fragment_sampler_states(struct pipe_context *_pipe,
+ unsigned num_samplers,
+ void **samplers)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->bind_fragment_sampler_states(pipe,
+ num_samplers,
+ samplers);
+}
+
+static void
+galahad_bind_vertex_sampler_states(struct pipe_context *_pipe,
+ unsigned num_samplers,
+ void **samplers)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->bind_vertex_sampler_states(pipe,
+ num_samplers,
+ samplers);
+}
+
+static void
+galahad_delete_sampler_state(struct pipe_context *_pipe,
+ void *sampler)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->delete_sampler_state(pipe,
+ sampler);
+}
+
+static void *
+galahad_create_rasterizer_state(struct pipe_context *_pipe,
+ const struct pipe_rasterizer_state *rasterizer)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ if (rasterizer->point_quad_rasterization) {
+ if (rasterizer->point_smooth) {
+ glhd_warn("Point smoothing requested but ignored");
+ }
+ } else {
+ if (rasterizer->sprite_coord_enable) {
+ glhd_warn("Point sprites requested but ignored");
+ }
+ }
+
+ return pipe->create_rasterizer_state(pipe,
+ rasterizer);
+}
+
+static void
+galahad_bind_rasterizer_state(struct pipe_context *_pipe,
+ void *rasterizer)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->bind_rasterizer_state(pipe,
+ rasterizer);
+}
+
+static void
+galahad_delete_rasterizer_state(struct pipe_context *_pipe,
+ void *rasterizer)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->delete_rasterizer_state(pipe,
+ rasterizer);
+}
+
+static void *
+galahad_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
+ const struct pipe_depth_stencil_alpha_state *depth_stencil_alpha)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ return pipe->create_depth_stencil_alpha_state(pipe,
+ depth_stencil_alpha);
+}
+
+static void
+galahad_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
+ void *depth_stencil_alpha)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->bind_depth_stencil_alpha_state(pipe,
+ depth_stencil_alpha);
+}
+
+static void
+galahad_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
+ void *depth_stencil_alpha)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->delete_depth_stencil_alpha_state(pipe,
+ depth_stencil_alpha);
+}
+
+static void *
+galahad_create_fs_state(struct pipe_context *_pipe,
+ const struct pipe_shader_state *fs)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ return pipe->create_fs_state(pipe,
+ fs);
+}
+
+static void
+galahad_bind_fs_state(struct pipe_context *_pipe,
+ void *fs)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->bind_fs_state(pipe,
+ fs);
+}
+
+static void
+galahad_delete_fs_state(struct pipe_context *_pipe,
+ void *fs)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->delete_fs_state(pipe,
+ fs);
+}
+
+static void *
+galahad_create_vs_state(struct pipe_context *_pipe,
+ const struct pipe_shader_state *vs)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ return pipe->create_vs_state(pipe,
+ vs);
+}
+
+static void
+galahad_bind_vs_state(struct pipe_context *_pipe,
+ void *vs)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->bind_vs_state(pipe,
+ vs);
+}
+
+static void
+galahad_delete_vs_state(struct pipe_context *_pipe,
+ void *vs)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->delete_vs_state(pipe,
+ vs);
+}
+
+
+static void *
+galahad_create_vertex_elements_state(struct pipe_context *_pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *vertex_elements)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ return pipe->create_vertex_elements_state(pipe,
+ num_elements,
+ vertex_elements);
+}
+
+static void
+galahad_bind_vertex_elements_state(struct pipe_context *_pipe,
+ void *velems)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->bind_vertex_elements_state(pipe,
+ velems);
+}
+
+static void
+galahad_delete_vertex_elements_state(struct pipe_context *_pipe,
+ void *velems)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->delete_vertex_elements_state(pipe,
+ velems);
+}
+
+static void
+galahad_set_blend_color(struct pipe_context *_pipe,
+ const struct pipe_blend_color *blend_color)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->set_blend_color(pipe,
+ blend_color);
+}
+
+static void
+galahad_set_stencil_ref(struct pipe_context *_pipe,
+ const struct pipe_stencil_ref *stencil_ref)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->set_stencil_ref(pipe,
+ stencil_ref);
+}
+
+static void
+galahad_set_clip_state(struct pipe_context *_pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->set_clip_state(pipe,
+ clip);
+}
+
+static void
+galahad_set_sample_mask(struct pipe_context *_pipe,
+ unsigned sample_mask)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->set_sample_mask(pipe,
+ sample_mask);
+}
+
+static void
+galahad_set_constant_buffer(struct pipe_context *_pipe,
+ uint shader,
+ uint index,
+ struct pipe_resource *_resource)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+ struct pipe_resource *unwrapped_resource;
+ struct pipe_resource *resource = NULL;
+
+ /* XXX hmm? unwrap the input state */
+ if (_resource) {
+ unwrapped_resource = galahad_resource_unwrap(_resource);
+ resource = unwrapped_resource;
+ }
+
+ pipe->set_constant_buffer(pipe,
+ shader,
+ index,
+ resource);
+}
+
+static void
+galahad_set_framebuffer_state(struct pipe_context *_pipe,
+ const struct pipe_framebuffer_state *_state)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+ struct pipe_framebuffer_state unwrapped_state;
+ struct pipe_framebuffer_state *state = NULL;
+ unsigned i;
+
+ if (_state->nr_cbufs > PIPE_MAX_COLOR_BUFS) {
+ glhd_error("%d render targets bound, but only %d are permitted by API",
+ _state->nr_cbufs, PIPE_MAX_COLOR_BUFS);
+ } else if (_state->nr_cbufs >
+ pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS)) {
+ glhd_warn("%d render targets bound, but only %d are supported",
+ _state->nr_cbufs,
+ pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS));
+ }
+
+ /* unwrap the input state */
+ if (_state) {
+ memcpy(&unwrapped_state, _state, sizeof(unwrapped_state));
+ for(i = 0; i < _state->nr_cbufs; i++)
+ unwrapped_state.cbufs[i] = galahad_surface_unwrap(_state->cbufs[i]);
+ for (; i < PIPE_MAX_COLOR_BUFS; i++)
+ unwrapped_state.cbufs[i] = NULL;
+ unwrapped_state.zsbuf = galahad_surface_unwrap(_state->zsbuf);
+ state = &unwrapped_state;
+ }
+
+ pipe->set_framebuffer_state(pipe,
+ state);
+}
+
+static void
+galahad_set_polygon_stipple(struct pipe_context *_pipe,
+ const struct pipe_poly_stipple *poly_stipple)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->set_polygon_stipple(pipe,
+ poly_stipple);
+}
+
+static void
+galahad_set_scissor_state(struct pipe_context *_pipe,
+ const struct pipe_scissor_state *scissor)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->set_scissor_state(pipe,
+ scissor);
+}
+
+static void
+galahad_set_viewport_state(struct pipe_context *_pipe,
+ const struct pipe_viewport_state *viewport)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->set_viewport_state(pipe,
+ viewport);
+}
+
+static void
+galahad_set_fragment_sampler_views(struct pipe_context *_pipe,
+ unsigned num,
+ struct pipe_sampler_view **_views)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+ struct pipe_sampler_view *unwrapped_views[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_view **views = NULL;
+ unsigned i;
+
+ if (_views) {
+ for (i = 0; i < num; i++)
+ unwrapped_views[i] = galahad_sampler_view_unwrap(_views[i]);
+ for (; i < PIPE_MAX_SAMPLERS; i++)
+ unwrapped_views[i] = NULL;
+
+ views = unwrapped_views;
+ }
+
+ pipe->set_fragment_sampler_views(pipe, num, views);
+}
+
+static void
+galahad_set_vertex_sampler_views(struct pipe_context *_pipe,
+ unsigned num,
+ struct pipe_sampler_view **_views)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+ struct pipe_sampler_view *unwrapped_views[PIPE_MAX_VERTEX_SAMPLERS];
+ struct pipe_sampler_view **views = NULL;
+ unsigned i;
+
+ if (_views) {
+ for (i = 0; i < num; i++)
+ unwrapped_views[i] = galahad_sampler_view_unwrap(_views[i]);
+ for (; i < PIPE_MAX_VERTEX_SAMPLERS; i++)
+ unwrapped_views[i] = NULL;
+
+ views = unwrapped_views;
+ }
+
+ pipe->set_vertex_sampler_views(pipe, num, views);
+}
+
+static void
+galahad_set_vertex_buffers(struct pipe_context *_pipe,
+ unsigned num_buffers,
+ const struct pipe_vertex_buffer *_buffers)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+ struct pipe_vertex_buffer unwrapped_buffers[PIPE_MAX_SHADER_INPUTS];
+ struct pipe_vertex_buffer *buffers = NULL;
+ unsigned i;
+
+ if (num_buffers) {
+ memcpy(unwrapped_buffers, _buffers, num_buffers * sizeof(*_buffers));
+ for (i = 0; i < num_buffers; i++)
+ unwrapped_buffers[i].buffer = galahad_resource_unwrap(_buffers[i].buffer);
+ buffers = unwrapped_buffers;
+ }
+
+ pipe->set_vertex_buffers(pipe,
+ num_buffers,
+ buffers);
+}
+static void
+galahad_resource_copy_region(struct pipe_context *_pipe,
+ struct pipe_resource *_dst,
+ struct pipe_subresource subdst,
+ unsigned dstx,
+ unsigned dsty,
+ unsigned dstz,
+ struct pipe_resource *_src,
+ struct pipe_subresource subsrc,
+ unsigned srcx,
+ unsigned srcy,
+ unsigned srcz,
+ unsigned width,
+ unsigned height)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct galahad_resource *glhd_resource_dst = galahad_resource(_dst);
+ struct galahad_resource *glhd_resource_src = galahad_resource(_src);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+ struct pipe_resource *dst = glhd_resource_dst->resource;
+ struct pipe_resource *src = glhd_resource_src->resource;
+
+ if (_dst->format != _src->format) {
+ glhd_warn("Format mismatch: Source is %s, destination is %s",
+ util_format_short_name(_src->format),
+ util_format_short_name(_dst->format));
+ }
+
+ pipe->resource_copy_region(pipe,
+ dst,
+ subdst,
+ dstx,
+ dsty,
+ dstz,
+ src,
+ subsrc,
+ srcx,
+ srcy,
+ srcz,
+ width,
+ height);
+}
+
+static void
+galahad_clear(struct pipe_context *_pipe,
+ unsigned buffers,
+ const float *rgba,
+ double depth,
+ unsigned stencil)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->clear(pipe,
+ buffers,
+ rgba,
+ depth,
+ stencil);
+}
+
+static void
+galahad_clear_render_target(struct pipe_context *_pipe,
+ struct pipe_surface *_dst,
+ const float *rgba,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct galahad_surface *glhd_surface_dst = galahad_surface(_dst);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+ struct pipe_surface *dst = glhd_surface_dst->surface;
+
+ pipe->clear_render_target(pipe,
+ dst,
+ rgba,
+ dstx,
+ dsty,
+ width,
+ height);
+}
+static void
+galahad_clear_depth_stencil(struct pipe_context *_pipe,
+ struct pipe_surface *_dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct galahad_surface *glhd_surface_dst = galahad_surface(_dst);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+ struct pipe_surface *dst = glhd_surface_dst->surface;
+
+ pipe->clear_depth_stencil(pipe,
+ dst,
+ clear_flags,
+ depth,
+ stencil,
+ dstx,
+ dsty,
+ width,
+ height);
+
+}
+
+static void
+galahad_flush(struct pipe_context *_pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+
+ pipe->flush(pipe,
+ flags,
+ fence);
+}
+
+static unsigned int
+galahad_is_resource_referenced(struct pipe_context *_pipe,
+ struct pipe_resource *_resource,
+ unsigned face,
+ unsigned level)
+{
+ struct galahad_context *glhd_pipe = galahad_context(_pipe);
+ struct galahad_resource *glhd_resource = galahad_resource(_resource);
+ struct pipe_context *pipe = glhd_pipe->pipe;
+ struct pipe_resource *resource = glhd_resource->resource;
+
+ return pipe->is_resource_referenced(pipe,
+ resource,
+ face,
+ level);
+}
+
+static struct pipe_sampler_view *
+galahad_context_create_sampler_view(struct pipe_context *_pipe,
+ struct pipe_resource *_resource,
+ const struct pipe_sampler_view *templ)
+{
+ struct galahad_context *glhd_context = galahad_context(_pipe);
+ struct galahad_resource *glhd_resource = galahad_resource(_resource);
+ struct pipe_context *pipe = glhd_context->pipe;
+ struct pipe_resource *resource = glhd_resource->resource;
+ struct pipe_sampler_view *result;
+
+ result = pipe->create_sampler_view(pipe,
+ resource,
+ templ);
+
+ if (result)
+ return galahad_sampler_view_create(glhd_context, glhd_resource, result);
+ return NULL;
+}
+
+static void
+galahad_context_sampler_view_destroy(struct pipe_context *_pipe,
+ struct pipe_sampler_view *_view)
+{
+ galahad_sampler_view_destroy(galahad_context(_pipe),
+ galahad_sampler_view(_view));
+}
+
+static struct pipe_transfer *
+galahad_context_get_transfer(struct pipe_context *_context,
+ struct pipe_resource *_resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct galahad_context *glhd_context = galahad_context(_context);
+ struct galahad_resource *glhd_resource = galahad_resource(_resource);
+ struct pipe_context *context = glhd_context->pipe;
+ struct pipe_resource *resource = glhd_resource->resource;
+ struct pipe_transfer *result;
+
+ result = context->get_transfer(context,
+ resource,
+ sr,
+ usage,
+ box);
+
+ if (result)
+ return galahad_transfer_create(glhd_context, glhd_resource, result);
+ return NULL;
+}
+
+static void
+galahad_context_transfer_destroy(struct pipe_context *_pipe,
+ struct pipe_transfer *_transfer)
+{
+ galahad_transfer_destroy(galahad_context(_pipe),
+ galahad_transfer(_transfer));
+}
+
+static void *
+galahad_context_transfer_map(struct pipe_context *_context,
+ struct pipe_transfer *_transfer)
+{
+ struct galahad_context *glhd_context = galahad_context(_context);
+ struct galahad_transfer *glhd_transfer = galahad_transfer(_transfer);
+ struct pipe_context *context = glhd_context->pipe;
+ struct pipe_transfer *transfer = glhd_transfer->transfer;
+
+ return context->transfer_map(context,
+ transfer);
+}
+
+
+
+static void
+galahad_context_transfer_flush_region(struct pipe_context *_context,
+ struct pipe_transfer *_transfer,
+ const struct pipe_box *box)
+{
+ struct galahad_context *glhd_context = galahad_context(_context);
+ struct galahad_transfer *glhd_transfer = galahad_transfer(_transfer);
+ struct pipe_context *context = glhd_context->pipe;
+ struct pipe_transfer *transfer = glhd_transfer->transfer;
+
+ context->transfer_flush_region(context,
+ transfer,
+ box);
+}
+
+
+static void
+galahad_context_transfer_unmap(struct pipe_context *_context,
+ struct pipe_transfer *_transfer)
+{
+ struct galahad_context *glhd_context = galahad_context(_context);
+ struct galahad_transfer *glhd_transfer = galahad_transfer(_transfer);
+ struct pipe_context *context = glhd_context->pipe;
+ struct pipe_transfer *transfer = glhd_transfer->transfer;
+
+ context->transfer_unmap(context,
+ transfer);
+}
+
+
+static void
+galahad_context_transfer_inline_write(struct pipe_context *_context,
+ struct pipe_resource *_resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned slice_stride)
+{
+ struct galahad_context *glhd_context = galahad_context(_context);
+ struct galahad_resource *glhd_resource = galahad_resource(_resource);
+ struct pipe_context *context = glhd_context->pipe;
+ struct pipe_resource *resource = glhd_resource->resource;
+
+ context->transfer_inline_write(context,
+ resource,
+ sr,
+ usage,
+ box,
+ data,
+ stride,
+ slice_stride);
+}
+
+
+struct pipe_context *
+galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
+{
+ struct galahad_context *glhd_pipe;
+ (void)galahad_screen(_screen);
+
+ glhd_pipe = CALLOC_STRUCT(galahad_context);
+ if (!glhd_pipe) {
+ return NULL;
+ }
+
+ glhd_pipe->base.winsys = NULL;
+ glhd_pipe->base.screen = _screen;
+ glhd_pipe->base.priv = pipe->priv; /* expose wrapped data */
+ glhd_pipe->base.draw = NULL;
+
+ glhd_pipe->base.destroy = galahad_destroy;
+ glhd_pipe->base.draw_arrays = galahad_draw_arrays;
+ glhd_pipe->base.draw_elements = galahad_draw_elements;
+ glhd_pipe->base.draw_range_elements = galahad_draw_range_elements;
+ glhd_pipe->base.create_query = galahad_create_query;
+ glhd_pipe->base.destroy_query = galahad_destroy_query;
+ glhd_pipe->base.begin_query = galahad_begin_query;
+ glhd_pipe->base.end_query = galahad_end_query;
+ glhd_pipe->base.get_query_result = galahad_get_query_result;
+ glhd_pipe->base.create_blend_state = galahad_create_blend_state;
+ glhd_pipe->base.bind_blend_state = galahad_bind_blend_state;
+ glhd_pipe->base.delete_blend_state = galahad_delete_blend_state;
+ glhd_pipe->base.create_sampler_state = galahad_create_sampler_state;
+ glhd_pipe->base.bind_fragment_sampler_states = galahad_bind_fragment_sampler_states;
+ glhd_pipe->base.bind_vertex_sampler_states = galahad_bind_vertex_sampler_states;
+ glhd_pipe->base.delete_sampler_state = galahad_delete_sampler_state;
+ glhd_pipe->base.create_rasterizer_state = galahad_create_rasterizer_state;
+ glhd_pipe->base.bind_rasterizer_state = galahad_bind_rasterizer_state;
+ glhd_pipe->base.delete_rasterizer_state = galahad_delete_rasterizer_state;
+ glhd_pipe->base.create_depth_stencil_alpha_state = galahad_create_depth_stencil_alpha_state;
+ glhd_pipe->base.bind_depth_stencil_alpha_state = galahad_bind_depth_stencil_alpha_state;
+ glhd_pipe->base.delete_depth_stencil_alpha_state = galahad_delete_depth_stencil_alpha_state;
+ glhd_pipe->base.create_fs_state = galahad_create_fs_state;
+ glhd_pipe->base.bind_fs_state = galahad_bind_fs_state;
+ glhd_pipe->base.delete_fs_state = galahad_delete_fs_state;
+ glhd_pipe->base.create_vs_state = galahad_create_vs_state;
+ glhd_pipe->base.bind_vs_state = galahad_bind_vs_state;
+ glhd_pipe->base.delete_vs_state = galahad_delete_vs_state;
+ glhd_pipe->base.create_vertex_elements_state = galahad_create_vertex_elements_state;
+ glhd_pipe->base.bind_vertex_elements_state = galahad_bind_vertex_elements_state;
+ glhd_pipe->base.delete_vertex_elements_state = galahad_delete_vertex_elements_state;
+ glhd_pipe->base.set_blend_color = galahad_set_blend_color;
+ glhd_pipe->base.set_stencil_ref = galahad_set_stencil_ref;
+ glhd_pipe->base.set_clip_state = galahad_set_clip_state;
+ glhd_pipe->base.set_sample_mask = galahad_set_sample_mask;
+ glhd_pipe->base.set_constant_buffer = galahad_set_constant_buffer;
+ glhd_pipe->base.set_framebuffer_state = galahad_set_framebuffer_state;
+ glhd_pipe->base.set_polygon_stipple = galahad_set_polygon_stipple;
+ glhd_pipe->base.set_scissor_state = galahad_set_scissor_state;
+ glhd_pipe->base.set_viewport_state = galahad_set_viewport_state;
+ glhd_pipe->base.set_fragment_sampler_views = galahad_set_fragment_sampler_views;
+ glhd_pipe->base.set_vertex_sampler_views = galahad_set_vertex_sampler_views;
+ glhd_pipe->base.set_vertex_buffers = galahad_set_vertex_buffers;
+ glhd_pipe->base.resource_copy_region = galahad_resource_copy_region;
+ glhd_pipe->base.clear = galahad_clear;
+ glhd_pipe->base.clear_render_target = galahad_clear_render_target;
+ glhd_pipe->base.clear_depth_stencil = galahad_clear_depth_stencil;
+ glhd_pipe->base.flush = galahad_flush;
+ glhd_pipe->base.is_resource_referenced = galahad_is_resource_referenced;
+ glhd_pipe->base.create_sampler_view = galahad_context_create_sampler_view;
+ glhd_pipe->base.sampler_view_destroy = galahad_context_sampler_view_destroy;
+ glhd_pipe->base.get_transfer = galahad_context_get_transfer;
+ glhd_pipe->base.transfer_destroy = galahad_context_transfer_destroy;
+ glhd_pipe->base.transfer_map = galahad_context_transfer_map;
+ glhd_pipe->base.transfer_unmap = galahad_context_transfer_unmap;
+ glhd_pipe->base.transfer_flush_region = galahad_context_transfer_flush_region;
+ glhd_pipe->base.transfer_inline_write = galahad_context_transfer_inline_write;
+
+ glhd_pipe->pipe = pipe;
+
+ return &glhd_pipe->base;
+}
diff --git a/src/gallium/drivers/identity/id_drm.c b/src/gallium/drivers/galahad/glhd_context.h
index 15d01519f8..4e71753ac3 100644
--- a/src/gallium/drivers/identity/id_drm.c
+++ b/src/gallium/drivers/galahad/glhd_context.h
@@ -25,69 +25,40 @@
*
**************************************************************************/
-#include "state_tracker/drm_api.h"
+#ifndef GLHD_CONTEXT_H
+#define GLHD_CONTEXT_H
-#include "util/u_memory.h"
-#include "id_drm.h"
-#include "id_screen.h"
-#include "id_public.h"
+#include <stdio.h>
-struct identity_drm_api
-{
- struct drm_api base;
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
- struct drm_api *api;
-};
-static INLINE struct identity_drm_api *
-identity_drm_api(struct drm_api *_api)
-{
- return (struct identity_drm_api *)_api;
-}
+struct galahad_context {
+ struct pipe_context base; /**< base class */
-static struct pipe_screen *
-identity_drm_create_screen(struct drm_api *_api, int fd)
-{
- struct identity_drm_api *id_api = identity_drm_api(_api);
- struct drm_api *api = id_api->api;
- struct pipe_screen *screen;
+ struct pipe_context *pipe;
+};
- screen = api->create_screen(api, fd);
- return identity_screen_create(screen);
-}
+struct pipe_context *
+galahad_context_create(struct pipe_screen *screen, struct pipe_context *pipe);
-static void
-identity_drm_destroy(struct drm_api *_api)
-{
- struct identity_drm_api *id_api = identity_drm_api(_api);
- struct drm_api *api = id_api->api;
- api->destroy(api);
- FREE(id_api);
-}
-
-struct drm_api *
-identity_drm_create(struct drm_api *api)
+static INLINE struct galahad_context *
+galahad_context(struct pipe_context *pipe)
{
- struct identity_drm_api *id_api;
-
- if (!api)
- goto error;
-
- id_api = CALLOC_STRUCT(identity_drm_api);
-
- if (!id_api)
- goto error;
+ return (struct galahad_context *)pipe;
+}
- id_api->base.name = api->name;
- id_api->base.driver_name = api->driver_name;
- id_api->base.create_screen = identity_drm_create_screen;
- id_api->base.destroy = identity_drm_destroy;
- id_api->api = api;
+#define glhd_warn(...) \
+do { \
+ fprintf(stderr, "galahad: %s: ", __FUNCTION__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+} while (0)
- return &id_api->base;
+#define glhd_error(...) \
+ glhd_warn(__VA_ARGS__);
-error:
- return api;
-}
+#endif /* GLHD_CONTEXT_H */
diff --git a/src/gallium/drivers/galahad/glhd_objects.c b/src/gallium/drivers/galahad/glhd_objects.c
new file mode 100644
index 0000000000..6c5a21ae70
--- /dev/null
+++ b/src/gallium/drivers/galahad/glhd_objects.c
@@ -0,0 +1,187 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+#include "glhd_screen.h"
+#include "glhd_objects.h"
+#include "glhd_context.h"
+
+
+
+struct pipe_resource *
+galahad_resource_create(struct galahad_screen *glhd_screen,
+ struct pipe_resource *resource)
+{
+ struct galahad_resource *glhd_resource;
+
+ if(!resource)
+ goto error;
+
+ assert(resource->screen == glhd_screen->screen);
+
+ glhd_resource = CALLOC_STRUCT(galahad_resource);
+ if(!glhd_resource)
+ goto error;
+
+ memcpy(&glhd_resource->base, resource, sizeof(struct pipe_resource));
+
+ pipe_reference_init(&glhd_resource->base.reference, 1);
+ glhd_resource->base.screen = &glhd_screen->base;
+ glhd_resource->resource = resource;
+
+ return &glhd_resource->base;
+
+error:
+ pipe_resource_reference(&resource, NULL);
+ return NULL;
+}
+
+void
+galahad_resource_destroy(struct galahad_resource *glhd_resource)
+{
+ pipe_resource_reference(&glhd_resource->resource, NULL);
+ FREE(glhd_resource);
+}
+
+
+struct pipe_surface *
+galahad_surface_create(struct galahad_resource *glhd_resource,
+ struct pipe_surface *surface)
+{
+ struct galahad_surface *glhd_surface;
+
+ if(!surface)
+ goto error;
+
+ assert(surface->texture == glhd_resource->resource);
+
+ glhd_surface = CALLOC_STRUCT(galahad_surface);
+ if(!glhd_surface)
+ goto error;
+
+ memcpy(&glhd_surface->base, surface, sizeof(struct pipe_surface));
+
+ pipe_reference_init(&glhd_surface->base.reference, 1);
+ glhd_surface->base.texture = NULL;
+ pipe_resource_reference(&glhd_surface->base.texture, &glhd_resource->base);
+ glhd_surface->surface = surface;
+
+ return &glhd_surface->base;
+
+error:
+ pipe_surface_reference(&surface, NULL);
+ return NULL;
+}
+
+void
+galahad_surface_destroy(struct galahad_surface *glhd_surface)
+{
+ pipe_resource_reference(&glhd_surface->base.texture, NULL);
+ pipe_surface_reference(&glhd_surface->surface, NULL);
+ FREE(glhd_surface);
+}
+
+
+struct pipe_sampler_view *
+galahad_sampler_view_create(struct galahad_context *glhd_context,
+ struct galahad_resource *glhd_resource,
+ struct pipe_sampler_view *view)
+{
+ struct galahad_sampler_view *glhd_view;
+
+ if (!view)
+ goto error;
+
+ assert(view->texture == glhd_resource->resource);
+
+ glhd_view = CALLOC_STRUCT(galahad_sampler_view);
+
+ glhd_view->base = *view;
+ glhd_view->base.reference.count = 1;
+ glhd_view->base.texture = NULL;
+ pipe_resource_reference(&glhd_view->base.texture, glhd_resource->resource);
+ glhd_view->base.context = glhd_context->pipe;
+ glhd_view->sampler_view = view;
+
+ return &glhd_view->base;
+error:
+ return NULL;
+}
+
+void
+galahad_sampler_view_destroy(struct galahad_context *glhd_context,
+ struct galahad_sampler_view *glhd_view)
+{
+ pipe_resource_reference(&glhd_view->base.texture, NULL);
+ glhd_context->pipe->sampler_view_destroy(glhd_context->pipe,
+ glhd_view->sampler_view);
+ FREE(glhd_view);
+}
+
+
+struct pipe_transfer *
+galahad_transfer_create(struct galahad_context *glhd_context,
+ struct galahad_resource *glhd_resource,
+ struct pipe_transfer *transfer)
+{
+ struct galahad_transfer *glhd_transfer;
+
+ if(!transfer)
+ goto error;
+
+ assert(transfer->resource == glhd_resource->resource);
+
+ glhd_transfer = CALLOC_STRUCT(galahad_transfer);
+ if(!glhd_transfer)
+ goto error;
+
+ memcpy(&glhd_transfer->base, transfer, sizeof(struct pipe_transfer));
+
+ glhd_transfer->base.resource = NULL;
+ glhd_transfer->transfer = transfer;
+
+ pipe_resource_reference(&glhd_transfer->base.resource, &glhd_resource->base);
+ assert(glhd_transfer->base.resource == &glhd_resource->base);
+
+ return &glhd_transfer->base;
+
+error:
+ glhd_context->pipe->transfer_destroy(glhd_context->pipe, transfer);
+ return NULL;
+}
+
+void
+galahad_transfer_destroy(struct galahad_context *glhd_context,
+ struct galahad_transfer *glhd_transfer)
+{
+ pipe_resource_reference(&glhd_transfer->base.resource, NULL);
+ glhd_context->pipe->transfer_destroy(glhd_context->pipe,
+ glhd_transfer->transfer);
+ FREE(glhd_transfer);
+}
diff --git a/src/gallium/drivers/galahad/glhd_objects.h b/src/gallium/drivers/galahad/glhd_objects.h
new file mode 100644
index 0000000000..935803915d
--- /dev/null
+++ b/src/gallium/drivers/galahad/glhd_objects.h
@@ -0,0 +1,175 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef GLHD_OBJECTS_H
+#define GLHD_OBJECTS_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+#include "glhd_screen.h"
+
+struct galahad_context;
+
+
+struct galahad_resource
+{
+ struct pipe_resource base;
+
+ struct pipe_resource *resource;
+};
+
+
+struct galahad_sampler_view
+{
+ struct pipe_sampler_view base;
+
+ struct pipe_sampler_view *sampler_view;
+};
+
+
+struct galahad_surface
+{
+ struct pipe_surface base;
+
+ struct pipe_surface *surface;
+};
+
+
+struct galahad_transfer
+{
+ struct pipe_transfer base;
+
+ struct pipe_transfer *transfer;
+};
+
+
+static INLINE struct galahad_resource *
+galahad_resource(struct pipe_resource *_resource)
+{
+ if(!_resource)
+ return NULL;
+ (void)galahad_screen(_resource->screen);
+ return (struct galahad_resource *)_resource;
+}
+
+static INLINE struct galahad_sampler_view *
+galahad_sampler_view(struct pipe_sampler_view *_sampler_view)
+{
+ if (!_sampler_view) {
+ return NULL;
+ }
+ return (struct galahad_sampler_view *)_sampler_view;
+}
+
+static INLINE struct galahad_surface *
+galahad_surface(struct pipe_surface *_surface)
+{
+ if(!_surface)
+ return NULL;
+ (void)galahad_resource(_surface->texture);
+ return (struct galahad_surface *)_surface;
+}
+
+static INLINE struct galahad_transfer *
+galahad_transfer(struct pipe_transfer *_transfer)
+{
+ if(!_transfer)
+ return NULL;
+ (void)galahad_resource(_transfer->resource);
+ return (struct galahad_transfer *)_transfer;
+}
+
+static INLINE struct pipe_resource *
+galahad_resource_unwrap(struct pipe_resource *_resource)
+{
+ if(!_resource)
+ return NULL;
+ return galahad_resource(_resource)->resource;
+}
+
+static INLINE struct pipe_sampler_view *
+galahad_sampler_view_unwrap(struct pipe_sampler_view *_sampler_view)
+{
+ if (!_sampler_view) {
+ return NULL;
+ }
+ return galahad_sampler_view(_sampler_view)->sampler_view;
+}
+
+static INLINE struct pipe_surface *
+galahad_surface_unwrap(struct pipe_surface *_surface)
+{
+ if(!_surface)
+ return NULL;
+ return galahad_surface(_surface)->surface;
+}
+
+static INLINE struct pipe_transfer *
+galahad_transfer_unwrap(struct pipe_transfer *_transfer)
+{
+ if(!_transfer)
+ return NULL;
+ return galahad_transfer(_transfer)->transfer;
+}
+
+
+struct pipe_resource *
+galahad_resource_create(struct galahad_screen *glhd_screen,
+ struct pipe_resource *resource);
+
+void
+galahad_resource_destroy(struct galahad_resource *glhd_resource);
+
+struct pipe_surface *
+galahad_surface_create(struct galahad_resource *glhd_resource,
+ struct pipe_surface *surface);
+
+void
+galahad_surface_destroy(struct galahad_surface *glhd_surface);
+
+struct pipe_sampler_view *
+galahad_sampler_view_create(struct galahad_context *glhd_context,
+ struct galahad_resource *glhd_resource,
+ struct pipe_sampler_view *view);
+
+void
+galahad_sampler_view_destroy(struct galahad_context *glhd_context,
+ struct galahad_sampler_view *glhd_sampler_view);
+
+struct pipe_transfer *
+galahad_transfer_create(struct galahad_context *glhd_context,
+ struct galahad_resource *glhd_resource,
+ struct pipe_transfer *transfer);
+
+void
+galahad_transfer_destroy(struct galahad_context *glhd_context,
+ struct galahad_transfer *glhd_transfer);
+
+
+#endif /* GLHD_OBJECTS_H */
diff --git a/src/gallium/drivers/identity/id_drm.h b/src/gallium/drivers/galahad/glhd_public.h
index cf2ad2ce07..77a380196a 100644
--- a/src/gallium/drivers/identity/id_drm.h
+++ b/src/gallium/drivers/galahad/glhd_public.h
@@ -25,11 +25,13 @@
*
**************************************************************************/
-#ifndef ID_DRM_H
-#define ID_DRM_H
+#ifndef GLHD_PUBLIC_H
+#define GLHD_PUBLIC_H
-struct drm_api;
+struct pipe_screen;
+struct pipe_context;
-struct drm_api* identity_drm_create(struct drm_api *api);
+struct pipe_screen *
+galahad_screen_create(struct pipe_screen *screen);
-#endif /* ID_DRM_H */
+#endif /* GLHD_PUBLIC_H */
diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c
new file mode 100644
index 0000000000..4117485702
--- /dev/null
+++ b/src/gallium/drivers/galahad/glhd_screen.c
@@ -0,0 +1,334 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * 2010 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+
+#include "glhd_public.h"
+#include "glhd_screen.h"
+#include "glhd_context.h"
+#include "glhd_objects.h"
+
+DEBUG_GET_ONCE_BOOL_OPTION(galahad, "GALLIUM_GALAHAD", FALSE)
+
+static void
+galahad_screen_destroy(struct pipe_screen *_screen)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+
+ screen->destroy(screen);
+
+ FREE(glhd_screen);
+}
+
+static const char *
+galahad_screen_get_name(struct pipe_screen *_screen)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+
+ return screen->get_name(screen);
+}
+
+static const char *
+galahad_screen_get_vendor(struct pipe_screen *_screen)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+
+ return screen->get_vendor(screen);
+}
+
+static int
+galahad_screen_get_param(struct pipe_screen *_screen,
+ enum pipe_cap param)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+
+ return screen->get_param(screen,
+ param);
+}
+
+static float
+galahad_screen_get_paramf(struct pipe_screen *_screen,
+ enum pipe_cap param)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+
+ return screen->get_paramf(screen,
+ param);
+}
+
+static boolean
+galahad_screen_is_format_supported(struct pipe_screen *_screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned tex_usage,
+ unsigned geom_flags)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+
+ if (target >= PIPE_MAX_TEXTURE_TYPES) {
+ glhd_warn("Received bogus texture target %d", target);
+ }
+
+ return screen->is_format_supported(screen,
+ format,
+ target,
+ sample_count,
+ tex_usage,
+ geom_flags);
+}
+
+static struct pipe_context *
+galahad_screen_context_create(struct pipe_screen *_screen,
+ void *priv)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+ struct pipe_context *result;
+
+ result = screen->context_create(screen, priv);
+ if (result)
+ return galahad_context_create(_screen, result);
+ return NULL;
+}
+
+static struct pipe_resource *
+galahad_screen_resource_create(struct pipe_screen *_screen,
+ const struct pipe_resource *templat)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+ struct pipe_resource *result;
+
+ result = screen->resource_create(screen,
+ templat);
+
+ if (result)
+ return galahad_resource_create(glhd_screen, result);
+ return NULL;
+}
+
+static struct pipe_resource *
+galahad_screen_resource_from_handle(struct pipe_screen *_screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *handle)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+ struct pipe_resource *result;
+
+ /* TODO trace call */
+
+ result = screen->resource_from_handle(screen, templ, handle);
+
+ result = galahad_resource_create(galahad_screen(_screen), result);
+
+ return result;
+}
+
+static boolean
+galahad_screen_resource_get_handle(struct pipe_screen *_screen,
+ struct pipe_resource *_resource,
+ struct winsys_handle *handle)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct galahad_resource *glhd_resource = galahad_resource(_resource);
+ struct pipe_screen *screen = glhd_screen->screen;
+ struct pipe_resource *resource = glhd_resource->resource;
+
+ /* TODO trace call */
+
+ return screen->resource_get_handle(screen, resource, handle);
+}
+
+
+
+static void
+galahad_screen_resource_destroy(struct pipe_screen *screen,
+ struct pipe_resource *_resource)
+{
+ galahad_resource_destroy(galahad_resource(_resource));
+}
+
+static struct pipe_surface *
+galahad_screen_get_tex_surface(struct pipe_screen *_screen,
+ struct pipe_resource *_resource,
+ unsigned face,
+ unsigned level,
+ unsigned zslice,
+ unsigned usage)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct galahad_resource *glhd_resource = galahad_resource(_resource);
+ struct pipe_screen *screen = glhd_screen->screen;
+ struct pipe_resource *resource = glhd_resource->resource;
+ struct pipe_surface *result;
+
+ result = screen->get_tex_surface(screen,
+ resource,
+ face,
+ level,
+ zslice,
+ usage);
+
+ if (result)
+ return galahad_surface_create(glhd_resource, result);
+ return NULL;
+}
+
+static void
+galahad_screen_tex_surface_destroy(struct pipe_surface *_surface)
+{
+ galahad_surface_destroy(galahad_surface(_surface));
+}
+
+
+
+static struct pipe_resource *
+galahad_screen_user_buffer_create(struct pipe_screen *_screen,
+ void *ptr,
+ unsigned bytes,
+ unsigned usage)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+ struct pipe_resource *result;
+
+ result = screen->user_buffer_create(screen,
+ ptr,
+ bytes,
+ usage);
+
+ if (result)
+ return galahad_resource_create(glhd_screen, result);
+ return NULL;
+}
+
+
+
+static void
+galahad_screen_flush_frontbuffer(struct pipe_screen *_screen,
+ struct pipe_surface *_surface,
+ void *context_private)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct galahad_surface *glhd_surface = galahad_surface(_surface);
+ struct pipe_screen *screen = glhd_screen->screen;
+ struct pipe_surface *surface = glhd_surface->surface;
+
+ screen->flush_frontbuffer(screen,
+ surface,
+ context_private);
+}
+
+static void
+galahad_screen_fence_reference(struct pipe_screen *_screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+
+ screen->fence_reference(screen,
+ ptr,
+ fence);
+}
+
+static int
+galahad_screen_fence_signalled(struct pipe_screen *_screen,
+ struct pipe_fence_handle *fence,
+ unsigned flags)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+
+ return screen->fence_signalled(screen,
+ fence,
+ flags);
+}
+
+static int
+galahad_screen_fence_finish(struct pipe_screen *_screen,
+ struct pipe_fence_handle *fence,
+ unsigned flags)
+{
+ struct galahad_screen *glhd_screen = galahad_screen(_screen);
+ struct pipe_screen *screen = glhd_screen->screen;
+
+ return screen->fence_finish(screen,
+ fence,
+ flags);
+}
+
+struct pipe_screen *
+galahad_screen_create(struct pipe_screen *screen)
+{
+ struct galahad_screen *glhd_screen;
+
+ if (!debug_get_option_galahad())
+ return screen;
+
+ glhd_screen = CALLOC_STRUCT(galahad_screen);
+ if (!glhd_screen) {
+ return screen;
+ }
+
+ glhd_screen->base.winsys = NULL;
+
+ glhd_screen->base.destroy = galahad_screen_destroy;
+ glhd_screen->base.get_name = galahad_screen_get_name;
+ glhd_screen->base.get_vendor = galahad_screen_get_vendor;
+ glhd_screen->base.get_param = galahad_screen_get_param;
+ glhd_screen->base.get_paramf = galahad_screen_get_paramf;
+ glhd_screen->base.is_format_supported = galahad_screen_is_format_supported;
+ glhd_screen->base.context_create = galahad_screen_context_create;
+ glhd_screen->base.resource_create = galahad_screen_resource_create;
+ glhd_screen->base.resource_from_handle = galahad_screen_resource_from_handle;
+ glhd_screen->base.resource_get_handle = galahad_screen_resource_get_handle;
+ glhd_screen->base.resource_destroy = galahad_screen_resource_destroy;
+ glhd_screen->base.get_tex_surface = galahad_screen_get_tex_surface;
+ glhd_screen->base.tex_surface_destroy = galahad_screen_tex_surface_destroy;
+ glhd_screen->base.user_buffer_create = galahad_screen_user_buffer_create;
+ glhd_screen->base.flush_frontbuffer = galahad_screen_flush_frontbuffer;
+ glhd_screen->base.fence_reference = galahad_screen_fence_reference;
+ glhd_screen->base.fence_signalled = galahad_screen_fence_signalled;
+ glhd_screen->base.fence_finish = galahad_screen_fence_finish;
+
+ glhd_screen->screen = screen;
+
+ return &glhd_screen->base;
+}
diff --git a/src/gallium/drivers/trace/tr_drm.h b/src/gallium/drivers/galahad/glhd_screen.h
index 845c66a32a..7862f4af2b 100644
--- a/src/gallium/drivers/trace/tr_drm.h
+++ b/src/gallium/drivers/galahad/glhd_screen.h
@@ -25,11 +25,24 @@
*
**************************************************************************/
-#ifndef TR_DRM_H
-#define TR_DRM_H
+#ifndef GLHD_SCREEN_H
+#define GLHD_SCREEN_H
-struct drm_api;
+#include "pipe/p_screen.h"
+#include "pipe/p_defines.h"
-struct drm_api* trace_drm_create(struct drm_api *api);
-#endif /* ID_DRM_H */
+struct galahad_screen {
+ struct pipe_screen base;
+
+ struct pipe_screen *screen;
+};
+
+
+static INLINE struct galahad_screen *
+galahad_screen(struct pipe_screen *screen)
+{
+ return (struct galahad_screen *)screen;
+}
+
+#endif /* GLHD_SCREEN_H */
diff --git a/src/gallium/drivers/i915/Makefile b/src/gallium/drivers/i915/Makefile
index 2cefe70850..b3f387f933 100644
--- a/src/gallium/drivers/i915/Makefile
+++ b/src/gallium/drivers/i915/Makefile
@@ -15,7 +15,9 @@ C_SOURCES = \
i915_state_dynamic.c \
i915_state_derived.c \
i915_state_emit.c \
+ i915_state_fpc.c \
i915_state_sampler.c \
+ i915_state_static.c \
i915_screen.c \
i915_prim_emit.c \
i915_prim_vbuf.c \
diff --git a/src/gallium/drivers/i915/SConscript b/src/gallium/drivers/i915/SConscript
index d6e7a8dbd3..d4bf6fef13 100644
--- a/src/gallium/drivers/i915/SConscript
+++ b/src/gallium/drivers/i915/SConscript
@@ -24,9 +24,11 @@ i915 = env.ConvenienceLibrary(
'i915_state.c',
'i915_state_derived.c',
'i915_state_dynamic.c',
+ 'i915_state_fpc.c',
'i915_state_emit.c',
'i915_state_immediate.c',
'i915_state_sampler.c',
+ 'i915_state_static.c',
'i915_surface.c',
'i915_resource.c',
'i915_resource_texture.c',
diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h
index f0086695d1..c411b84ccd 100644
--- a/src/gallium/drivers/i915/i915_batch.h
+++ b/src/gallium/drivers/i915/i915_batch.h
@@ -30,6 +30,7 @@
#include "i915_batchbuffer.h"
+
#define BEGIN_BATCH(dwords, relocs) \
(i915_winsys_batchbuffer_check(i915->batch, dwords, relocs))
@@ -39,9 +40,14 @@
#define OUT_RELOC(buf, usage, offset) \
i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset)
-#define FLUSH_BATCH(fence) do { \
- i915_winsys_batchbuffer_flush(i915->batch, fence); \
- i915->hardware_dirty = ~0; \
-} while (0)
+#define FLUSH_BATCH(fence) \
+ i915_flush(i915, fence)
+
+
+/************************************************************************
+ * i915_flush.c
+ */
+void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence);
+
#endif
diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h
index 27ccaa6b1f..c1cd314e7b 100644
--- a/src/gallium/drivers/i915/i915_batchbuffer.h
+++ b/src/gallium/drivers/i915/i915_batchbuffer.h
@@ -30,6 +30,8 @@
#include "i915_winsys.h"
+struct i915_context;
+
static INLINE boolean
i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch,
size_t dwords,
@@ -77,11 +79,4 @@ i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch,
return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset);
}
-static INLINE void
-i915_winsys_batchbuffer_flush(struct i915_winsys_batchbuffer *batch,
- struct pipe_fence_handle **fence)
-{
- batch->iws->batchbuffer_flush(batch, fence);
-}
-
#endif
diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c
index c5b5979bf9..cdf20c0055 100644
--- a/src/gallium/drivers/i915/i915_blit.c
+++ b/src/gallium/drivers/i915/i915_blit.c
@@ -31,7 +31,6 @@
#include "i915_batch.h"
#include "i915_debug.h"
-#define FILE_DEBUG_FLAG DEBUG_BLIT
void
i915_fill_blit(struct i915_context *i915,
@@ -47,10 +46,8 @@ i915_fill_blit(struct i915_context *i915,
unsigned BR13, CMD;
- I915_DBG(i915,
- "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
- __FUNCTION__,
- dst_buffer, dst_pitch, dst_offset, x, y, w, h);
+ I915_DBG(DBG_BLIT, "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+ __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);
switch (cpp) {
case 1:
@@ -79,7 +76,6 @@ i915_fill_blit(struct i915_context *i915,
OUT_BATCH(((y + h) << 16) | (x + w));
OUT_RELOC(dst_buffer, I915_USAGE_2D_TARGET, dst_offset);
OUT_BATCH(color);
- FLUSH_BATCH(NULL);
}
void
@@ -100,11 +96,11 @@ i915_copy_blit(struct i915_context *i915,
int dst_x2 = dst_x + w;
- I915_DBG(i915,
- "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
- __FUNCTION__,
- src_buffer, src_pitch, src_offset, src_x, src_y,
- dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
+ I915_DBG(DBG_BLIT,
+ "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+ __FUNCTION__,
+ src_buffer, src_pitch, src_offset, src_x, src_y,
+ dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
switch (cpp) {
case 1:
@@ -146,5 +142,4 @@ i915_copy_blit(struct i915_context *i915,
OUT_BATCH((src_y << 16) | src_x);
OUT_BATCH(((int) src_pitch & 0xffff));
OUT_RELOC(src_buffer, I915_USAGE_2D_SOURCE, src_offset);
- FLUSH_BATCH(NULL);
}
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index acc0ffe037..b210cb130d 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -237,8 +237,6 @@ struct i915_context
struct i915_state current;
unsigned hardware_dirty;
-
- unsigned debug;
};
/* A flag for each state_tracker state object:
@@ -318,8 +316,6 @@ struct pipe_context *i915_create_context(struct pipe_screen *screen,
void *priv);
-
-
/***********************************************************************
* Inline conversion functions. These are better-typed than the
* macros used previously:
@@ -331,5 +327,4 @@ i915_context( struct pipe_context *pipe )
}
-
#endif
diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c
index 663fac3055..57d3390dea 100644
--- a/src/gallium/drivers/i915/i915_debug.c
+++ b/src/gallium/drivers/i915/i915_debug.c
@@ -27,11 +27,37 @@
#include "i915_reg.h"
#include "i915_context.h"
+#include "i915_screen.h"
#include "i915_debug.h"
+#include "i915_debug_private.h"
#include "i915_batch.h"
#include "util/u_debug.h"
+
+static const struct debug_named_value debug_options[] = {
+ {"blit", DBG_BLIT, "Print when using the 2d blitter"},
+ {"emit", DBG_EMIT, "State emit information"},
+ {"atoms", DBG_ATOMS, "Print dirty state atoms"},
+ {"flush", DBG_FLUSH, "Flushing information"},
+ {"texture", DBG_TEXTURE, "Texture information"},
+ {"constants", DBG_CONSTANTS, "Constant buffers"},
+ DEBUG_NAMED_VALUE_END
+};
+
+unsigned i915_debug = 0;
+
+void i915_debug_init(struct i915_screen *screen)
+{
+ i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0);
+}
+
+
+
+/***********************************************************************
+ * Batchbuffer dumping
+ */
+
static void
PRINTF(
struct debug_stream *stream,
@@ -896,3 +922,66 @@ i915_dump_batchbuffer( struct i915_winsys_batchbuffer *batch )
}
+
+/***********************************************************************
+ * Dirty state atom dumping
+ */
+
+void
+i915_dump_dirty(struct i915_context *i915, const char *func)
+{
+ struct {
+ unsigned dirty;
+ const char *name;
+ } l[] = {
+ {I915_NEW_VIEWPORT, "viewport"},
+ {I915_NEW_RASTERIZER, "rasterizer"},
+ {I915_NEW_FS, "fs"},
+ {I915_NEW_BLEND, "blend"},
+ {I915_NEW_CLIP, "clip"},
+ {I915_NEW_SCISSOR, "scissor"},
+ {I915_NEW_STIPPLE, "stipple"},
+ {I915_NEW_FRAMEBUFFER, "framebuffer"},
+ {I915_NEW_ALPHA_TEST, "alpha_test"},
+ {I915_NEW_DEPTH_STENCIL, "depth_stencil"},
+ {I915_NEW_SAMPLER, "sampler"},
+ {I915_NEW_SAMPLER_VIEW, "sampler_view"},
+ {I915_NEW_CONSTANTS, "constants"},
+ {I915_NEW_VBO, "vbo"},
+ {I915_NEW_VS, "vs"},
+ {0, NULL},
+ };
+ int i;
+
+ debug_printf("%s: ", func);
+ for (i = 0; l[i].name; i++)
+ if (i915->dirty & l[i].dirty)
+ debug_printf("%s ", l[i].name);
+ debug_printf("\n");
+}
+
+void
+i915_dump_hardware_dirty(struct i915_context *i915, const char *func)
+{
+ struct {
+ unsigned dirty;
+ const char *name;
+ } l[] = {
+ {I915_HW_STATIC, "static"},
+ {I915_HW_DYNAMIC, "dynamic"},
+ {I915_HW_SAMPLER, "sampler"},
+ {I915_HW_MAP, "map"},
+ {I915_HW_PROGRAM, "program"},
+ {I915_HW_CONSTANTS, "constants"},
+ {I915_HW_IMMEDIATE, "immediate"},
+ {I915_HW_INVARIENT, "invarient"},
+ {0, NULL},
+ };
+ int i;
+
+ debug_printf("%s: ", func);
+ for (i = 0; l[i].name; i++)
+ if (i915->hardware_dirty & l[i].dirty)
+ debug_printf("%s ", l[i].name);
+ debug_printf("\n");
+}
diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h
index 67b8d9c2f6..fa60799d0c 100644
--- a/src/gallium/drivers/i915/i915_debug.h
+++ b/src/gallium/drivers/i915/i915_debug.h
@@ -26,89 +26,56 @@
**************************************************************************/
/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ * Jakob Bornecrantz <wallbraker@gmail.com>
*/
#ifndef I915_DEBUG_H
#define I915_DEBUG_H
-#include <stdarg.h>
+#include "util/u_debug.h"
+struct i915_screen;
struct i915_context;
+struct i915_winsys_batchbuffer;
-struct debug_stream
-{
- unsigned offset; /* current gtt offset */
- char *ptr; /* pointer to gtt offset zero */
- char *end; /* pointer to gtt offset zero */
- unsigned print_addresses;
-};
-
-
-/* Internal functions
- */
-void i915_disassemble_program(struct debug_stream *stream,
- const unsigned *program, unsigned sz);
-
-void i915_print_ureg(const char *msg, unsigned ureg);
-
-
-#define DEBUG_BATCH 0x1
-#define DEBUG_BLIT 0x2
-#define DEBUG_BUFFER 0x4
-#define DEBUG_CONSTANTS 0x8
-#define DEBUG_CONTEXT 0x10
-#define DEBUG_DRAW 0x20
-#define DEBUG_DYNAMIC 0x40
-#define DEBUG_FLUSH 0x80
-#define DEBUG_MAP 0x100
-#define DEBUG_PROGRAM 0x200
-#define DEBUG_REGIONS 0x400
-#define DEBUG_SAMPLER 0x800
-#define DEBUG_STATIC 0x1000
-#define DEBUG_SURFACE 0x2000
-#define DEBUG_WINSYS 0x4000
-
-#include "pipe/p_compiler.h"
+#define DBG_BLIT 0x1
+#define DBG_EMIT 0x2
+#define DBG_ATOMS 0x4
+#define DBG_FLUSH 0x8
+#define DBG_TEXTURE 0x10
+#define DBG_CONSTANTS 0x20
-#if defined(DEBUG) && defined(FILE_DEBUG_FLAG)
+extern unsigned i915_debug;
-#include "util/u_simple_screen.h"
+#ifdef DEBUG
+static INLINE boolean
+I915_DBG_ON(unsigned flags)
+{
+ return i915_debug & flags;
+}
static INLINE void
-I915_DBG(
- struct i915_context *i915,
- const char *fmt,
- ... )
+I915_DBG(unsigned flags, const char *fmt, ...)
{
- if ((i915)->debug & FILE_DEBUG_FLAG) {
+ if (I915_DBG_ON(flags)) {
va_list args;
- va_start( args, fmt );
- debug_vprintf( fmt, args );
- va_end( args );
+ va_start(args, fmt);
+ debug_vprintf(fmt, args);
+ va_end(args);
}
}
-
#else
-
-static INLINE void
-I915_DBG(
- struct i915_context *i915,
- const char *fmt,
- ... )
-{
- (void) i915;
- (void) fmt;
-}
-
+#define I915_DBG_ON(flags) (0)
+static INLINE void I915_DBG(unsigned flags, const char *fmt, ...) {}
#endif
+void i915_debug_init(struct i915_screen *i915);
-struct i915_winsys_batchbuffer;
-
-void i915_dump_batchbuffer( struct i915_winsys_batchbuffer *i915 );
+void i915_dump_batchbuffer(struct i915_winsys_batchbuffer *i915);
-void i915_debug_init( struct i915_context *i915 );
+void i915_dump_dirty(struct i915_context *i915, const char *func);
+void i915_dump_hardware_dirty(struct i915_context *i915, const char *func);
#endif
diff --git a/src/gallium/drivers/i915/i915_debug_fp.c b/src/gallium/drivers/i915/i915_debug_fp.c
index f41c51f299..50f49c540f 100644
--- a/src/gallium/drivers/i915/i915_debug_fp.c
+++ b/src/gallium/drivers/i915/i915_debug_fp.c
@@ -28,6 +28,7 @@
#include "i915_reg.h"
#include "i915_debug.h"
+#include "i915_debug_private.h"
#include "util/u_debug.h"
diff --git a/src/gallium/drivers/i915/i915_debug_private.h b/src/gallium/drivers/i915/i915_debug_private.h
new file mode 100644
index 0000000000..b3668d0848
--- /dev/null
+++ b/src/gallium/drivers/i915/i915_debug_private.h
@@ -0,0 +1,45 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef I915_DEBUG_PRIVATE_H
+#define I915_DEBUG_PRIVATE_H
+
+struct debug_stream
+{
+ unsigned offset; /* current gtt offset */
+ char *ptr; /* pointer to gtt offset zero */
+ char *end; /* pointer to gtt offset zero */
+ unsigned print_addresses;
+};
+
+void i915_disassemble_program(struct debug_stream *stream,
+ const unsigned *program, unsigned sz);
+
+#endif
diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c
index 1582168eba..a2c70b1199 100644
--- a/src/gallium/drivers/i915/i915_flush.c
+++ b/src/gallium/drivers/i915/i915_flush.c
@@ -35,11 +35,12 @@
#include "i915_context.h"
#include "i915_reg.h"
#include "i915_batch.h"
+#include "i915_debug.h"
-static void i915_flush( struct pipe_context *pipe,
- unsigned flags,
- struct pipe_fence_handle **fence )
+static void i915_flush_pipe( struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence )
{
struct i915_context *i915 = i915_context(pipe);
@@ -66,21 +67,31 @@ static void i915_flush( struct pipe_context *pipe,
}
#endif
-#if 0
if (i915->batch->map == i915->batch->ptr) {
return;
}
-#endif
/* If there are no flags, just flush pending commands to hardware:
*/
FLUSH_BATCH(fence);
i915->vbo_flushed = 1;
-}
-
+ I915_DBG(DBG_FLUSH, "%s: #####\n", __FUNCTION__);
+}
void i915_init_flush_functions( struct i915_context *i915 )
{
- i915->base.flush = i915_flush;
+ i915->base.flush = i915_flush_pipe;
+}
+
+/**
+ * Here we handle all the notifications that needs to go out on a flush.
+ * XXX might move above function to i915_pipe_flush.c and leave this here.
+ */
+void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence)
+{
+ struct i915_winsys_batchbuffer *batch = i915->batch;
+
+ batch->iws->batchbuffer_flush(batch, fence);
+ i915->hardware_dirty = ~0;
}
diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c
index f8665acbe1..bd046bd905 100644
--- a/src/gallium/drivers/i915/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915/i915_prim_vbuf.c
@@ -52,8 +52,7 @@
#include "i915_state.h"
-#undef VBUF_USE_FIFO
-#undef VBUF_MAP_BUFFER
+#define VBUF_MAP_BUFFER
/**
* Primitive renderer for i915.
@@ -79,23 +78,18 @@ struct i915_vbuf_render {
struct i915_winsys_buffer *vbo;
size_t vbo_size; /**< current size of allocated buffer */
size_t vbo_alloc_size; /**< minimum buffer size to allocate */
- size_t vbo_offset;
+ size_t vbo_hw_offset; /**< offset that we program the hardware with */
+ size_t vbo_sw_offset; /**< offset that we work with */
+ size_t vbo_index; /**< index offset to be added to all indices */
void *vbo_ptr;
size_t vbo_max_used;
+ size_t vbo_max_index; /**< index offset to be added to all indices */
#ifndef VBUF_MAP_BUFFER
size_t map_used_start;
size_t map_used_end;
size_t map_size;
#endif
-
-#ifdef VBUF_USE_FIFO
- /* Stuff for the pool */
- struct util_fifo *pool_fifo;
- unsigned pool_used;
- unsigned pool_buffer_size;
- boolean pool_not_used;
-#endif
};
@@ -109,6 +103,35 @@ i915_vbuf_render(struct vbuf_render *render)
return (struct i915_vbuf_render *)render;
}
+/**
+ * If vbo state differs between renderer and context
+ * push state to the context. This function pushes
+ * hw_offset to i915->vbo_offset and vbo to i915->vbo.
+ *
+ * Side effects:
+ * May updates context vbo_offset and vbo fields.
+ */
+static void
+i915_vbuf_update_vbo_state(struct vbuf_render *render)
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ struct i915_context *i915 = i915_render->i915;
+
+ if (i915->vbo != i915_render->vbo ||
+ i915->vbo_offset != i915_render->vbo_hw_offset) {
+ i915->vbo = i915_render->vbo;
+ i915->vbo_offset = i915_render->vbo_hw_offset;
+ i915->dirty |= I915_NEW_VBO;
+ }
+}
+
+/**
+ * Callback exported to the draw module.
+ * Returns the current vertex_info.
+ *
+ * Side effects:
+ * If state is dirty update derived state.
+ */
static const struct vertex_info *
i915_vbuf_render_get_vertex_info(struct vbuf_render *render)
{
@@ -123,12 +146,18 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render)
return &i915->current.vertex_info;
}
+/**
+ * Reserve space in the vbo for vertices.
+ *
+ * Side effects:
+ * None.
+ */
static boolean
i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size)
{
struct i915_context *i915 = i915_render->i915;
- if (i915_render->vbo_size < size + i915_render->vbo_offset)
+ if (i915_render->vbo_size < size + i915_render->vbo_sw_offset)
return FALSE;
if (i915->vbo_flushed)
@@ -137,28 +166,28 @@ i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size)
return TRUE;
}
+/**
+ * Allocate a new vbo buffer should there not be enough space for
+ * the requested number of vertices by the draw module.
+ *
+ * Side effects:
+ * Updates hw_offset, sw_offset, index and allocates a new buffer.
+ */
static void
i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
{
struct i915_context *i915 = i915_render->i915;
struct i915_winsys *iws = i915->iws;
- if (i915_render->vbo) {
-#ifdef VBUF_USE_FIFO
- if (i915_render->pool_not_used)
- iws->buffer_destroy(iws, i915_render->vbo);
- else
- u_fifo_add(i915_render->pool_fifo, i915_render->vbo);
- i915_render->vbo = NULL;
-#else
+ if (i915_render->vbo)
iws->buffer_destroy(iws, i915_render->vbo);
-#endif
- }
i915->vbo_flushed = 0;
i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size);
- i915_render->vbo_offset = 0;
+ i915_render->vbo_hw_offset = 0;
+ i915_render->vbo_sw_offset = 0;
+ i915_render->vbo_index = 0;
#ifndef VBUF_MAP_BUFFER
if (i915_render->vbo_size > i915_render->map_size) {
@@ -168,52 +197,51 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
}
#endif
-#ifdef VBUF_USE_FIFO
- if (i915_render->vbo_size != i915_render->pool_buffer_size) {
- i915_render->pool_not_used = TRUE;
- i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64,
- I915_NEW_VERTEX);
- } else {
- i915_render->pool_not_used = FALSE;
-
- if (i915_render->pool_used >= 2) {
- FLUSH_BATCH(NULL);
- i915->vbo_flushed = 0;
- i915_render->pool_used = 0;
- }
- u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo);
- }
-#else
i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size,
64, I915_NEW_VERTEX);
-#endif
}
+/**
+ * Callback exported to the draw module.
+ *
+ * Side effects:
+ * Updates hw_offset, sw_offset, index and may allocate
+ * a new buffer. Also updates may update the vbo state
+ * on the i915 context.
+ */
static boolean
i915_vbuf_render_allocate_vertices(struct vbuf_render *render,
ushort vertex_size,
ushort nr_vertices)
{
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
- struct i915_context *i915 = i915_render->i915;
size_t size = (size_t)vertex_size * (size_t)nr_vertices;
+ size_t offset;
- /* FIXME: handle failure */
- assert(!i915->vbo);
+ /*
+ * Align sw_offset with first multiple of vertex size from hw_offset.
+ * Set index to be the multiples from from hw_offset to sw_offset.
+ * i915_vbuf_render_new_buf will reset index, sw_offset, hw_offset
+ * when it allocates a new buffer this is correct.
+ */
+ {
+ offset = i915_render->vbo_sw_offset - i915_render->vbo_hw_offset;
+ offset = util_align_npot(offset, vertex_size);
+ i915_render->vbo_sw_offset = i915_render->vbo_hw_offset + offset;
+ i915_render->vbo_index = offset / vertex_size;
+ }
- if (!i915_vbuf_render_reserve(i915_render, size)) {
-#ifdef VBUF_USE_FIFO
- /* incase we flushed reset the number of pool buffers used */
- if (i915->vbo_flushed)
- i915_render->pool_used = 0;
-#endif
+ if (!i915_vbuf_render_reserve(i915_render, size))
i915_vbuf_render_new_buf(i915_render, size);
- }
+
+ /*
+ * If a new buffer has been alocated sw_offset,
+ * hw_offset & index will be reset by new_buf
+ */
i915_render->vertex_size = vertex_size;
- i915->vbo = i915_render->vbo;
- i915->vbo_offset = i915_render->vbo_offset;
- i915->dirty |= I915_NEW_VBO;
+
+ i915_vbuf_update_vbo_state(render);
if (!i915_render->vbo)
return FALSE;
@@ -232,7 +260,7 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render)
#ifdef VBUF_MAP_BUFFER
i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE);
- return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_offset;
+ return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_sw_offset;
#else
(void)iws;
return (unsigned char *)i915_render->vbo_ptr;
@@ -248,6 +276,7 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render,
struct i915_context *i915 = i915_render->i915;
struct i915_winsys *iws = i915->iws;
+ i915_render->vbo_max_index = max_index;
i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1));
#ifdef VBUF_MAP_BUFFER
iws->buffer_unmap(iws, i915_render->vbo);
@@ -255,13 +284,36 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render,
i915_render->map_used_start = i915_render->vertex_size * min_index;
i915_render->map_used_end = i915_render->vertex_size * (max_index + 1);
iws->buffer_write(iws, i915_render->vbo,
- i915_render->map_used_start + i915_render->vbo_offset,
+ i915_render->map_used_start + i915_render->vbo_sw_offset,
i915_render->map_used_end - i915_render->map_used_start,
(unsigned char *)i915_render->vbo_ptr + i915_render->map_used_start);
#endif
}
+/**
+ * Ensure that the given max_index given is not larger ushort max.
+ * If it is larger then ushort max it advanced the hw_offset to the
+ * same position in the vbo as sw_offset and set index to zero.
+ *
+ * Side effects:
+ * On failure update hw_offset and index.
+ */
+static void
+i915_vbuf_ensure_index_bounds(struct vbuf_render *render,
+ unsigned max_index)
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+
+ if (max_index + i915_render->vbo_index < ((1 << 17) - 1))
+ return;
+
+ i915_render->vbo_hw_offset = i915_render->vbo_sw_offset;
+ i915_render->vbo_index = 0;
+
+ i915_vbuf_update_vbo_state(render);
+}
+
static boolean
i915_vbuf_render_set_primitive(struct vbuf_render *render,
unsigned prim)
@@ -327,7 +379,9 @@ draw_arrays_generate_indices(struct vbuf_render *render,
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
struct i915_context *i915 = i915_render->i915;
unsigned i;
- unsigned end = start + nr;
+ unsigned end = start + nr + i915_render->vbo_index;
+ start += i915_render->vbo_index;
+
switch(type) {
case 0:
for (i = start; i+1 < end; i += 2)
@@ -391,16 +445,18 @@ draw_arrays_fallback(struct vbuf_render *render,
struct i915_context *i915 = i915_render->i915;
unsigned nr_indices;
+ nr_indices = draw_arrays_calc_nr_indices(nr, i915_render->fallback);
+ if (!nr_indices)
+ return;
+
+ i915_vbuf_ensure_index_bounds(render, start + nr_indices);
+
if (i915->dirty)
i915_update_derived(i915);
if (i915->hardware_dirty)
i915_emit_hardware_state(i915);
- nr_indices = draw_arrays_calc_nr_indices(nr, i915_render->fallback);
- if (!nr_indices)
- return;
-
if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
FLUSH_BATCH(NULL);
@@ -415,6 +471,7 @@ draw_arrays_fallback(struct vbuf_render *render,
goto out;
}
}
+
OUT_BATCH(_3DPRIMITIVE |
PRIM_INDIRECT |
i915_render->hwprim |
@@ -440,6 +497,9 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render,
return;
}
+ i915_vbuf_ensure_index_bounds(render, start + nr);
+ start += i915_render->vbo_index;
+
if (i915->dirty)
i915_update_derived(i915);
@@ -485,35 +545,36 @@ draw_generate_indices(struct vbuf_render *render,
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
struct i915_context *i915 = i915_render->i915;
unsigned i;
+ unsigned o = i915_render->vbo_index;
switch(type) {
case 0:
for (i = 0; i + 1 < nr_indices; i += 2) {
- OUT_BATCH(indices[i] | indices[i+1] << 16);
+ OUT_BATCH((o+indices[i]) | (o+indices[i+1]) << 16);
}
if (i < nr_indices) {
- OUT_BATCH(indices[i]);
+ OUT_BATCH((o+indices[i]));
}
break;
case PIPE_PRIM_LINE_LOOP:
if (nr_indices >= 2) {
for (i = 1; i < nr_indices; i++)
- OUT_BATCH(indices[i-1] | indices[i] << 16);
- OUT_BATCH(indices[i-1] | indices[0] << 16);
+ OUT_BATCH((o+indices[i-1]) | (o+indices[i]) << 16);
+ OUT_BATCH((o+indices[i-1]) | (o+indices[0]) << 16);
}
break;
case PIPE_PRIM_QUADS:
for (i = 0; i + 3 < nr_indices; i += 4) {
- OUT_BATCH(indices[i+0] | indices[i+1] << 16);
- OUT_BATCH(indices[i+3] | indices[i+1] << 16);
- OUT_BATCH(indices[i+2] | indices[i+3] << 16);
+ OUT_BATCH((o+indices[i+0]) | (o+indices[i+1]) << 16);
+ OUT_BATCH((o+indices[i+3]) | (o+indices[i+1]) << 16);
+ OUT_BATCH((o+indices[i+2]) | (o+indices[i+3]) << 16);
}
break;
case PIPE_PRIM_QUAD_STRIP:
for (i = 0; i + 3 < nr_indices; i += 2) {
- OUT_BATCH(indices[i+0] | indices[i+1] << 16);
- OUT_BATCH(indices[i+3] | indices[i+2] << 16);
- OUT_BATCH(indices[i+0] | indices[i+3] << 16);
+ OUT_BATCH((o+indices[i+0]) | (o+indices[i+1]) << 16);
+ OUT_BATCH((o+indices[i+3]) | (o+indices[i+2]) << 16);
+ OUT_BATCH((o+indices[i+0]) | (o+indices[i+3]) << 16);
}
break;
default:
@@ -558,6 +619,8 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render,
if (!nr_indices)
return;
+ i915_vbuf_ensure_index_bounds(render, i915_render->vbo_max_index);
+
if (i915->dirty)
i915_update_derived(i915);
@@ -597,14 +660,15 @@ static void
i915_vbuf_render_release_vertices(struct vbuf_render *render)
{
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
- struct i915_context *i915 = i915_render->i915;
-
- assert(i915->vbo);
- i915_render->vbo_offset += i915_render->vbo_max_used;
+ i915_render->vbo_sw_offset += i915_render->vbo_max_used;
i915_render->vbo_max_used = 0;
- i915->vbo = NULL;
- i915->dirty |= I915_NEW_VBO;
+
+ /*
+ * Micro optimization, by calling update here we the offset change
+ * will be picked up on the next pipe_context::draw_*.
+ */
+ i915_vbuf_update_vbo_state(render);
}
static void
@@ -652,7 +716,8 @@ i915_vbuf_render_create(struct i915_context *i915)
i915_render->vbo = NULL;
i915_render->vbo_ptr = NULL;
i915_render->vbo_size = 0;
- i915_render->vbo_offset = 0;
+ i915_render->vbo_hw_offset = 0;
+ i915_render->vbo_sw_offset = 0;
i915_render->vbo_alloc_size = i915_render->base.max_vertex_buffer_bytes * 4;
#ifdef VBUF_USE_POOL
diff --git a/src/gallium/drivers/i915/i915_public.h b/src/gallium/drivers/i915/i915_public.h
new file mode 100644
index 0000000000..588654d608
--- /dev/null
+++ b/src/gallium/drivers/i915/i915_public.h
@@ -0,0 +1,13 @@
+
+#ifndef I915_PUBLIC_H
+#define I915_PUBLIC_H
+
+struct i915_winsys;
+struct pipe_screen;
+
+/**
+ * Create i915 pipe_screen.
+ */
+struct pipe_screen * i915_screen_create(struct i915_winsys *iws);
+
+#endif
diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c
index 17fcdee379..752ddaae7b 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -42,6 +42,7 @@
#include "i915_resource.h"
#include "i915_screen.h"
#include "i915_winsys.h"
+#include "i915_debug.h"
#define DEBUG_TEXTURES 0
@@ -800,12 +801,10 @@ i915_texture_create(struct pipe_screen *screen,
ws->buffer_unmap(ws, tex->buffer);
#endif
-#if DEBUG_TEXTURES
- debug_printf("%s: %p size %u, stride %u, blocks (%u, %u)\n", __func__,
- tex, (unsigned int)tex_size, tex->stride,
- tex->stride / util_format_get_blocksize(tex->b.b.format),
- tex->total_nblocksy);
-#endif
+ I915_DBG(DBG_TEXTURE, "%s: %p size %u, stride %u, blocks (%u, %u)\n", __func__,
+ tex, (unsigned int)tex_size, tex->stride,
+ tex->stride / util_format_get_blocksize(tex->b.b.format),
+ tex->total_nblocksy);
return &tex->b.b;
@@ -846,12 +845,18 @@ i915_texture_from_handle(struct pipe_screen * screen,
tex->b.b.screen = screen;
tex->stride = stride;
+ tex->total_nblocksy = align_nblocksy(tex->b.b.format, tex->b.b.height0, 8);
i915_texture_set_level_info(tex, 0, 1);
i915_texture_set_image_offset(tex, 0, 0, 0, 0);
tex->buffer = buffer;
+ I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%ux%u)\n", __func__,
+ tex, tex->stride,
+ tex->stride / util_format_get_blocksize(tex->b.b.format),
+ tex->total_nblocksy);
+
return &tex->b.b;
}
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index f82426520c..77345d5f71 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -31,11 +31,13 @@
#include "util/u_string.h"
#include "i915_reg.h"
+#include "i915_debug.h"
#include "i915_context.h"
#include "i915_screen.h"
#include "i915_surface.h"
#include "i915_resource.h"
#include "i915_winsys.h"
+#include "i915_public.h"
/*
@@ -330,5 +332,7 @@ i915_screen_create(struct i915_winsys *iws)
i915_init_screen_resource_functions(is);
i915_init_screen_surface_functions(is);
+ i915_debug_init(is);
+
return &is->base;
}
diff --git a/src/gallium/drivers/i915/i915_state.h b/src/gallium/drivers/i915/i915_state.h
index 86c6b0027d..b4074dc35b 100644
--- a/src/gallium/drivers/i915/i915_state.h
+++ b/src/gallium/drivers/i915/i915_state.h
@@ -35,16 +35,22 @@ struct i915_context;
struct i915_tracked_state {
+ const char *name;
+ void (*update)(struct i915_context *);
unsigned dirty;
- void (*update)( struct i915_context * );
};
-void i915_update_immediate( struct i915_context *i915 );
-void i915_update_dynamic( struct i915_context *i915 );
-void i915_update_derived( struct i915_context *i915 );
-void i915_update_samplers( struct i915_context *i915 );
-void i915_update_textures(struct i915_context *i915);
+extern struct i915_tracked_state i915_update_vertex_layout;
-void i915_emit_hardware_state( struct i915_context *i915 );
+extern struct i915_tracked_state i915_hw_samplers;
+extern struct i915_tracked_state i915_hw_sampler_views;
+extern struct i915_tracked_state i915_hw_immediate;
+extern struct i915_tracked_state i915_hw_dynamic;
+extern struct i915_tracked_state i915_hw_fs;
+extern struct i915_tracked_state i915_hw_framebuffer;
+extern struct i915_tracked_state i915_hw_constants;
+
+void i915_update_derived(struct i915_context *i915);
+void i915_emit_hardware_state(struct i915_context *i915);
#endif
diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c
index 4da46772b5..1d4026a214 100644
--- a/src/gallium/drivers/i915/i915_state_derived.c
+++ b/src/gallium/drivers/i915/i915_state_derived.c
@@ -32,15 +32,16 @@
#include "draw/draw_vertex.h"
#include "i915_context.h"
#include "i915_state.h"
+#include "i915_debug.h"
#include "i915_reg.h"
-/**
+/***********************************************************************
* Determine the hardware vertex layout.
* Depends on vertex/fragment shader state.
*/
-static void calculate_vertex_layout( struct i915_context *i915 )
+static void calculate_vertex_layout(struct i915_context *i915)
{
const struct i915_fragment_shader *fs = i915->fs;
const enum interp_mode colorInterp = i915->rasterizer->color_interp;
@@ -146,37 +147,38 @@ static void calculate_vertex_layout( struct i915_context *i915 )
}
}
+struct i915_tracked_state i915_update_vertex_layout = {
+ "vertex_layout",
+ calculate_vertex_layout,
+ I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS
+};
-/* Hopefully this will remain quite simple, otherwise need to pull in
- * something like the state tracker mechanism.
+/***********************************************************************
*/
-void i915_update_derived( struct i915_context *i915 )
+static struct i915_tracked_state *atoms[] = {
+ &i915_update_vertex_layout,
+ &i915_hw_samplers,
+ &i915_hw_sampler_views,
+ &i915_hw_immediate,
+ &i915_hw_dynamic,
+ &i915_hw_fs,
+ &i915_hw_framebuffer,
+ &i915_hw_constants,
+ NULL,
+};
+
+void i915_update_derived(struct i915_context *i915)
{
- if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS))
- calculate_vertex_layout( i915 );
+ int i;
- if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_SAMPLER_VIEW))
- i915_update_samplers(i915);
+ if (I915_DBG_ON(DBG_ATOMS))
+ i915_dump_dirty(i915, __FUNCTION__);
- if (i915->dirty & I915_NEW_SAMPLER_VIEW)
- i915_update_textures(i915);
-
- if (i915->dirty)
- i915_update_immediate( i915 );
-
- if (i915->dirty)
- i915_update_dynamic( i915 );
-
- if (i915->dirty & I915_NEW_FS) {
- i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */
- }
-
- /* HW emit currently references framebuffer state directly:
- */
- if (i915->dirty & I915_NEW_FRAMEBUFFER)
- i915->hardware_dirty |= I915_HW_STATIC;
+ for (i = 0; atoms[i]; i++)
+ if (atoms[i]->dirty & i915->dirty)
+ atoms[i]->update(i915);
i915->dirty = 0;
}
diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c
index 9c6723b391..d61a8c3407 100644
--- a/src/gallium/drivers/i915/i915_state_dynamic.c
+++ b/src/gallium/drivers/i915/i915_state_dynamic.c
@@ -1,8 +1,8 @@
/**************************************************************************
- *
+ *
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ *
**************************************************************************/
#include "i915_batch.h"
@@ -30,14 +30,13 @@
#include "i915_context.h"
#include "i915_reg.h"
#include "i915_state.h"
-#include "util/u_math.h"
+
#include "util/u_memory.h"
#include "util/u_pack_color.h"
-#define FILE_DEBUG_FLAG DEBUG_STATE
/* State that we have chosen to store in the DYNAMIC segment of the
- * i915 indirect state mechanism.
+ * i915 indirect state mechanism.
*
* Can't cache these in the way we do the static state, as there is no
* start/size in the command packet, instead an 'end' value that gets
@@ -47,13 +46,16 @@
* (active) state every time a 4kb boundary is crossed.
*/
-static INLINE void set_dynamic_indirect( struct i915_context *i915,
- unsigned offset,
- const unsigned *src,
- unsigned dwords )
+static INLINE void set_dynamic_indirect(struct i915_context *i915,
+ unsigned offset,
+ const unsigned *src,
+ unsigned dwords)
{
unsigned i;
+ if (!memcmp(src, &i915->current.dynamic[offset], dwords * 4))
+ return;
+
for (i = 0; i < dwords; i++)
i915->current.dynamic[offset + i] = src[i];
@@ -61,38 +63,41 @@ static INLINE void set_dynamic_indirect( struct i915_context *i915,
}
+
/***********************************************************************
- * Modes4: stencil masks and logicop
+ * Modes4: stencil masks and logicop
*/
-static void upload_MODES4( struct i915_context *i915 )
+static void upload_MODES4(struct i915_context *i915)
{
unsigned modes4 = 0;
- /* I915_NEW_STENCIL */
+ /* I915_NEW_STENCIL
+ */
modes4 |= i915->depth_stencil->stencil_modes4;
- /* I915_NEW_BLEND */
+
+ /* I915_NEW_BLEND
+ */
modes4 |= i915->blend->modes4;
- /* Always, so that we know when state is in-active:
+ /* Always, so that we know when state is in-active:
*/
- set_dynamic_indirect( i915,
- I915_DYNAMIC_MODES4,
- &modes4,
- 1 );
+ set_dynamic_indirect(i915,
+ I915_DYNAMIC_MODES4,
+ &modes4,
+ 1);
}
const struct i915_tracked_state i915_upload_MODES4 = {
- I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL,
- upload_MODES4
+ "MODES4",
+ upload_MODES4,
+ I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL
};
-
/***********************************************************************
*/
-
-static void upload_BFO( struct i915_context *i915 )
+static void upload_BFO(struct i915_context *i915)
{
unsigned bfo[2];
bfo[0] = i915->depth_stencil->bfo[0];
@@ -101,88 +106,89 @@ static void upload_BFO( struct i915_context *i915 )
if (bfo[0] & BFO_ENABLE_STENCIL_REF) {
bfo[0] |= i915->stencil_ref.ref_value[1] << BFO_STENCIL_REF_SHIFT;
}
- set_dynamic_indirect( i915,
- I915_DYNAMIC_BFO_0,
- &(bfo[0]),
- 2 );
+
+ set_dynamic_indirect(i915,
+ I915_DYNAMIC_BFO_0,
+ &(bfo[0]),
+ 2);
}
const struct i915_tracked_state i915_upload_BFO = {
- I915_NEW_DEPTH_STENCIL,
- upload_BFO
+ "BFO",
+ upload_BFO,
+ I915_NEW_DEPTH_STENCIL
};
+
/***********************************************************************
*/
-
-
-static void upload_BLENDCOLOR( struct i915_context *i915 )
+static void upload_BLENDCOLOR(struct i915_context *i915)
{
unsigned bc[2];
- memset( bc, 0, sizeof(bc) );
+ memset(bc, 0, sizeof(bc));
- /* I915_NEW_BLEND {_COLOR}
+ /* I915_NEW_BLEND
*/
{
const float *color = i915->blend_color.color;
bc[0] = _3DSTATE_CONST_BLEND_COLOR_CMD;
- bc[1] = pack_ui32_float4( color[0],
- color[1],
- color[2],
- color[3] );
+ bc[1] = pack_ui32_float4(color[0],
+ color[1],
+ color[2],
+ color[3]);
}
- set_dynamic_indirect( i915,
- I915_DYNAMIC_BC_0,
- bc,
- 2 );
+ set_dynamic_indirect(i915,
+ I915_DYNAMIC_BC_0,
+ bc,
+ 2);
}
const struct i915_tracked_state i915_upload_BLENDCOLOR = {
- I915_NEW_BLEND,
- upload_BLENDCOLOR
+ "BLENDCOLOR",
+ upload_BLENDCOLOR,
+ I915_NEW_BLEND
};
-/***********************************************************************
- */
-static void upload_IAB( struct i915_context *i915 )
+/***********************************************************************
+ */
+static void upload_IAB(struct i915_context *i915)
{
unsigned iab = i915->blend->iab;
-
- set_dynamic_indirect( i915,
- I915_DYNAMIC_IAB,
- &iab,
- 1 );
+ set_dynamic_indirect(i915,
+ I915_DYNAMIC_IAB,
+ &iab,
+ 1);
}
const struct i915_tracked_state i915_upload_IAB = {
- I915_NEW_BLEND,
- upload_IAB
+ "IAB",
+ upload_IAB,
+ I915_NEW_BLEND
};
+
/***********************************************************************
*/
-
-
-
-static void upload_DEPTHSCALE( struct i915_context *i915 )
+static void upload_DEPTHSCALE(struct i915_context *i915)
{
- set_dynamic_indirect( i915,
- I915_DYNAMIC_DEPTHSCALE_0,
- &(i915->rasterizer->ds[0].u),
- 2 );
+ set_dynamic_indirect(i915,
+ I915_DYNAMIC_DEPTHSCALE_0,
+ &(i915->rasterizer->ds[0].u),
+ 2);
}
const struct i915_tracked_state i915_upload_DEPTHSCALE = {
- I915_NEW_RASTERIZER,
- upload_DEPTHSCALE
+ "DEPTHSCALE",
+ upload_DEPTHSCALE,
+ I915_NEW_RASTERIZER
};
@@ -196,10 +202,9 @@ const struct i915_tracked_state i915_upload_DEPTHSCALE = {
* XXX: does stipple pattern need to be adjusted according to
* the window position?
*
- * XXX: possibly need workaround for conform paths test.
+ * XXX: possibly need workaround for conform paths test.
*/
-
-static void upload_STIPPLE( struct i915_context *i915 )
+static void upload_STIPPLE(struct i915_context *i915)
{
unsigned st[2];
@@ -210,7 +215,6 @@ static void upload_STIPPLE( struct i915_context *i915 )
*/
st[1] |= i915->rasterizer->st;
-
/* I915_NEW_STIPPLE
*/
{
@@ -225,73 +229,75 @@ static void upload_STIPPLE( struct i915_context *i915 )
/* Not sure what to do about fallbacks, so for now just dont:
*/
st[1] |= ((p[0] << 0) |
- (p[1] << 4) |
- (p[2] << 8) |
- (p[3] << 12));
+ (p[1] << 4) |
+ (p[2] << 8) |
+ (p[3] << 12));
}
-
- set_dynamic_indirect( i915,
- I915_DYNAMIC_STP_0,
- &st[0],
- 2 );
+ set_dynamic_indirect(i915,
+ I915_DYNAMIC_STP_0,
+ &st[0],
+ 2);
}
-
const struct i915_tracked_state i915_upload_STIPPLE = {
- I915_NEW_RASTERIZER | I915_NEW_STIPPLE,
- upload_STIPPLE
+ "STIPPLE",
+ upload_STIPPLE,
+ I915_NEW_RASTERIZER | I915_NEW_STIPPLE
};
/***********************************************************************
- * Scissor.
+ * Scissor enable
*/
static void upload_SCISSOR_ENABLE( struct i915_context *i915 )
{
- set_dynamic_indirect( i915,
- I915_DYNAMIC_SC_ENA_0,
- &(i915->rasterizer->sc[0]),
- 1 );
+ set_dynamic_indirect(i915,
+ I915_DYNAMIC_SC_ENA_0,
+ &(i915->rasterizer->sc[0]),
+ 1);
}
const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = {
- I915_NEW_RASTERIZER,
- upload_SCISSOR_ENABLE
+ "SCISSOR ENABLE",
+ upload_SCISSOR_ENABLE,
+ I915_NEW_RASTERIZER
};
-static void upload_SCISSOR_RECT( struct i915_context *i915 )
+/***********************************************************************
+ * Scissor rect
+ */
+static void upload_SCISSOR_RECT(struct i915_context *i915)
{
unsigned x1 = i915->scissor.minx;
unsigned y1 = i915->scissor.miny;
unsigned x2 = i915->scissor.maxx;
unsigned y2 = i915->scissor.maxy;
unsigned sc[3];
-
+
sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD;
sc[1] = (y1 << 16) | (x1 & 0xffff);
sc[2] = (y2 << 16) | (x2 & 0xffff);
- set_dynamic_indirect( i915,
- I915_DYNAMIC_SC_RECT_0,
- &sc[0],
- 3 );
+ set_dynamic_indirect(i915,
+ I915_DYNAMIC_SC_RECT_0,
+ &sc[0],
+ 3);
}
-
const struct i915_tracked_state i915_upload_SCISSOR_RECT = {
- I915_NEW_SCISSOR,
- upload_SCISSOR_RECT
+ "SCISSOR RECT",
+ upload_SCISSOR_RECT,
+ I915_NEW_SCISSOR
};
-
-
-
+/***********************************************************************
+ */
static const struct i915_tracked_state *atoms[] = {
&i915_upload_MODES4,
&i915_upload_BFO,
@@ -306,12 +312,17 @@ static const struct i915_tracked_state *atoms[] = {
/* These will be dynamic indirect state commands, but for now just end
* up on the batch buffer with everything else.
*/
-void i915_update_dynamic( struct i915_context *i915 )
+static void update_dynamic(struct i915_context *i915)
{
int i;
for (i = 0; i < Elements(atoms); i++)
if (i915->dirty & atoms[i]->dirty)
- atoms[i]->update( i915 );
+ atoms[i]->update(i915);
}
+struct i915_tracked_state i915_hw_dynamic = {
+ "dynamic",
+ update_dynamic,
+ ~0 /* all state atoms, becuase we do internal checking */
+};
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 22082fece8..7bb7893d93 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -29,6 +29,7 @@
#include "i915_reg.h"
#include "i915_context.h"
#include "i915_batch.h"
+#include "i915_debug.h"
#include "i915_reg.h"
#include "i915_resource.h"
@@ -111,15 +112,20 @@ i915_emit_hardware_state(struct i915_context *i915 )
3
) * 3/2; /* plus 50% margin */
-#if 0
- debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs);
-#endif
-
+ uintptr_t save_ptr;
+ size_t save_relocs;
+
+ if (I915_DBG_ON(DBG_ATOMS))
+ i915_dump_hardware_dirty(i915, __FUNCTION__);
+
if(!BEGIN_BATCH(dwords, relocs)) {
FLUSH_BATCH(NULL);
assert(BEGIN_BATCH(dwords, relocs));
}
+ save_ptr = (uintptr_t)i915->batch->ptr;
+ save_relocs = i915->batch->relocs;
+
/* 14 dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_INVARIENT)
{
@@ -169,7 +175,7 @@ i915_emit_hardware_state(struct i915_context *i915 )
OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);
OUT_BATCH(0);
}
-
+
/* 7 dwords, 1 relocs */
if (i915->hardware_dirty & I915_HW_IMMEDIATE)
{
@@ -195,7 +201,8 @@ i915_emit_hardware_state(struct i915_context *i915 )
OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]);
OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]);
}
-
+
+#if 01
/* I915_MAX_DYNAMIC dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_DYNAMIC)
{
@@ -204,7 +211,9 @@ i915_emit_hardware_state(struct i915_context *i915 )
OUT_BATCH(i915->current.dynamic[i]);
}
}
-
+#endif
+
+#if 01
/* 8 dwords, 2 relocs */
if (i915->hardware_dirty & I915_HW_STATIC)
{
@@ -253,10 +262,10 @@ i915_emit_hardware_state(struct i915_context *i915 )
I915_USAGE_RENDER,
depth_surface->offset);
}
-
+
{
unsigned cformat, zformat = 0;
-
+
if (cbuf_surface)
cformat = cbuf_surface->format;
else
@@ -275,6 +284,7 @@ i915_emit_hardware_state(struct i915_context *i915 )
zformat );
}
}
+#endif
#if 01
/* texture images */
@@ -314,7 +324,7 @@ i915_emit_hardware_state(struct i915_context *i915 )
{
if (i915->current.sampler_enable_nr) {
int i;
-
+
OUT_BATCH( _3DSTATE_SAMPLER_STATE |
(3 * i915->current.sampler_enable_nr) );
@@ -331,9 +341,10 @@ i915_emit_hardware_state(struct i915_context *i915 )
}
#endif
+#if 01
/* constants */
/* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_PROGRAM)
+ if (i915->hardware_dirty & I915_HW_CONSTANTS)
{
/* Collate the user-defined constants with the fragment shader's
* immediates according to the constant_flags[] array.
@@ -370,7 +381,9 @@ i915_emit_hardware_state(struct i915_context *i915 )
}
}
}
+#endif
+#if 01
/* Fragment program */
/* i915->current.program_len dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_PROGRAM)
@@ -382,7 +395,9 @@ i915_emit_hardware_state(struct i915_context *i915 )
OUT_BATCH(i915->fs->program[i]);
}
}
+#endif
+#if 01
/* drawing surface size */
/* 6 dwords, 0 relocs */
{
@@ -398,7 +413,11 @@ i915_emit_hardware_state(struct i915_context *i915 )
OUT_BATCH(0);
OUT_BATCH(0);
}
+#endif
+ I915_DBG(DBG_EMIT, "%s: used %d dwords, %d relocs\n", __FUNCTION__,
+ ((uintptr_t)i915->batch->ptr - save_ptr) / 4,
+ i915->batch->relocs - save_relocs);
i915->hardware_dirty = 0;
}
diff --git a/src/gallium/drivers/i915/i915_state_fpc.c b/src/gallium/drivers/i915/i915_state_fpc.c
new file mode 100644
index 0000000000..ec7cec0e47
--- /dev/null
+++ b/src/gallium/drivers/i915/i915_state_fpc.c
@@ -0,0 +1,59 @@
+/**************************************************************************
+ *
+ * Copyright © 2010 Jakob Bornecrantz
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_state.h"
+
+
+
+/***********************************************************************
+ */
+static void update_hw_constants(struct i915_context *i915)
+{
+ i915->hardware_dirty |= I915_HW_CONSTANTS;
+}
+
+struct i915_tracked_state i915_hw_constants = {
+ "hw_constants",
+ update_hw_constants,
+ I915_NEW_CONSTANTS | I915_NEW_FS
+};
+
+
+
+/***********************************************************************
+ */
+static void update_fs(struct i915_context *i915)
+{
+ i915->hardware_dirty |= I915_HW_PROGRAM;
+}
+
+struct i915_tracked_state i915_hw_fs = {
+ "fs",
+ update_fs,
+ I915_NEW_FS
+};
diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c
index 8cec699285..f9ade7077f 100644
--- a/src/gallium/drivers/i915/i915_state_immediate.c
+++ b/src/gallium/drivers/i915/i915_state_immediate.c
@@ -1,8 +1,8 @@
/**************************************************************************
- *
+ *
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,13 +22,13 @@
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ *
**************************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
-
+
#include "i915_state_inlines.h"
#include "i915_context.h"
#include "i915_state.h"
@@ -46,30 +46,31 @@
/***********************************************************************
- * S0,S1: Vertex buffer state.
+ * S0,S1: Vertex buffer state.
*/
static void upload_S0S1(struct i915_context *i915)
{
unsigned LIS0, LIS1;
- /* I915_NEW_VBO */
- /* TODO: re-use vertex buffers here? */
+ /* I915_NEW_VBO
+ */
LIS0 = i915->vbo_offset;
- /* I915_NEW_VERTEX_SIZE -- do this where the vertex size is calculated!
+ /* I915_NEW_VERTEX_SIZE
*/
+ /* XXX do this where the vertex size is calculated! */
{
unsigned vertex_size = i915->current.vertex_info.size;
LIS1 = ((vertex_size << 24) |
- (vertex_size << 16));
+ (vertex_size << 16));
}
- /* I915_NEW_VBO */
- /* TODO: use a vertex generation number to track vbo changes */
+ /* I915_NEW_VBO
+ */
if (1 ||
i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 ||
- i915->current.immediate[I915_IMMEDIATE_S1] != LIS1)
+ i915->current.immediate[I915_IMMEDIATE_S1] != LIS1)
{
i915->current.immediate[I915_IMMEDIATE_S0] = LIS0;
i915->current.immediate[I915_IMMEDIATE_S1] = LIS1;
@@ -78,13 +79,13 @@ static void upload_S0S1(struct i915_context *i915)
}
const struct i915_tracked_state i915_upload_S0S1 = {
- I915_NEW_VBO | I915_NEW_VERTEX_FORMAT,
- upload_S0S1
+ "imm S0 S1",
+ upload_S0S1,
+ I915_NEW_VBO | I915_NEW_VERTEX_FORMAT
};
-
/***********************************************************************
* S4: Vertex format, rasterization state
*/
@@ -92,7 +93,8 @@ static void upload_S2S4(struct i915_context *i915)
{
unsigned LIS2, LIS4;
- /* I915_NEW_VERTEX_FORMAT */
+ /* I915_NEW_VERTEX_FORMAT
+ */
{
LIS2 = i915->current.vertex_info.hwfmt[1];
LIS4 = i915->current.vertex_info.hwfmt[0];
@@ -113,35 +115,38 @@ static void upload_S2S4(struct i915_context *i915)
}
}
-
const struct i915_tracked_state i915_upload_S2S4 = {
- I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT,
- upload_S2S4
+ "imm S2 S4",
+ upload_S2S4,
+ I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT
};
/***********************************************************************
- *
*/
-static void upload_S5( struct i915_context *i915 )
+static void upload_S5(struct i915_context *i915)
{
unsigned LIS5 = 0;
+ /* I915_NEW_DEPTH_STENCIL
+ */
LIS5 |= i915->depth_stencil->stencil_LIS5;
/* hope it's safe to set stencil ref value even if stencil test is disabled? */
LIS5 |= i915->stencil_ref.ref_value[0] << S5_STENCIL_REF_SHIFT;
+ /* I915_NEW_BLEND
+ */
LIS5 |= i915->blend->LIS5;
#if 0
- /* I915_NEW_RASTERIZER */
+ /* I915_NEW_RASTERIZER
+ */
if (i915->state.Polygon->OffsetFill) {
LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE;
}
#endif
-
if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) {
i915->current.immediate[I915_IMMEDIATE_S5] = LIS5;
i915->hardware_dirty |= I915_HW_IMMEDIATE;
@@ -149,14 +154,16 @@ static void upload_S5( struct i915_context *i915 )
}
const struct i915_tracked_state i915_upload_S5 = {
- (I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER),
- upload_S5
+ "imm S5",
+ upload_S5,
+ I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER
};
+
/***********************************************************************
*/
-static void upload_S6( struct i915_context *i915 )
+static void upload_S6(struct i915_context *i915)
{
unsigned LIS6 = (2 << S6_TRISTRIP_PV_SHIFT);
@@ -180,14 +187,16 @@ static void upload_S6( struct i915_context *i915 )
}
const struct i915_tracked_state i915_upload_S6 = {
- I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER,
- upload_S6
+ "imm s6",
+ upload_S6,
+ I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER
};
+
/***********************************************************************
*/
-static void upload_S7( struct i915_context *i915 )
+static void upload_S7(struct i915_context *i915)
{
unsigned LIS7;
@@ -202,11 +211,15 @@ static void upload_S7( struct i915_context *i915 )
}
const struct i915_tracked_state i915_upload_S7 = {
- I915_NEW_RASTERIZER,
- upload_S7
+ "imm S7",
+ upload_S7,
+ I915_NEW_RASTERIZER
};
+
+/***********************************************************************
+ */
static const struct i915_tracked_state *atoms[] = {
&i915_upload_S0S1,
&i915_upload_S2S4,
@@ -215,13 +228,17 @@ static const struct i915_tracked_state *atoms[] = {
&i915_upload_S7
};
-/*
- */
-void i915_update_immediate( struct i915_context *i915 )
+static void update_immediate(struct i915_context *i915)
{
int i;
for (i = 0; i < Elements(atoms); i++)
if (i915->dirty & atoms[i]->dirty)
- atoms[i]->update( i915 );
+ atoms[i]->update(i915);
}
+
+struct i915_tracked_state i915_hw_immediate = {
+ "immediate",
+ update_immediate,
+ ~0 /* all state atoms, becuase we do internal checking */
+};
diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c
index 77b9bccbb7..4667e0b78d 100644
--- a/src/gallium/drivers/i915/i915_state_sampler.c
+++ b/src/gallium/drivers/i915/i915_state_sampler.c
@@ -53,17 +53,23 @@
*
* So we need to update the map state when we change samplers and
* we need to be change the sampler state when map state is changed.
- * The first part is done by calling i915_update_texture in
- * i915_update_samplers and the second part is done else where in
- * code tracking the state changes.
+ * The first part is done by calling update_texture in update_samplers
+ * and the second part is done else where in code tracking the state
+ * changes.
+ */
+
+static void update_map(struct i915_context *i915,
+ uint unit,
+ const struct i915_texture *tex,
+ const struct i915_sampler_state *sampler,
+ uint state[2]);
+
+
+
+/***********************************************************************
+ * Samplers
*/
-static void
-i915_update_texture(struct i915_context *i915,
- uint unit,
- const struct i915_texture *tex,
- const struct i915_sampler_state *sampler,
- uint state[6]);
/**
* Compute i915 texture sampling state.
*
@@ -74,16 +80,13 @@ i915_update_texture(struct i915_context *i915,
*/
static void update_sampler(struct i915_context *i915,
uint unit,
- const struct i915_sampler_state *sampler,
- const struct i915_texture *tex,
- unsigned state[3] )
+ const struct i915_sampler_state *sampler,
+ const struct i915_texture *tex,
+ unsigned state[3])
{
const struct pipe_resource *pt = &tex->b.b;
unsigned minlod, lastlod;
- /* Need to do this after updating the maps, which call the
- * intel_finalize_mipmap_tree and hence can update firstLevel:
- */
state[0] = sampler->state[0];
state[1] = sampler->state[1];
state[2] = sampler->state[2];
@@ -118,7 +121,7 @@ static void update_sampler(struct i915_context *i915,
wr == PIPE_TEX_WRAP_CLAMP_TO_BORDER)) {
if (i915->conformance_mode > 0) {
assert(0);
- /* sampler->fallback = true; */
+ /* sampler->fallback = true; */
/* TODO */
}
}
@@ -137,8 +140,7 @@ static void update_sampler(struct i915_context *i915,
state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT);
}
-
-void i915_update_samplers( struct i915_context *i915 )
+static void update_samplers(struct i915_context *i915)
{
uint unit;
@@ -152,29 +154,38 @@ void i915_update_samplers( struct i915_context *i915 )
if (i915->fragment_sampler_views[unit]) {
struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
- update_sampler( i915,
- unit,
- i915->sampler[unit], /* sampler state */
- texture, /* texture */
- i915->current.sampler[unit] /* the result */
- );
- i915_update_texture( i915,
- unit,
- texture, /* texture */
- i915->sampler[unit], /* sampler state */
- i915->current.texbuffer[unit] );
-
- i915->current.sampler_enable_nr++;
- i915->current.sampler_enable_flags |= (1 << unit);
+ update_sampler(i915,
+ unit,
+ i915->sampler[unit], /* sampler state */
+ texture, /* texture */
+ i915->current.sampler[unit]); /* the result */
+ update_map(i915,
+ unit,
+ texture, /* texture */
+ i915->sampler[unit], /* sampler state */
+ i915->current.texbuffer[unit]); /* the result */
+
+ i915->current.sampler_enable_nr++;
+ i915->current.sampler_enable_flags |= (1 << unit);
}
}
i915->hardware_dirty |= I915_HW_SAMPLER | I915_HW_MAP;
}
+struct i915_tracked_state i915_hw_samplers = {
+ "samplers",
+ update_samplers,
+ I915_NEW_SAMPLER | I915_NEW_SAMPLER_VIEW
+};
-static uint
-translate_texture_format(enum pipe_format pipeFormat)
+
+
+/***********************************************************************
+ * Sampler views
+ */
+
+static uint translate_texture_format(enum pipe_format pipeFormat)
{
switch (pipeFormat) {
case PIPE_FORMAT_L8_UNORM:
@@ -226,19 +237,17 @@ translate_texture_format(enum pipe_format pipeFormat)
return (MAPSURF_32BIT | MT_32BIT_xI824);
default:
debug_printf("i915: translate_texture_format() bad image format %x\n",
- pipeFormat);
+ pipeFormat);
assert(0);
return 0;
}
}
-
-static void
-i915_update_texture(struct i915_context *i915,
- uint unit,
- const struct i915_texture *tex,
- const struct i915_sampler_state *sampler,
- uint state[6])
+static void update_map(struct i915_context *i915,
+ uint unit,
+ const struct i915_texture *tex,
+ const struct i915_sampler_state *sampler,
+ uint state[2])
{
const struct pipe_resource *pt = &tex->b.b;
uint format, pitch;
@@ -287,9 +296,7 @@ i915_update_texture(struct i915_context *i915,
| ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT));
}
-
-void
-i915_update_textures(struct i915_context *i915)
+static void update_maps(struct i915_context *i915)
{
uint unit;
@@ -300,13 +307,19 @@ i915_update_textures(struct i915_context *i915)
if (i915->fragment_sampler_views[unit]) {
struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
- i915_update_texture( i915,
- unit,
- texture, /* texture */
- i915->sampler[unit], /* sampler state */
- i915->current.texbuffer[unit] );
+ update_map(i915,
+ unit,
+ texture, /* texture */
+ i915->sampler[unit], /* sampler state */
+ i915->current.texbuffer[unit]);
}
}
i915->hardware_dirty |= I915_HW_MAP;
}
+
+struct i915_tracked_state i915_hw_sampler_views = {
+ "sampler_views",
+ update_maps,
+ I915_NEW_SAMPLER_VIEW
+};
diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c
new file mode 100644
index 0000000000..dc9a4c1e2f
--- /dev/null
+++ b/src/gallium/drivers/i915/i915_state_static.c
@@ -0,0 +1,47 @@
+/**************************************************************************
+ *
+ * Copyright © 2010 Jakob Bornecrantz
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_state.h"
+
+
+
+/***********************************************************************
+ * Update framebuffer state
+ */
+static void update_framebuffer(struct i915_context *i915)
+{
+ /* HW emit currently references framebuffer state directly:
+ */
+ i915->hardware_dirty |= I915_HW_STATIC;
+}
+
+struct i915_tracked_state i915_hw_framebuffer = {
+ "framebuffer",
+ update_framebuffer,
+ I915_NEW_FRAMEBUFFER
+};
diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h
index 3aba19fe6a..5385e403d2 100644
--- a/src/gallium/drivers/i915/i915_winsys.h
+++ b/src/gallium/drivers/i915/i915_winsys.h
@@ -222,11 +222,4 @@ struct i915_winsys {
void (*destroy)(struct i915_winsys *iws);
};
-
-/**
- * Create i915 pipe_screen.
- */
-struct pipe_screen *i915_screen_create(struct i915_winsys *iws);
-
-
#endif
diff --git a/src/gallium/drivers/i965/brw_public.h b/src/gallium/drivers/i965/brw_public.h
new file mode 100644
index 0000000000..be2cd6b5c4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_public.h
@@ -0,0 +1,13 @@
+
+#ifndef BRW_PUBLIC_H
+#define BRW_PUBLIC_H
+
+struct brw_winsys_screen;
+struct pipe_screen;
+
+/**
+ * Create brw AKA i965 pipe_screen.
+ */
+struct pipe_screen * brw_screen_create(struct brw_winsys_screen *bws);
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 50a446db91..bdfead73cc 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -34,6 +34,7 @@
#include "brw_context.h"
#include "brw_screen.h"
#include "brw_winsys.h"
+#include "brw_public.h"
#include "brw_debug.h"
#include "brw_resource.h"
@@ -350,7 +351,7 @@ brw_destroy_screen(struct pipe_screen *screen)
* Create a new brw_screen object
*/
struct pipe_screen *
-brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
+brw_screen_create(struct brw_winsys_screen *sws)
{
struct brw_screen *bscreen;
struct brw_chipset chipset;
@@ -365,9 +366,9 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
memset(&chipset, 0, sizeof chipset);
- chipset.pci_id = pci_id;
+ chipset.pci_id = sws->pci_id;
- switch (pci_id) {
+ switch (chipset.pci_id) {
case PCI_CHIP_I965_G:
case PCI_CHIP_I965_Q:
case PCI_CHIP_I965_G_1:
@@ -393,7 +394,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
default:
debug_printf("%s: unknown pci id 0x%x, cannot create screen\n",
- __FUNCTION__, pci_id);
+ __FUNCTION__, chipset.pci_id);
return NULL;
}
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index f30c7f1813..a06f8bb7d6 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -147,6 +147,7 @@ static INLINE void make_reloc(struct brw_winsys_reloc *reloc,
struct brw_winsys_screen {
+ unsigned pci_id;
/**
* Buffer functions.
@@ -261,12 +262,6 @@ bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf)
}
-/**
- * Create brw pipe_screen.
- */
-struct pipe_screen *brw_create_screen(struct brw_winsys_screen *iws, unsigned pci_id);
-
-
/*************************************************************************
* Cooperative dumping between winsys and driver. TODO: make this
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index 9c67759ad0..f7ee55cc1c 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -678,7 +678,7 @@ static void precalc_tex( struct brw_wm_compile *c,
struct brw_fp_src src0,
struct brw_fp_src sampler )
{
- struct brw_fp_src coord = src_undef();
+ struct brw_fp_src coord;
struct brw_fp_dst tmp = dst_undef();
assert(unit < BRW_MAX_TEX_UNIT);
diff --git a/src/gallium/drivers/identity/Makefile b/src/gallium/drivers/identity/Makefile
index e32b9102e5..74692d9761 100644
--- a/src/gallium/drivers/identity/Makefile
+++ b/src/gallium/drivers/identity/Makefile
@@ -6,7 +6,6 @@ LIBNAME = identity
C_SOURCES = \
id_objects.c \
id_context.c \
- id_screen.c \
- id_drm.c
+ id_screen.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/identity/SConscript b/src/gallium/drivers/identity/SConscript
index 2a68891c28..b364e0acc8 100644
--- a/src/gallium/drivers/identity/SConscript
+++ b/src/gallium/drivers/identity/SConscript
@@ -6,7 +6,6 @@ identity = env.ConvenienceLibrary(
target = 'identity',
source = [
'id_context.c',
- 'id_drm.c',
'id_objects.c',
'id_screen.c',
])
diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c
index ca4743f9ef..593928f399 100644
--- a/src/gallium/drivers/identity/id_objects.c
+++ b/src/gallium/drivers/identity/id_objects.c
@@ -120,13 +120,14 @@ identity_sampler_view_create(struct identity_context *id_context,
assert(view->texture == id_resource->resource);
- id_view = MALLOC(sizeof(struct identity_sampler_view));
+ id_view = CALLOC_STRUCT(identity_sampler_view);
id_view->base = *view;
id_view->base.reference.count = 1;
id_view->base.texture = NULL;
pipe_resource_reference(&id_view->base.texture, id_resource->resource);
id_view->base.context = id_context->pipe;
+ id_view->sampler_view = view;
return &id_view->base;
error:
@@ -180,8 +181,8 @@ identity_transfer_destroy(struct identity_context *id_context,
struct identity_transfer *id_transfer)
{
pipe_resource_reference(&id_transfer->base.resource, NULL);
- id_transfer->pipe->transfer_destroy(id_context->pipe,
- id_transfer->transfer);
+ id_context->pipe->transfer_destroy(id_context->pipe,
+ id_transfer->transfer);
FREE(id_transfer);
}
diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h
index 5eea10b0b5..e8deabf4fc 100644
--- a/src/gallium/drivers/identity/id_objects.h
+++ b/src/gallium/drivers/identity/id_objects.h
@@ -65,7 +65,6 @@ struct identity_transfer
{
struct pipe_transfer base;
- struct pipe_context *pipe;
struct pipe_transfer *transfer;
};
diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore
index a1b6f56e0d..6ebd2b8a63 100644
--- a/src/gallium/drivers/llvmpipe/.gitignore
+++ b/src/gallium/drivers/llvmpipe/.gitignore
@@ -3,3 +3,5 @@ lp_test_blend
lp_test_conv
lp_test_format
lp_test_printf
+lp_test_round
+lp_test_sincos
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index ee28179c30..2892b62920 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -18,6 +18,7 @@ C_SOURCES = \
lp_fence.c \
lp_flush.c \
lp_jit.c \
+ lp_memory.c \
lp_perf.c \
lp_query.c \
lp_rast.c \
@@ -53,8 +54,12 @@ PROGS := lp_test_format \
lp_test_blend \
lp_test_conv \
lp_test_printf \
+ lp_test_round \
lp_test_sincos
+# Need this for the lp_test_*.o files
+CLEAN_EXTRA = *.o
+
lp_test_sincos.o : sse_mathfun.h
PROGS_DEPS := ../../auxiliary/libgallium.a
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index a1ef71da89..fd6ba1561e 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -1,3 +1,5 @@
+import distutils.version
+
Import('*')
if not env['llvm']:
@@ -23,6 +25,16 @@ env.Depends('lp_tile_soa.c', [
'#src/gallium/auxiliary/util/u_format_pack.py',
])
+
+# Only enable SSSE3 for lp_tile_soa_sse3.c
+ssse3_env = env.Clone()
+if env['gcc'] \
+ and distutils.version.LooseVersion(env['CCVERSION']) >= distutils.version.LooseVersion('4.3') \
+ and env['machine'] in ('x86', 'x86_64') :
+ ssse3_env.Append(CCFLAGS = ['-mssse3'])
+lp_tile_soa_os = ssse3_env.SharedObject('lp_tile_soa.c')
+
+
llvmpipe = env.ConvenienceLibrary(
target = 'llvmpipe',
source = [
@@ -38,6 +50,7 @@ llvmpipe = env.ConvenienceLibrary(
'lp_fence.c',
'lp_flush.c',
'lp_jit.c',
+ 'lp_memory.c',
'lp_perf.c',
'lp_query.c',
'lp_rast.c',
@@ -65,7 +78,7 @@ llvmpipe = env.ConvenienceLibrary(
'lp_tex_sample.c',
'lp_texture.c',
'lp_tile_image.c',
- 'lp_tile_soa.c',
+ lp_tile_soa_os,
])
@@ -82,6 +95,9 @@ if env['platform'] != 'embedded':
'sincos',
]
+ if not msvc:
+ tests.append('round')
+
for test in tests:
target = env.Program(
target = 'lp_test_' + test,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index 70d08e71f6..09e9833057 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -190,30 +190,27 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
enum lp_build_blend_swizzle rgb_swizzle,
unsigned alpha_swizzle)
{
- if(rgb == alpha) {
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
- return rgb;
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
- return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
+ LLVMValueRef swizzled_rgb;
+
+ switch (rgb_swizzle) {
+ case LP_BUILD_BLEND_SWIZZLE_RGBA:
+ swizzled_rgb = rgb;
+ break;
+ case LP_BUILD_BLEND_SWIZZLE_AAAA:
+ swizzled_rgb = lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
+ break;
+ default:
+ assert(0);
+ swizzled_rgb = bld->base.undef;
}
- else {
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
- boolean cond[4] = {0, 0, 0, 0};
- cond[alpha_swizzle] = 1;
- return lp_build_select_aos(&bld->base, alpha, rgb, cond);
- }
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
- unsigned char swizzle[4];
- swizzle[0] = alpha_swizzle;
- swizzle[1] = alpha_swizzle;
- swizzle[2] = alpha_swizzle;
- swizzle[3] = alpha_swizzle;
- swizzle[alpha_swizzle] += 4;
- return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
- }
+
+ if (rgb != alpha) {
+ boolean cond[4] = {0, 0, 0, 0};
+ cond[alpha_swizzle] = 1;
+ swizzled_rgb = lp_build_select_aos(&bld->base, alpha, swizzled_rgb, cond);
}
- assert(0);
- return bld->base.undef;
+
+ return swizzled_rgb;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 90d2b26f9f..78744da500 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -261,7 +261,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
const unsigned interp = bld->interp[attrib];
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
if(mask & (1 << chan)) {
- LLVMValueRef a = coeff_bld->undef;
+ LLVMValueRef a;
if (interp == LP_INTERP_CONSTANT ||
interp == LP_INTERP_FACING) {
a = bld->a[attrib][chan];
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 986e604ce7..b2643ab33c 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -83,6 +83,7 @@ struct llvmpipe_context {
int so_count[PIPE_MAX_SO_BUFFERS];
int num_buffers;
} so_target;
+ struct pipe_resource *mapped_vs_tex[PIPE_MAX_VERTEX_SAMPLERS];
unsigned num_samplers;
unsigned num_fragment_sampler_views;
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 98780d7631..625d0c8a8c 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -43,18 +43,23 @@
/**
- * Draw vertex arrays, with optional indexing.
+ * Draw vertex arrays, with optional indexing, optional instancing.
+ * All the other drawing functions are implemented in terms of this function.
* Basically, map the vertex buffers (and drawing surfaces), then hand off
* the drawing to the 'draw' module.
*/
static void
-llvmpipe_draw_range_elements(struct pipe_context *pipe,
- struct pipe_resource *indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned min_index,
- unsigned max_index,
- unsigned mode, unsigned start, unsigned count)
+llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe,
+ struct pipe_resource *indexBuffer,
+ unsigned indexSize,
+ int indexBias,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
{
struct llvmpipe_context *lp = llvmpipe_context(pipe);
struct draw_context *draw = lp->draw;
@@ -74,9 +79,11 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
/* Map index buffer, if present */
if (indexBuffer) {
void *mapped_indexes = llvmpipe_resource_data(indexBuffer);
- draw_set_mapped_element_buffer_range(draw, indexSize, indexBias,
- min_index,
- max_index,
+ draw_set_mapped_element_buffer_range(draw,
+ indexSize,
+ indexBias,
+ minIndex,
+ maxIndex,
mapped_indexes);
}
else {
@@ -84,9 +91,13 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
draw_set_mapped_element_buffer_range(draw, 0, 0, start,
start + count - 1, NULL);
}
+ llvmpipe_prepare_vertex_sampling(lp,
+ lp->num_vertex_sampler_views,
+ lp->vertex_sampler_views);
/* draw! */
- draw_arrays(draw, mode, start, count);
+ draw_arrays_instanced(draw, mode, start, count,
+ startInstance, instanceCount);
/*
* unmap vertex/index buffers
@@ -97,6 +108,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
if (indexBuffer) {
draw_set_mapped_element_buffer(draw, 0, 0, NULL);
}
+ llvmpipe_cleanup_vertex_sampling(lp);
/*
* TODO: Flush only when a user vertex/index buffer is present
@@ -108,24 +120,102 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
static void
+llvmpipe_draw_arrays_instanced(struct pipe_context *pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
+ llvmpipe_draw_range_elements_instanced(pipe,
+ NULL, /* no indexBuffer */
+ 0, 0, /* indexSize, indexBias */
+ 0, ~0, /* minIndex, maxIndex */
+ mode,
+ start,
+ count,
+ startInstance,
+ instanceCount);
+}
+
+
+static void
+llvmpipe_draw_elements_instanced(struct pipe_context *pipe,
+ struct pipe_resource *indexBuffer,
+ unsigned indexSize,
+ int indexBias,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
+ llvmpipe_draw_range_elements_instanced(pipe,
+ indexBuffer,
+ indexSize, indexBias,
+ 0, ~0, /* minIndex, maxIndex */
+ mode,
+ start,
+ count,
+ startInstance,
+ instanceCount);
+}
+
+
+static void
llvmpipe_draw_elements(struct pipe_context *pipe,
struct pipe_resource *indexBuffer,
unsigned indexSize,
int indexBias,
- unsigned mode, unsigned start, unsigned count)
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ llvmpipe_draw_range_elements_instanced(pipe,
+ indexBuffer,
+ indexSize, indexBias,
+ 0, 0xffffffff, /* min, maxIndex */
+ mode, start, count,
+ 0, /* startInstance */
+ 1); /* instanceCount */
+}
+
+
+static void
+llvmpipe_draw_range_elements(struct pipe_context *pipe,
+ struct pipe_resource *indexBuffer,
+ unsigned indexSize,
+ int indexBias,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
{
- llvmpipe_draw_range_elements( pipe, indexBuffer,
- indexSize, indexBias,
- 0, 0xffffffff,
- mode, start, count );
+ llvmpipe_draw_range_elements_instanced(pipe,
+ indexBuffer,
+ indexSize, indexBias,
+ min_index, max_index,
+ mode, start, count,
+ 0, /* startInstance */
+ 1); /* instanceCount */
}
static void
-llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
- unsigned start, unsigned count)
+llvmpipe_draw_arrays(struct pipe_context *pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
{
- llvmpipe_draw_elements(pipe, NULL, 0, 0, mode, start, count);
+ llvmpipe_draw_range_elements_instanced(pipe,
+ NULL, /* indexBuffer */
+ 0, /* indexSize */
+ 0, /* indexBias */
+ 0, ~0, /* min, maxIndex */
+ mode, start, count,
+ 0, /* startInstance */
+ 1); /* instanceCount */
}
@@ -135,4 +225,6 @@ llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe)
llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays;
llvmpipe->pipe.draw_elements = llvmpipe_draw_elements;
llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements;
+ llvmpipe->pipe.draw_arrays_instanced = llvmpipe_draw_arrays_instanced;
+ llvmpipe->pipe.draw_elements_instanced = llvmpipe_draw_elements_instanced;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c
index 75d8d2b825..f9805e5d68 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.c
+++ b/src/gallium/drivers/llvmpipe/lp_fence.c
@@ -28,7 +28,6 @@
#include "pipe/p_screen.h"
#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "lp_debug.h"
#include "lp_fence.h"
@@ -59,7 +58,7 @@ lp_fence_create(unsigned rank)
/** Destroy a fence. Called when refcount hits zero. */
-static void
+void
lp_fence_destroy(struct lp_fence *fence)
{
pipe_mutex_destroy(fence->mutex);
@@ -77,12 +76,10 @@ llvmpipe_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence)
{
- struct lp_fence *old = (struct lp_fence *) *ptr;
+ struct lp_fence **old = (struct lp_fence **) ptr;
struct lp_fence *f = (struct lp_fence *) fence;
- if (pipe_reference(&old->reference, &f->reference)) {
- lp_fence_destroy(old);
- }
+ lp_fence_reference(old, f);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h
index d9270f5784..13358fb99f 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.h
+++ b/src/gallium/drivers/llvmpipe/lp_fence.h
@@ -32,6 +32,7 @@
#include "os/os_thread.h"
#include "pipe/p_state.h"
+#include "util/u_inlines.h"
struct pipe_screen;
@@ -61,4 +62,21 @@ void
llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
+void
+lp_fence_destroy(struct lp_fence *fence);
+
+static INLINE void
+lp_fence_reference(struct lp_fence **ptr,
+ struct lp_fence *f)
+{
+ struct lp_fence *old = *ptr;
+
+ if (pipe_reference(&old->reference, &f->reference)) {
+ lp_fence_destroy(old);
+ }
+
+ *ptr = f;
+}
+
+
#endif /* LP_FENCE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index 0cd288bb73..845292f4ab 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -40,27 +40,19 @@
/**
* \param flags bitmask of PIPE_FLUSH_x flags
- * \param fence if non-null, returns pointer to a fench which can be waited on
+ * \param fence if non-null, returns pointer to a fence which can be waited on
*/
void
llvmpipe_flush( struct pipe_context *pipe,
- unsigned flags,
+ unsigned flags,
struct pipe_fence_handle **fence )
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
draw_flush(llvmpipe->draw);
- if (fence) {
- /* if we're going to flush the setup/rasterization modules, emit
- * a fence.
- * XXX this (and the code below) may need fine tuning...
- */
- *fence = lp_setup_fence( llvmpipe->setup );
- }
-
/* ask the setup module to flush */
- lp_setup_flush(llvmpipe->setup, flags);
+ lp_setup_flush(llvmpipe->setup, flags, fence);
/* Enable to dump BMPs of the color/depth buffers each frame */
if (0) {
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 23aa34ddec..8e6dfb293d 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -103,10 +103,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatType();
elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = LLVMInt32Type();
elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32Type();
- elem_types[LP_JIT_CTX_SCISSOR_XMIN] = LLVMFloatType();
- elem_types[LP_JIT_CTX_SCISSOR_YMIN] = LLVMFloatType();
- elem_types[LP_JIT_CTX_SCISSOR_XMAX] = LLVMFloatType();
- elem_types[LP_JIT_CTX_SCISSOR_YMAX] = LLVMFloatType();
elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8Type(), 0);
elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
PIPE_MAX_SAMPLERS);
@@ -125,18 +121,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back,
screen->target, context_type,
LP_JIT_CTX_STENCIL_REF_BACK);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmin,
- screen->target, context_type,
- LP_JIT_CTX_SCISSOR_XMIN);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymin,
- screen->target, context_type,
- LP_JIT_CTX_SCISSOR_YMIN);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmax,
- screen->target, context_type,
- LP_JIT_CTX_SCISSOR_XMAX);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymax,
- screen->target, context_type,
- LP_JIT_CTX_SCISSOR_YMAX);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
screen->target, context_type,
LP_JIT_CTX_BLEND_COLOR);
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 8d06e65725..c94189413a 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -89,9 +89,6 @@ struct lp_jit_context
uint32_t stencil_ref_front, stencil_ref_back;
- /** floats, not ints */
- float scissor_xmin, scissor_ymin, scissor_xmax, scissor_ymax;
-
/* FIXME: store (also?) in floats */
uint8_t *blend_color;
@@ -108,10 +105,6 @@ enum {
LP_JIT_CTX_ALPHA_REF,
LP_JIT_CTX_STENCIL_REF_FRONT,
LP_JIT_CTX_STENCIL_REF_BACK,
- LP_JIT_CTX_SCISSOR_XMIN,
- LP_JIT_CTX_SCISSOR_YMIN,
- LP_JIT_CTX_SCISSOR_XMAX,
- LP_JIT_CTX_SCISSOR_YMAX,
LP_JIT_CTX_BLEND_COLOR,
LP_JIT_CTX_TEXTURES,
LP_JIT_CTX_COUNT
@@ -130,18 +123,6 @@ enum {
#define lp_jit_context_stencil_ref_back_value(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back")
-#define lp_jit_context_scissor_xmin_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMIN, "scissor_xmin")
-
-#define lp_jit_context_scissor_ymin_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMIN, "scissor_ymin")
-
-#define lp_jit_context_scissor_xmax_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMAX, "scissor_xmax")
-
-#define lp_jit_context_scissor_ymax_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMAX, "scissor_ymax")
-
#define lp_jit_context_blend_color(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color")
@@ -160,12 +141,7 @@ typedef void
const void *dady,
uint8_t **color,
void *depth,
- const int32_t c1,
- const int32_t c2,
- const int32_t c3,
- const int32_t *step1,
- const int32_t *step2,
- const int32_t *step3,
+ uint32_t mask,
uint32_t *counter);
diff --git a/src/gallium/drivers/llvmpipe/lp_memory.c b/src/gallium/drivers/llvmpipe/lp_memory.c
new file mode 100644
index 0000000000..0f55d4a80a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_memory.c
@@ -0,0 +1,45 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_debug.h"
+#include "lp_limits.h"
+#include "lp_memory.h"
+
+/**
+ * 32bpp RGBA swizzled tiles. One for for each thread and each
+ * possible colorbuf. Adds up to quite a bit 8*8*64*64*4 == 1MB.
+ * Several schemes exist to reduce this, such as scaling back the
+ * number of threads or using a smaller tilesize when multiple
+ * colorbuffers are bound.
+ */
+PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
+
+
+/* A single dummy tile used in a couple of out-of-memory situations.
+ */
+PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
+
diff --git a/src/gallium/drivers/llvmpipe/lp_memory.h b/src/gallium/drivers/llvmpipe/lp_memory.h
new file mode 100644
index 0000000000..f7418f5e08
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_memory.h
@@ -0,0 +1,40 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_MEMORY_H
+#define LP_MEMORY_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "lp_limits.h"
+
+extern PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
+
+extern PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
+
+#endif /* LP_MEMORY_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c
index a316597675..083e7e30a5 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.c
+++ b/src/gallium/drivers/llvmpipe/lp_perf.c
@@ -46,10 +46,10 @@ lp_print_counters(void)
{
if (LP_DEBUG & DEBUG_COUNTERS) {
unsigned total_64, total_16, total_4;
- float p1, p2, p3;
+ float p1, p2, p3, p4;
- debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris);
- debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris);
+ debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris);
+ debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris);
total_64 = (lp_count.nr_empty_64 +
lp_count.nr_fully_covered_64 +
@@ -58,10 +58,13 @@ lp_print_counters(void)
p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64;
p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64;
p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64;
+ p4 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64;
- debug_printf("llvmpipe: nr_empty_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64);
- debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64);
- debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64);
+ debug_printf("llvmpipe: nr_64x64: %9u\n", total_64);
+ debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64);
+ debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p4, total_64);
+ debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64);
+ debug_printf("llvmpipe: nr_empty_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64);
total_16 = (lp_count.nr_empty_16 +
lp_count.nr_fully_covered_16 +
@@ -71,25 +74,27 @@ lp_print_counters(void)
p2 = 100.0 * (float) lp_count.nr_fully_covered_16 / (float) total_16;
p3 = 100.0 * (float) lp_count.nr_partially_covered_16 / (float) total_16;
- debug_printf("llvmpipe: nr_empty_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16);
- debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16);
- debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16);
+ debug_printf("llvmpipe: nr_16x16: %9u\n", total_16);
+ debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16);
+ debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16);
+ debug_printf("llvmpipe: nr_empty_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16);
total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4);
p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4;
p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4;
- debug_printf("llvmpipe: nr_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4);
- debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4);
+ debug_printf("llvmpipe: nr_4x4: %9u\n", total_4);
+ debug_printf("llvmpipe: nr_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4);
+ debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4);
- debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear);
- debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load);
- debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store);
+ debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear);
+ debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load);
+ debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store);
- debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles);
- debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0);
- debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles);
+ debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles);
+ debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0);
+ debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles);
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h
index a9629dae3c..4774f64550 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.h
+++ b/src/gallium/drivers/llvmpipe/lp_perf.h
@@ -44,6 +44,7 @@ struct lp_counters
unsigned nr_empty_64;
unsigned nr_fully_covered_64;
unsigned nr_partially_covered_64;
+ unsigned nr_shade_opaque_64;
unsigned nr_empty_16;
unsigned nr_fully_covered_16;
unsigned nr_partially_covered_16;
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c
index c902c04684..02eeaf6487 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -48,7 +48,7 @@ static struct llvmpipe_query *llvmpipe_query( struct pipe_query *p )
static struct pipe_query *
llvmpipe_create_query(struct pipe_context *pipe,
- unsigned type)
+ unsigned type)
{
struct llvmpipe_query *pq;
@@ -67,6 +67,16 @@ static void
llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
{
struct llvmpipe_query *pq = llvmpipe_query(q);
+ /* query might still be in process if we never waited for the result */
+ if (!pq->done) {
+ struct pipe_fence_handle *fence = NULL;
+ llvmpipe_flush(pipe, 0, &fence);
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+ }
+
pipe_mutex_destroy(pq->mutex);
FREE(pq);
}
@@ -74,16 +84,26 @@ llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
static boolean
llvmpipe_get_query_result(struct pipe_context *pipe,
- struct pipe_query *q,
- boolean wait,
- void *vresult)
+ struct pipe_query *q,
+ boolean wait,
+ void *vresult)
{
- struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
struct llvmpipe_query *pq = llvmpipe_query(q);
uint64_t *result = (uint64_t *)vresult;
if (!pq->done) {
- lp_setup_flush(llvmpipe->setup, 0);
+ if (wait) {
+ struct pipe_fence_handle *fence = NULL;
+ llvmpipe_flush(pipe, 0, &fence);
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+ }
+ /* this is a bit inconsequent but should be ok */
+ else {
+ llvmpipe_flush(pipe, 0, NULL);
+ }
}
if (pq->done) {
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 50e44dcb2b..654f4ea48e 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -28,6 +28,7 @@
#include <limits.h>
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_rect.h"
#include "util/u_surface.h"
#include "lp_scene_queue.h"
@@ -66,7 +67,7 @@ lp_rast_begin( struct lp_rasterizer *rast,
cbuf->level,
cbuf->zslice,
LP_TEX_USAGE_READ_WRITE,
- LP_TEX_LAYOUT_NONE);
+ LP_TEX_LAYOUT_LINEAR);
}
if (fb->zsbuf) {
@@ -81,7 +82,6 @@ lp_rast_begin( struct lp_rasterizer *rast,
zsbuf->zslice,
LP_TEX_USAGE_READ_WRITE,
LP_TEX_LAYOUT_NONE);
- assert(rast->zsbuf.map);
}
lp_scene_bin_iter_begin( scene );
@@ -137,7 +137,6 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
struct lp_rasterizer *rast = task->rast;
struct lp_scene *scene = rast->curr_scene;
enum lp_texture_usage usage;
- unsigned buf;
LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
@@ -147,24 +146,8 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
task->x = x;
task->y = y;
- if (scene->has_color_clear)
- usage = LP_TEX_USAGE_WRITE_ALL;
- else
- usage = LP_TEX_USAGE_READ_WRITE;
-
- /* get pointers to color tile(s) */
- for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
- struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
- struct llvmpipe_resource *lpt;
- assert(cbuf);
- lpt = llvmpipe_resource(cbuf->texture);
- task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
- cbuf->face + cbuf->zslice,
- cbuf->level,
- usage,
- x, y);
- assert(task->color_tiles[buf]);
- }
+ /* reset pointers to color tile(s) */
+ memset(task->color_tiles, 0, sizeof(task->color_tiles));
/* get pointer to depth/stencil tile */
{
@@ -188,7 +171,7 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
/* Get actual pointer to the tile data. Note that depth/stencil
* data is tiled differently than color data.
*/
- task->depth_tile = lp_rast_get_depth_block_pointer(rast, x, y);
+ task->depth_tile = lp_rast_get_depth_block_pointer(task, x, y);
assert(task->depth_tile);
}
@@ -223,7 +206,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
clear_color[2] == clear_color[3]) {
/* clear to grayscale value {x, x, x, x} */
for (i = 0; i < rast->state.nr_cbufs; i++) {
- uint8_t *ptr = task->color_tiles[i];
+ uint8_t *ptr =
+ lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
}
}
@@ -235,7 +219,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
*/
const unsigned chunk = TILE_SIZE / 4;
for (i = 0; i < rast->state.nr_cbufs; i++) {
- uint8_t *c = task->color_tiles[i];
+ uint8_t *c =
+ lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
unsigned j;
for (j = 0; j < 4 * TILE_SIZE; j++) {
@@ -286,8 +271,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
dst = task->depth_tile;
- assert(dst == lp_rast_get_depth_block_pointer(rast, task->x, task->y));
-
switch (block_size) {
case 1:
memset(dst, (uint8_t) clear_value, height * width);
@@ -376,8 +359,8 @@ lp_rast_load_color(struct lp_rasterizer_task *task,
* This is a bin command which is stored in all bins.
*/
void
-lp_rast_store_color( struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+lp_rast_store_linear_color( struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
{
struct lp_rasterizer *rast = task->rast;
struct lp_scene *scene = rast->curr_scene;
@@ -387,30 +370,20 @@ lp_rast_store_color( struct lp_rasterizer_task *task,
struct pipe_surface *cbuf = scene->fb.cbufs[buf];
const unsigned face = cbuf->face, level = cbuf->level;
struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
- /* this will convert the tiled data to linear if needed */
- (void) llvmpipe_get_texture_tile_linear(lpt, face, level,
- LP_TEX_USAGE_READ,
- task->x, task->y);
- }
-}
-
-/**
- * This is a bin command called during bin processing.
- */
-void
-lp_rast_set_state(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- const struct lp_rast_state *state = arg.set_state;
+ if (!task->color_tiles[buf])
+ continue;
- LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
-
- /* just set the current state pointer for this rasterizer */
- task->current_state = state;
+ llvmpipe_unswizzle_cbuf_tile(lpt,
+ face,
+ level,
+ task->x, task->y,
+ task->color_tiles[buf]);
+ }
}
+
/**
* Run the shader on all blocks in a tile. This is used when a tile is
* completely contained inside a triangle.
@@ -421,8 +394,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
struct lp_rasterizer *rast = task->rast;
- const struct lp_rast_state *state = task->current_state;
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
+ const struct lp_rast_state *state = inputs->state;
struct lp_fragment_shader_variant *variant = state->variant;
const unsigned tile_x = task->x, tile_y = task->y;
unsigned x, y;
@@ -442,36 +415,60 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
tile_x + x, tile_y + y);
/* depth buffer */
- depth = lp_rast_get_depth_block_pointer(rast, tile_x + x, tile_y + y);
+ depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y);
/* run shader on 4x4 block */
variant->jit_function[RAST_WHOLE]( &state->jit_context,
- tile_x + x, tile_y + y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
- color,
- depth,
- INT_MIN, INT_MIN, INT_MIN,
- NULL, NULL, NULL, &task->vis_counter);
+ tile_x + x, tile_y + y,
+ inputs->facing,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ 0xffff,
+ &task->vis_counter);
}
}
}
/**
- * Compute shading for a 4x4 block of pixels.
+ * Run the shader on all blocks in a tile. This is used when a tile is
+ * completely contained inside a triangle, and the shader is opaque.
+ * This is a bin command called during bin processing.
+ */
+void
+lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct lp_rasterizer *rast = task->rast;
+ unsigned i;
+
+ LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
+
+ /* this will prevent converting the layout from tiled to linear */
+ for (i = 0; i < rast->state.nr_cbufs; i++) {
+ (void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
+ }
+
+ lp_rast_shade_tile(task, arg);
+}
+
+
+/**
+ * Compute shading for a 4x4 block of pixels inside a triangle.
* This is a bin command called during bin processing.
* \param x X position of quad in window coords
* \param y Y position of quad in window coords
*/
-void lp_rast_shade_quads( struct lp_rasterizer_task *task,
- const struct lp_rast_shader_inputs *inputs,
- unsigned x, unsigned y,
- int32_t c1, int32_t c2, int32_t c3)
+void
+lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y,
+ unsigned mask)
{
- const struct lp_rast_state *state = task->current_state;
+ const struct lp_rast_state *state = inputs->state;
struct lp_fragment_shader_variant *variant = state->variant;
struct lp_rasterizer *rast = task->rast;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
@@ -494,32 +491,26 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task,
}
/* depth buffer */
- depth = lp_rast_get_depth_block_pointer(rast, x, y);
+ depth = lp_rast_get_depth_block_pointer(task, x, y);
assert(lp_check_alignment(state->jit_context.blend_color, 16));
- assert(lp_check_alignment(inputs->step[0], 16));
- assert(lp_check_alignment(inputs->step[1], 16));
- assert(lp_check_alignment(inputs->step[2], 16));
-
/* run shader on 4x4 block */
- variant->jit_function[RAST_EDGE_TEST]( &state->jit_context,
- x, y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
- color,
- depth,
- c1, c2, c3,
- inputs->step[0],
- inputs->step[1],
- inputs->step[2],
- &task->vis_counter);
+ variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
+ x, y,
+ inputs->facing,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ mask,
+ &task->vis_counter);
}
+
/**
* Set top row and left column of the tile's pixels to white. For debugging.
*/
@@ -598,6 +589,11 @@ lp_rast_tile_end(struct lp_rasterizer_task *task)
(void) outline_subtiles;
#endif
+ {
+ union lp_rast_cmd_arg dummy = {0};
+ lp_rast_store_linear_color(task, dummy);
+ }
+
/* debug */
memset(task->color_tiles, 0, sizeof(task->color_tiles));
task->depth_tile = NULL;
@@ -627,7 +623,7 @@ void
lp_rast_begin_query(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
- /* Reset the the per-task counter */
+ /* Reset the per-task counter */
task->vis_counter = 0;
}
@@ -715,10 +711,16 @@ static struct {
{
RAST(clear_color),
RAST(clear_zstencil),
- RAST(triangle),
+ RAST(triangle_1),
+ RAST(triangle_2),
+ RAST(triangle_3),
+ RAST(triangle_4),
+ RAST(triangle_5),
+ RAST(triangle_6),
+ RAST(triangle_7),
RAST(shade_tile),
- RAST(set_state),
- RAST(store_color),
+ RAST(shade_tile_opaque),
+ RAST(store_linear_color),
RAST(fence),
RAST(begin_query),
RAST(end_query),
@@ -754,30 +756,8 @@ debug_bin( const struct cmd_bin *bin )
static boolean
is_empty_bin( const struct cmd_bin *bin )
{
- const struct cmd_block *head = bin->commands.head;
- int i;
-
- if (0)
- debug_bin(bin);
-
- /* We emit at most two load-tile commands at the start of the first
- * command block. In addition we seem to emit a couple of
- * set-state commands even in empty bins.
- *
- * As a heuristic, if a bin has more than 4 commands, consider it
- * non-empty.
- */
- if (head->next != NULL ||
- head->count > 4) {
- return FALSE;
- }
-
- for (i = 0; i < head->count; i++)
- if (head->cmd[i] != lp_rast_set_state) {
- return FALSE;
- }
-
- return TRUE;
+ if (0) debug_bin(bin);
+ return bin->commands.head->count == 0;
}
@@ -813,6 +793,10 @@ rasterize_scene(struct lp_rasterizer_task *task,
}
}
#endif
+
+ if (scene->fence) {
+ lp_rast_fence(task, lp_rast_arg_fence(scene->fence));
+ }
}
@@ -983,6 +967,10 @@ lp_rast_create( unsigned num_threads )
/* for synchronizing rasterization threads */
pipe_barrier_init( &rast->barrier, rast->num_threads );
+ memset(lp_swizzled_cbuf, 0, sizeof lp_swizzled_cbuf);
+
+ memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
+
return rast;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 80ca68f5a2..eaf2a6f334 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -84,8 +84,7 @@ struct lp_rast_shader_inputs {
float (*dadx)[4];
float (*dady)[4];
- /* edge/step info for 3 edges and 4x4 block of pixels */
- PIPE_ALIGN_VAR(16) int step[3][16];
+ const struct lp_rast_state *state;
};
struct lp_rast_clearzs {
@@ -93,6 +92,22 @@ struct lp_rast_clearzs {
unsigned clearzs_mask;
};
+struct lp_rast_plane {
+ /* one-pixel sized trivial accept offsets for each plane */
+ int ei;
+
+ /* one-pixel sized trivial reject offsets for each plane */
+ int eo;
+
+ /* edge function values at minx,miny ?? */
+ int c;
+
+ int dcdx;
+ int dcdy;
+
+ /* edge/step info for 3 edges and 4x4 block of pixels */
+ const int *step;
+};
/**
* Rasterization information for a triangle known to be in this bin,
@@ -101,35 +116,16 @@ struct lp_rast_clearzs {
* Objects of this type are put into the lp_setup_context::data buffer.
*/
struct lp_rast_triangle {
+ /* inputs for the shader */
+ struct lp_rast_shader_inputs inputs;
+
+ int step[3][16];
+
#ifdef DEBUG
float v[3][2];
#endif
- /* one-pixel sized trivial accept offsets for each plane */
- int ei1;
- int ei2;
- int ei3;
-
- /* one-pixel sized trivial reject offsets for each plane */
- int eo1;
- int eo2;
- int eo3;
-
- /* y deltas for vertex pairs (in fixed pt) */
- int dy12;
- int dy23;
- int dy31;
-
- /* x deltas for vertex pairs (in fixed pt) */
- int dx12;
- int dx23;
- int dx31;
-
- /* edge function values at minx,miny ?? */
- int c1, c2, c3;
-
- /* inputs for the shader */
- PIPE_ALIGN_VAR(16) struct lp_rast_shader_inputs inputs;
+ struct lp_rast_plane plane[7]; /* NOTE: may allocate fewer planes */
};
@@ -153,7 +149,10 @@ lp_rast_finish( struct lp_rasterizer *rast );
union lp_rast_cmd_arg {
const struct lp_rast_shader_inputs *shade_tile;
- const struct lp_rast_triangle *triangle;
+ struct {
+ const struct lp_rast_triangle *tri;
+ unsigned plane_mask;
+ } triangle;
const struct lp_rast_state *set_state;
uint8_t clear_color[4];
const struct lp_rast_clearzs *clear_zstencil;
@@ -173,10 +172,12 @@ lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile )
}
static INLINE union lp_rast_cmd_arg
-lp_rast_arg_triangle( const struct lp_rast_triangle *triangle )
+lp_rast_arg_triangle( const struct lp_rast_triangle *triangle,
+ unsigned plane_mask)
{
union lp_rast_cmd_arg arg;
- arg.triangle = triangle;
+ arg.triangle.tri = triangle;
+ arg.triangle.plane_mask = plane_mask;
return arg;
}
@@ -226,19 +227,31 @@ void lp_rast_clear_color( struct lp_rasterizer_task *,
void lp_rast_clear_zstencil( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
-void lp_rast_set_state( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-
-void lp_rast_triangle( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
+void lp_rast_triangle_1( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_2( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_3( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_4( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_5( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_6( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_7( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
void lp_rast_shade_tile( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
+void lp_rast_shade_tile_opaque( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
void lp_rast_fence( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
-void lp_rast_store_color( struct lp_rasterizer_task *,
+void lp_rast_store_linear_color( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index d33dd49f3a..b4a48cfd02 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -31,6 +31,7 @@
#include "os/os_thread.h"
#include "util/u_format.h"
#include "gallivm/lp_bld_debug.h"
+#include "lp_memory.h"
#include "lp_rast.h"
#include "lp_scene.h"
#include "lp_state.h"
@@ -52,8 +53,6 @@ struct lp_rasterizer_task
uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS];
uint8_t *depth_tile;
- const struct lp_rast_state *current_state;
-
/** "back" pointer */
struct lp_rasterizer *rast;
@@ -118,10 +117,12 @@ struct lp_rasterizer
};
-void lp_rast_shade_quads( struct lp_rasterizer_task *task,
- const struct lp_rast_shader_inputs *inputs,
- unsigned x, unsigned y,
- int32_t c1, int32_t c2, int32_t c3);
+void
+lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y,
+ unsigned mask);
+
/**
@@ -132,18 +133,23 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task,
* \param x, y location of 4x4 block in window coords
*/
static INLINE void *
-lp_rast_get_depth_block_pointer(const struct lp_rasterizer *rast,
+lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
unsigned x, unsigned y)
{
+ const struct lp_rasterizer *rast = task->rast;
void *depth;
assert((x % TILE_VECTOR_WIDTH) == 0);
assert((y % TILE_VECTOR_HEIGHT) == 0);
- assert(rast->zsbuf.map || !rast->curr_scene->fb.zsbuf);
-
- if (!rast->zsbuf.map)
- return NULL;
+ if (!rast->zsbuf.map) {
+ /* Either out of memory or no zsbuf. Can't tell without access
+ * to the state. Just use dummy tile memory, but don't print
+ * the oom warning as this most likely because there is no
+ * zsbuf.
+ */
+ return lp_dummy_tile;
+ }
depth = (rast->zsbuf.map +
rast->zsbuf.stride * y +
@@ -155,6 +161,39 @@ lp_rast_get_depth_block_pointer(const struct lp_rasterizer *rast,
/**
+ * Get pointer to the swizzled color tile
+ */
+static INLINE uint8_t *
+lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
+ unsigned buf, enum lp_texture_usage usage)
+{
+ struct lp_rasterizer *rast = task->rast;
+
+ assert(task->x % TILE_SIZE == 0);
+ assert(task->y % TILE_SIZE == 0);
+ assert(buf < rast->state.nr_cbufs);
+
+ if (!task->color_tiles[buf]) {
+ struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
+ struct llvmpipe_resource *lpt;
+ assert(cbuf);
+ lpt = llvmpipe_resource(cbuf->texture);
+ task->color_tiles[buf] = lp_swizzled_cbuf[task->thread_index][buf];
+
+ if (usage != LP_TEX_USAGE_WRITE_ALL) {
+ llvmpipe_swizzle_cbuf_tile(lpt,
+ cbuf->face + cbuf->zslice,
+ cbuf->level,
+ task->x, task->y,
+ task->color_tiles[buf]);
+ }
+ }
+
+ return task->color_tiles[buf];
+}
+
+
+/**
* Get the pointer to a 4x4 color block (within a 64x64 tile).
* We'll map the color buffer on demand here.
* Note that this may be called even when there's no color buffers - return
@@ -171,7 +210,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
assert((x % TILE_VECTOR_WIDTH) == 0);
assert((y % TILE_VECTOR_HEIGHT) == 0);
- color = task->color_tiles[buf];
+ color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
assert(color);
px = x % TILE_SIZE;
@@ -196,8 +235,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y )
{
- struct lp_rasterizer *rast = task->rast;
- const struct lp_rast_state *state = task->current_state;
+ const struct lp_rasterizer *rast = task->rast;
+ const struct lp_rast_state *state = inputs->state;
struct lp_fragment_shader_variant *variant = state->variant;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
void *depth;
@@ -207,19 +246,19 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
for (i = 0; i < rast->state.nr_cbufs; i++)
color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
- depth = lp_rast_get_depth_block_pointer(rast, x, y);
+ depth = lp_rast_get_depth_block_pointer(task, x, y);
/* run shader on 4x4 block */
variant->jit_function[RAST_WHOLE]( &state->jit_context,
- x, y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
- color,
- depth,
- INT_MIN, INT_MIN, INT_MIN,
- NULL, NULL, NULL, &task->vis_counter );
+ x, y,
+ inputs->facing,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ 0xffff,
+ &task->vis_counter );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index a5f0d14c95..ebe9a8e92b 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -113,168 +113,31 @@ block_full_16(struct lp_rasterizer_task *task,
block_full_4(task, tri, x + ix, y + iy);
}
+#define TAG(x) x##_1
+#define NR_PLANES 1
+#include "lp_rast_tri_tmp.h"
-/**
- * Pass the 4x4 pixel block to the shader function.
- * Determination of which of the 16 pixels lies inside the triangle
- * will be done as part of the fragment shader.
- */
-static void
-do_block_4(struct lp_rasterizer_task *task,
- const struct lp_rast_triangle *tri,
- int x, int y,
- int c1, int c2, int c3)
-{
- assert(x >= 0);
- assert(y >= 0);
-
- lp_rast_shade_quads(task, &tri->inputs, x, y, -c1, -c2, -c3);
-}
-
-
-/**
- * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
- * of the triangle's bounds.
- */
-static void
-do_block_16(struct lp_rasterizer_task *task,
- const struct lp_rast_triangle *tri,
- int x, int y,
- int c0, int c1, int c2)
-{
- unsigned mask = 0;
- int eo[3];
- int c[3];
- int i, j;
-
- assert(x >= 0);
- assert(y >= 0);
- assert(x % 16 == 0);
- assert(y % 16 == 0);
-
- eo[0] = tri->eo1 * 4;
- eo[1] = tri->eo2 * 4;
- eo[2] = tri->eo3 * 4;
-
- c[0] = c0;
- c[1] = c1;
- c[2] = c2;
-
- for (j = 0; j < 3; j++) {
- const int *step = tri->inputs.step[j];
- const int cx = c[j] + eo[j];
-
- /* Mask has bits set whenever we are outside any of the edges.
- */
- for (i = 0; i < 16; i++) {
- int out = cx + step[i] * 4;
- mask |= (out >> 31) & (1 << i);
- }
- }
+#define TAG(x) x##_2
+#define NR_PLANES 2
+#include "lp_rast_tri_tmp.h"
- mask = ~mask & 0xffff;
- while (mask) {
- int i = ffs(mask) - 1;
- int px = x + pos_table4[i][0];
- int py = y + pos_table4[i][1];
- int cx1 = c0 + tri->inputs.step[0][i] * 4;
- int cx2 = c1 + tri->inputs.step[1][i] * 4;
- int cx3 = c2 + tri->inputs.step[2][i] * 4;
+#define TAG(x) x##_3
+#define NR_PLANES 3
+#include "lp_rast_tri_tmp.h"
- mask &= ~(1 << i);
+#define TAG(x) x##_4
+#define NR_PLANES 4
+#include "lp_rast_tri_tmp.h"
- /* Don't bother testing if the 4x4 block is entirely in/out of
- * the triangle. It's a little faster to do it in the jit code.
- */
- LP_COUNT(nr_non_empty_4);
- do_block_4(task, tri, px, py, cx1, cx2, cx3);
- }
-}
-
-
-/**
- * Scan the tile in chunks and figure out which pixels to rasterize
- * for this triangle.
- */
-void
-lp_rast_triangle(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- const struct lp_rast_triangle *tri = arg.triangle;
- const int x = task->x, y = task->y;
- int ei[3], eo[3], c[3];
- unsigned outmask, inmask, partial_mask;
- unsigned i, j;
-
- c[0] = tri->c1 + tri->dx12 * y - tri->dy12 * x;
- c[1] = tri->c2 + tri->dx23 * y - tri->dy23 * x;
- c[2] = tri->c3 + tri->dx31 * y - tri->dy31 * x;
-
- eo[0] = tri->eo1 * 16;
- eo[1] = tri->eo2 * 16;
- eo[2] = tri->eo3 * 16;
-
- ei[0] = tri->ei1 * 16;
- ei[1] = tri->ei2 * 16;
- ei[2] = tri->ei3 * 16;
-
- outmask = 0;
- inmask = 0xffff;
+#define TAG(x) x##_5
+#define NR_PLANES 5
+#include "lp_rast_tri_tmp.h"
- for (j = 0; j < 3; j++) {
- const int *step = tri->inputs.step[j];
- const int cox = c[j] + eo[j];
- const int cio = ei[j]- eo[j];
+#define TAG(x) x##_6
+#define NR_PLANES 6
+#include "lp_rast_tri_tmp.h"
- /* Outmask has bits set whenever we are outside any of the
- * edges.
- */
- /* Inmask has bits set whenever we are inside all of the edges.
- */
- for (i = 0; i < 16; i++) {
- int out = cox + step[i] * 16;
- int in = out + cio;
- outmask |= (out >> 31) & (1 << i);
- inmask &= ~((in >> 31) & (1 << i));
- }
- }
+#define TAG(x) x##_7
+#define NR_PLANES 7
+#include "lp_rast_tri_tmp.h"
- assert((outmask & inmask) == 0);
-
- if (outmask == 0xffff)
- return;
-
- /* Invert mask, so that bits are set whenever we are at least
- * partially inside all of the edges:
- */
- partial_mask = ~inmask & ~outmask & 0xffff;
-
- /* Iterate over partials:
- */
- while (partial_mask) {
- int i = ffs(partial_mask) - 1;
- int px = x + pos_table16[i][0];
- int py = y + pos_table16[i][1];
- int cx1 = c[0] + tri->inputs.step[0][i] * 16;
- int cx2 = c[1] + tri->inputs.step[1][i] * 16;
- int cx3 = c[2] + tri->inputs.step[2][i] * 16;
-
- partial_mask &= ~(1 << i);
-
- LP_COUNT(nr_partially_covered_16);
- do_block_16(task, tri, px, py, cx1, cx2, cx3);
- }
-
- /* Iterate over fulls:
- */
- while (inmask) {
- int i = ffs(inmask) - 1;
- int px = x + pos_table16[i][0];
- int py = y + pos_table16[i][1];
-
- inmask &= ~(1 << i);
-
- LP_COUNT(nr_fully_covered_16);
- block_full_16(task, tri, px, py);
- }
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
new file mode 100644
index 0000000000..a410c611a3
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -0,0 +1,238 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Rasterization for binned triangles within a tile
+ */
+
+
+
+/**
+ * Prototype for a 7 plane rasterizer function. Will codegenerate
+ * several of these.
+ *
+ * XXX: Varients for more/fewer planes.
+ * XXX: Need ways of dropping planes as we descend.
+ * XXX: SIMD
+ */
+static void
+TAG(do_block_4)(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ const struct lp_rast_plane *plane,
+ int x, int y,
+ const int *c)
+{
+ unsigned mask = 0;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ int any_negative = 0;
+ int j;
+
+ for (j = 0; j < NR_PLANES; j++)
+ any_negative |= (c[j] - 1 + plane[j].step[i]);
+
+ any_negative >>= 31;
+
+ mask |= (~any_negative) & (1 << i);
+ }
+
+ /* Now pass to the shader:
+ */
+ if (mask)
+ lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
+}
+
+/**
+ * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
+ * of the triangle's bounds.
+ */
+static void
+TAG(do_block_16)(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ const struct lp_rast_plane *plane,
+ int x, int y,
+ const int *c)
+{
+ unsigned outmask, inmask, partmask, partial_mask;
+ unsigned i, j;
+
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int *step = plane[j].step;
+ const int eo = plane[j].eo * 4;
+ const int ei = plane[j].ei * 4;
+ const int cox = c[j] + eo;
+ const int cio = ei - 1 - eo;
+
+ for (i = 0; i < 16; i++) {
+ int out = cox + step[i] * 4;
+ int part = out + cio;
+ outmask |= (out >> 31) & (1 << i);
+ partmask |= (part >> 31) & (1 << i);
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Mask of sub-blocks which are inside all trivial accept planes:
+ */
+ inmask = ~partmask & 0xffff;
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = partmask & ~outmask;
+
+ assert((partial_mask & inmask) == 0);
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int px = x + pos_table4[i][0];
+ int py = y + pos_table4[i][1];
+ int cx[NR_PLANES];
+
+ for (j = 0; j < NR_PLANES; j++)
+ cx[j] = c[j] + plane[j].step[i] * 4;
+
+ partial_mask &= ~(1 << i);
+
+ TAG(do_block_4)(task, tri, plane, px, py, cx);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int px = x + pos_table4[i][0];
+ int py = y + pos_table4[i][1];
+
+ inmask &= ~(1 << i);
+
+ block_full_4(task, tri, px, py);
+ }
+}
+
+
+/**
+ * Scan the tile in chunks and figure out which pixels to rasterize
+ * for this triangle.
+ */
+void
+TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ unsigned plane_mask = arg.triangle.plane_mask;
+ const int x = task->x, y = task->y;
+ struct lp_rast_plane plane[NR_PLANES];
+ int c[NR_PLANES];
+ unsigned outmask, inmask, partmask, partial_mask;
+ unsigned i, j, nr_planes = 0;
+
+ while (plane_mask) {
+ int i = ffs(plane_mask) - 1;
+ plane[nr_planes] = tri->plane[i];
+ plane_mask &= ~(1 << i);
+ nr_planes++;
+ };
+
+ assert(nr_planes == NR_PLANES);
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int *step = plane[j].step;
+ const int eo = plane[j].eo * 16;
+ const int ei = plane[j].ei * 16;
+ int cox, cio;
+
+ c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+ cox = c[j] + eo;
+ cio = ei - 1 - eo;
+
+ for (i = 0; i < 16; i++) {
+ int out = cox + step[i] * 16;
+ int part = out + cio;
+ outmask |= (out >> 31) & (1 << i);
+ partmask |= (part >> 31) & (1 << i);
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Mask of sub-blocks which are inside all trivial accept planes:
+ */
+ inmask = ~partmask & 0xffff;
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = partmask & ~outmask;
+
+ assert((partial_mask & inmask) == 0);
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int px = x + pos_table16[i][0];
+ int py = y + pos_table16[i][1];
+ int cx[NR_PLANES];
+
+ for (j = 0; j < NR_PLANES; j++)
+ cx[j] = c[j] + plane[j].step[i] * 16;
+
+ partial_mask &= ~(1 << i);
+
+ LP_COUNT(nr_partially_covered_16);
+ TAG(do_block_16)(task, tri, plane, px, py, cx);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int px = x + pos_table16[i][0];
+ int py = y + pos_table16[i][1];
+
+ inmask &= ~(1 << i);
+
+ LP_COUNT(nr_fully_covered_16);
+ block_full_16(task, tri, px, py);
+ }
+}
+
+#undef TAG
+#undef NR_PLANES
+
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index 845c175cf2..f88a759fe7 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -32,6 +32,7 @@
#include "util/u_simple_list.h"
#include "lp_scene.h"
#include "lp_scene_queue.h"
+#include "lp_fence.h"
/** List of texture references */
@@ -162,8 +163,8 @@ lp_scene_reset(struct lp_scene *scene )
/* Free all but last binner command lists:
*/
- for (i = 0; i < scene->tiles_x; i++) {
- for (j = 0; j < scene->tiles_y; j++) {
+ for (i = 0; i < TILES_X; i++) {
+ for (j = 0; j < TILES_Y; j++) {
lp_scene_bin_reset(scene, i, j);
}
}
@@ -198,6 +199,8 @@ lp_scene_reset(struct lp_scene *scene )
make_empty_list(ref_list);
}
+ lp_fence_reference(&scene->fence, NULL);
+
scene->scene_size = 0;
scene->has_color_clear = FALSE;
@@ -303,60 +306,6 @@ lp_scene_is_resource_referenced(const struct lp_scene *scene,
}
-/**
- * Return last command in the bin
- */
-static lp_rast_cmd
-lp_get_last_command( const struct cmd_bin *bin )
-{
- const struct cmd_block *tail = bin->commands.tail;
- const unsigned i = tail->count;
- if (i > 0)
- return tail->cmd[i - 1];
- else
- return NULL;
-}
-
-
-/**
- * Replace the arg of the last command in the bin.
- */
-static void
-lp_replace_last_command_arg( struct cmd_bin *bin,
- const union lp_rast_cmd_arg arg )
-{
- struct cmd_block *tail = bin->commands.tail;
- const unsigned i = tail->count;
- assert(i > 0);
- tail->arg[i - 1] = arg;
-}
-
-
-
-/**
- * Put a state-change command into all bins.
- * If we find that the last command in a bin was also a state-change
- * command, we can simply replace that one with the new one.
- */
-void
-lp_scene_bin_state_command( struct lp_scene *scene,
- lp_rast_cmd cmd,
- const union lp_rast_cmd_arg arg )
-{
- unsigned i, j;
- for (i = 0; i < scene->tiles_x; i++) {
- for (j = 0; j < scene->tiles_y; j++) {
- struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
- lp_rast_cmd last_cmd = lp_get_last_command(bin);
- if (last_cmd == cmd) {
- lp_replace_last_command_arg(bin, arg);
- }
- else {
- lp_scene_bin_command( scene, i, j, cmd, arg );
- }
- }
- }
-}
/** advance curr_x,y to the next bin */
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index 4e55d43174..fa1b311fa1 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -112,6 +112,7 @@ struct resource_ref {
*/
struct lp_scene {
struct pipe_context *pipe;
+ struct lp_fence *fence;
/** the framebuffer to render the scene into */
struct pipe_framebuffer_state fb;
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 6432cea862..167cb2ee2e 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -43,6 +43,7 @@
#include "lp_debug.h"
#include "lp_public.h"
#include "lp_limits.h"
+#include "lp_rast.h"
#include "state_tracker/sw_winsys.h"
@@ -86,7 +87,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return PIPE_MAX_SAMPLERS;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return 0;
+ return PIPE_MAX_VERTEX_SAMPLERS;
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS;
case PIPE_CAP_NPOT_TEXTURES:
@@ -166,6 +167,10 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return LP_MAX_TGSI_PREDS;
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
return 1;
+ case PIPE_CAP_GEOMETRY_SHADER4:
+ return 1;
+ case PIPE_CAP_DEPTH_CLAMP:
+ return 0;
default:
assert(0);
return 0;
@@ -294,11 +299,16 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen )
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
struct sw_winsys *winsys = screen->winsys;
+ if (screen->rast)
+ lp_rast_destroy(screen->rast);
+
lp_jit_screen_cleanup(screen);
if(winsys->destroy)
winsys->destroy(winsys);
+ pipe_mutex_destroy(screen->rast_mutex);
+
FREE(screen);
}
@@ -347,11 +357,6 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
lp_jit_screen_init(screen);
-#ifdef PIPE_OS_WINDOWS
- /* Multithreading not supported on windows until conditions and barriers are
- * properly implemented. */
- screen->num_threads = 0;
-#else
#ifdef PIPE_OS_EMBEDDED
screen->num_threads = 0;
#else
@@ -359,7 +364,14 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
#endif
screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads);
screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS);
-#endif
+
+ screen->rast = lp_rast_create(screen->num_threads);
+ if (!screen->rast) {
+ lp_jit_screen_cleanup(screen);
+ FREE(screen);
+ return NULL;
+ }
+ pipe_mutex_init(screen->rast_mutex);
util_format_s3tc_init();
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h
index eb40f6823f..731526dfab 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.h
+++ b/src/gallium/drivers/llvmpipe/lp_screen.h
@@ -37,6 +37,7 @@
#include "gallivm/lp_bld.h"
#include <llvm-c/ExecutionEngine.h>
+#include "os/os_thread.h"
#include "pipe/p_screen.h"
#include "pipe/p_defines.h"
@@ -63,6 +64,9 @@ struct llvmpipe_screen
/* Increments whenever textures are modified. Contexts can track this.
*/
unsigned timestamp;
+
+ struct lp_rasterizer *rast;
+ pipe_mutex rast_mutex;
};
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index e8aafee33f..556e571585 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -40,6 +40,7 @@
#include "util/u_memory.h"
#include "util/u_pack_color.h"
#include "lp_context.h"
+#include "lp_memory.h"
#include "lp_scene.h"
#include "lp_scene_queue.h"
#include "lp_texture.h"
@@ -63,15 +64,7 @@ struct lp_scene *
lp_setup_get_current_scene(struct lp_setup_context *setup)
{
if (!setup->scene) {
-
- /* wait for a free/empty scene
- */
- setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE);
-
- assert(lp_scene_is_empty(setup->scene));
-
- lp_scene_begin_binning(setup->scene,
- &setup->fb );
+ set_scene_state( setup, SETUP_EMPTY );
}
return setup->scene;
}
@@ -159,8 +152,11 @@ static void
lp_setup_rasterize_scene( struct lp_setup_context *setup )
{
struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct llvmpipe_screen *screen = llvmpipe_screen(scene->pipe->screen);
- lp_scene_rasterize(scene, setup->rast);
+ pipe_mutex_lock(screen->rast_mutex);
+ lp_scene_rasterize(scene, screen->rast);
+ pipe_mutex_unlock(screen->rast_mutex);
reset_context( setup );
@@ -233,22 +229,36 @@ set_scene_state( struct lp_setup_context *setup,
LP_DBG(DEBUG_SETUP, "%s old %d new %d\n", __FUNCTION__, old_state, new_state);
switch (new_state) {
- case SETUP_ACTIVE:
- begin_binning( setup );
+ case SETUP_EMPTY:
+ assert(old_state == SETUP_FLUSHED);
+ assert(setup->scene == NULL);
+
+ /* wait for a free/empty scene
+ */
+ setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE);
+ assert(lp_scene_is_empty(setup->scene));
+ lp_scene_begin_binning(setup->scene,
+ &setup->fb );
break;
case SETUP_CLEARED:
- if (old_state == SETUP_ACTIVE) {
- assert(0);
- return;
- }
+ assert(old_state == SETUP_EMPTY);
+ assert(setup->scene != NULL);
break;
-
+
+ case SETUP_ACTIVE:
+ assert(old_state == SETUP_EMPTY ||
+ old_state == SETUP_CLEARED);
+ assert(setup->scene != NULL);
+ begin_binning( setup );
+ break;
+
case SETUP_FLUSHED:
if (old_state == SETUP_CLEARED)
execute_clears( setup );
else
lp_setup_rasterize_scene( setup );
+ assert(setup->scene == NULL);
break;
default:
@@ -264,23 +274,19 @@ set_scene_state( struct lp_setup_context *setup,
*/
void
lp_setup_flush( struct lp_setup_context *setup,
- unsigned flags )
+ unsigned flags,
+ struct pipe_fence_handle **fence)
{
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
if (setup->scene) {
- struct lp_scene *scene = lp_setup_get_current_scene(setup);
- union lp_rast_cmd_arg dummy = {0};
-
- if (flags & (PIPE_FLUSH_SWAPBUFFERS |
- PIPE_FLUSH_FRAME)) {
- /* Store colors in the linear color buffer(s).
- * If we don't do this here, we'll end up converting the tiled
- * data to linear in the texture_unmap() function, which will
- * not be a parallel/threaded operation as here.
+ if (fence) {
+ /* if we're going to flush the setup/rasterization modules, emit
+ * a fence.
*/
- lp_scene_bin_everywhere(scene, lp_rast_store_color, dummy);
+ *fence = lp_setup_fence( setup );
}
+
}
set_scene_state( setup, SETUP_FLUSHED );
@@ -297,6 +303,11 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup,
*/
set_scene_state( setup, SETUP_FLUSHED );
+ /*
+ * Ensure the old scene is not reused.
+ */
+ assert(!setup->scene);
+
/* Set new state. This will be picked up later when we next need a
* scene.
*/
@@ -421,24 +432,27 @@ lp_setup_clear( struct lp_setup_context *setup,
struct pipe_fence_handle *
lp_setup_fence( struct lp_setup_context *setup )
{
- if (setup->num_threads == 0) {
+ if (setup->scene == NULL)
return NULL;
- }
- else {
+ else if (setup->num_threads == 0)
+ return NULL;
+ else
+ {
struct lp_scene *scene = lp_setup_get_current_scene(setup);
- const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */
- struct lp_fence *fence = lp_fence_create(rank);
-
- LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank);
+ const unsigned rank = setup->num_threads;
set_scene_state( setup, SETUP_ACTIVE );
+
+ assert(scene->fence == NULL);
+
+ /* The caller gets a reference, we keep a copy too, so need to
+ * bump the refcount:
+ */
+ lp_fence_reference(&scene->fence, lp_fence_create(rank));
- /* insert the fence into all command bins */
- lp_scene_bin_everywhere( scene,
- lp_rast_fence,
- lp_rast_arg_fence(fence) );
+ LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank);
- return (struct pipe_fence_handle *) fence;
+ return (struct pipe_fence_handle *) scene->fence;
}
}
@@ -611,6 +625,17 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
LP_TEX_LAYOUT_LINEAR);
jit_tex->row_stride[j] = lp_tex->row_stride[j];
jit_tex->img_stride[j] = lp_tex->img_stride[j];
+
+ if (!jit_tex->data[j]) {
+ /* out of memory - use dummy tile memory */
+ jit_tex->data[j] = lp_dummy_tile;
+ jit_tex->width = TILE_SIZE;
+ jit_tex->height = TILE_SIZE;
+ jit_tex->depth = 1;
+ jit_tex->last_level = 0;
+ jit_tex->row_stride[j] = 0;
+ jit_tex->img_stride[j] = 0;
+ }
}
}
else {
@@ -618,7 +643,6 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
/*
* XXX: Where should this be unmapped?
*/
-
struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen);
struct sw_winsys *winsys = screen->winsys;
jit_tex->data[0] = winsys->displaytarget_map(winsys, lp_tex->dt,
@@ -717,28 +741,6 @@ lp_setup_update_state( struct lp_setup_context *setup )
setup->dirty |= LP_SETUP_NEW_FS;
}
- if (setup->dirty & LP_SETUP_NEW_SCISSOR) {
- float *stored;
-
- stored = lp_scene_alloc_aligned(scene, 4 * sizeof(int32_t), 16);
-
- if (stored) {
- stored[0] = (float) setup->scissor.current.minx;
- stored[1] = (float) setup->scissor.current.miny;
- stored[2] = (float) setup->scissor.current.maxx;
- stored[3] = (float) setup->scissor.current.maxy;
-
- setup->scissor.stored = stored;
-
- setup->fs.current.jit_context.scissor_xmin = stored[0];
- setup->fs.current.jit_context.scissor_ymin = stored[1];
- setup->fs.current.jit_context.scissor_xmax = stored[2];
- setup->fs.current.jit_context.scissor_ymax = stored[3];
- }
-
- setup->dirty |= LP_SETUP_NEW_FS;
- }
-
if(setup->dirty & LP_SETUP_NEW_CONSTANTS) {
struct pipe_resource *buffer = setup->constants.current;
@@ -792,11 +794,6 @@ lp_setup_update_state( struct lp_setup_context *setup )
&setup->fs.current,
sizeof setup->fs.current);
setup->fs.stored = stored;
-
- /* put the state-set command into all bins */
- lp_scene_bin_state_command( scene,
- lp_rast_set_state,
- lp_rast_arg_state(setup->fs.stored) );
}
/* The scene now references the textures in the rasterization
@@ -843,8 +840,6 @@ lp_setup_destroy( struct lp_setup_context *setup )
lp_scene_queue_destroy(setup->empty_scenes);
- lp_rast_destroy( setup->rast );
-
FREE( setup );
}
@@ -871,13 +866,7 @@ lp_setup_create( struct pipe_context *pipe,
if (!setup->empty_scenes)
goto fail;
- /* XXX: move this to the screen and share between contexts:
- */
setup->num_threads = screen->num_threads;
- setup->rast = lp_rast_create(screen->num_threads);
- if (!setup->rast)
- goto fail;
-
setup->vbuf = draw_vbuf_stage(draw, &setup->base);
if (!setup->vbuf)
goto fail;
@@ -901,9 +890,6 @@ lp_setup_create( struct pipe_context *pipe,
return setup;
fail:
- if (setup->rast)
- lp_rast_destroy( setup->rast );
-
if (setup->vbuf)
;
@@ -933,6 +919,8 @@ lp_setup_begin_query(struct lp_setup_context *setup,
memset(pq->count, 0, sizeof(pq->count)); /* reset all counters */
+ set_scene_state( setup, SETUP_ACTIVE );
+
cmd_arg.query_obj = pq;
lp_scene_bin_everywhere(scene, lp_rast_begin_query, cmd_arg);
pq->binned = TRUE;
@@ -948,6 +936,8 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq)
struct lp_scene * scene = lp_setup_get_current_scene(setup);
union lp_rast_cmd_arg cmd_arg;
+ set_scene_state( setup, SETUP_ACTIVE );
+
cmd_arg.query_obj = pq;
lp_scene_bin_everywhere(scene, lp_rast_end_query, cmd_arg);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index 6a0dc55129..73b1c85325 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -84,7 +84,8 @@ lp_setup_fence( struct lp_setup_context *setup );
void
lp_setup_flush( struct lp_setup_context *setup,
- unsigned flags );
+ unsigned flags,
+ struct pipe_fence_handle **fence);
void
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index c8b8a2480b..a0606f5034 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -81,7 +81,6 @@ struct lp_setup_context
*/
struct draw_stage *vbuf;
unsigned num_threads;
- struct lp_rasterizer *rast;
struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */
struct lp_scene *scene; /**< current scene being built */
struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */
@@ -101,9 +100,10 @@ struct lp_setup_context
} clear;
enum setup_state {
- SETUP_FLUSHED,
- SETUP_CLEARED,
- SETUP_ACTIVE
+ SETUP_FLUSHED, /**< scene is null */
+ SETUP_EMPTY, /**< scene exists but has only state changes */
+ SETUP_CLEARED, /**< scene exists but has only clears */
+ SETUP_ACTIVE /**< scene exists and has at least one draw/query */
} state;
struct {
@@ -129,7 +129,6 @@ struct lp_setup_context
struct {
struct pipe_scissor_state current;
- const void *stored;
} scissor;
unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 0557d35f8b..7e432503c1 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -38,12 +38,78 @@
#define NUM_CHANNELS 4
+struct tri_info {
+
+ float pixel_offset;
+
+ /* fixed point vertex coordinates */
+ int x[3];
+ int y[3];
+
+ /* float x,y deltas - all from the original coordinates
+ */
+ float dy01, dy20;
+ float dx01, dx20;
+ float oneoverarea;
+
+ const float (*v0)[4];
+ const float (*v1)[4];
+ const float (*v2)[4];
+
+ boolean frontfacing;
+};
+
+
+
+static const int step_scissor_minx[16] = {
+ 0, 1, 0, 1,
+ 2, 3, 2, 3,
+ 0, 1, 0, 1,
+ 2, 3, 2, 3
+};
+
+static const int step_scissor_maxx[16] = {
+ 0, -1, 0, -1,
+ -2, -3, -2, -3,
+ 0, -1, 0, -1,
+ -2, -3, -2, -3
+};
+
+static const int step_scissor_miny[16] = {
+ 0, 0, 1, 1,
+ 0, 0, 1, 1,
+ 2, 2, 3, 3,
+ 2, 2, 3, 3
+};
+
+static const int step_scissor_maxy[16] = {
+ 0, 0, -1, -1,
+ 0, 0, -1, -1,
+ -2, -2, -3, -3,
+ -2, -2, -3, -3
+};
+
+
+
+
+static INLINE int
+subpixel_snap(float a)
+{
+ return util_iround(FIXED_ONE * a);
+}
+
+static INLINE float
+fixed_to_float(int a)
+{
+ return a * (1.0 / FIXED_ONE);
+}
+
+
/**
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
*/
-static void constant_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
+static void constant_coef( struct lp_rast_triangle *tri,
unsigned slot,
const float value,
unsigned i )
@@ -54,28 +120,21 @@ static void constant_coef( struct lp_setup_context *setup,
}
-/**
- * Compute a0, dadx and dady for a linearly interpolated coefficient,
- * for a triangle.
- */
-static void linear_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
- float oneoverarea,
+
+static void linear_coef( struct lp_rast_triangle *tri,
+ const struct tri_info *info,
unsigned slot,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4],
unsigned vert_attr,
unsigned i)
{
- float a1 = v1[vert_attr][i];
- float a2 = v2[vert_attr][i];
- float a3 = v3[vert_attr][i];
+ float a0 = info->v0[vert_attr][i];
+ float a1 = info->v1[vert_attr][i];
+ float a2 = info->v2[vert_attr][i];
- float da12 = a1 - a2;
- float da31 = a3 - a1;
- float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
- float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
+ float da01 = a0 - a1;
+ float da20 = a2 - a0;
+ float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea;
+ float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea;
tri->inputs.dadx[slot][i] = dadx;
tri->inputs.dady[slot][i] = dady;
@@ -92,9 +151,9 @@ static void linear_coef( struct lp_setup_context *setup,
* to define a0 as the sample at a pixel center somewhere near vmin
* instead - i'll switch to this later.
*/
- tri->inputs.a0[slot][i] = (a1 -
- (dadx * (v1[0][0] - setup->pixel_offset) +
- dady * (v1[0][1] - setup->pixel_offset)));
+ tri->inputs.a0[slot][i] = (a0 -
+ (dadx * (info->v0[0][0] - info->pixel_offset) +
+ dady * (info->v0[0][1] - info->pixel_offset)));
}
@@ -106,31 +165,27 @@ static void linear_coef( struct lp_setup_context *setup,
* Later, when we compute the value at a particular fragment position we'll
* divide the interpolated value by the interpolated W at that fragment.
*/
-static void perspective_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
- float oneoverarea,
+static void perspective_coef( struct lp_rast_triangle *tri,
+ const struct tri_info *info,
unsigned slot,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4],
unsigned vert_attr,
unsigned i)
{
/* premultiply by 1/w (v[0][3] is always 1/w):
*/
- float a1 = v1[vert_attr][i] * v1[0][3];
- float a2 = v2[vert_attr][i] * v2[0][3];
- float a3 = v3[vert_attr][i] * v3[0][3];
- float da12 = a1 - a2;
- float da31 = a3 - a1;
- float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
- float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
+ float a0 = info->v0[vert_attr][i] * info->v0[0][3];
+ float a1 = info->v1[vert_attr][i] * info->v1[0][3];
+ float a2 = info->v2[vert_attr][i] * info->v2[0][3];
+ float da01 = a0 - a1;
+ float da20 = a2 - a0;
+ float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea;
+ float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea;
tri->inputs.dadx[slot][i] = dadx;
tri->inputs.dady[slot][i] = dady;
- tri->inputs.a0[slot][i] = (a1 -
- (dadx * (v1[0][0] - setup->pixel_offset) +
- dady * (v1[0][1] - setup->pixel_offset)));
+ tri->inputs.a0[slot][i] = (a0 -
+ (dadx * (info->v0[0][0] - info->pixel_offset) +
+ dady * (info->v0[0][1] - info->pixel_offset)));
}
@@ -141,13 +196,9 @@ static void perspective_coef( struct lp_setup_context *setup,
* We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
*/
static void
-setup_fragcoord_coef(struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
- float oneoverarea,
+setup_fragcoord_coef(struct lp_rast_triangle *tri,
+ const struct tri_info *info,
unsigned slot,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4],
unsigned usage_mask)
{
/*X*/
@@ -166,12 +217,12 @@ setup_fragcoord_coef(struct lp_setup_context *setup,
/*Z*/
if (usage_mask & TGSI_WRITEMASK_Z) {
- linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2);
+ linear_coef(tri, info, slot, 0, 2);
}
/*W*/
if (usage_mask & TGSI_WRITEMASK_W) {
- linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3);
+ linear_coef(tri, info, slot, 0, 3);
}
}
@@ -180,24 +231,23 @@ setup_fragcoord_coef(struct lp_setup_context *setup,
* Setup the fragment input attribute with the front-facing value.
* \param frontface is the triangle front facing?
*/
-static void setup_facing_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
+static void setup_facing_coef( struct lp_rast_triangle *tri,
unsigned slot,
boolean frontface,
unsigned usage_mask)
{
/* convert TRUE to 1.0 and FALSE to -1.0 */
if (usage_mask & TGSI_WRITEMASK_X)
- constant_coef( setup, tri, slot, 2.0f * frontface - 1.0f, 0 );
+ constant_coef( tri, slot, 2.0f * frontface - 1.0f, 0 );
if (usage_mask & TGSI_WRITEMASK_Y)
- constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */
+ constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
if (usage_mask & TGSI_WRITEMASK_Z)
- constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */
+ constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
if (usage_mask & TGSI_WRITEMASK_W)
- constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */
+ constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
}
@@ -206,11 +256,7 @@ static void setup_facing_coef( struct lp_setup_context *setup,
*/
static void setup_tri_coefficients( struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
- float oneoverarea,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4],
- boolean frontface)
+ const struct tri_info *info)
{
unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
unsigned slot;
@@ -227,25 +273,25 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
if (setup->flatshade_first) {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, tri, slot+1, v1[vert_attr][i], i);
+ constant_coef(tri, slot+1, info->v0[vert_attr][i], i);
}
else {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, tri, slot+1, v3[vert_attr][i], i);
+ constant_coef(tri, slot+1, info->v2[vert_attr][i], i);
}
break;
case LP_INTERP_LINEAR:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
+ linear_coef(tri, info, slot+1, vert_attr, i);
break;
case LP_INTERP_PERSPECTIVE:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
+ perspective_coef(tri, info, slot+1, vert_attr, i);
fragcoord_usage_mask |= TGSI_WRITEMASK_W;
break;
@@ -259,7 +305,7 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
break;
case LP_INTERP_FACING:
- setup_facing_coef(setup, tri, slot+1, frontface, usage_mask);
+ setup_facing_coef(tri, slot+1, info->frontfacing, usage_mask);
break;
default:
@@ -269,16 +315,11 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
/* The internal position input is in slot zero:
*/
- setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3,
- fragcoord_usage_mask);
+ setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask);
}
-static INLINE int subpixel_snap( float a )
-{
- return util_iround(FIXED_ONE * a - (FIXED_ONE / 2));
-}
@@ -291,21 +332,23 @@ static INLINE int subpixel_snap( float a )
* \return pointer to triangle space
*/
static INLINE struct lp_rast_triangle *
-alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size)
+alloc_triangle(struct lp_scene *scene,
+ unsigned nr_inputs,
+ unsigned nr_planes,
+ unsigned *tri_size)
{
unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
struct lp_rast_triangle *tri;
- unsigned bytes;
+ unsigned tri_bytes, bytes;
char *inputs;
- assert(sizeof(*tri) % 16 == 0);
-
- bytes = sizeof(*tri) + (3 * input_array_sz);
+ tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16);
+ bytes = tri_bytes + (3 * input_array_sz);
tri = lp_scene_alloc_aligned( scene, bytes, 16 );
if (tri) {
- inputs = (char *) (tri + 1);
+ inputs = ((char *)tri) + tri_bytes;
tri->inputs.a0 = (float (*)[4]) inputs;
tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz);
tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz);
@@ -329,52 +372,71 @@ print_triangle(struct lp_setup_context *setup,
uint i;
debug_printf("llvmpipe triangle\n");
- for (i = 0; i < setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
debug_printf(" v1[%d]: %f %f %f %f\n", i,
v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
}
- for (i = 0; i < setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
debug_printf(" v2[%d]: %f %f %f %f\n", i,
v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
}
- for (i = 0; i < setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
debug_printf(" v3[%d]: %f %f %f %f\n", i,
v3[i][0], v3[i][1], v3[i][2], v3[i][3]);
}
}
+lp_rast_cmd lp_rast_tri_tab[8] = {
+ NULL, /* should be impossible */
+ lp_rast_triangle_1,
+ lp_rast_triangle_2,
+ lp_rast_triangle_3,
+ lp_rast_triangle_4,
+ lp_rast_triangle_5,
+ lp_rast_triangle_6,
+ lp_rast_triangle_7
+};
+
/**
* Do basic setup for triangle rasterization and determine which
* framebuffer tiles are touched. Put the triangle in the scene's
* bins for the tiles which we overlap.
*/
-static void
+static void
do_triangle_ccw(struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
boolean frontfacing )
{
- /* x/y positions in fixed point */
- const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset);
- const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset);
- const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset);
- const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset);
- const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset);
- const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset);
struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_fragment_shader_variant *variant = setup->fs.current.variant;
struct lp_rast_triangle *tri;
+ struct tri_info info;
int area;
- float oneoverarea;
int minx, maxx, miny, maxy;
+ int ix0, ix1, iy0, iy1;
unsigned tri_bytes;
-
+ int i;
+ int nr_planes = 3;
+
if (0)
print_triangle(setup, v1, v2, v3);
- tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes);
+ if (setup->scissor_test) {
+ nr_planes = 7;
+ }
+ else {
+ nr_planes = 3;
+ }
+
+
+ tri = alloc_triangle(scene,
+ setup->fs.nr_inputs,
+ nr_planes,
+ &tri_bytes);
if (!tri)
return;
@@ -387,15 +449,24 @@ do_triangle_ccw(struct lp_setup_context *setup,
tri->v[2][1] = v3[0][1];
#endif
- tri->dx12 = x1 - x2;
- tri->dx23 = x2 - x3;
- tri->dx31 = x3 - x1;
+ /* x/y positions in fixed point */
+ info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset);
+ info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset);
+ info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset);
+ info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset);
+ info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset);
+ info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset);
+
+ tri->plane[0].dcdy = info.x[0] - info.x[1];
+ tri->plane[1].dcdy = info.x[1] - info.x[2];
+ tri->plane[2].dcdy = info.x[2] - info.x[0];
- tri->dy12 = y1 - y2;
- tri->dy23 = y2 - y3;
- tri->dy31 = y3 - y1;
+ tri->plane[0].dcdx = info.y[0] - info.y[1];
+ tri->plane[1].dcdx = info.y[1] - info.y[2];
+ tri->plane[2].dcdx = info.y[2] - info.y[0];
- area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12);
+ area = (tri->plane[0].dcdy * tri->plane[2].dcdx -
+ tri->plane[2].dcdy * tri->plane[0].dcdx);
LP_COUNT(nr_tris);
@@ -410,20 +481,35 @@ do_triangle_ccw(struct lp_setup_context *setup,
}
/* Bounding rectangle (in pixels) */
- minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
- maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
- miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
- maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
-
+ {
+ /* Yes this is necessary to accurately calculate bounding boxes
+ * with the two fill-conventions we support. GL (normally) ends
+ * up needing a bottom-left fill convention, which requires
+ * slightly different rounding.
+ */
+ int adj = (setup->pixel_offset != 0) ? 1 : 0;
+
+ minx = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ maxx = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ miny = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ maxy = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ }
+
if (setup->scissor_test) {
minx = MAX2(minx, setup->scissor.current.minx);
maxx = MIN2(maxx, setup->scissor.current.maxx);
miny = MAX2(miny, setup->scissor.current.miny);
maxy = MIN2(maxy, setup->scissor.current.maxy);
}
+ else {
+ minx = MAX2(minx, 0);
+ miny = MAX2(miny, 0);
+ maxx = MIN2(maxx, scene->fb.width);
+ maxy = MIN2(maxy, scene->fb.height);
+ }
+
- if (miny == maxy ||
- minx == maxx) {
+ if (miny >= maxy || minx >= maxx) {
lp_scene_putback_data( scene, tri_bytes );
LP_COUNT(nr_culled_tris);
return;
@@ -431,75 +517,88 @@ do_triangle_ccw(struct lp_setup_context *setup,
/*
*/
- oneoverarea = ((float)FIXED_ONE) / (float)area;
+ info.pixel_offset = setup->pixel_offset;
+ info.v0 = v1;
+ info.v1 = v2;
+ info.v2 = v3;
+ info.dx01 = info.v0[0][0] - info.v1[0][0];
+ info.dx20 = info.v2[0][0] - info.v0[0][0];
+ info.dy01 = info.v0[0][1] - info.v1[0][1];
+ info.dy20 = info.v2[0][1] - info.v0[0][1];
+ info.oneoverarea = 1.0 / (info.dx01 * info.dy20 - info.dx20 * info.dy01);
+ info.frontfacing = frontfacing;
/* Setup parameter interpolants:
*/
- setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing );
+ setup_tri_coefficients( setup, tri, &info );
tri->inputs.facing = frontfacing ? 1.0F : -1.0F;
+ tri->inputs.state = setup->fs.stored;
- /* half-edge constants, will be interated over the whole render target.
- */
- tri->c1 = tri->dy12 * x1 - tri->dx12 * y1;
- tri->c2 = tri->dy23 * x2 - tri->dx23 * y2;
- tri->c3 = tri->dy31 * x3 - tri->dx31 * y3;
- /* correct for top-left fill convention:
- */
- if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++;
- if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++;
- if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++;
-
- tri->dy12 *= FIXED_ONE;
- tri->dy23 *= FIXED_ONE;
- tri->dy31 *= FIXED_ONE;
-
- tri->dx12 *= FIXED_ONE;
- tri->dx23 *= FIXED_ONE;
- tri->dx31 *= FIXED_ONE;
-
- /* find trivial reject offsets for each edge for a single-pixel
- * sized block. These will be scaled up at each recursive level to
- * match the active blocksize. Scaling in this way works best if
- * the blocks are square.
- */
- tri->eo1 = 0;
- if (tri->dy12 < 0) tri->eo1 -= tri->dy12;
- if (tri->dx12 > 0) tri->eo1 += tri->dx12;
+
+ for (i = 0; i < 3; i++) {
+ struct lp_rast_plane *plane = &tri->plane[i];
- tri->eo2 = 0;
- if (tri->dy23 < 0) tri->eo2 -= tri->dy23;
- if (tri->dx23 > 0) tri->eo2 += tri->dx23;
+ /* half-edge constants, will be interated over the whole render
+ * target.
+ */
+ plane->c = plane->dcdx * info.x[i] - plane->dcdy * info.y[i];
+
+ /* correct for top-left vs. bottom-left fill convention.
+ *
+ * note that we're overloading gl_rasterization_rules to mean
+ * both (0.5,0.5) pixel centers *and* bottom-left filling
+ * convention.
+ *
+ * GL actually has a top-left filling convention, but GL's
+ * notion of "top" differs from gallium's...
+ *
+ * Also, sometimes (in FBO cases) GL will render upside down
+ * to its usual method, in which case it will probably want
+ * to use the opposite, top-left convention.
+ */
+ if (plane->dcdx < 0) {
+ /* both fill conventions want this - adjust for left edges */
+ plane->c++;
+ }
+ else if (plane->dcdx == 0) {
+ if (setup->pixel_offset == 0) {
+ /* correct for top-left fill convention:
+ */
+ if (plane->dcdy > 0) plane->c++;
+ }
+ else {
+ /* correct for bottom-left fill convention:
+ */
+ if (plane->dcdy < 0) plane->c++;
+ }
+ }
- tri->eo3 = 0;
- if (tri->dy31 < 0) tri->eo3 -= tri->dy31;
- if (tri->dx31 > 0) tri->eo3 += tri->dx31;
+ plane->dcdx *= FIXED_ONE;
+ plane->dcdy *= FIXED_ONE;
- /* Calculate trivial accept offsets from the above.
- */
- tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1;
- tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2;
- tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;
+ /* find trivial reject offsets for each edge for a single-pixel
+ * sized block. These will be scaled up at each recursive level to
+ * match the active blocksize. Scaling in this way works best if
+ * the blocks are square.
+ */
+ plane->eo = 0;
+ if (plane->dcdx < 0) plane->eo -= plane->dcdx;
+ if (plane->dcdy > 0) plane->eo += plane->dcdy;
- /* Fill in the inputs.step[][] arrays.
- * We've manually unrolled some loops here.
- */
- {
- const int xstep1 = -tri->dy12;
- const int xstep2 = -tri->dy23;
- const int xstep3 = -tri->dy31;
- const int ystep1 = tri->dx12;
- const int ystep2 = tri->dx23;
- const int ystep3 = tri->dx31;
-
-#define SETUP_STEP(i, x, y) \
- do { \
- tri->inputs.step[0][i] = x * xstep1 + y * ystep1; \
- tri->inputs.step[1][i] = x * xstep2 + y * ystep2; \
- tri->inputs.step[2][i] = x * xstep3 + y * ystep3; \
- } while (0)
+ /* Calculate trivial accept offsets from the above.
+ */
+ plane->ei = plane->dcdy - plane->dcdx - plane->eo;
+ plane->step = tri->step[i];
+
+ /* Fill in the inputs.step[][] arrays.
+ * We've manually unrolled some loops here.
+ */
+#define SETUP_STEP(j, x, y) \
+ tri->step[i][j] = y * plane->dcdy - x * plane->dcdx
+
SETUP_STEP(0, 0, 0);
SETUP_STEP(1, 1, 0);
SETUP_STEP(2, 0, 1);
@@ -522,63 +621,106 @@ do_triangle_ccw(struct lp_setup_context *setup,
#undef STEP
}
+
+ /*
+ * When rasterizing scissored tris, use the intersection of the
+ * triangle bounding box and the scissor rect to generate the
+ * scissor planes.
+ *
+ * This permits us to cut off the triangle "tails" that are present
+ * in the intermediate recursive levels caused when two of the
+ * triangles edges don't diverge quickly enough to trivially reject
+ * exterior blocks from the triangle.
+ *
+ * It's not really clear if it's worth worrying about these tails,
+ * but since we generate the planes for each scissored tri, it's
+ * free to trim them in this case.
+ *
+ * Note that otherwise, the scissor planes only vary in 'C' value,
+ * and even then only on state-changes. Could alternatively store
+ * these planes elsewhere.
+ */
+ if (nr_planes == 7) {
+ tri->plane[3].step = step_scissor_minx;
+ tri->plane[3].dcdx = -1;
+ tri->plane[3].dcdy = 0;
+ tri->plane[3].c = 1-minx;
+ tri->plane[3].ei = 0;
+ tri->plane[3].eo = 1;
+
+ tri->plane[4].step = step_scissor_maxx;
+ tri->plane[4].dcdx = 1;
+ tri->plane[4].dcdy = 0;
+ tri->plane[4].c = maxx;
+ tri->plane[4].ei = -1;
+ tri->plane[4].eo = 0;
+
+ tri->plane[5].step = step_scissor_miny;
+ tri->plane[5].dcdx = 0;
+ tri->plane[5].dcdy = 1;
+ tri->plane[5].c = 1-miny;
+ tri->plane[5].ei = 0;
+ tri->plane[5].eo = 1;
+
+ tri->plane[6].step = step_scissor_maxy;
+ tri->plane[6].dcdx = 0;
+ tri->plane[6].dcdy = -1;
+ tri->plane[6].c = maxy;
+ tri->plane[6].ei = -1;
+ tri->plane[6].eo = 0;
+ }
+
+
/*
* All fields of 'tri' are now set. The remaining code here is
* concerned with binning.
*/
- /* Convert to tile coordinates:
+ /* Convert to tile coordinates, and inclusive ranges:
*/
- minx = minx / TILE_SIZE;
- miny = miny / TILE_SIZE;
- maxx = maxx / TILE_SIZE;
- maxy = maxy / TILE_SIZE;
+ ix0 = minx / TILE_SIZE;
+ iy0 = miny / TILE_SIZE;
+ ix1 = (maxx-1) / TILE_SIZE;
+ iy1 = (maxy-1) / TILE_SIZE;
/*
* Clamp to framebuffer size
*/
- minx = MAX2(minx, 0);
- miny = MAX2(miny, 0);
- maxx = MIN2(maxx, scene->tiles_x - 1);
- maxy = MIN2(maxy, scene->tiles_y - 1);
+ assert(ix0 == MAX2(ix0, 0));
+ assert(iy0 == MAX2(iy0, 0));
+ assert(ix1 == MIN2(ix1, scene->tiles_x - 1));
+ assert(iy1 == MIN2(iy1, scene->tiles_y - 1));
/* Determine which tile(s) intersect the triangle's bounding box
*/
- if (miny == maxy && minx == maxx)
+ if (iy0 == iy1 && ix0 == ix1)
{
/* Triangle is contained in a single tile:
*/
- lp_scene_bin_command( scene, minx, miny, lp_rast_triangle,
- lp_rast_arg_triangle(tri) );
+ lp_scene_bin_command( scene, ix0, iy0,
+ lp_rast_tri_tab[nr_planes],
+ lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
}
- else
+ else
{
- int c1 = (tri->c1 +
- tri->dx12 * miny * TILE_SIZE -
- tri->dy12 * minx * TILE_SIZE);
- int c2 = (tri->c2 +
- tri->dx23 * miny * TILE_SIZE -
- tri->dy23 * minx * TILE_SIZE);
- int c3 = (tri->c3 +
- tri->dx31 * miny * TILE_SIZE -
- tri->dy31 * minx * TILE_SIZE);
-
- int ei1 = tri->ei1 << TILE_ORDER;
- int ei2 = tri->ei2 << TILE_ORDER;
- int ei3 = tri->ei3 << TILE_ORDER;
-
- int eo1 = tri->eo1 << TILE_ORDER;
- int eo2 = tri->eo2 << TILE_ORDER;
- int eo3 = tri->eo3 << TILE_ORDER;
-
- int xstep1 = -(tri->dy12 << TILE_ORDER);
- int xstep2 = -(tri->dy23 << TILE_ORDER);
- int xstep3 = -(tri->dy31 << TILE_ORDER);
-
- int ystep1 = tri->dx12 << TILE_ORDER;
- int ystep2 = tri->dx23 << TILE_ORDER;
- int ystep3 = tri->dx31 << TILE_ORDER;
+ int c[7];
+ int ei[7];
+ int eo[7];
+ int xstep[7];
+ int ystep[7];
int x, y;
+
+ for (i = 0; i < nr_planes; i++) {
+ c[i] = (tri->plane[i].c +
+ tri->plane[i].dcdy * iy0 * TILE_SIZE -
+ tri->plane[i].dcdx * ix0 * TILE_SIZE);
+
+ ei[i] = tri->plane[i].ei << TILE_ORDER;
+ eo[i] = tri->plane[i].eo << TILE_ORDER;
+ xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER);
+ ystep[i] = tri->plane[i].dcdy << TILE_ORDER;
+ }
+
/* Test tile-sized blocks against the triangle.
@@ -586,63 +728,67 @@ do_triangle_ccw(struct lp_setup_context *setup,
* contained inside the tri, bin an lp_rast_shade_tile command.
* Else, bin a lp_rast_triangle command.
*/
- for (y = miny; y <= maxy; y++)
+ for (y = iy0; y <= iy1; y++)
{
- int cx1 = c1;
- int cx2 = c2;
- int cx3 = c3;
boolean in = FALSE; /* are we inside the triangle? */
+ int cx[7];
+
+ for (i = 0; i < nr_planes; i++)
+ cx[i] = c[i];
- for (x = minx; x <= maxx; x++)
+ for (x = ix0; x <= ix1; x++)
{
- if (cx1 + eo1 < 0 ||
- cx2 + eo2 < 0 ||
- cx3 + eo3 < 0)
- {
- /* do nothing */
+ int out = 0;
+ int partial = 0;
+
+ for (i = 0; i < nr_planes; i++) {
+ int planeout = cx[i] + eo[i];
+ int planepartial = cx[i] + ei[i] - 1;
+ out |= (planeout >> 31);
+ partial |= (planepartial >> 31) & (1<<i);
+ }
+
+ if (out) {
+ /* do nothing */
+ if (in)
+ break; /* exiting triangle, all done with this row */
LP_COUNT(nr_empty_64);
- if (in)
- break; /* exiting triangle, all done with this row */
- }
- else if (cx1 + ei1 > 0 &&
- cx2 + ei2 > 0 &&
- cx3 + ei3 > 0)
- {
+ }
+ else if (partial) {
+ /* Not trivially accepted by at least one plane -
+ * rasterize/shade partial tile
+ */
+ int count = util_bitcount(partial);
+ in = TRUE;
+ lp_scene_bin_command( scene, x, y,
+ lp_rast_tri_tab[count],
+ lp_rast_arg_triangle(tri, partial) );
+
+ LP_COUNT(nr_partially_covered_64);
+ }
+ else {
/* triangle covers the whole tile- shade whole tile */
LP_COUNT(nr_fully_covered_64);
- in = TRUE;
- if (setup->fs.current.variant->opaque) {
+ in = TRUE;
+ if (variant->opaque &&
+ !setup->fb.zsbuf) {
lp_scene_bin_reset( scene, x, y );
- lp_scene_bin_command( scene, x, y,
- lp_rast_set_state,
- lp_rast_arg_state(setup->fs.stored) );
}
lp_scene_bin_command( scene, x, y,
lp_rast_shade_tile,
lp_rast_arg_inputs(&tri->inputs) );
- }
- else
- {
- /* rasterizer/shade partial tile */
- LP_COUNT(nr_partially_covered_64);
- in = TRUE;
- lp_scene_bin_command( scene, x, y,
- lp_rast_triangle,
- lp_rast_arg_triangle(tri) );
- }
+ }
/* Iterate cx values across the region:
*/
- cx1 += xstep1;
- cx2 += xstep2;
- cx3 += xstep3;
+ for (i = 0; i < nr_planes; i++)
+ cx[i] += xstep[i];
}
/* Iterate c values down the region:
*/
- c1 += ystep1;
- c2 += ystep2;
- c3 += ystep3;
+ for (i = 0; i < nr_planes; i++)
+ c[i] += ystep[i];
}
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
index f6a424f25a..51948f5bf2 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
@@ -61,7 +61,9 @@ lp_setup_get_vertex_info(struct vbuf_render *vbr)
{
struct lp_setup_context *setup = lp_setup_context(vbr);
- /* vertex size/info depends on the latest state */
+ /* Vertex size/info depends on the latest state.
+ * The draw module may have issued additional state-change commands.
+ */
lp_setup_update_state(setup);
return setup->vertex_info;
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 05d1b93794..86313e1c48 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -130,6 +130,12 @@ llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe);
void
llvmpipe_init_so_funcs(struct llvmpipe_context *llvmpipe);
+void
+llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *ctx,
+ unsigned num,
+ struct pipe_sampler_view **views);
+void
+llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx);
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index d20a5218d4..77bec4640b 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -189,7 +189,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
llvmpipe->constants[PIPE_SHADER_FRAGMENT][0]);
if (llvmpipe->dirty & LP_NEW_SAMPLER_VIEW)
- lp_setup_set_fragment_sampler_views(llvmpipe->setup,
+ lp_setup_set_fragment_sampler_views(llvmpipe->setup,
llvmpipe->num_fragment_sampler_views,
llvmpipe->fragment_sampler_views);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 65115052cd..5953d690a4 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -31,9 +31,6 @@
* Code generate the whole fragment pipeline.
*
* The fragment pipeline consists of the following stages:
- * - triangle edge in/out testing
- * - scissor test
- * - stipple (TBI)
* - early depth test
* - fragment shader
* - alpha test
@@ -97,6 +94,7 @@
#include "lp_state.h"
#include "lp_tex_sample.h"
#include "lp_flush.h"
+#include "lp_state_fs.h"
#include <llvm-c/Analysis.h>
@@ -170,177 +168,63 @@ generate_depth_stencil(LLVMBuilderRef builder,
/**
- * Generate the code to do inside/outside triangle testing for the
+ * Expand the relevent bits of mask_input to a 4-dword mask for the
* four pixels in a 2x2 quad. This will set the four elements of the
* quad mask vector to 0 or ~0.
- * \param i which quad of the quad group to test, in [0,3]
+ *
+ * \param quad which quad of the quad group to test, in [0,3]
+ * \param mask_input bitwise mask for the whole 4x4 stamp
*/
-static void
-generate_tri_edge_mask(LLVMBuilderRef builder,
- unsigned i,
- LLVMValueRef *mask, /* ivec4, out */
- LLVMValueRef c0, /* int32 */
- LLVMValueRef c1, /* int32 */
- LLVMValueRef c2, /* int32 */
- LLVMValueRef step0_ptr, /* ivec4 */
- LLVMValueRef step1_ptr, /* ivec4 */
- LLVMValueRef step2_ptr) /* ivec4 */
+static LLVMValueRef
+generate_quad_mask(LLVMBuilderRef builder,
+ struct lp_type fs_type,
+ unsigned quad,
+ LLVMValueRef mask_input) /* int32 */
{
-#define OPTIMIZE_IN_OUT_TEST 0
-#if OPTIMIZE_IN_OUT_TEST
- struct lp_build_if_state ifctx;
- LLVMValueRef not_draw_all;
-#endif
- struct lp_build_flow_context *flow;
- struct lp_type i32_type;
- LLVMTypeRef i32vec4_type;
- LLVMValueRef c0_vec, c1_vec, c2_vec;
- LLVMValueRef in_out_mask;
-
- assert(i < 4);
-
- /* int32 vector type */
- memset(&i32_type, 0, sizeof i32_type);
- i32_type.floating = FALSE; /* values are integers */
- i32_type.sign = TRUE; /* values are signed */
- i32_type.norm = FALSE; /* values are not normalized */
- i32_type.width = 32; /* 32-bit int values */
- i32_type.length = 4; /* 4 elements per vector */
-
- i32vec4_type = lp_build_int32_vec4_type();
+ struct lp_type mask_type;
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef bits[4];
+ LLVMValueRef mask;
/*
- * Use a conditional here to do detailed pixel in/out testing.
- * We only have to do this if c0 != INT_MIN.
+ * XXX: We'll need a different path for 16 x u8
*/
- flow = lp_build_flow_create(builder);
- lp_build_flow_scope_begin(flow);
-
- {
-#if OPTIMIZE_IN_OUT_TEST
- /* not_draw_all = (c0 != INT_MIN) */
- not_draw_all = LLVMBuildICmp(builder,
- LLVMIntNE,
- c0,
- LLVMConstInt(LLVMInt32Type(), INT_MIN, 0),
- "");
-
- in_out_mask = lp_build_const_int_vec(i32_type, ~0);
-
-
- lp_build_flow_scope_declare(flow, &in_out_mask);
-
- /* if (not_draw_all) {... */
- lp_build_if(&ifctx, flow, builder, not_draw_all);
-#endif
- {
- LLVMValueRef step0_vec, step1_vec, step2_vec;
- LLVMValueRef m0_vec, m1_vec, m2_vec;
- LLVMValueRef index, m;
-
- /* c0_vec = {c0, c0, c0, c0}
- * Note that we emit this code four times but LLVM optimizes away
- * three instances of it.
- */
- c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
- c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
- c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
- lp_build_name(c0_vec, "edgeconst0vec");
- lp_build_name(c1_vec, "edgeconst1vec");
- lp_build_name(c2_vec, "edgeconst2vec");
-
- /* load step0vec, step1, step2 vec from memory */
- index = LLVMConstInt(LLVMInt32Type(), i, 0);
- step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
- step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
- step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
- lp_build_name(step0_vec, "step0vec");
- lp_build_name(step1_vec, "step1vec");
- lp_build_name(step2_vec, "step2vec");
-
- /* m0_vec = step0_ptr[i] > c0_vec */
- m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
- m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
- m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);
-
- /* in_out_mask = m0_vec & m1_vec & m2_vec */
- m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
- in_out_mask = LLVMBuildAnd(builder, m, m2_vec, "");
- lp_build_name(in_out_mask, "inoutmaskvec");
- }
-#if OPTIMIZE_IN_OUT_TEST
- lp_build_endif(&ifctx);
-#endif
-
- }
- lp_build_flow_scope_end(flow);
- lp_build_flow_destroy(flow);
+ assert(fs_type.width == 32);
+ assert(fs_type.length == 4);
+ mask_type = lp_int_type(fs_type);
- /* This is the initial alive/dead pixel mask for a quad of four pixels.
- * It's an int[4] vector with each word set to 0 or ~0.
- * Words will get cleared when pixels faile the Z test, etc.
+ /*
+ * mask_input >>= (quad * 4)
*/
- *mask = in_out_mask;
-}
-
-
-static LLVMValueRef
-generate_scissor_test(LLVMBuilderRef builder,
- LLVMValueRef context_ptr,
- const struct lp_build_interp_soa_context *interp,
- struct lp_type type)
-{
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1];
- LLVMValueRef xmin, ymin, xmax, ymax;
- LLVMValueRef m0, m1, m2, m3, m;
-
- /* xpos, ypos contain the window coords for the four pixels in the quad */
- assert(xpos);
- assert(ypos);
-
- /* get the current scissor bounds, convert to vectors */
- xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr);
- xmin = lp_build_broadcast(builder, vec_type, xmin);
-
- ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr);
- ymin = lp_build_broadcast(builder, vec_type, ymin);
- xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr);
- xmax = lp_build_broadcast(builder, vec_type, xmax);
+ mask_input = LLVMBuildLShr(builder,
+ mask_input,
+ LLVMConstInt(i32t, quad * 4, 0),
+ "");
- ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr);
- ymax = lp_build_broadcast(builder, vec_type, ymax);
+ /*
+ * mask = { mask_input & (1 << i), for i in [0,3] }
+ */
- /* compare the fragment's position coordinates against the scissor bounds */
- m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin);
- m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin);
- m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax);
- m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax);
+ mask = lp_build_broadcast(builder, lp_build_vec_type(mask_type), mask_input);
- /* AND all the masks together */
- m = LLVMBuildAnd(builder, m0, m1, "");
- m = LLVMBuildAnd(builder, m, m2, "");
- m = LLVMBuildAnd(builder, m, m3, "");
+ bits[0] = LLVMConstInt(i32t, 1 << 0, 0);
+ bits[1] = LLVMConstInt(i32t, 1 << 1, 0);
+ bits[2] = LLVMConstInt(i32t, 1 << 2, 0);
+ bits[3] = LLVMConstInt(i32t, 1 << 3, 0);
- lp_build_name(m, "scissormask");
+ mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), "");
- return m;
-}
+ /*
+ * mask = mask != 0 ? ~0 : 0
+ */
+ mask = lp_build_compare(builder,
+ mask_type, PIPE_FUNC_NOTEQUAL,
+ mask,
+ lp_build_const_int_vec(mask_type, 0));
-static LLVMValueRef
-build_int32_vec_const(int value)
-{
- struct lp_type i32_type;
-
- memset(&i32_type, 0, sizeof i32_type);
- i32_type.floating = FALSE; /* values are integers */
- i32_type.sign = TRUE; /* values are signed */
- i32_type.norm = FALSE; /* values are not normalized */
- i32_type.width = 32; /* 32-bit int values */
- i32_type.length = 4; /* 4 elements per vector */
- return lp_build_const_int_vec(i32_type, value);
+ return mask;
}
@@ -348,7 +232,7 @@ build_int32_vec_const(int value)
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
* \param i which quad in the tile, in range [0,3]
- * \param do_tri_test if 1, do triangle edge in/out testing
+ * \param partial_mask if 1, do mask_input testing
*/
static void
generate_fs(struct llvmpipe_context *lp,
@@ -364,13 +248,8 @@ generate_fs(struct llvmpipe_context *lp,
LLVMValueRef (*color)[4],
LLVMValueRef depth_ptr,
LLVMValueRef facing,
- unsigned do_tri_test,
- LLVMValueRef c0,
- LLVMValueRef c1,
- LLVMValueRef c2,
- LLVMValueRef step0_ptr,
- LLVMValueRef step1_ptr,
- LLVMValueRef step2_ptr,
+ unsigned partial_mask,
+ LLVMValueRef mask_input,
LLVMValueRef counter)
{
const struct tgsi_token *tokens = shader->base.tokens;
@@ -411,23 +290,17 @@ generate_fs(struct llvmpipe_context *lp,
lp_build_flow_scope_declare(flow, &z);
/* do triangle edge testing */
- if (do_tri_test) {
- generate_tri_edge_mask(builder, i, pmask,
- c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
+ if (partial_mask) {
+ *pmask = generate_quad_mask(builder, type,
+ i, mask_input);
}
else {
- *pmask = build_int32_vec_const(~0);
+ *pmask = lp_build_const_int_vec(type, ~0);
}
/* 'mask' will control execution based on quad's pixel alive/killed state */
lp_build_mask_begin(&mask, flow, type, *pmask);
- if (key->scissor) {
- LLVMValueRef smask =
- generate_scissor_test(builder, context_ptr, interp, type);
- lp_build_mask_update(&mask, smask);
- }
-
early_depth_stencil_test =
(key->depth.enabled || key->stencil[0].enabled) &&
!key->alpha.enabled &&
@@ -579,7 +452,7 @@ static void
generate_fragment(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
struct lp_fragment_shader_variant *variant,
- unsigned do_tri_test)
+ unsigned partial_mask)
{
struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
const struct lp_fragment_shader_variant_key *key = &variant->key;
@@ -589,9 +462,8 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMTypeRef fs_elem_type;
LLVMTypeRef fs_int_vec_type;
LLVMTypeRef blend_vec_type;
- LLVMTypeRef arg_types[16];
+ LLVMTypeRef arg_types[11];
LLVMTypeRef func_type;
- LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type();
LLVMValueRef context_ptr;
LLVMValueRef x;
LLVMValueRef y;
@@ -600,7 +472,8 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMValueRef dady_ptr;
LLVMValueRef color_ptr_ptr;
LLVMValueRef depth_ptr;
- LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr, counter = NULL;
+ LLVMValueRef mask_input;
+ LLVMValueRef counter = NULL;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
struct lp_build_sampler_soa *sampler;
@@ -645,7 +518,7 @@ generate_fragment(struct llvmpipe_context *lp,
blend_vec_type = lp_build_vec_type(blend_type);
util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
- shader->no, variant->no, do_tri_test ? "edge" : "whole");
+ shader->no, variant->no, partial_mask ? "partial" : "whole");
arg_types[0] = screen->context_ptr_type; /* context */
arg_types[1] = LLVMInt32Type(); /* x */
@@ -656,23 +529,15 @@ generate_fragment(struct llvmpipe_context *lp,
arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */
arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */
arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
- arg_types[9] = LLVMInt32Type(); /* c0 */
- arg_types[10] = LLVMInt32Type(); /* c1 */
- arg_types[11] = LLVMInt32Type(); /* c2 */
- /* Note: the step arrays are built as int32[16] but we interpret
- * them here as int32_vec4[4].
- */
- arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step0 */
- arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step1 */
- arg_types[14] = LLVMPointerType(int32_vec4_type, 0);/* step2 */
- arg_types[15] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
+ arg_types[9] = LLVMInt32Type(); /* mask_input */
+ arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
function = LLVMAddFunction(screen->module, func_name, func_type);
LLVMSetFunctionCallConv(function, LLVMCCallConv);
- variant->function[do_tri_test] = function;
+ variant->function[partial_mask] = function;
/* XXX: need to propagate noalias down into color param now we are
@@ -691,12 +556,7 @@ generate_fragment(struct llvmpipe_context *lp,
dady_ptr = LLVMGetParam(function, 6);
color_ptr_ptr = LLVMGetParam(function, 7);
depth_ptr = LLVMGetParam(function, 8);
- c0 = LLVMGetParam(function, 9);
- c1 = LLVMGetParam(function, 10);
- c2 = LLVMGetParam(function, 11);
- step0_ptr = LLVMGetParam(function, 12);
- step1_ptr = LLVMGetParam(function, 13);
- step2_ptr = LLVMGetParam(function, 14);
+ mask_input = LLVMGetParam(function, 9);
lp_build_name(context_ptr, "context");
lp_build_name(x, "x");
@@ -706,15 +566,10 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(dady_ptr, "dady");
lp_build_name(color_ptr_ptr, "color_ptr_ptr");
lp_build_name(depth_ptr, "depth");
- lp_build_name(c0, "c0");
- lp_build_name(c1, "c1");
- lp_build_name(c2, "c2");
- lp_build_name(step0_ptr, "step0");
- lp_build_name(step1_ptr, "step1");
- lp_build_name(step2_ptr, "step2");
+ lp_build_name(mask_input, "mask_input");
if (key->occlusion_count) {
- counter = LLVMGetParam(function, 15);
+ counter = LLVMGetParam(function, 10);
lp_build_name(counter, "counter");
}
@@ -763,9 +618,9 @@ generate_fragment(struct llvmpipe_context *lp,
out_color,
depth_ptr_i,
facing,
- do_tri_test,
- c0, c1, c2,
- step0_ptr, step1_ptr, step2_ptr, counter);
+ partial_mask,
+ mask_input,
+ counter);
for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
for(chan = 0; chan < NUM_CHANNELS; ++chan)
@@ -792,9 +647,13 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
}
- lp_build_conv_mask(builder, fs_type, blend_type,
- fs_mask, num_fs,
- &blend_mask, 1);
+ if (partial_mask || !variant->opaque) {
+ lp_build_conv_mask(builder, fs_type, blend_type,
+ fs_mask, num_fs,
+ &blend_mask, 1);
+ } else {
+ blend_mask = lp_build_const_int_vec(blend_type, ~0);
+ }
color_ptr = LLVMBuildLoad(builder,
LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
@@ -832,8 +691,7 @@ generate_fragment(struct llvmpipe_context *lp,
#endif
/* Apply optimizations to LLVM IR */
- if (1)
- LLVMRunFunctionPassManager(screen->pass, function);
+ LLVMRunFunctionPassManager(screen->pass, function);
if (gallivm_debug & GALLIVM_DEBUG_IR) {
/* Print the LLVM IR to stderr */
@@ -847,7 +705,7 @@ generate_fragment(struct llvmpipe_context *lp,
{
void *f = LLVMGetPointerToGlobal(screen->engine, function);
- variant->jit_function[do_tri_test] = (lp_jit_frag_func)pointer_to_func(f);
+ variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f);
if (gallivm_debug & GALLIVM_DEBUG_ASM) {
lp_disassemble(f);
@@ -963,7 +821,6 @@ generate_variant(struct llvmpipe_context *lp,
!key->stencil[0].enabled &&
!key->alpha.enabled &&
!key->depth.enabled &&
- !key->scissor &&
!shader->info.uses_kill
? TRUE : FALSE;
@@ -1182,7 +1039,6 @@ make_variant_key(struct llvmpipe_context *lp,
/* alpha.ref_value is passed in jit_context */
key->flatshade = lp->rasterizer->flatshade;
- key->scissor = lp->rasterizer->scissor;
if (lp->active_query_count) {
key->occlusion_count = TRUE;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index 593cd4de6b..37900fc544 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -54,7 +54,6 @@ struct lp_fragment_shader_variant_key
enum pipe_format zsbuf_format;
unsigned nr_cbufs:8;
unsigned flatshade:1;
- unsigned scissor:1;
unsigned occlusion_count:1;
struct {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index e94065fb6a..715ce2f02e 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -35,10 +35,9 @@
#include "draw/draw_context.h"
#include "lp_context.h"
-#include "lp_context.h"
+#include "lp_screen.h"
#include "lp_state.h"
-#include "draw/draw_context.h"
-
+#include "state_tracker/sw_winsys.h"
static void *
@@ -100,6 +99,10 @@ llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe,
llvmpipe->num_vertex_samplers = num_samplers;
+ draw_set_samplers(llvmpipe->draw,
+ llvmpipe->vertex_samplers,
+ llvmpipe->num_vertex_samplers);
+
llvmpipe->dirty |= LP_NEW_SAMPLER;
}
@@ -166,6 +169,10 @@ llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe,
llvmpipe->num_vertex_sampler_views = num;
+ draw_set_sampler_views(llvmpipe->draw,
+ llvmpipe->vertex_sampler_views,
+ llvmpipe->num_vertex_sampler_views);
+
llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
}
@@ -214,6 +221,77 @@ llvmpipe_delete_sampler_state(struct pipe_context *pipe,
}
+/**
+ * Called during state validation when LP_NEW_SAMPLER_VIEW is set.
+ */
+void
+llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp,
+ unsigned num,
+ struct pipe_sampler_view **views)
+{
+ unsigned i;
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS];
+ const void *data[DRAW_MAX_TEXTURE_LEVELS];
+
+ assert(num <= PIPE_MAX_VERTEX_SAMPLERS);
+ if (!num)
+ return;
+
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ struct pipe_sampler_view *view = i < num ? views[i] : NULL;
+
+ if (view) {
+ struct pipe_resource *tex = view->texture;
+ struct llvmpipe_resource *lp_tex = llvmpipe_resource(tex);
+
+ /* We're referencing the texture's internal data, so save a
+ * reference to it.
+ */
+ pipe_resource_reference(&lp->mapped_vs_tex[i], tex);
+
+ if (!lp_tex->dt) {
+ /* regular texture - setup array of mipmap level pointers */
+ int j;
+ for (j = 0; j <= tex->last_level; j++) {
+ data[j] =
+ llvmpipe_get_texture_image_all(lp_tex, j, LP_TEX_USAGE_READ,
+ LP_TEX_LAYOUT_LINEAR);
+ row_stride[j] = lp_tex->row_stride[j];
+ img_stride[j] = lp_tex->img_stride[j];
+ }
+ }
+ else {
+ /* display target texture/surface */
+ /*
+ * XXX: Where should this be unmapped?
+ */
+ struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen);
+ struct sw_winsys *winsys = screen->winsys;
+ data[0] = winsys->displaytarget_map(winsys, lp_tex->dt,
+ PIPE_TRANSFER_READ);
+ row_stride[0] = lp_tex->row_stride[0];
+ img_stride[0] = lp_tex->img_stride[0];
+ assert(data[0]);
+ }
+ draw_set_mapped_texture(lp->draw,
+ i,
+ tex->width0, tex->height0, tex->depth0,
+ tex->last_level,
+ row_stride, img_stride, data);
+ }
+ }
+}
+
+void
+llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx)
+{
+ unsigned i;
+ for (i = 0; i < Elements(ctx->mapped_vs_tex); i++) {
+ pipe_resource_reference(&ctx->mapped_vs_tex[i], NULL);
+ }
+}
+
void
llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_state_so.c b/src/gallium/drivers/llvmpipe/lp_state_so.c
index 4c64a5b142..30b17c9881 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_so.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_so.c
@@ -29,7 +29,6 @@
#include "lp_state.h"
#include "lp_texture.h"
-#include "util/u_format.h"
#include "util/u_memory.h"
#include "draw/draw_context.h"
diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c
index 76b3fce1fa..f761e82850 100644
--- a/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -67,14 +67,14 @@ lp_resource_copy(struct pipe_context *pipe,
dst, subdst.face, subdst.level,
0, /* flush_flags */
FALSE, /* read_only */
- FALSE, /* cpu_access */
+ TRUE, /* cpu_access */
FALSE); /* do_not_block */
llvmpipe_flush_resource(pipe,
src, subsrc.face, subsrc.level,
0, /* flush_flags */
TRUE, /* read_only */
- FALSE, /* cpu_access */
+ TRUE, /* cpu_access */
FALSE); /* do_not_block */
/*
@@ -106,19 +106,27 @@ lp_resource_copy(struct pipe_context *pipe,
unsigned x, y;
enum lp_texture_usage usage;
- /* XXX for the tiles which are completely contained by the
- * dest rectangle, we could set the usage mode to WRITE_ALL.
- * Just test for the case of replacing the whole dest region for now.
- */
- if (width == dst_tex->base.width0 && height == dst_tex->base.height0)
- usage = LP_TEX_USAGE_WRITE_ALL;
- else
- usage = LP_TEX_USAGE_READ_WRITE;
-
adjust_to_tile_bounds(dstx, dsty, width, height, &tx, &ty, &tw, &th);
for (y = 0; y < th; y += TILE_SIZE) {
+ boolean contained_y = ty + y >= dsty &&
+ ty + y + TILE_SIZE <= dsty + height ?
+ TRUE : FALSE;
+
for (x = 0; x < tw; x += TILE_SIZE) {
+ boolean contained_x = tx + x >= dstx &&
+ tx + x + TILE_SIZE <= dstx + width ?
+ TRUE : FALSE;
+
+ /*
+ * Set the usage mode to WRITE_ALL for the tiles which are
+ * completely contained by the dest rectangle.
+ */
+ if (contained_y && contained_x)
+ usage = LP_TEX_USAGE_WRITE_ALL;
+ else
+ usage = LP_TEX_USAGE_READ_WRITE;
+
(void) llvmpipe_get_texture_tile_linear(dst_tex,
subdst.face, subdst.level,
usage,
@@ -138,13 +146,15 @@ lp_resource_copy(struct pipe_context *pipe,
subdst.level,
LP_TEX_LAYOUT_LINEAR);
- util_copy_rect(dst_linear_ptr, format,
- llvmpipe_resource_stride(&dst_tex->base, subdst.level),
- dstx, dsty,
- width, height,
- src_linear_ptr,
- llvmpipe_resource_stride(&src_tex->base, subsrc.level),
- srcx, srcy);
+ if (dst_linear_ptr && src_linear_ptr) {
+ util_copy_rect(dst_linear_ptr, format,
+ llvmpipe_resource_stride(&dst_tex->base, subdst.level),
+ dstx, dsty,
+ width, height,
+ src_linear_ptr,
+ llvmpipe_resource_stride(&src_tex->base, subsrc.level),
+ srcx, srcy);
+ }
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 9b02f436c5..cf41b40581 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -167,19 +167,26 @@ test_one(unsigned verbose,
unsigned i, j;
void *code;
+ if (src_type.width * src_type.length != dst_type.width * dst_type.length &&
+ src_type.length != dst_type.length) {
+ return TRUE;
+ }
+
if(verbose >= 1)
dump_conv_types(stdout, src_type, dst_type);
- if(src_type.length > dst_type.length) {
+ if (src_type.length > dst_type.length) {
num_srcs = 1;
num_dsts = src_type.length/dst_type.length;
}
- else {
+ else if (src_type.length < dst_type.length) {
num_dsts = 1;
num_srcs = dst_type.length/src_type.length;
}
-
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+ else {
+ num_dsts = 1;
+ num_srcs = 1;
+ }
/* We must not loose or gain channels. Only precision */
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
@@ -381,6 +388,11 @@ const struct lp_type conv_types[] = {
{ FALSE, FALSE, TRUE, FALSE, 8, 16 },
{ FALSE, FALSE, FALSE, TRUE, 8, 16 },
{ FALSE, FALSE, FALSE, FALSE, 8, 16 },
+
+ { FALSE, FALSE, TRUE, TRUE, 8, 4 },
+ { FALSE, FALSE, TRUE, FALSE, 8, 4 },
+ { FALSE, FALSE, FALSE, TRUE, 8, 4 },
+ { FALSE, FALSE, FALSE, FALSE, 8, 4 },
};
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 8b6dc1c7f5..2855d7cea4 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -31,6 +31,7 @@
#include <float.h>
#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_init.h"
#include <llvm-c/Analysis.h>
#include <llvm-c/Target.h>
@@ -38,6 +39,7 @@
#include "util/u_memory.h"
#include "util/u_pointer.h"
+#include "util/u_string.h"
#include "util/u_format.h"
#include "util/u_format_tests.h"
#include "util/u_format_s3tc.h"
@@ -71,17 +73,20 @@ write_tsv_row(FILE *fp,
typedef void
-(*fetch_ptr_t)(float *, const void *packed,
+(*fetch_ptr_t)(void *unpacked, const void *packed,
unsigned i, unsigned j);
static LLVMValueRef
-add_fetch_rgba_test(LLVMModuleRef lp_build_module,
- const struct util_format_description *desc)
+add_fetch_rgba_test(unsigned verbose,
+ const struct util_format_description *desc,
+ struct lp_type type)
{
+ char name[256];
LLVMTypeRef args[4];
LLVMValueRef func;
LLVMValueRef packed_ptr;
+ LLVMValueRef offset = LLVMConstNull(LLVMInt32Type());
LLVMValueRef rgba_ptr;
LLVMValueRef i;
LLVMValueRef j;
@@ -89,11 +94,15 @@ add_fetch_rgba_test(LLVMModuleRef lp_build_module,
LLVMBuilderRef builder;
LLVMValueRef rgba;
- args[0] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+ util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name,
+ type.floating ? "float" : "unorm8");
+
+ args[0] = LLVMPointerType(lp_build_vec_type(type), 0);
args[1] = LLVMPointerType(LLVMInt8Type(), 0);
args[3] = args[2] = LLVMInt32Type();
- func = LLVMAddFunction(lp_build_module, "fetch", LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0));
+ func = LLVMAddFunction(lp_build_module, name,
+ LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
rgba_ptr = LLVMGetParam(func, 0);
packed_ptr = LLVMGetParam(func, 1);
@@ -104,91 +113,104 @@ add_fetch_rgba_test(LLVMModuleRef lp_build_module,
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- rgba = lp_build_fetch_rgba_aos(builder, desc, packed_ptr, i, j);
+ rgba = lp_build_fetch_rgba_aos(builder, desc, type,
+ packed_ptr, offset, i, j);
LLVMBuildStore(builder, rgba, rgba_ptr);
LLVMBuildRetVoid(builder);
LLVMDisposeBuilder(builder);
+
+ if (LLVMVerifyFunction(func, LLVMPrintMessageAction)) {
+ LLVMDumpValue(func);
+ abort();
+ }
+
+ LLVMRunFunctionPassManager(lp_build_pass, func);
+
+ if (verbose >= 1) {
+ LLVMDumpValue(func);
+ }
+
return func;
}
PIPE_ALIGN_STACK
static boolean
-test_format(unsigned verbose, FILE *fp,
- const struct util_format_description *desc,
- const struct util_format_test_case *test)
+test_format_float(unsigned verbose, FILE *fp,
+ const struct util_format_description *desc)
{
LLVMValueRef fetch = NULL;
- LLVMPassManagerRef pass = NULL;
fetch_ptr_t fetch_ptr;
PIPE_ALIGN_VAR(16) float unpacked[4];
- boolean success;
- unsigned i, j, k;
+ boolean first = TRUE;
+ boolean success = TRUE;
+ unsigned i, j, k, l;
+ void *f;
- fetch = add_fetch_rgba_test(lp_build_module, desc);
+ fetch = add_fetch_rgba_test(verbose, desc, lp_float32_vec4_type());
- if (LLVMVerifyFunction(fetch, LLVMPrintMessageAction)) {
- LLVMDumpValue(fetch);
- abort();
+ f = LLVMGetPointerToGlobal(lp_build_engine, fetch);
+ fetch_ptr = (fetch_ptr_t) pointer_to_func(f);
+
+ if (verbose >= 2) {
+ lp_disassemble(f);
}
-#if 0
- pass = LLVMCreatePassManager();
- LLVMAddTargetData(LLVMGetExecutionEngineTargetData(lp_build_engine), pass);
- /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
- * but there are more on SVN. */
- LLVMAddConstantPropagationPass(pass);
- LLVMAddInstructionCombiningPass(pass);
- LLVMAddPromoteMemoryToRegisterPass(pass);
- LLVMAddGVNPass(pass);
- LLVMAddCFGSimplificationPass(pass);
- LLVMRunPassManager(pass, lp_build_module);
-#else
- (void)pass;
-#endif
-
- fetch_ptr = (fetch_ptr_t)pointer_to_func(LLVMGetPointerToGlobal(lp_build_engine, fetch));
-
- for (i = 0; i < desc->block.height; ++i) {
- for (j = 0; j < desc->block.width; ++j) {
-
- memset(unpacked, 0, sizeof unpacked);
-
- fetch_ptr(unpacked, test->packed, j, i);
-
- success = TRUE;
- for(k = 0; k < 4; ++k)
- if (fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON)
- success = FALSE;
-
- if (!success) {
- printf("FAILED\n");
- printf(" Packed: %02x %02x %02x %02x\n",
- test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
- printf(" Unpacked (%u,%u): %f %f %f %f obtained\n",
- j, i,
- unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
- printf(" %f %f %f %f expected\n",
- test->unpacked[i][j][0],
- test->unpacked[i][j][1],
- test->unpacked[i][j][2],
- test->unpacked[i][j][3]);
+ for (l = 0; l < util_format_nr_test_cases; ++l) {
+ const struct util_format_test_case *test = &util_format_test_cases[l];
+
+ if (test->format == desc->format) {
+
+ if (first) {
+ printf("Testing %s (float) ...\n",
+ desc->name);
+ first = FALSE;
+ }
+
+ for (i = 0; i < desc->block.height; ++i) {
+ for (j = 0; j < desc->block.width; ++j) {
+ boolean match;
+
+ memset(unpacked, 0, sizeof unpacked);
+
+ fetch_ptr(unpacked, test->packed, j, i);
+
+ match = TRUE;
+ for(k = 0; k < 4; ++k)
+ if (fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON)
+ match = FALSE;
+
+ if (!match) {
+ printf("FAILED\n");
+ printf(" Packed: %02x %02x %02x %02x\n",
+ test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
+ printf(" Unpacked (%u,%u): %f %f %f %f obtained\n",
+ j, i,
+ unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
+ printf(" %f %f %f %f expected\n",
+ test->unpacked[i][j][0],
+ test->unpacked[i][j][1],
+ test->unpacked[i][j][2],
+ test->unpacked[i][j][3]);
+ success = FALSE;
+ }
+ }
}
}
}
- if (!success)
- LLVMDumpValue(fetch);
+ if (!success) {
+ if (verbose < 1) {
+ LLVMDumpValue(fetch);
+ }
+ }
LLVMFreeMachineCodeForFunction(lp_build_engine, fetch);
LLVMDeleteFunction(fetch);
- if(pass)
- LLVMDisposePassManager(pass);
-
if(fp)
write_tsv_row(fp, desc, success);
@@ -196,32 +218,104 @@ test_format(unsigned verbose, FILE *fp,
}
-
+PIPE_ALIGN_STACK
static boolean
-test_one(unsigned verbose, FILE *fp,
- const struct util_format_description *format_desc)
+test_format_unorm8(unsigned verbose, FILE *fp,
+ const struct util_format_description *desc)
{
- unsigned i;
+ LLVMValueRef fetch = NULL;
+ fetch_ptr_t fetch_ptr;
+ uint8_t unpacked[4];
boolean first = TRUE;
boolean success = TRUE;
+ unsigned i, j, k, l;
+ void *f;
- for (i = 0; i < util_format_nr_test_cases; ++i) {
- const struct util_format_test_case *test = &util_format_test_cases[i];
+ fetch = add_fetch_rgba_test(verbose, desc, lp_unorm8_vec4_type());
- if (test->format == format_desc->format) {
+ f = LLVMGetPointerToGlobal(lp_build_engine, fetch);
+ fetch_ptr = (fetch_ptr_t) pointer_to_func(f);
+
+ if (verbose >= 2) {
+ lp_disassemble(f);
+ }
+
+ for (l = 0; l < util_format_nr_test_cases; ++l) {
+ const struct util_format_test_case *test = &util_format_test_cases[l];
+
+ if (test->format == desc->format) {
if (first) {
- printf("Testing %s ...\n",
- format_desc->name);
+ printf("Testing %s (unorm8) ...\n",
+ desc->name);
first = FALSE;
}
- if (!test_format(verbose, fp, format_desc, test)) {
- success = FALSE;
+ for (i = 0; i < desc->block.height; ++i) {
+ for (j = 0; j < desc->block.width; ++j) {
+ boolean match;
+
+ memset(unpacked, 0, sizeof unpacked);
+
+ fetch_ptr(unpacked, test->packed, j, i);
+
+ match = TRUE;
+ for(k = 0; k < 4; ++k) {
+ int error = float_to_ubyte(test->unpacked[i][j][k]) - unpacked[k];
+ if (error < 0)
+ error = -error;
+ if (error > 1)
+ match = FALSE;
+ }
+
+ if (!match) {
+ printf("FAILED\n");
+ printf(" Packed: %02x %02x %02x %02x\n",
+ test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
+ printf(" Unpacked (%u,%u): %02x %02x %02x %02x obtained\n",
+ j, i,
+ unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
+ printf(" %02x %02x %02x %02x expected\n",
+ float_to_ubyte(test->unpacked[i][j][0]),
+ float_to_ubyte(test->unpacked[i][j][1]),
+ float_to_ubyte(test->unpacked[i][j][2]),
+ float_to_ubyte(test->unpacked[i][j][3]));
+ success = FALSE;
+ }
+ }
}
}
}
+ if (!success)
+ LLVMDumpValue(fetch);
+
+ LLVMFreeMachineCodeForFunction(lp_build_engine, fetch);
+ LLVMDeleteFunction(fetch);
+
+ if(fp)
+ write_tsv_row(fp, desc, success);
+
+ return success;
+}
+
+
+
+
+static boolean
+test_one(unsigned verbose, FILE *fp,
+ const struct util_format_description *format_desc)
+{
+ boolean success = TRUE;
+
+ if (!test_format_float(verbose, fp, format_desc)) {
+ success = FALSE;
+ }
+
+ if (!test_format_unorm8(verbose, fp, format_desc)) {
+ success = FALSE;
+ }
+
return success;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c
new file mode 100644
index 0000000000..f571a81a4a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_round.c
@@ -0,0 +1,277 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "util/u_pointer.h"
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_printf.h"
+#include "gallivm/lp_bld_arit.h"
+
+#include <llvm-c/Analysis.h>
+#include <llvm-c/ExecutionEngine.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/Transforms/Scalar.h>
+
+#include "lp_test.h"
+
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "format\n");
+
+ fflush(fp);
+}
+
+
+#ifdef PIPE_ARCH_SSE
+
+#define USE_SSE2
+#include "sse_mathfun.h"
+
+typedef __m128 (*test_round_t)(__m128);
+
+typedef LLVMValueRef (*lp_func_t)(struct lp_build_context *, LLVMValueRef);
+
+
+static LLVMValueRef
+add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func)
+{
+ LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4);
+ LLVMTypeRef args[1] = { v4sf };
+ LLVMValueRef func = LLVMAddFunction(module, name, LLVMFunctionType(v4sf, args, 1, 0));
+ LLVMValueRef arg1 = LLVMGetParam(func, 0);
+ LLVMBuilderRef builder = LLVMCreateBuilder();
+ LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry");
+ LLVMValueRef ret;
+ struct lp_build_context bld;
+
+ bld.builder = builder;
+ bld.type.floating = 1;
+ bld.type.width = 32;
+ bld.type.length = 4;
+
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ ret = lp_func(&bld, arg1);
+
+ LLVMBuildRet(builder, ret);
+ LLVMDisposeBuilder(builder);
+ return func;
+}
+
+static void
+printv(char* string, v4sf value)
+{
+ v4sf v = value;
+ float *f = (float *)&v;
+ printf("%s: %10f %10f %10f %10f\n", string,
+ f[0], f[1], f[2], f[3]);
+}
+
+static void
+compare(v4sf x, v4sf y)
+{
+ float *xp = (float *) &x;
+ float *yp = (float *) &y;
+ if (xp[0] != yp[0] ||
+ xp[1] != yp[1] ||
+ xp[2] != yp[2] ||
+ xp[3] != yp[3]) {
+ printf(" Incorrect result! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n");
+ }
+}
+
+
+
+PIPE_ALIGN_STACK
+static boolean
+test_round(unsigned verbose, FILE *fp)
+{
+ LLVMModuleRef module = NULL;
+ LLVMValueRef test_round = NULL, test_trunc, test_floor, test_ceil;
+ LLVMExecutionEngineRef engine = NULL;
+ LLVMModuleProviderRef provider = NULL;
+ LLVMPassManagerRef pass = NULL;
+ char *error = NULL;
+ test_round_t round_func, trunc_func, floor_func, ceil_func;
+ float unpacked[4];
+ unsigned packed;
+ boolean success = TRUE;
+ int i;
+
+ module = LLVMModuleCreateWithName("test");
+
+ test_round = add_test(module, "round", lp_build_round);
+ test_trunc = add_test(module, "trunc", lp_build_trunc);
+ test_floor = add_test(module, "floor", lp_build_floor);
+ test_ceil = add_test(module, "ceil", lp_build_ceil);
+
+ if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+ printf("LLVMVerifyModule: %s\n", error);
+ LLVMDumpModule(module);
+ abort();
+ }
+ LLVMDisposeMessage(error);
+
+ provider = LLVMCreateModuleProviderForExistingModule(module);
+ if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
+ fprintf(stderr, "%s\n", error);
+ LLVMDisposeMessage(error);
+ abort();
+ }
+
+#if 0
+ pass = LLVMCreatePassManager();
+ LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ LLVMAddConstantPropagationPass(pass);
+ LLVMAddInstructionCombiningPass(pass);
+ LLVMAddPromoteMemoryToRegisterPass(pass);
+ LLVMAddGVNPass(pass);
+ LLVMAddCFGSimplificationPass(pass);
+ LLVMRunPassManager(pass, module);
+#else
+ (void)pass;
+#endif
+
+ round_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_round));
+ trunc_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_trunc));
+ floor_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_floor));
+ ceil_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_ceil));
+
+ memset(unpacked, 0, sizeof unpacked);
+ packed = 0;
+
+ if (0)
+ LLVMDumpModule(module);
+
+ for (i = 0; i < 3; i++) {
+ v4sf xvals[3] = {
+ {-10.0, -1, 0, 12.0},
+ {-1.5, -0.25, 1.25, 2.5},
+ {-0.99, -0.01, 0.01, 0.99}
+ };
+ v4sf x = xvals[i];
+ v4sf y, ref;
+ float *xp = (float *) &x;
+ float *refp = (float *) &ref;
+
+ printf("\n");
+ printv("x ", x);
+
+ refp[0] = round(xp[0]);
+ refp[1] = round(xp[1]);
+ refp[2] = round(xp[2]);
+ refp[3] = round(xp[3]);
+ y = round_func(x);
+ printv("C round(x) ", ref);
+ printv("LLVM round(x)", y);
+ compare(ref, y);
+
+ refp[0] = trunc(xp[0]);
+ refp[1] = trunc(xp[1]);
+ refp[2] = trunc(xp[2]);
+ refp[3] = trunc(xp[3]);
+ y = trunc_func(x);
+ printv("C trunc(x) ", ref);
+ printv("LLVM trunc(x)", y);
+ compare(ref, y);
+
+ refp[0] = floor(xp[0]);
+ refp[1] = floor(xp[1]);
+ refp[2] = floor(xp[2]);
+ refp[3] = floor(xp[3]);
+ y = floor_func(x);
+ printv("C floor(x) ", ref);
+ printv("LLVM floor(x)", y);
+ compare(ref, y);
+
+ refp[0] = ceil(xp[0]);
+ refp[1] = ceil(xp[1]);
+ refp[2] = ceil(xp[2]);
+ refp[3] = ceil(xp[3]);
+ y = ceil_func(x);
+ printv("C ceil(x) ", ref);
+ printv("LLVM ceil(x) ", y);
+ compare(ref, y);
+ }
+
+ LLVMFreeMachineCodeForFunction(engine, test_round);
+ LLVMFreeMachineCodeForFunction(engine, test_trunc);
+ LLVMFreeMachineCodeForFunction(engine, test_floor);
+ LLVMFreeMachineCodeForFunction(engine, test_ceil);
+
+ LLVMDisposeExecutionEngine(engine);
+ if(pass)
+ LLVMDisposePassManager(pass);
+
+ return success;
+}
+
+#else /* !PIPE_ARCH_SSE */
+
+static boolean
+test_round(unsigned verbose, FILE *fp)
+{
+ return TRUE;
+}
+
+#endif /* !PIPE_ARCH_SSE */
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ boolean success = TRUE;
+
+ test_round(verbose, fp);
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+ return test_all(verbose, fp);
+}
+
+boolean
+test_single(unsigned verbose, FILE *fp)
+{
+ printf("no test_single()");
+ return TRUE;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c
index c7a903a025..1366ecddcb 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c
@@ -108,7 +108,6 @@ test_sincos(unsigned verbose, FILE *fp)
test_sincos_t sin_func;
test_sincos_t cos_func;
float unpacked[4];
- unsigned packed;
boolean success = TRUE;
module = LLVMModuleCreateWithName("test");
@@ -149,7 +148,6 @@ test_sincos(unsigned verbose, FILE *fp)
cos_func = (test_sincos_t)LLVMGetPointerToGlobal(engine, test_cos);
memset(unpacked, 0, sizeof unpacked);
- packed = 0;
// LLVMDumpModule(module);
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 0d526ead89..25112c10a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -36,6 +36,7 @@
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
+#include "util/u_cpu_detect.h"
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -55,6 +56,7 @@
#ifdef DEBUG
static struct llvmpipe_resource resource_list;
#endif
+static unsigned id_counter = 0;
static INLINE boolean
@@ -183,8 +185,8 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
*/
const unsigned width = align(lpr->base.width0, TILE_SIZE);
const unsigned height = align(lpr->base.height0, TILE_SIZE);
- const unsigned width_t = align(width, TILE_SIZE) / TILE_SIZE;
- const unsigned height_t = align(height, TILE_SIZE) / TILE_SIZE;
+ const unsigned width_t = width / TILE_SIZE;
+ const unsigned height_t = height / TILE_SIZE;
lpr->tiles_per_row[0] = width_t;
lpr->tiles_per_image[0] = width_t * height_t;
@@ -209,7 +211,6 @@ static struct pipe_resource *
llvmpipe_resource_create(struct pipe_screen *_screen,
const struct pipe_resource *templat)
{
- static unsigned id_counter = 0;
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource);
if (!lpr)
@@ -389,7 +390,6 @@ llvmpipe_resource_map(struct pipe_resource *resource,
map = llvmpipe_get_texture_image(lpr, face + zslice, level,
tex_usage, layout);
- assert(map);
return map;
}
else {
@@ -446,6 +446,10 @@ llvmpipe_resource_from_handle(struct pipe_screen *screen,
{
struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys;
struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource);
+ unsigned width, height, width_t, height_t;
+
+ /* XXX Seems like from_handled depth textures doesn't work that well */
+
if (!lpr)
return NULL;
@@ -453,6 +457,25 @@ llvmpipe_resource_from_handle(struct pipe_screen *screen,
pipe_reference_init(&lpr->base.reference, 1);
lpr->base.screen = screen;
+ width = align(lpr->base.width0, TILE_SIZE);
+ height = align(lpr->base.height0, TILE_SIZE);
+ width_t = width / TILE_SIZE;
+ height_t = height / TILE_SIZE;
+
+ /*
+ * Looks like unaligned displaytargets work just fine,
+ * at least sampler/render ones.
+ */
+#if 0
+ assert(lpr->base.width0 == width);
+ assert(lpr->base.height0 == height);
+#endif
+
+ lpr->tiles_per_row[0] = width_t;
+ lpr->tiles_per_image[0] = width_t * height_t;
+ lpr->num_slices_faces[0] = 1;
+ lpr->img_stride[0] = 0;
+
lpr->dt = winsys->displaytarget_from_handle(winsys,
template,
whandle,
@@ -460,6 +483,17 @@ llvmpipe_resource_from_handle(struct pipe_screen *screen,
if (!lpr->dt)
goto fail;
+ lpr->layout[0] = alloc_layout_array(1, lpr->base.width0, lpr->base.height0);
+
+ assert(lpr->layout[0]);
+ assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE);
+
+ lpr->id = id_counter++;
+
+#ifdef DEBUG
+ insert_at_tail(&resource_list, lpr);
+#endif
+
return &lpr->base;
fail:
@@ -899,13 +933,15 @@ static void
alloc_image_data(struct llvmpipe_resource *lpr, unsigned level,
enum lp_texture_layout layout)
{
+ uint alignment = MAX2(16, util_cpu_caps.cacheline);
+
if (lpr->dt)
assert(level == 0);
if (layout == LP_TEX_LAYOUT_TILED) {
/* tiled data is stored in regular memory */
uint buffer_size = tex_image_size(lpr, level, layout);
- lpr->tiled[level].data = align_malloc(buffer_size, 16);
+ lpr->tiled[level].data = align_malloc(buffer_size, alignment);
}
else {
assert(layout == LP_TEX_LAYOUT_LINEAR);
@@ -921,7 +957,7 @@ alloc_image_data(struct llvmpipe_resource *lpr, unsigned level,
else {
/* not a display target - allocate regular memory */
uint buffer_size = tex_image_size(lpr, level, LP_TEX_LAYOUT_LINEAR);
- lpr->linear[level].data = align_malloc(buffer_size, 16);
+ lpr->linear[level].data = align_malloc(buffer_size, alignment);
}
}
}
@@ -1035,7 +1071,7 @@ llvmpipe_get_texture_image(struct llvmpipe_resource *lpr,
layout_logic(cur_layout, layout, usage, &new_layout, &convert);
- if (convert) {
+ if (convert && other_data && target_data) {
if (layout == LP_TEX_LAYOUT_TILED) {
lp_linear_to_tiled(other_data, target_data,
x * TILE_SIZE, y * TILE_SIZE,
@@ -1067,8 +1103,6 @@ llvmpipe_get_texture_image(struct llvmpipe_resource *lpr,
width_t, height_t, layout);
}
- assert(target_data);
-
return target_data;
}
@@ -1138,7 +1172,7 @@ llvmpipe_get_texture_tile_linear(struct llvmpipe_resource *lpr,
layout_logic(cur_layout, LP_TEX_LAYOUT_LINEAR, usage,
&new_layout, &convert);
- if (convert) {
+ if (convert && tiled_image && linear_image) {
lp_tiled_to_linear(tiled_image, linear_image,
x, y, TILE_SIZE, TILE_SIZE, lpr->base.format,
lpr->row_stride[level],
@@ -1187,13 +1221,16 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
cur_layout = llvmpipe_get_texture_tile_layout(lpr, face_slice, level, tx, ty);
layout_logic(cur_layout, LP_TEX_LAYOUT_TILED, usage, &new_layout, &convert);
- if (convert) {
+ if (convert && linear_image && tiled_image) {
lp_linear_to_tiled(linear_image, tiled_image,
x, y, TILE_SIZE, TILE_SIZE, lpr->base.format,
lpr->row_stride[level],
lpr->tiles_per_row[level]);
}
+ if (!tiled_image)
+ return NULL;
+
if (new_layout != cur_layout)
llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty, new_layout);
@@ -1206,6 +1243,94 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
/**
+ * Get pointer to tiled data for rendering.
+ * \return pointer to the tiled data at the given tile position
+ */
+void
+llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ unsigned x, unsigned y,
+ uint8_t *tile)
+{
+ struct llvmpipe_texture_image *linear_img = &lpr->linear[level];
+ const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE;
+ uint8_t *linear_image;
+
+ assert(x % TILE_SIZE == 0);
+ assert(y % TILE_SIZE == 0);
+
+ if (!linear_img->data) {
+ /* allocate memory for the linear image now */
+ alloc_image_data(lpr, level, LP_TEX_LAYOUT_LINEAR);
+ }
+
+ /* compute address of the slice/face of the image that contains the tile */
+ linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
+ LP_TEX_LAYOUT_LINEAR);
+
+ {
+ uint ii = x, jj = y;
+ uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE;
+ uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
+
+ /* Note that lp_tiled_to_linear expects the tile parameter to
+ * point at the first tile in a whole-image sized array. In
+ * this code, we have only a single tile and have to do some
+ * pointer arithmetic to figure out where the "image" would have
+ * started.
+ */
+ lp_tiled_to_linear(tile - byte_offset, linear_image,
+ x, y, TILE_SIZE, TILE_SIZE,
+ lpr->base.format,
+ lpr->row_stride[level],
+ 1); /* tiles per row */
+ }
+
+ llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty,
+ LP_TEX_LAYOUT_LINEAR);
+}
+
+
+/**
+ * Get pointer to tiled data for rendering.
+ * \return pointer to the tiled data at the given tile position
+ */
+void
+llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ unsigned x, unsigned y,
+ uint8_t *tile)
+{
+ uint8_t *linear_image;
+
+ assert(x % TILE_SIZE == 0);
+ assert(y % TILE_SIZE == 0);
+
+ /* compute address of the slice/face of the image that contains the tile */
+ linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
+ LP_TEX_LAYOUT_LINEAR);
+
+ if (linear_image) {
+ uint ii = x, jj = y;
+ uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE;
+ uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
+
+ /* Note that lp_linear_to_tiled expects the tile parameter to
+ * point at the first tile in a whole-image sized array. In
+ * this code, we have only a single tile and have to do some
+ * pointer arithmetic to figure out where the "image" would have
+ * started.
+ */
+ lp_linear_to_tiled(linear_image, tile - byte_offset,
+ x, y, TILE_SIZE, TILE_SIZE,
+ lpr->base.format,
+ lpr->row_stride[level],
+ 1); /* tiles per row */
+ }
+}
+
+
+/**
* Return size of resource in bytes
*/
unsigned
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
index 503b6a19a8..4e4a65dcb4 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.h
+++ b/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -223,6 +223,17 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
unsigned x, unsigned y);
+void
+llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ unsigned x, unsigned y,
+ uint8_t *tile);
+
+void
+llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ unsigned x, unsigned y,
+ uint8_t *tile);
extern void
llvmpipe_print_resources(void);
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c
index 2b63992dd7..0938f7aea7 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_image.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c
@@ -204,7 +204,7 @@ lp_tiled_to_linear(const void *src, void *dst,
lp_tile_unswizzle_4ub(format,
src_tile,
dst, dst_stride,
- ii, jj, tile_w, tile_h);
+ ii, jj);
}
}
}
@@ -293,7 +293,7 @@ lp_linear_to_tiled(const void *src, void *dst,
lp_tile_swizzle_4ub(format,
dst_tile,
src, src_stride,
- ii, jj, tile_w, tile_h);
+ ii, jj);
}
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_shuffle_mask.py b/src/gallium/drivers/llvmpipe/lp_tile_shuffle_mask.py
new file mode 100644
index 0000000000..ea2fc0f375
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tile_shuffle_mask.py
@@ -0,0 +1,32 @@
+
+tile = [[0,1,4,5],
+ [2,3,6,7],
+ [8,9,12,13],
+ [10,11,14,15]]
+shift = 0
+align = 1
+value = 0L
+holder = []
+
+import sys
+
+basemask = [0x
+fd = sys.stdout
+indent = " "*9
+for c in range(4):
+ fd.write(indent + "*pdst++ = \n");
+ for l,line in enumerate(tile):
+ fd.write(indent + " %s_mm_shuffle_epi8(line%d, (__m128i){"%(l and '+' or ' ',l))
+ for i,pos in enumerate(line):
+ mask = 0x00ffffffff & (~(0xffL << shift))
+ value = mask | ((pos) << shift)
+ holder.append(value)
+ if holder and (i + 1) %2 == 0:
+ fd.write("0x%8.0x"%(holder[0] + (holder[1] << 32)))
+ holder = []
+ if (i) %4 == 1:
+ fd.write( ',')
+
+ fd.write("})%s\n"%((l == 3) and ';' or ''))
+ print
+ shift += 8
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
index 07f71b8411..12dac1da6c 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
@@ -79,14 +79,14 @@ void
lp_tile_swizzle_4ub(enum pipe_format format,
uint8_t *dst,
const void *src, unsigned src_stride,
- unsigned x, unsigned y, unsigned w, unsigned h);
+ unsigned x, unsigned y);
void
lp_tile_unswizzle_4ub(enum pipe_format format,
const uint8_t *src,
void *dst, unsigned dst_stride,
- unsigned x, unsigned y, unsigned w, unsigned h);
+ unsigned x, unsigned y);
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index 5ab63cbac6..c71ec8066c 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -75,13 +75,13 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix):
src_native_type = native_type(format)
print 'static void'
- print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type)
+ print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type)
print '{'
print ' unsigned x, y;'
print ' const uint8_t *src_row = src + y0*src_stride;'
- print ' for (y = 0; y < h; ++y) {'
+ print ' for (y = 0; y < TILE_SIZE; ++y) {'
print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride())
- print ' for (x = 0; x < w; ++x) {'
+ print ' for (x = 0; x < TILE_SIZE; ++x) {'
names = ['']*4
if format.colorspace in ('rgb', 'srgb'):
@@ -202,9 +202,9 @@ def emit_unrolled_unswizzle_code(format, src_channel):
print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type)
print ' unsigned int qx, qy, i;'
print
- print ' for (qy = 0; qy < h; qy += TILE_VECTOR_HEIGHT) {'
+ print ' for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {'
print ' const unsigned py = y0 + qy;'
- print ' for (qx = 0; qx < w; qx += TILE_VECTOR_WIDTH) {'
+ print ' for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {'
print ' const unsigned px = x0 + qx;'
print ' const uint8_t *r = src + 0 * TILE_C_STRIDE;'
print ' const uint8_t *g = src + 1 * TILE_C_STRIDE;'
@@ -231,9 +231,9 @@ def emit_tile_pixel_unswizzle_code(format, src_channel):
print ' unsigned x, y;'
print ' uint8_t *dst_row = dst + y0*dst_stride;'
- print ' for (y = 0; y < h; ++y) {'
+ print ' for (y = 0; y < TILE_SIZE; ++y) {'
print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride())
- print ' for (x = 0; x < w; ++x) {'
+ print ' for (x = 0; x < TILE_SIZE; ++x) {'
if format.layout == PLAIN:
if not format.is_array():
@@ -273,7 +273,7 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix):
name = format.short_name()
print 'static void'
- print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type)
+ print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type)
print '{'
if format.layout == PLAIN \
and format.colorspace == 'rgb' \
@@ -289,6 +289,202 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix):
print
+def generate_ssse3():
+ print '''
+#if defined(PIPE_ARCH_SSE)
+
+
+#if defined(PIPE_ARCH_SSSE3)
+
+#include <tmmintrin.h>
+
+#else
+
+#include <emmintrin.h>
+
+/**
+ * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases
+ * where -mssse3 is not supported/enabled.
+ *
+ * MSVC will never get in here as its intrinsics support do not rely on
+ * compiler command line options.
+ */
+static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi8(__m128i a, __m128i mask)
+{
+ __m128i result;
+ __asm__("pshufb %1, %0"
+ : "=x" (result)
+ : "xm" (mask), "0" (a));
+ return result;
+}
+
+#endif
+
+
+static void
+lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst,
+ const uint8_t *src, unsigned src_stride,
+ unsigned x0, unsigned y0)
+{
+
+ unsigned x, y;
+ __m128i *pdst = (__m128i*) dst;
+ const uint8_t *ysrc0 = src + y0*src_stride + x0*sizeof(uint32_t);
+ unsigned int tile_stridex = src_stride*(TILE_VECTOR_HEIGHT - 1) - sizeof(uint32_t)*TILE_VECTOR_WIDTH;
+ unsigned int tile_stridey = src_stride*TILE_VECTOR_HEIGHT;
+
+ const __m128i shuffle00 = _mm_setr_epi8(0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+ const __m128i shuffle01 = _mm_setr_epi8(0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+ const __m128i shuffle02 = _mm_setr_epi8(0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+ const __m128i shuffle03 = _mm_setr_epi8(0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+
+ const __m128i shuffle10 = _mm_setr_epi8(0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+ const __m128i shuffle11 = _mm_setr_epi8(0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+ const __m128i shuffle12 = _mm_setr_epi8(0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+ const __m128i shuffle13 = _mm_setr_epi8(0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+
+ const __m128i shuffle20 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff);
+ const __m128i shuffle21 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff);
+ const __m128i shuffle22 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff);
+ const __m128i shuffle23 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff);
+
+ const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e);
+ const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d);
+ const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c);
+ const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f);
+
+ for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) {
+ __m128i line0 = *(__m128i*)ysrc0;
+ const uint8_t *ysrc = ysrc0 + src_stride;
+ ysrc0 += tile_stridey;
+
+ for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) {
+ __m128i r, g, b, a, line1;
+ line1 = *(__m128i*)ysrc;
+ PIPE_READ_WRITE_BARRIER();
+ ysrc += src_stride;
+ r = _mm_shuffle_epi8(line0, shuffle00);
+ g = _mm_shuffle_epi8(line0, shuffle01);
+ b = _mm_shuffle_epi8(line0, shuffle02);
+ a = _mm_shuffle_epi8(line0, shuffle03);
+
+ line0 = *(__m128i*)ysrc;
+ PIPE_READ_WRITE_BARRIER();
+ ysrc += src_stride;
+ r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle10));
+ g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle11));
+ b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle12));
+ a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle13));
+
+ line1 = *(__m128i*)ysrc;
+ PIPE_READ_WRITE_BARRIER();
+ ysrc -= tile_stridex;
+ r = _mm_or_si128(r, _mm_shuffle_epi8(line0, shuffle20));
+ g = _mm_or_si128(g, _mm_shuffle_epi8(line0, shuffle21));
+ b = _mm_or_si128(b, _mm_shuffle_epi8(line0, shuffle22));
+ a = _mm_or_si128(a, _mm_shuffle_epi8(line0, shuffle23));
+
+ if (x + 1 < TILE_SIZE) {
+ line0 = *(__m128i*)ysrc;
+ ysrc += src_stride;
+ }
+
+ PIPE_READ_WRITE_BARRIER();
+ r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle30));
+ g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle31));
+ b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle32));
+ a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle33));
+
+ *pdst++ = r;
+ *pdst++ = g;
+ *pdst++ = b;
+ *pdst++ = a;
+ }
+ }
+
+}
+
+static void
+lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src,
+ uint8_t *dst, unsigned dst_stride,
+ unsigned x0, unsigned y0)
+{
+ unsigned int x, y;
+ const __m128i *psrc = (__m128i*) src;
+ const __m128i *end = (__m128i*) (src + (y0 + TILE_SIZE - 1)*dst_stride + (x0 + TILE_SIZE - 1)*sizeof(uint32_t));
+ uint8_t *pdst = dst + y0 * dst_stride + x0 * sizeof(uint32_t);
+ __m128i c0 = *psrc++;
+ __m128i c1;
+
+ const __m128i shuffle00 = _mm_setr_epi8(0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff);
+ const __m128i shuffle01 = _mm_setr_epi8(0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff);
+ const __m128i shuffle02 = _mm_setr_epi8(0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff);
+ const __m128i shuffle03 = _mm_setr_epi8(0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff);
+
+ const __m128i shuffle10 = _mm_setr_epi8(0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff);
+ const __m128i shuffle11 = _mm_setr_epi8(0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff);
+ const __m128i shuffle12 = _mm_setr_epi8(0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff);
+ const __m128i shuffle13 = _mm_setr_epi8(0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff);
+
+ const __m128i shuffle20 = _mm_setr_epi8(0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff,0xff);
+ const __m128i shuffle21 = _mm_setr_epi8(0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff,0xff);
+ const __m128i shuffle22 = _mm_setr_epi8(0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff,0xff);
+ const __m128i shuffle23 = _mm_setr_epi8(0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff,0xff);
+
+ const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05);
+ const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07);
+ const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d);
+ const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f);
+
+ for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) {
+ __m128i *tile = (__m128i*) pdst;
+ pdst += dst_stride * TILE_VECTOR_HEIGHT;
+ for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) {
+ uint8_t *linep = (uint8_t*) (tile++);
+ __m128i line0, line1, line2, line3;
+
+ c1 = *psrc++; /* r */
+ PIPE_READ_WRITE_BARRIER();
+ line0 = _mm_shuffle_epi8(c0, shuffle00);
+ line1 = _mm_shuffle_epi8(c0, shuffle01);
+ line2 = _mm_shuffle_epi8(c0, shuffle02);
+ line3 = _mm_shuffle_epi8(c0, shuffle03);
+
+ c0 = *psrc++; /* g */
+ PIPE_READ_WRITE_BARRIER();
+ line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle10));
+ line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle11));
+ line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle12));
+ line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle13));
+
+ c1 = *psrc++; /* b */
+ PIPE_READ_WRITE_BARRIER();
+ line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c0, shuffle20));
+ line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c0, shuffle21));
+ line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c0, shuffle22));
+ line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c0, shuffle23));
+
+ if (psrc != end)
+ c0 = *psrc++; /* a */
+ PIPE_READ_WRITE_BARRIER();
+ line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle30));
+ line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle31));
+ line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle32));
+ line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle33));
+
+ *(__m128i*) (linep) = line0;
+ *(__m128i*) (((char*)linep) + dst_stride) = line1;
+ *(__m128i*) (((char*)linep) + 2 * dst_stride) = line2;
+ *(__m128i*) (((char*)linep) + 3 * dst_stride) = line3;
+ }
+ }
+}
+
+#endif /* PIPE_ARCH_SSSE3 */
+'''
+
+
def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
'''Generate the dispatch function to read pixels from any format'''
@@ -297,9 +493,9 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
generate_format_read(format, dst_channel, dst_native_type, dst_suffix)
print 'void'
- print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type)
+ print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y)' % (dst_suffix, dst_native_type)
print '{'
- print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type
+ print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type
print '#ifdef DEBUG'
print ' lp_tile_swizzle_count += 1;'
print '#endif'
@@ -307,13 +503,21 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
for format in formats:
if is_format_supported(format):
print ' case %s:' % format.name
- print ' func = &lp_tile_%s_swizzle_%s;' % (format.short_name(), dst_suffix)
+ func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix)
+ if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM':
+ print '#ifdef PIPE_ARCH_SSE'
+ print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name)
+ print '#else'
+ print ' func = %s;' % (func_name,)
+ print '#endif'
+ else:
+ print ' func = %s;' % (func_name,)
print ' break;'
print ' default:'
print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));'
print ' return;'
print ' }'
- print ' func(dst, (const uint8_t *)src, src_stride, x, y, w, h);'
+ print ' func(dst, (const uint8_t *)src, src_stride, x, y);'
print '}'
print
@@ -326,10 +530,10 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix):
generate_format_write(format, src_channel, src_native_type, src_suffix)
print 'void'
- print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type)
+ print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y)' % (src_suffix, src_native_type)
print '{'
- print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type
+ print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type
print '#ifdef DEBUG'
print ' lp_tile_unswizzle_count += 1;'
print '#endif'
@@ -337,13 +541,21 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix):
for format in formats:
if is_format_supported(format):
print ' case %s:' % format.name
- print ' func = &lp_tile_%s_unswizzle_%s;' % (format.short_name(), src_suffix)
+ func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix)
+ if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM':
+ print '#ifdef PIPE_ARCH_SSE'
+ print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name)
+ print '#else'
+ print ' func = %s;' % (func_name,)
+ print '#endif'
+ else:
+ print ' func = %s;' % (func_name,)
print ' break;'
print ' default:'
print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));'
print ' return;'
print ' }'
- print ' func(src, (uint8_t *)dst, dst_stride, x, y, w, h);'
+ print ' func(src, (uint8_t *)dst, dst_stride, x, y);'
print '}'
print
@@ -362,6 +574,7 @@ def main():
print '#include "util/u_format.h"'
print '#include "util/u_math.h"'
print '#include "util/u_half.h"'
+ print '#include "util/u_cpu_detect.h"'
print '#include "lp_tile_soa.h"'
print
print '#ifdef DEBUG'
@@ -391,6 +604,8 @@ def main():
print '};'
print
+ generate_ssse3()
+
channel = Channel(UNSIGNED, True, 8)
native_type = 'uint8_t'
suffix = '4ub'
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 60bdd7276a..513e5e02bc 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -6,6 +6,7 @@
#include "util/u_inlines.h"
#include "util/u_format.h"
#include "util/u_format_s3tc.h"
+#include "util/u_string.h"
#include <stdio.h>
#include <errno.h>
@@ -15,7 +16,7 @@
#include "nouveau_screen.h"
/* XXX this should go away */
-#include "state_tracker/drm_api.h"
+#include "state_tracker/drm_driver.h"
#include "util/u_simple_screen.h"
static const char *
@@ -24,7 +25,7 @@ nouveau_screen_get_name(struct pipe_screen *pscreen)
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
static char buffer[128];
- snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
return buffer;
}
@@ -181,7 +182,7 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
ret = nouveau_bo_handle_ref(dev, whandle->handle, &bo);
if (ret) {
debug_printf("%s: ref name 0x%08x failed with %d\n",
- __func__, whandle->handle, ret);
+ __FUNCTION__, whandle->handle, ret);
return NULL;
}
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 8eacdff035..8c290273fb 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -14,7 +14,7 @@ struct nouveau_screen {
unsigned index_buffer_flags;
};
-static inline struct nouveau_screen *
+static INLINE struct nouveau_screen *
nouveau_screen(struct pipe_screen *pscreen)
{
return (struct nouveau_screen *)pscreen;
@@ -67,13 +67,13 @@ void nouveau_screen_fini(struct nouveau_screen *);
-static __inline__ unsigned
+static INLINE unsigned
RING_3D(unsigned mthd, unsigned size)
{
return (7 << 13) | (size << 18) | mthd;
}
-static __inline__ unsigned
+static INLINE unsigned
RING_3D_NI(unsigned mthd, unsigned size)
{
return 0x40000000 | (7 << 13) | (size << 18) | mthd;
diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h
index ed6e643785..a5e8537533 100644
--- a/src/gallium/drivers/nouveau/nouveau_util.h
+++ b/src/gallium/drivers/nouveau/nouveau_util.h
@@ -103,7 +103,7 @@ struct u_split_prim {
uint edgeflag_off:1;
};
-static inline void
+static INLINE void
u_split_prim_init(struct u_split_prim *s,
unsigned mode, unsigned start, unsigned count)
{
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index cd7da9977d..df79ca89ca 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -13,7 +13,7 @@
#include "nouveau/nouveau_resource.h"
#include "nouveau/nouveau_pushbuf.h"
-static inline uint32_t
+static INLINE uint32_t
nouveau_screen_transfer_flags(unsigned pipe)
{
uint32_t flags = 0;
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 61807dd999..12c4a93a9b 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -21,7 +21,7 @@
#include "nv50_program.h"
#define NOUVEAU_ERR(fmt, args...) \
- fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
#define NOUVEAU_MSG(fmt, args...) \
fprintf(stderr, "nouveau: "fmt, ##args);
@@ -50,6 +50,7 @@
#define NV50_NEW_SAMPLER (1 << 15)
#define NV50_NEW_TEXTURE (1 << 16)
#define NV50_NEW_STENCIL_REF (1 << 17)
+#define NV50_NEW_CLIP (1 << 18)
struct nv50_blend_stateobj {
struct pipe_blend_state pipe;
@@ -140,6 +141,7 @@ struct nv50_context {
struct pipe_scissor_state scissor;
struct pipe_viewport_state viewport;
struct pipe_framebuffer_state framebuffer;
+ struct pipe_clip_state clip;
struct nv50_program *vertprog;
struct nv50_program *fragprog;
struct nv50_program *geomprog;
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 21908bcd3c..ca4b01b12b 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -186,6 +186,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_VS_TEMPS:
case PIPE_CAP_MAX_FS_TEMPS: /* no spilling atm */
return 128 / 4;
+ case PIPE_CAP_DEPTH_CLAMP:
+ return 1;
default:
NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
return 0;
@@ -525,6 +527,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
OUT_RINGf (chan, 0.0f);
OUT_RINGf (chan, 1.0f);
+ BEGIN_RING(chan, screen->tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
+ OUT_RING (chan, 1);
+
/* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
BEGIN_RING(chan, screen->tesla, NV50TCL_LINKED_TSC, 1);
OUT_RING (chan, 1);
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index f8bff764f2..42c5a58318 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -658,6 +658,10 @@ static void
nv50_set_clip_state(struct pipe_context *pipe,
const struct pipe_clip_state *clip)
{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->clip.depth_clamp = clip->depth_clamp;
+ nv50->dirty |= NV50_NEW_CLIP;
}
static void
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 14c3490599..524696f35d 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -277,7 +277,7 @@ static struct nouveau_stateobj *
validate_viewport(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so = so_new(5, 9, 0);
+ struct nouveau_stateobj *so = so_new(3, 7, 0);
so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
so_data (so, fui(nv50->viewport.translate[0]));
@@ -288,15 +288,6 @@ validate_viewport(struct nv50_context *nv50)
so_data (so, fui(nv50->viewport.scale[1]));
so_data (so, fui(nv50->viewport.scale[2]));
- so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
- so_data (so, 1);
- /* 0x0000 = remove whole primitive only (xyz)
- * 0x1018 = remove whole primitive only (xy), clamp z
- * 0x1080 = clip primitive (xyz)
- * 0x1098 = clip primitive (xy), clamp z
- */
- so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
- so_data (so, 0x1080);
/* no idea what 0f90 does */
so_method(so, tesla, 0x0f90, 1);
so_data (so, 0);
@@ -341,6 +332,26 @@ validate_vtxattr(struct nv50_context *nv50)
return so;
}
+static struct nouveau_stateobj *
+validate_clip(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(1, 1, 0);
+ uint32_t vvcc;
+
+ /* 0x0000 = remove whole primitive only (xyz)
+ * 0x1018 = remove whole primitive only (xy), clamp z
+ * 0x1080 = clip primitive (xyz)
+ * 0x1098 = clip primitive (xy), clamp z
+ */
+ vvcc = nv50->clip.depth_clamp ? 0x1098 : 0x1080;
+
+ so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
+ so_data (so, vvcc);
+
+ return so;
+}
+
struct state_validate {
struct nouveau_stateobj *(*func)(struct nv50_context *nv50);
unsigned states;
@@ -365,6 +376,7 @@ struct state_validate {
{ nv50_vbo_validate , NV50_NEW_ARRAYS },
{ validate_vtxbuf , NV50_NEW_ARRAYS },
{ validate_vtxattr , NV50_NEW_ARRAYS },
+ { validate_clip , NV50_NEW_CLIP },
{}
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
index 6772d9bd51..ee41f03b9b 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -842,7 +842,6 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
struct nouveau_channel* chan = nvfx->screen->base.channel;
struct nvfx_fragment_program *fp = nvfx->fragprog;
int update = 0;
- int i;
if (!fp->translated)
{
@@ -895,6 +894,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
{
struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + fp->prog_size * fp->progs_per_bo, 16);
char *map, *buf;
+ int i;
if(fp->fpbo)
{
@@ -910,7 +910,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
map = fpbo->bo->map;
buf = fpbo->insn;
- for(int i = 0; i < fp->progs_per_bo; ++i)
+ for(i = 0; i < fp->progs_per_bo; ++i)
{
memcpy(buf, fp->insn, fp->insn_len * 4);
nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4);
@@ -931,6 +931,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
uint32_t* map = pipe_buffer_map(&nvfx->pipe, constbuf, PIPE_TRANSFER_READ, &transfer);
uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset);
+ int i;
for (i = 0; i < fp->nr_consts; ++i) {
unsigned off = fp->consts[i].offset;
unsigned idx = fp->consts[i].index * 4;
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index a78d2411a0..80db28a07c 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -56,6 +56,8 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 0;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
return 1;
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ return 1;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
return 13;
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
@@ -127,6 +129,8 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 2;
case PIPE_CAP_MAX_VS_PREDS:
return screen->is_nv4x ? 1 : 0;
+ case PIPE_CAP_GEOMETRY_SHADER4:
+ return 0;
default:
NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
return 0;
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index dd897f6072..728bc40a5b 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -21,10 +21,10 @@ C_SOURCES = \
r300_screen_buffer.c \
r300_state.c \
r300_state_derived.c \
- r300_state_invariant.c \
r300_vs.c \
r300_vs_draw.c \
r300_texture.c \
+ r300_texture_desc.c \
r300_tgsi_to_rc.c \
r300_transfer.c
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
index ee19e9d278..bf023daaa5 100644
--- a/src/gallium/drivers/r300/SConscript
+++ b/src/gallium/drivers/r300/SConscript
@@ -31,10 +31,10 @@ r300 = env.ConvenienceLibrary(
'r300_screen_buffer.c',
'r300_state.c',
'r300_state_derived.c',
- 'r300_state_invariant.c',
'r300_vs.c',
'r300_vs_draw.c',
'r300_texture.c',
+ 'r300_texture_desc.c',
'r300_tgsi_to_rc.c',
'r300_transfer.c',
] + r300compiler) + r300compiler
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 2a47701291..d125196b6d 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -24,12 +24,13 @@
#include "r300_texture.h"
#include "util/u_format.h"
+#include "util/u_pack_color.h"
-enum r300_blitter_op
+enum r300_blitter_op /* bitmask */
{
- R300_CLEAR,
- R300_CLEAR_SURFACE,
- R300_COPY
+ R300_CLEAR = 1,
+ R300_CLEAR_SURFACE = 2,
+ R300_COPY = 4
};
static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op op)
@@ -79,6 +80,31 @@ static void r300_blitter_end(struct r300_context *r300)
}
}
+static uint32_t r300_depth_clear_cb_value(enum pipe_format format,
+ const float* rgba)
+{
+ union util_color uc;
+ util_pack_color(rgba, format, &uc);
+
+ if (util_format_get_blocksizebits(format) == 32)
+ return uc.ui;
+ else
+ return uc.us | (uc.us << 16);
+}
+
+static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
+ unsigned clear_buffers)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+
+ /* Only color clear allowed, and only one colorbuffer. */
+ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1)
+ return FALSE;
+
+ return r300_surface(fb->cbufs[0])->cbzb_allowed;
+}
+
/* Clear currently bound buffers. */
static void r300_clear(struct pipe_context* pipe,
unsigned buffers,
@@ -86,39 +112,81 @@ static void r300_clear(struct pipe_context* pipe,
double depth,
unsigned stencil)
{
- /* XXX Implement fastfill.
+ /* My notes about fastfill:
+ *
+ * 1) Only the zbuffer is cleared.
+ *
+ * 2) The zbuffer must be micro-tiled and whole microtiles must be
+ * written. If microtiling is disabled, it locks up.
*
- * If fastfill is enabled, a few facts should be considered:
+ * 3) There is Z Mask RAM which contains a compressed zbuffer and
+ * it interacts with fastfill. We should figure out how to use it
+ * to get more performance.
+ * This is what we know about the Z Mask:
*
- * 1) Zbuffer must be micro-tiled and whole microtiles must be
- * written.
+ * Each dword of the Z Mask contains compression information
+ * for 16 4x4 pixel blocks, that is 2 bits for each block.
+ * On chips with 2 Z pipes, every other dword maps to a different
+ * pipe.
*
- * 2) ZB_DEPTHCLEARVALUE is used to clear a zbuffer and Z Mask must be
- * equal to 0.
+ * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must
+ * be equal to 0. (clear the Z Mask RAM with zeros)
*
- * 3) For 16-bit integer buffering, compression causes a hung with one or
+ * 5) For 16-bit zbuffer, compression causes a hung with one or
* two samples and should not be used.
*
- * 4) Fastfill must not be used if reading of compressed Z data is disabled
+ * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears
+ * to avoid needless decompression.
+ *
+ * 7) Fastfill must not be used if reading of compressed Z data is disabled
* and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
* i.e. it cannot be used to compress the zbuffer.
- * (what the hell does that mean and how does it fit in clearing
- * the buffers?)
+ *
+ * 8) ZB_CB_CLEAR does not interact with fastfill in any way.
*
* - Marek
*/
struct r300_context* r300 = r300_context(pipe);
- struct pipe_framebuffer_state* fb =
+ struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_hyperz_state *hyperz =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
+ uint32_t width = fb->width;
+ uint32_t height = fb->height;
+
+ /* Enable CBZB clear. */
+ if (r300_cbzb_clear_allowed(r300, buffers)) {
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+
+ hyperz->zb_depthclearvalue =
+ r300_depth_clear_cb_value(surf->base.format, rgba);
+
+ width = surf->cbzb_width;
+ height = surf->cbzb_height;
+
+ r300->cbzb_clear = TRUE;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ }
+ /* Clear. */
r300_blitter_begin(r300, R300_CLEAR);
util_blitter_clear(r300->blitter,
- fb->width,
- fb->height,
+ width,
+ height,
fb->nr_cbufs,
buffers, rgba, depth, stencil);
r300_blitter_end(r300);
+
+ /* Disable CBZB clear. */
+ if (r300->cbzb_clear) {
+ r300->cbzb_clear = FALSE;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ }
+
+ /* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */
+ if (r300->flush_counter == 0)
+ pipe->flush(pipe, 0, NULL);
}
/* Clear a region of a color surface to a constant value. */
@@ -185,14 +253,6 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
enum pipe_format old_format = dst->format;
enum pipe_format new_format = old_format;
- if (dst->format != src->format) {
- debug_printf("r300: Implementation error: Format mismatch in %s\n"
- " : src: %s dst: %s\n", __FUNCTION__,
- util_format_short_name(src->format),
- util_format_short_name(dst->format));
- debug_assert(0);
- }
-
if (!pipe->screen->is_format_supported(pipe->screen,
old_format, src->target,
src->nr_samples,
diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h
index 6987471244..9d3d4fc1b1 100644
--- a/src/gallium/drivers/r300/r300_cb.h
+++ b/src/gallium/drivers/r300/r300_cb.h
@@ -89,9 +89,6 @@
CB_DEBUG(cs_count = size;) \
} while (0)
-#define BEGIN_CS_AS_CB(r300, size) \
- BEGIN_CB(r300->rws->get_cs_pointer(r300->rws, dwords), dwords)
-
#define END_CB do { \
CB_DEBUG(if (cs_count != 0) \
debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index e6dca66d4a..21f3b9d261 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -36,6 +36,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->num_vert_fpus = 2;
caps->num_tex_units = 16;
caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE;
+ caps->has_hiz = TRUE;
caps->is_r400 = FALSE;
caps->is_r500 = FALSE;
caps->high_second_pipe = FALSE;
@@ -76,6 +77,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x4E54:
case 0x4E56:
caps->family = CHIP_FAMILY_RV350;
+ caps->has_hiz = FALSE;
caps->high_second_pipe = TRUE;
break;
@@ -106,6 +108,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5B64:
case 0x5B65:
caps->family = CHIP_FAMILY_RV370;
+ caps->has_hiz = FALSE;
caps->high_second_pipe = TRUE;
break;
@@ -201,24 +204,28 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5954:
case 0x5955:
caps->family = CHIP_FAMILY_RS480;
+ caps->has_hiz = FALSE;
caps->has_tcl = FALSE;
break;
case 0x5974:
case 0x5975:
caps->family = CHIP_FAMILY_RS482;
+ caps->has_hiz = FALSE;
caps->has_tcl = FALSE;
break;
case 0x5A41:
case 0x5A42:
caps->family = CHIP_FAMILY_RS400;
+ caps->has_hiz = FALSE;
caps->has_tcl = FALSE;
break;
case 0x5A61:
case 0x5A62:
caps->family = CHIP_FAMILY_RC410;
+ caps->has_hiz = FALSE;
caps->has_tcl = FALSE;
break;
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index ab649c3857..65750f54e7 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -42,6 +42,8 @@ struct r300_capabilities {
unsigned num_tex_units;
/* Whether or not TCL is physically present */
boolean has_tcl;
+ /* Some chipsets do not have HiZ RAM. */
+ boolean has_hiz;
/* Whether or not this is RV350 or newer, including all r400 and r500
* chipsets. The differences compared to the oldest r300 chips are:
* - Blend LTE/GTE thresholds
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 16a75aa612..df90359058 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -32,23 +32,72 @@
#include "r300_emit.h"
#include "r300_screen.h"
#include "r300_screen_buffer.h"
-#include "r300_state_invariant.h"
#include "r300_winsys.h"
#include <inttypes.h>
-static void r300_destroy_context(struct pipe_context* context)
+static void r300_update_num_contexts(struct r300_screen *r300screen,
+ int diff)
{
- struct r300_context* r300 = r300_context(context);
+ if (diff > 0) {
+ p_atomic_inc(&r300screen->num_contexts);
+
+ if (r300screen->num_contexts > 1)
+ util_mempool_set_thread_safety(&r300screen->pool_buffers,
+ UTIL_MEMPOOL_MULTITHREADED);
+ } else {
+ p_atomic_dec(&r300screen->num_contexts);
+
+ if (r300screen->num_contexts <= 1)
+ util_mempool_set_thread_safety(&r300screen->pool_buffers,
+ UTIL_MEMPOOL_SINGLETHREADED);
+ }
+}
+
+static void r300_release_referenced_objects(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_textures_state *textures =
+ (struct r300_textures_state*)r300->textures_state.state;
struct r300_query *query, *temp;
- struct r300_atom *atom;
+ unsigned i;
+ /* Framebuffer state. */
+ util_assign_framebuffer_state(fb, NULL);
+
+ /* Textures. */
+ for (i = 0; i < textures->sampler_view_count; i++)
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view**)&textures->sampler_views[i], NULL);
+
+ /* The special dummy texture for texkill. */
if (r300->texkill_sampler) {
pipe_sampler_view_reference(
(struct pipe_sampler_view**)&r300->texkill_sampler,
NULL);
}
+ /* The SWTCL VBO. */
+ pipe_resource_reference(&r300->vbo, NULL);
+
+ /* Vertex buffers. */
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL);
+ }
+
+ /* If there are any queries pending or not destroyed, remove them now. */
+ foreach_s(query, temp, &r300->query_list) {
+ remove_from_list(query);
+ FREE(query);
+ }
+}
+
+static void r300_destroy_context(struct pipe_context* context)
+{
+ struct r300_context* r300 = r300_context(context);
+ struct r300_atom *atom;
+
util_blitter_destroy(r300->blitter);
draw_destroy(r300->draw);
@@ -62,23 +111,30 @@ static void r300_destroy_context(struct pipe_context* context)
}
}
- /* If there are any queries pending or not destroyed, remove them now. */
- foreach_s(query, temp, &r300->query_list) {
- remove_from_list(query);
- FREE(query);
- }
-
u_upload_destroy(r300->upload_vb);
u_upload_destroy(r300->upload_ib);
translate_cache_destroy(r300->tran.translate_cache);
+ r300_release_referenced_objects(r300);
+
+ r300->rws->cs_destroy(r300->cs);
+
+ util_mempool_destroy(&r300->pool_transfers);
+
+ r300_update_num_contexts(r300->screen, -1);
+
+ FREE(r300->aa_state.state);
FREE(r300->blend_color_state.state);
FREE(r300->clip_state.state);
FREE(r300->fb_state.state);
+ FREE(r300->gpu_flush.state);
+ FREE(r300->hyperz_state.state);
+ FREE(r300->invariant_state.state);
FREE(r300->rs_block_state.state);
FREE(r300->scissor_state.state);
FREE(r300->textures_state.state);
+ FREE(r300->vap_invariant_state.state);
FREE(r300->viewport_state.state);
FREE(r300->ztop_state.state);
FREE(r300->fs_constants.state);
@@ -89,7 +145,7 @@ static void r300_destroy_context(struct pipe_context* context)
FREE(r300);
}
-static void r300_flush_cb(void *data)
+void r300_flush_cb(void *data)
{
struct r300_context* const cs_context_copy = data;
@@ -106,8 +162,10 @@ static void r300_flush_cb(void *data)
static void r300_setup_atoms(struct r300_context* r300)
{
+ boolean is_rv350 = r300->screen->caps.is_rv350;
boolean is_r500 = r300->screen->caps.is_r500;
boolean has_tcl = r300->screen->caps.has_tcl;
+ boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
/* Create the actual atom list.
*
@@ -115,44 +173,75 @@ static void r300_setup_atoms(struct r300_context* r300)
* can affect performance and conformance if not handled with care.
*
* Some atoms never change size, others change every emit - those have
- * the size of 0 here. */
+ * the size of 0 here.
+ *
+ * NOTE: The framebuffer state is split into these atoms:
+ * - gpu_flush (unpipelined regs)
+ * - aa_state (unpipelined regs)
+ * - fb_state (unpipelined regs)
+ * - hyperz_state (unpipelined regs followed by pipelined ones)
+ * - fb_state_pipelined (pipelined regs)
+ * The motivation behind this is to be able to emit a strict
+ * subset of the regs, and to have reasonable register ordering. */
make_empty_list(&r300->atom_list);
- R300_INIT_ATOM(invariant_state, 71);
+ /* SC, GB (unpipelined), RB3D (unpipelined), ZB (unpipelined). */
+ R300_INIT_ATOM(gpu_flush, 9);
+ R300_INIT_ATOM(aa_state, 4);
+ R300_INIT_ATOM(fb_state, 0);
+ /* ZB (unpipelined), SC. */
+ R300_INIT_ATOM(hyperz_state, 6);
R300_INIT_ATOM(ztop_state, 2);
- R300_INIT_ATOM(query_start, 4);
+ /* ZB, FG. */
+ R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6);
+ /* RB3D. */
R300_INIT_ATOM(blend_state, 8);
R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2);
- R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2);
- R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6);
- R300_INIT_ATOM(fb_state, 0);
- R300_INIT_ATOM(rs_state, 0);
+ /* SC. */
R300_INIT_ATOM(scissor_state, 3);
+ /* GB, FG, GA, SU, SC, RB3D. */
+ R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0));
+ /* VAP. */
R300_INIT_ATOM(viewport_state, 9);
- R300_INIT_ATOM(rs_block_state, 0);
- R300_INIT_ATOM(vertex_stream_state, 0);
R300_INIT_ATOM(pvs_flush, 2);
+ R300_INIT_ATOM(vap_invariant_state, 9);
+ R300_INIT_ATOM(vertex_stream_state, 0);
R300_INIT_ATOM(vs_state, 0);
R300_INIT_ATOM(vs_constants, 0);
- R300_INIT_ATOM(texture_cache_inval, 2);
- R300_INIT_ATOM(textures_state, 0);
+ R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2);
+ /* VAP, RS, GA, GB, SU, SC. */
+ R300_INIT_ATOM(rs_block_state, 0);
+ R300_INIT_ATOM(rs_state, 0);
+ /* SC, US. */
+ R300_INIT_ATOM(fb_state_pipelined, 5 + (drm_2_3_0 ? 3 : 0));
+ /* US. */
R300_INIT_ATOM(fs, 0);
R300_INIT_ATOM(fs_rc_constant_state, 0);
R300_INIT_ATOM(fs_constants, 0);
+ /* TX. */
+ R300_INIT_ATOM(texture_cache_inval, 2);
+ R300_INIT_ATOM(textures_state, 0);
+ /* ZB (unpipelined), SU. */
+ R300_INIT_ATOM(query_start, 4);
/* Replace emission functions for r500. */
- if (r300->screen->caps.is_r500) {
+ if (is_r500) {
r300->fs.emit = r500_emit_fs;
r300->fs_rc_constant_state.emit = r500_emit_fs_rc_constant_state;
r300->fs_constants.emit = r500_emit_fs_constants;
}
/* Some non-CSO atoms need explicit space to store the state locally. */
+ r300->aa_state.state = CALLOC_STRUCT(r300_aa_state);
r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state);
r300->clip_state.state = CALLOC_STRUCT(r300_clip_state);
r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state);
+ r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state);
+ r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state);
+ r300->invariant_state.state = CALLOC_STRUCT(r300_invariant_state);
r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block);
r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state);
r300->textures_state.state = CALLOC_STRUCT(r300_textures_state);
+ r300->vap_invariant_state.state = CALLOC_STRUCT(r300_vap_invariant_state);
r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state);
r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state);
r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer);
@@ -162,27 +251,45 @@ static void r300_setup_atoms(struct r300_context* r300)
}
/* Some non-CSO atoms don't use the state pointer. */
- r300->invariant_state.allow_null_state = TRUE;
+ r300->fb_state_pipelined.allow_null_state = TRUE;
r300->fs_rc_constant_state.allow_null_state = TRUE;
r300->pvs_flush.allow_null_state = TRUE;
r300->query_start.allow_null_state = TRUE;
r300->texture_cache_inval.allow_null_state = TRUE;
+
+ /* Some states must be marked as dirty here to properly set up
+ * hardware in the first command stream. */
+ r300->invariant_state.dirty = TRUE;
+ r300->pvs_flush.dirty = TRUE;
+ r300->vap_invariant_state.dirty = TRUE;
+ r300->texture_cache_inval.dirty = TRUE;
+ r300->textures_state.dirty = TRUE;
}
/* Not every state tracker calls every driver function before the first draw
* call and we must initialize the command buffers somehow. */
static void r300_init_states(struct pipe_context *pipe)
{
+ struct r300_context *r300 = r300_context(pipe);
struct pipe_blend_color bc = {{0}};
struct pipe_clip_state cs = {{{0}}};
struct pipe_scissor_state ss = {0};
struct r300_clip_state *clip =
- (struct r300_clip_state*)r300_context(pipe)->clip_state.state;
+ (struct r300_clip_state*)r300->clip_state.state;
+ struct r300_gpu_flush *gpuflush =
+ (struct r300_gpu_flush*)r300->gpu_flush.state;
+ struct r300_vap_invariant_state *vap_invariant =
+ (struct r300_vap_invariant_state*)r300->vap_invariant_state.state;
+ struct r300_invariant_state *invariant =
+ (struct r300_invariant_state*)r300->invariant_state.state;
+ struct r300_hyperz_state *hyperz =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
CB_LOCALS;
pipe->set_blend_color(pipe, &bc);
pipe->set_scissor_state(pipe, &ss);
+ /* Initialize the clip state. */
if (r300_context(pipe)->screen->caps.has_tcl) {
pipe->set_clip_state(pipe, &cs);
} else {
@@ -190,6 +297,66 @@ static void r300_init_states(struct pipe_context *pipe)
OUT_CB_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
END_CB;
}
+
+ /* Initialize the GPU flush. */
+ {
+ BEGIN_CB(gpuflush->cb_flush_clean, 6);
+
+ /* Flush and free renderbuffer caches. */
+ OUT_CB_REG(R300_RB3D_DSTCACHE_CTLSTAT,
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+ OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT,
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+
+ /* Wait until the GPU is idle.
+ * This fixes random pixels sometimes appearing probably caused
+ * by incomplete rendering. */
+ OUT_CB_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+ END_CB;
+ }
+
+ /* Initialize the VAP invariant state. */
+ {
+ BEGIN_CB(vap_invariant->cb, 9);
+ OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff);
+ OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
+ OUT_CB_32F(1.0);
+ OUT_CB_32F(1.0);
+ OUT_CB_32F(1.0);
+ OUT_CB_32F(1.0);
+ OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO);
+ END_CB;
+ }
+
+ /* Initialize the invariant state. */
+ {
+ BEGIN_CB(invariant->cb, r300->invariant_state.size);
+ OUT_CB_REG(R300_GB_SELECT, 0);
+ OUT_CB_REG(R300_FG_FOG_BLEND, 0);
+ OUT_CB_REG(R300_GA_ROUND_MODE, 1);
+ OUT_CB_REG(R300_GA_OFFSET, 0);
+ OUT_CB_REG(R300_SU_TEX_WRAP, 0);
+ OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF);
+ OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0);
+ OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525);
+
+ if (r300->screen->caps.is_rv350) {
+ OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
+ OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE);
+ }
+ END_CB;
+ }
+
+ /* Initialize the hyperz state. */
+ {
+ BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size);
+ OUT_CB_REG(R300_ZB_BW_CNTL, 0);
+ OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0);
+ OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2);
+ END_CB;
+ }
}
struct pipe_context* r300_create_context(struct pipe_screen* screen,
@@ -202,6 +369,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
if (!r300)
return NULL;
+ r300_update_num_contexts(r300screen, 1);
+
r300->rws = rws;
r300->screen = r300screen;
@@ -211,6 +380,12 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->context.destroy = r300_destroy_context;
+ r300->cs = rws->cs_create(rws);
+
+ util_mempool_create(&r300->pool_transfers,
+ sizeof(struct pipe_transfer), 64,
+ UTIL_MEMPOOL_SINGLETHREADED);
+
if (!r300screen->caps.has_tcl) {
/* Create a Draw. This is used for SW TCL. */
r300->draw = draw_create(&r300->context);
@@ -230,16 +405,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300_init_blit_functions(r300);
r300_init_flush_functions(r300);
r300_init_query_functions(r300);
- r300_init_render_functions(r300);
r300_init_state_functions(r300);
r300_init_resource_functions(r300);
- r300->invariant_state.dirty = TRUE;
+ r300->blitter = util_blitter_create(&r300->context);
- rws->set_flush_cb(r300->rws, r300_flush_cb, r300);
- r300->dirty_hw++;
+ /* Render functions must be initialized after blitter. */
+ r300_init_render_functions(r300);
- r300->blitter = util_blitter_create(&r300->context);
+ rws->cs_set_flush(r300->cs, r300_flush_cb, r300);
r300->upload_ib = u_upload_create(&r300->context,
32 * 1024, 16,
@@ -280,11 +454,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->context.create_sampler_view(&r300->context, tex, &vtempl);
pipe_resource_reference(&tex, NULL);
-
- /* This will make sure that the dummy texture is set up
- * from the beginning even if an application does not use
- * textures. */
- r300->textures_state.dirty = TRUE;
}
return &r300->context;
@@ -296,11 +465,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
return NULL;
}
-boolean r300_check_cs(struct r300_context *r300, unsigned size)
-{
- return size <= r300->rws->get_cs_free_dwords(r300->rws);
-}
-
void r300_finish(struct r300_context *r300)
{
struct pipe_framebuffer_state *fb;
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 8d0b4bb3d3..b4256c6278 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -61,6 +61,13 @@ struct r300_atom {
boolean allow_null_state;
};
+struct r300_aa_state {
+ struct r300_surface *dest;
+
+ uint32_t aa_config;
+ uint32_t aaresolve_ctl;
+};
+
struct r300_blend_state {
uint32_t cb[8];
uint32_t cb_no_readwrite[8];
@@ -98,40 +105,39 @@ struct r300_dsa_state {
boolean two_sided_stencil_ref;
};
+struct r300_hyperz_state {
+ /* This is actually a command buffer with named dwords. */
+ uint32_t cb_begin;
+ uint32_t zb_bw_cntl; /* R300_ZB_BW_CNTL */
+ uint32_t cb_reg1;
+ uint32_t zb_depthclearvalue; /* R300_ZB_DEPTHCLEARVALUE */
+ uint32_t cb_reg2;
+ uint32_t sc_hyperz; /* R300_SC_HYPERZ */
+};
+
+struct r300_gpu_flush {
+ uint32_t cb_flush_clean[6];
+};
+
struct r300_rs_state {
/* Original rasterizer state. */
struct pipe_rasterizer_state rs;
/* Draw-specific rasterizer state. */
struct pipe_rasterizer_state rs_draw;
- uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */
- uint32_t multisample_position_0;/* R300_GB_MSPOS0: 0x4010 */
- uint32_t multisample_position_1;/* R300_GB_MSPOS1: 0x4014 */
- uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */
- uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */
- uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */
- uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */
- float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */
- /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */
- float depth_offset; /* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */
- /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */
- uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */
- uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */
- uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
- uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
+ /* Command buffers. */
+ uint32_t cb_main[25];
+ uint32_t cb_poly_offset_zb16[5];
+ uint32_t cb_poly_offset_zb24[5];
+
+ /* The index to cb_main where the cull_mode register value resides. */
+ unsigned cull_mode_index;
+
+ /* Whether polygon offset is enabled. */
+ boolean polygon_offset_enable;
+
+ /* This is emitted in the draw function. */
uint32_t color_control; /* R300_GA_COLOR_CONTROL: 0x4278 */
- uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */
- uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */
-
- /* Specifies top of Raster pipe specific enable controls,
- * i.e. texture coordinates stuffing for points, lines, triangles */
- uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */
-
- /* Point sprites texture coordinates, 0: lower left, 1: upper right */
- float point_texcoord_left; /* R300_GA_POINT_S0: 0x4200 */
- float point_texcoord_bottom; /* R300_GA_POINT_T0: 0x4204 */
- float point_texcoord_right; /* R300_GA_POINT_S1: 0x4208 */
- float point_texcoord_top; /* R300_GA_POINT_T1: 0x420c */
};
struct r300_rs_block {
@@ -214,6 +220,14 @@ struct r300_vertex_stream_state {
unsigned count;
};
+struct r300_invariant_state {
+ uint32_t cb[20];
+};
+
+struct r300_vap_invariant_state {
+ uint32_t cb[9];
+};
+
struct r300_viewport_state {
float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */
float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */
@@ -233,8 +247,8 @@ struct r300_ztop_state {
struct r300_constant_buffer {
/* Buffer of constants */
- uint32_t constants[256][4];
- /* Total number of constants */
+ uint32_t *ptr;
+ /* Total number of vec4s */
unsigned count;
};
@@ -294,32 +308,48 @@ struct r300_surface {
enum r300_buffer_domain domain;
- uint32_t offset;
+ uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */
uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
- uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT. */
+ uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */
+
+ /* Parameters dedicated to the CBZB clear. */
+ uint32_t cbzb_width; /* Aligned width. */
+ uint32_t cbzb_height; /* Half of the height. */
+ uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */
+ uint32_t cbzb_pitch; /* DEPTHPITCH. */
+ uint32_t cbzb_format; /* ZB_FORMAT. */
+
+ /* Whether the CBZB clear is allowed on the surface. */
+ boolean cbzb_allowed;
};
-struct r300_texture {
- /* Parent class */
+struct r300_texture_desc {
+ /* Parent class. */
struct u_resource b;
- enum r300_buffer_domain domain;
+ /* Buffer tiling.
+ * Macrotiling is specified per-level because small mipmaps cannot
+ * be macrotiled. */
+ enum r300_buffer_tiling microtile;
+ enum r300_buffer_tiling macrotile[R300_MAX_TEXTURE_LEVELS];
/* Offsets into the buffer. */
- unsigned offset[R300_MAX_TEXTURE_LEVELS];
+ unsigned offset_in_bytes[R300_MAX_TEXTURE_LEVELS];
- /* A pitch for each mip-level */
- unsigned pitch[R300_MAX_TEXTURE_LEVELS];
+ /* Strides for each mip-level. */
+ unsigned stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
+ unsigned stride_in_bytes[R300_MAX_TEXTURE_LEVELS];
- /* A pitch multiplied by blockwidth as hardware wants
- * the number of pixels instead of the number of blocks. */
- unsigned hwpitch[R300_MAX_TEXTURE_LEVELS];
+ /* Size of one zslice or face or 2D image based on the texture target. */
+ unsigned layer_size_in_bytes[R300_MAX_TEXTURE_LEVELS];
- /* Size of one zslice or face based on the texture target */
- unsigned layer_size[R300_MAX_TEXTURE_LEVELS];
+ /* Total size of this texture, in bytes,
+ * derived from the texture properties. */
+ unsigned size_in_bytes;
- /* Whether the mipmap level is macrotiled. */
- enum r300_buffer_tiling mip_macrotile[R300_MAX_TEXTURE_LEVELS];
+ /* Total size of the buffer backing this texture, in bytes.
+ * It must be >= size. */
+ unsigned buffer_size_in_bytes;
/**
* If non-zero, override the natural texture layout with
@@ -329,16 +359,24 @@ struct r300_texture {
*
* \sa r300_texture_get_stride
*/
- unsigned stride_override;
+ unsigned stride_in_bytes_override;
- /* Total size of this texture, in bytes. */
- unsigned size;
+ /* Whether this texture has non-power-of-two dimensions.
+ * It can be either a regular texture or a rectangle one. */
+ boolean is_npot;
- /* Whether this texture has non-power-of-two dimensions
- * or a user-specified pitch.
- * It can be either a regular texture or a rectangle one.
- */
- boolean uses_pitch;
+ /* This flag says that hardware must use the stride for addressing
+ * instead of the width. */
+ boolean uses_stride_addressing;
+
+ /* Whether CBZB fast color clear is allowed on the miplevel. */
+ boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS];
+};
+
+struct r300_texture {
+ struct r300_texture_desc desc;
+
+ enum r300_buffer_domain domain;
/* Pipe buffer backing this texture. */
struct r300_winsys_buffer *buffer;
@@ -349,8 +387,9 @@ struct r300_texture {
/* All bits should be filled in. */
struct r300_texture_fb_state fb_state;
- /* Buffer tiling */
- enum r300_buffer_tiling microtile, macrotile;
+ /* This is the level tiling flags were last time set for.
+ * It's used to prevent redundant tiling-flags changes from happening.*/
+ unsigned surface_level;
};
struct r300_vertex_element_state {
@@ -391,6 +430,8 @@ struct r300_context {
/* The interface to the windowing system, etc. */
struct r300_winsys_screen *rws;
+ /* The command stream. */
+ struct r300_winsys_cs *cs;
/* Screen. */
struct r300_screen *screen;
/* Draw module. Used mostly for SW TCL. */
@@ -421,6 +462,8 @@ struct r300_context {
/* Various CSO state objects. */
/* Beginning of atom list. */
struct r300_atom atom_list;
+ /* Anti-aliasing (MSAA) state. */
+ struct r300_atom aa_state;
/* Blend state. */
struct r300_atom blend_state;
/* Blend color state. */
@@ -437,6 +480,10 @@ struct r300_context {
struct r300_atom fs_constants;
/* Framebuffer state. */
struct r300_atom fb_state;
+ /* Framebuffer state (pipelined regs). */
+ struct r300_atom fb_state_pipelined;
+ /* HyperZ state (various SC/ZB bits). */
+ struct r300_atom hyperz_state;
/* Occlusion query. */
struct r300_atom query_start;
/* Rasterizer state. */
@@ -459,8 +506,12 @@ struct r300_context {
struct r300_atom ztop_state;
/* PVS flush. */
struct r300_atom pvs_flush;
+ /* VAP invariant state. */
+ struct r300_atom vap_invariant_state;
/* Texture cache invalidate. */
struct r300_atom texture_cache_inval;
+ /* GPU flush. */
+ struct r300_atom gpu_flush;
/* Invariant state. This must be emitted to get the engine started. */
struct r300_atom invariant_state;
@@ -497,10 +548,13 @@ struct r300_context {
/* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
boolean incompatible_vb_layout;
+ boolean cbzb_clear;
/* upload managers */
struct u_upload_mgr *upload_vb;
struct u_upload_mgr *upload_ib;
+ struct util_mempool pool_transfers;
+
/* Stat counter. */
uint64_t flush_counter;
};
@@ -534,8 +588,8 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300)
struct pipe_context* r300_create_context(struct pipe_screen* screen,
void *priv);
-boolean r300_check_cs(struct r300_context *r300, unsigned size);
void r300_finish(struct r300_context *r300);
+void r300_flush_cb(void *data);
/* Context initialization. */
struct draw_stage* r300_draw_stage(struct r300_context* r300);
@@ -563,6 +617,13 @@ void r300_translate_index_buffer(struct r300_context *r300,
void r300_plug_in_stencil_ref_fallback(struct r300_context *r300);
/* r300_state.c */
+enum r300_fb_state_change {
+ R300_CHANGED_FB_STATE = 0,
+ R300_CHANGED_CBZB_FLAG
+};
+
+void r300_mark_fb_state_dirty(struct r300_context *r300,
+ enum r300_fb_state_change change);
void r300_mark_fs_code_dirty(struct r300_context *r300);
/* r300_debug.c */
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 1db7da642b..c194d6a1b0 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -46,12 +46,12 @@
*/
#define CS_LOCALS(context) \
- struct r300_context* const cs_context_copy = (context); \
- struct r300_winsys_screen *cs_winsys = cs_context_copy->rws; \
- CS_DEBUG(int cs_count = 0; (void) cs_count;)
+ struct r300_winsys_cs *cs_copy = (context)->cs; \
+ struct r300_winsys_screen *cs_winsys = (context)->rws; \
+ int cs_count = 0; (void) cs_count; (void) cs_winsys;
#define BEGIN_CS(size) do { \
- assert(r300_check_cs(cs_context_copy, (size))); \
+ assert(size <= (cs_copy->ndw - cs_copy->cdw)); \
CS_DEBUG(cs_count = size;) \
} while (0)
@@ -66,49 +66,39 @@
#define END_CS
#endif
+
/**
* Writing pure DWORDs.
*/
#define OUT_CS(value) do { \
- cs_winsys->write_cs_dword(cs_winsys, (value)); \
+ cs_copy->ptr[cs_copy->cdw++] = (value); \
CS_DEBUG(cs_count--;) \
} while (0)
-#define OUT_CS_32F(value) do { \
- cs_winsys->write_cs_dword(cs_winsys, fui(value)); \
- CS_DEBUG(cs_count--;) \
-} while (0)
+#define OUT_CS_32F(value) \
+ OUT_CS(fui(value))
#define OUT_CS_REG(register, value) do { \
- assert(register); \
- cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0(register, 0)); \
- cs_winsys->write_cs_dword(cs_winsys, value); \
- CS_DEBUG(cs_count -= 2;) \
+ OUT_CS(CP_PACKET0(register, 0)); \
+ OUT_CS(value); \
} while (0)
/* Note: This expects count to be the number of registers,
* not the actual packet0 count! */
-#define OUT_CS_REG_SEQ(register, count) do { \
- assert(register); \
- cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1))); \
- CS_DEBUG(cs_count--;) \
-} while (0)
+#define OUT_CS_REG_SEQ(register, count) \
+ OUT_CS(CP_PACKET0((register), ((count) - 1)))
-#define OUT_CS_TABLE(values, count) do { \
- cs_winsys->write_cs_table(cs_winsys, values, count); \
- CS_DEBUG(cs_count -= count;) \
-} while (0)
+#define OUT_CS_ONE_REG(register, count) \
+ OUT_CS(CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR)
-#define OUT_CS_ONE_REG(register, count) do { \
- assert(register); \
- cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR); \
- CS_DEBUG(cs_count--;) \
-} while (0)
+#define OUT_CS_PKT3(op, count) \
+ OUT_CS(CP_PACKET3(op, count))
-#define OUT_CS_PKT3(op, count) do { \
- cs_winsys->write_cs_dword(cs_winsys, CP_PACKET3(op, count)); \
- CS_DEBUG(cs_count--;) \
+#define OUT_CS_TABLE(values, count) do { \
+ memcpy(cs_copy->ptr + cs_copy->cdw, values, count * 4); \
+ cs_copy->cdw += count; \
+ CS_DEBUG(cs_count -= count;) \
} while (0)
@@ -116,26 +106,26 @@
* Writing relocations.
*/
-#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \
+#define OUT_CS_RELOC(bo, offset, rd, wd) do { \
assert(bo); \
- cs_winsys->write_cs_dword(cs_winsys, offset); \
- cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \
- CS_DEBUG(cs_count -= 3;) \
+ OUT_CS(offset); \
+ cs_winsys->cs_write_reloc(cs_copy, bo, rd, wd); \
+ CS_DEBUG(cs_count -= 2;) \
} while (0)
-#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \
+#define OUT_CS_BUF_RELOC(bo, offset, rd, wd) do { \
assert(bo); \
- OUT_CS_RELOC(r300_buffer(bo)->buf, offset, rd, wd, flags); \
+ OUT_CS_RELOC(r300_buffer(bo)->buf, offset, rd, wd); \
} while (0)
-#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \
+#define OUT_CS_TEX_RELOC(tex, offset, rd, wd) do { \
assert(tex); \
- OUT_CS_RELOC(tex->buffer, offset, rd, wd, flags); \
+ OUT_CS_RELOC(tex->buffer, offset, rd, wd); \
} while (0)
-#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \
+#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd) do { \
assert(bo); \
- cs_winsys->write_cs_reloc(cs_winsys, r300_buffer(bo)->buf, rd, wd, flags); \
+ cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->buf, rd, wd); \
CS_DEBUG(cs_count -= 2;) \
} while (0)
@@ -146,7 +136,8 @@
#define WRITE_CS_TABLE(values, count) do { \
CS_DEBUG(assert(cs_count == 0);) \
- cs_winsys->write_cs_table(cs_winsys, values, count); \
+ memcpy(cs_copy->ptr + cs_copy->cdw, (values), (count) * 4); \
+ cs_copy->cdw += (count); \
} while (0)
#endif /* R300_CS_H */
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index a6cd86e392..053a64ea6d 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -29,17 +29,21 @@
static const struct debug_named_value debug_options[] = {
{ "fp", DBG_FP, "Fragment program handling (for debugging)" },
{ "vp", DBG_VP, "Vertex program handling (for debugging)" },
- { "draw", DBG_DRAW, "Draw and emit (for debugging)" },
+ { "draw", DBG_DRAW, "Draw calls (for debugging)" },
+ { "swtcl", DBG_SWTCL, "SWTCL-specific info (for debugging)" },
+ { "rsblock", DBG_RS_BLOCK, "Rasterizer registers (for debugging)" },
+ { "psc", DBG_PSC, "Vertex stream registers (for debugging)" },
{ "tex", DBG_TEX, "Textures (for debugging)" },
{ "texalloc", DBG_TEXALLOC, "Texture allocation (for debugging)" },
{ "fall", DBG_FALL, "Fallbacks (for debugging)" },
{ "rs", DBG_RS, "Rasterizer (for debugging)" },
{ "fb", DBG_FB, "Framebuffer (for debugging)" },
+ { "cbzb", DBG_CBZB, "Fast color clear info (for debugging)" },
+ { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for debugging)" },
{ "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" },
{ "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" },
{ "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" },
- { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for lulz)" },
- { "stats", DBG_STATS, "Gather statistics (for lulz)" },
+ { "stats", DBG_STATS, "Gather statistics" },
/* must be last */
DEBUG_NAMED_VALUE_END
diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h
index d510d80a7b..896aeef395 100644
--- a/src/gallium/drivers/r300/r300_defines.h
+++ b/src/gallium/drivers/r300/r300_defines.h
@@ -36,7 +36,10 @@
enum r300_buffer_tiling {
R300_BUFFER_LINEAR = 0,
R300_BUFFER_TILED,
- R300_BUFFER_SQUARETILED
+ R300_BUFFER_SQUARETILED,
+
+ R300_BUFFER_UNKNOWN,
+ R300_BUFFER_SELECT_LAYOUT = R300_BUFFER_UNKNOWN
};
enum r300_buffer_domain { /* bitfield */
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index e2c40d823d..36a26a7871 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -170,15 +170,18 @@ void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat
{
struct r300_fragment_shader *fs = r300_fs(r300);
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
- unsigned count = fs->shader->externals_count * 4;
+ unsigned count = fs->shader->externals_count;
+ unsigned i, j;
CS_LOCALS(r300);
if (count == 0)
return;
BEGIN_CS(size);
- OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count);
- OUT_CS_TABLE(buf->constants, count);
+ OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4);
+ for (i = 0; i < count; i++)
+ for (j = 0; j < 4; j++)
+ OUT_CS(pack_float24(*(float*)&buf->ptr[i*4+j]));
END_CS;
}
@@ -190,7 +193,6 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo
unsigned count = fs->shader->rc_state_count;
unsigned first = fs->shader->externals_count;
unsigned end = constants->Count;
- uint32_t cdata[4];
unsigned j;
CS_LOCALS(r300);
@@ -203,11 +205,9 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo
const float *data =
get_rc_constant_state(r300, &constants->Constants[i]);
- for (j = 0; j < 4; j++)
- cdata[j] = pack_float24(data[j]);
-
OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4);
- OUT_CS_TABLE(cdata, 4);
+ for (j = 0; j < 4; j++)
+ OUT_CS(pack_float24(data[j]));
}
}
END_CS;
@@ -234,7 +234,7 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat
BEGIN_CS(size);
OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count);
- OUT_CS_TABLE(buf->constants, count);
+ OUT_CS_TABLE(buf->ptr, count);
END_CS;
}
@@ -267,13 +267,22 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo
END_CS;
}
-void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
+void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
{
- struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state;
- struct r300_surface* surf;
- unsigned i;
+ struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state;
+ struct pipe_framebuffer_state* fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ uint32_t height = fb->height;
+ uint32_t width = fb->width;
CS_LOCALS(r300);
+ if (r300->cbzb_clear) {
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+
+ height = surf->cbzb_height;
+ width = surf->cbzb_width;
+ }
+
BEGIN_CS(size);
/* Set up scissors.
@@ -281,27 +290,48 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
if (r300->screen->caps.is_r500) {
OUT_CS(0);
- OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) |
- ((fb->height - 1) << R300_SCISSORS_Y_SHIFT));
+ OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) |
+ ((height - 1) << R300_SCISSORS_Y_SHIFT));
} else {
OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
(1440 << R300_SCISSORS_Y_SHIFT));
- OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) |
- ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT));
+ OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) |
+ ((height + 1440-1) << R300_SCISSORS_Y_SHIFT));
+ }
+
+ /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */
+ OUT_CS_TABLE(gpuflush->cb_flush_clean, 6);
+ END_CS;
+}
+
+void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state)
+{
+ struct r300_aa_state *aa = (struct r300_aa_state*)state;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+ OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config);
+
+ if (aa->dest) {
+ OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1);
+ OUT_CS_RELOC(aa->dest->buffer, aa->dest->offset, 0, aa->dest->domain);
+
+ OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1);
+ OUT_CS_RELOC(aa->dest->buffer, aa->dest->pitch, 0, aa->dest->domain);
}
- /* Flush and free renderbuffer caches. */
- OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
- R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
- R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
- OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
- R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
- R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+ OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl);
+ END_CS;
+}
- /* Wait until the GPU is idle.
- * This fixes random pixels sometimes appearing probably caused
- * by incomplete rendering. */
- OUT_CS_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
+{
+ struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state;
+ struct r300_surface* surf;
+ unsigned i;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
/* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not
* what we usually want. */
@@ -317,28 +347,123 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
surf = r300_surface(fb->cbufs[i]);
OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
- OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0);
+ OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain);
OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
- OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0);
-
- OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), surf->format);
- }
- for (; i < 4; i++) {
- OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED);
+ OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain);
}
- /* Set up a zbuffer. */
- if (fb->zsbuf) {
- surf = r300_surface(fb->zsbuf);
+ /* Set up the ZB part of the CBZB clear. */
+ if (r300->cbzb_clear) {
+ surf = r300_surface(fb->cbufs[0]);
+
+ OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);
OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
- OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0);
+ OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain);
+
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
+ OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain);
+ }
+ /* Set up a zbuffer. */
+ else if (fb->zsbuf) {
+ surf = r300_surface(fb->zsbuf);
OUT_CS_REG(R300_ZB_FORMAT, surf->format);
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
+ OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain);
+
OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
- OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0);
+ OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain);
+
+ /* HiZ RAM. */
+ if (r300->screen->caps.has_hiz) {
+ OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
+ OUT_CS_REG(R300_ZB_HIZ_PITCH, 0);
+ }
+
+ /* Z Mask RAM. (compressed zbuffer) */
+ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
+ OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0);
+ }
+
+ END_CS;
+}
+
+void r300_emit_hyperz_state(struct r300_context *r300,
+ unsigned size, void *state)
+{
+ CS_LOCALS(r300);
+ WRITE_CS_TABLE(state, size);
+}
+
+void r300_emit_hyperz_end(struct r300_context *r300)
+{
+ struct r300_hyperz_state z =
+ *(struct r300_hyperz_state*)r300->hyperz_state.state;
+
+ z.zb_bw_cntl = 0;
+ z.zb_depthclearvalue = 0;
+ z.sc_hyperz = R300_SC_HYPERZ_ADJ_2;
+
+ r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z);
+}
+
+void r300_emit_fb_state_pipelined(struct r300_context *r300,
+ unsigned size, void *state)
+{
+ struct pipe_framebuffer_state* fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ unsigned i;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+
+ /* Colorbuffer format in the US block.
+ * (must be written after unpipelined regs) */
+ OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4);
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ OUT_CS(r300_surface(fb->cbufs[i])->format);
+ }
+ for (; i < 4; i++) {
+ OUT_CS(R300_US_OUT_FMT_UNUSED);
+ }
+
+ /* Multisampling. Depends on framebuffer sample count.
+ * These are pipelined regs and as such cannot be moved
+ * to the AA state. */
+ if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
+ unsigned mspos0 = 0x66666666;
+ unsigned mspos1 = 0x6666666;
+
+ if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) {
+ /* Subsample placement. These may not be optimal. */
+ switch (fb->cbufs[0]->texture->nr_samples) {
+ case 2:
+ mspos0 = 0x33996633;
+ mspos1 = 0x6666663;
+ break;
+ case 3:
+ mspos0 = 0x33936933;
+ mspos1 = 0x6666663;
+ break;
+ case 4:
+ mspos0 = 0x33939933;
+ mspos1 = 0x3966663;
+ break;
+ case 6:
+ mspos0 = 0x22a2aa22;
+ mspos1 = 0x2a65672;
+ break;
+ default:
+ debug_printf("r300: Bad number of multisamples!\n");
+ }
+ }
+
+ OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
+ OUT_CS(mspos0);
+ OUT_CS(mspos1);
}
END_CS;
}
@@ -387,13 +512,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
OUT_CS_REG(R300_SU_REG_DEST, 1 << 3);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(buf, (query->num_results + 3) * 4,
- 0, query->domain, 0);
+ 0, query->domain);
case 3:
/* pipe 2 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 2);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(buf, (query->num_results + 2) * 4,
- 0, query->domain, 0);
+ 0, query->domain);
case 2:
/* pipe 1 only */
/* As mentioned above, accomodate RV380 and older. */
@@ -401,13 +526,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
1 << (caps->high_second_pipe ? 3 : 1));
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(buf, (query->num_results + 1) * 4,
- 0, query->domain, 0);
+ 0, query->domain);
case 1:
/* pipe 0 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 0);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(buf, (query->num_results + 0) * 4,
- 0, query->domain, 0);
+ 0, query->domain);
break;
default:
fprintf(stderr, "r300: Implementation error: Chipset reports %d"
@@ -429,7 +554,7 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300,
BEGIN_CS(8);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain, 0);
+ OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
@@ -443,10 +568,10 @@ static void rv530_emit_query_end_double_z(struct r300_context *r300,
BEGIN_CS(14);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain, 0);
+ OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain, 0);
+ OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
@@ -480,102 +605,27 @@ void r300_emit_query_end(struct r300_context* r300)
}
}
+void r300_emit_invariant_state(struct r300_context *r300,
+ unsigned size, void *state)
+{
+ CS_LOCALS(r300);
+ WRITE_CS_TABLE(state, size);
+}
+
void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state)
{
struct r300_rs_state* rs = state;
- struct pipe_framebuffer_state* fb = r300->fb_state.state;
- float scale, offset;
- unsigned mspos0, mspos1, aa_config;
CS_LOCALS(r300);
BEGIN_CS(size);
- OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status);
-
- /* Multisampling. Depends on framebuffer sample count. */
- if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
- if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) {
- aa_config = R300_GB_AA_CONFIG_AA_ENABLE;
- /* Subsample placement. These may not be optimal. */
- switch (fb->cbufs[0]->texture->nr_samples) {
- case 2:
- aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
- mspos0 = 0x33996633;
- mspos1 = 0x6666663;
- break;
- case 3:
- aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3;
- mspos0 = 0x33936933;
- mspos1 = 0x6666663;
- break;
- case 4:
- aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
- mspos0 = 0x33939933;
- mspos1 = 0x3966663;
- break;
- case 6:
- aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
- mspos0 = 0x22a2aa22;
- mspos1 = 0x2a65672;
- break;
- default:
- debug_printf("r300: Bad number of multisamples!\n");
- mspos0 = rs->multisample_position_0;
- mspos1 = rs->multisample_position_1;
- break;
- }
-
- OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
- OUT_CS(mspos0);
- OUT_CS(mspos1);
-
- OUT_CS_REG(R300_GB_AA_CONFIG, aa_config);
- } else {
- OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
- OUT_CS(rs->multisample_position_0);
- OUT_CS(rs->multisample_position_1);
-
- OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config);
- }
- }
-
- OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size);
- OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2);
- OUT_CS(rs->point_minmax);
- OUT_CS(rs->line_control);
-
+ OUT_CS_TABLE(rs->cb_main, 25);
if (rs->polygon_offset_enable) {
- scale = rs->depth_scale * 12;
- offset = rs->depth_offset;
-
- switch (r300->zbuffer_bpp) {
- case 16:
- offset *= 4;
- break;
- case 24:
- offset *= 2;
- break;
+ if (r300->zbuffer_bpp == 16) {
+ OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5);
+ } else {
+ OUT_CS_TABLE(rs->cb_poly_offset_zb24, 5);
}
-
- OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
- OUT_CS_32F(scale);
- OUT_CS_32F(offset);
- OUT_CS_32F(scale);
- OUT_CS_32F(offset);
}
-
- OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2);
- OUT_CS(rs->polygon_offset_enable);
- OUT_CS(rs->cull_mode);
- OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config);
- OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value);
- OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode);
- OUT_CS_REG(R300_SC_CLIP_RULE, rs->clip_rule);
- OUT_CS_REG(R300_GB_ENABLE, rs->stuffing_enable);
- OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4);
- OUT_CS_32F(rs->point_texcoord_left);
- OUT_CS_32F(rs->point_texcoord_bottom);
- OUT_CS_32F(rs->point_texcoord_right);
- OUT_CS_32F(rs->point_texcoord_top);
END_CS;
}
@@ -588,11 +638,20 @@ void r300_emit_rs_block_state(struct r300_context* r300,
unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1;
CS_LOCALS(r300);
- if (SCREEN_DBG_ON(r300->screen, DBG_DRAW)) {
+ if (DBG_ON(r300, DBG_RS_BLOCK)) {
r500_dump_rs_block(rs);
- }
- DBG(r300, DBG_DRAW, "r300: RS emit:\n");
+ fprintf(stderr, "r300: RS emit:\n");
+
+ for (i = 0; i < count; i++)
+ fprintf(stderr, " : ip %d: 0x%08x\n", i, rs->ip[i]);
+
+ for (i = 0; i < count; i++)
+ fprintf(stderr, " : inst %d: 0x%08x\n", i, rs->inst[i]);
+
+ fprintf(stderr, " : count: 0x%08x inst_count: 0x%08x\n",
+ rs->count, rs->inst_count);
+ }
BEGIN_CS(size);
OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
@@ -608,9 +667,6 @@ void r300_emit_rs_block_state(struct r300_context* r300,
OUT_CS_REG_SEQ(R300_RS_IP_0, count);
}
OUT_CS_TABLE(rs->ip, count);
- for (i = 0; i < count; i++) {
- DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]);
- }
OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
OUT_CS(rs->count);
@@ -622,13 +678,6 @@ void r300_emit_rs_block_state(struct r300_context* r300,
OUT_CS_REG_SEQ(R300_RS_INST_0, count);
}
OUT_CS_TABLE(rs->inst, count);
- for (i = 0; i < count; i++) {
- DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]);
- }
-
- DBG(r300, DBG_DRAW, " : count: 0x%08x inst_count: 0x%08x\n",
- rs->count, rs->inst_count);
-
END_CS;
}
@@ -682,7 +731,7 @@ void r300_emit_textures_state(struct r300_context *r300,
OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1);
OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain,
- 0, 0);
+ 0);
}
}
END_CS;
@@ -725,7 +774,7 @@ void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed)
for (i = 0; i < aos_count; i++) {
buf = r300_buffer(vbuf[velem[i].vertex_buffer_index].buffer);
- OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0, 0);
+ OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0);
}
END_CS;
}
@@ -734,7 +783,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed)
{
CS_LOCALS(r300);
- DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, "
+ DBG(r300, DBG_SWTCL, "r300: Preparing vertex buffer %p for render, "
"vertex size %d\n", r300->vbo,
r300->vertex_info.size);
/* Set the pointer to our vertex buffer. The emitted values are this:
@@ -750,7 +799,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed)
OUT_CS(r300->vertex_info.size |
(r300->vertex_info.size << 8));
OUT_CS(r300->vbo_offset);
- OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0, 0);
+ OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0);
END_CS;
}
@@ -762,21 +811,25 @@ void r300_emit_vertex_stream_state(struct r300_context* r300,
unsigned i;
CS_LOCALS(r300);
- DBG(r300, DBG_DRAW, "r300: PSC emit:\n");
+ if (DBG_ON(r300, DBG_PSC)) {
+ fprintf(stderr, "r300: PSC emit:\n");
+
+ for (i = 0; i < streams->count; i++) {
+ fprintf(stderr, " : prog_stream_cntl%d: 0x%08x\n", i,
+ streams->vap_prog_stream_cntl[i]);
+ }
+
+ for (i = 0; i < streams->count; i++) {
+ fprintf(stderr, " : prog_stream_cntl_ext%d: 0x%08x\n", i,
+ streams->vap_prog_stream_cntl_ext[i]);
+ }
+ }
BEGIN_CS(size);
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count);
OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count);
- for (i = 0; i < streams->count; i++) {
- DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i,
- streams->vap_prog_stream_cntl[i]);
- }
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count);
OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count);
- for (i = 0; i < streams->count; i++) {
- DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i,
- streams->vap_prog_stream_cntl_ext[i]);
- }
END_CS;
}
@@ -789,6 +842,13 @@ void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state)
END_CS;
}
+void r300_emit_vap_invariant_state(struct r300_context *r300,
+ unsigned size, void *state)
+{
+ CS_LOCALS(r300);
+ WRITE_CS_TABLE(state, size);
+}
+
void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
{
struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state;
@@ -813,6 +873,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
CS_LOCALS(r300);
BEGIN_CS(size);
+
/* R300_VAP_PVS_CODE_CNTL_0
* R300_VAP_PVS_CONST_CNTL
* R300_VAP_PVS_CODE_CNTL_1
@@ -865,7 +926,7 @@ void r300_emit_vs_constants(struct r300_context* r300,
(r300->screen->caps.is_r500 ?
R500_PVS_CONST_START : R300_PVS_CONST_START));
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4);
- OUT_CS_TABLE(buf->constants, count * 4);
+ OUT_CS_TABLE(buf->ptr, count * 4);
END_CS;
}
@@ -924,27 +985,22 @@ void r300_emit_buffer_validate(struct r300_context *r300,
}
/* Clean out BOs. */
- r300->rws->reset_bos(r300->rws);
+ r300->rws->cs_reset_buffers(r300->cs);
validate:
/* Color buffers... */
for (i = 0; i < fb->nr_cbufs; i++) {
tex = r300_texture(fb->cbufs[i]->texture);
assert(tex && tex->buffer && "cbuf is marked, but NULL!");
- if (!r300_add_texture(r300->rws, tex, 0, tex->domain)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
+ r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0,
+ r300_surface(fb->cbufs[i])->domain);
}
/* ...depth buffer... */
if (fb->zsbuf) {
tex = r300_texture(fb->zsbuf->texture);
assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
- if (!r300_add_texture(r300->rws, tex,
- 0, tex->domain)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
+ r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0,
+ r300_surface(fb->zsbuf)->domain);
}
/* ...textures... */
for (i = 0; i < texstate->count; i++) {
@@ -953,48 +1009,31 @@ validate:
}
tex = r300_texture(texstate->sampler_views[i]->base.texture);
- if (!r300_add_texture(r300->rws, tex, tex->domain, 0)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
+ r300->rws->cs_add_buffer(r300->cs, tex->buffer, tex->domain, 0);
}
/* ...occlusion query buffer... */
- if (r300->query_current) {
- if (!r300->rws->add_buffer(r300->rws, r300->query_current->buffer,
- 0, r300->query_current->domain)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
- }
+ if (r300->query_current)
+ r300->rws->cs_add_buffer(r300->cs, r300->query_current->buffer,
+ 0, r300->query_current->domain);
/* ...vertex buffer for SWTCL path... */
- if (r300->vbo) {
- if (!r300_add_buffer(r300->rws, r300->vbo,
- r300_buffer(r300->vbo)->domain, 0)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
- }
+ if (r300->vbo)
+ r300->rws->cs_add_buffer(r300->cs, r300_buffer(r300->vbo)->buf,
+ r300_buffer(r300->vbo)->domain, 0);
/* ...vertex buffers for HWTCL path... */
if (do_validate_vertex_buffers) {
for (i = 0; i < r300->velems->count; i++) {
pbuf = vbuf[velem[i].vertex_buffer_index].buffer;
- if (!r300_add_buffer(r300->rws, pbuf,
- r300_buffer(pbuf)->domain, 0)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
+ r300->rws->cs_add_buffer(r300->cs, r300_buffer(pbuf)->buf,
+ r300_buffer(pbuf)->domain, 0);
}
}
/* ...and index buffer for HWTCL path. */
- if (index_buffer) {
- if (!r300_add_buffer(r300->rws, index_buffer,
- r300_buffer(index_buffer)->domain, 0)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
- }
- if (!r300->rws->validate(r300->rws)) {
+ if (index_buffer)
+ r300->rws->cs_add_buffer(r300->cs, r300_buffer(index_buffer)->buf,
+ r300_buffer(index_buffer)->domain, 0);
+
+ if (!r300->rws->cs_validate(r300->cs)) {
r300->context.flush(&r300->context, 0, NULL);
if (invalid) {
/* Well, hell. */
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 36a29894d0..5d05039669 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -45,6 +45,11 @@ void r300_emit_clip_state(struct r300_context* r300,
void r300_emit_dsa_state(struct r300_context* r300,
unsigned size, void* state);
+void r300_emit_hyperz_state(struct r300_context *r300,
+ unsigned size, void *state);
+
+void r300_emit_hyperz_end(struct r300_context *r300);
+
void r300_emit_fs(struct r300_context* r300, unsigned size, void *state);
void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state);
@@ -59,6 +64,13 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo
void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state);
+void r300_emit_fb_state_pipelined(struct r300_context *r300,
+ unsigned size, void *state);
+
+void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state);
+
+void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state);
+
void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state);
void r300_emit_query_end(struct r300_context* r300);
@@ -76,6 +88,9 @@ void r300_emit_textures_state(struct r300_context *r300,
void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed);
+void r300_emit_vap_invariant_state(struct r300_context *r300,
+ unsigned size, void *state);
+
void r300_emit_vertex_stream_state(struct r300_context* r300,
unsigned size, void* state);
@@ -94,6 +109,9 @@ void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state);
void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state);
+void r300_emit_invariant_state(struct r300_context *r300,
+ unsigned size, void *state);
+
unsigned r300_get_num_dirty_dwords(struct r300_context *r300);
/* Emit all dirty state. */
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index ba840bfff8..ae7b5759e7 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -25,6 +25,7 @@
#include "draw/draw_private.h"
#include "util/u_simple_list.h"
+#include "util/u_upload_mgr.h"
#include "r300_context.h"
#include "r300_cs.h"
@@ -39,6 +40,9 @@ static void r300_flush(struct pipe_context* pipe,
struct r300_atom *atom;
struct r300_fence **rfence = (struct r300_fence**)fence;
+ u_upload_flush(r300->upload_vb);
+ u_upload_flush(r300->upload_ib);
+
/* We probably need to flush Draw, but we may have been called from
* within Draw. This feels kludgy, but it might be the best thing.
*
@@ -48,12 +52,11 @@ static void r300_flush(struct pipe_context* pipe,
}
if (r300->dirty_hw) {
+ r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
- if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
- r300->flush_counter++;
- }
- r300->rws->flush_cs(r300->rws);
+ r300->flush_counter++;
+ r300->rws->cs_flush(r300->cs);
r300->dirty_hw = 0;
/* New kitchen sink, baby. */
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index e585394304..db5269912e 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -173,7 +173,7 @@ static void get_external_state(
t = (struct r300_texture*)texstate->sampler_views[i]->base.texture;
/* XXX this should probably take into account STR, not just S. */
- if (t->uses_pitch) {
+ if (t->desc.is_npot) {
switch (s->state.wrap_s) {
case PIPE_TEX_WRAP_REPEAT:
state->unit[i].wrap_mode = RC_WRAP_REPEAT;
@@ -246,13 +246,14 @@ static void r300_emit_fs_code_to_buffer(
if (r300->screen->caps.is_r500) {
struct r500_fragment_program_code *code = &generic_code->code.r500;
- shader->cb_code_size = 17 +
+ shader->cb_code_size = 19 +
((code->inst_end + 1) * 6) +
imm_count * 7;
NEW_CB(shader->cb_code, shader->cb_code_size);
OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx);
+ OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl);
OUT_CB_REG(R500_US_CODE_RANGE,
R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end));
OUT_CB_REG(R500_US_CODE_OFFSET, 0);
@@ -288,11 +289,16 @@ static void r300_emit_fs_code_to_buffer(
struct r300_fragment_program_code *code = &generic_code->code.r300;
shader->cb_code_size = 19 +
+ (r300->screen->caps.is_r400 ? 2 : 0) +
code->alu.length * 4 +
(code->tex.length ? (1 + code->tex.length) : 0) +
imm_count * 5;
NEW_CB(shader->cb_code, shader->cb_code_size);
+
+ if (r300->screen->caps.is_r400)
+ OUT_CB_REG(R400_US_CODE_BANK, 0);
+
OUT_CB_REG(R300_US_CONFIG, code->config);
OUT_CB_REG(R300_US_PIXSIZE, code->pixsize);
OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset);
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index e5c7658952..e952895601 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -21,13 +21,28 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r300_hyperz.h"
#include "r300_context.h"
+#include "r300_hyperz.h"
#include "r300_reg.h"
#include "r300_fs.h"
/*****************************************************************************/
+/* The HyperZ setup */
+/*****************************************************************************/
+
+static void r300_update_hyperz(struct r300_context* r300)
+{
+ struct r300_hyperz_state *z =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
+
+ z->zb_bw_cntl = 0;
+ z->sc_hyperz = R300_SC_HYPERZ_ADJ_2;
+
+ if (r300->cbzb_clear)
+ z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY;
+}
+
+/*****************************************************************************/
/* The ZTOP state */
/*****************************************************************************/
@@ -119,4 +134,7 @@ static void r300_update_ztop(struct r300_context* r300)
void r300_update_hyperz_state(struct r300_context* r300)
{
r300_update_ztop(r300);
+ if (r300->hyperz_state.dirty) {
+ r300_update_hyperz(r300);
+ }
}
diff --git a/src/gallium/drivers/r300/r300_public.h b/src/gallium/drivers/r300/r300_public.h
new file mode 100644
index 0000000000..8e7a963c55
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_public.h
@@ -0,0 +1,9 @@
+
+#ifndef R300_PUBLIC_H
+#define R300_PUBLIC_H
+
+struct r300_winsys_screen;
+
+struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws);
+
+#endif
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 10cb468dfc..5b0121ce9e 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -37,7 +37,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
struct r300_screen *r300screen = r300->screen;
struct r300_query *q;
- assert(query_type == PIPE_QUERY_OCCLUSION_COUNTER);
+ if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) {
+ return NULL;
+ }
q = CALLOC_STRUCT(r300_query);
if (!q)
@@ -55,7 +57,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
insert_at_tail(&r300->query_list, q);
/* Open up the occlusion query buffer. */
- q->buffer = r300->rws->buffer_create(r300->rws, 4096, 0, q->domain, q->buffer_size);
+ q->buffer = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096,
+ PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM,
+ q->domain);
return (struct pipe_query*)q;
}
@@ -132,7 +136,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0);
- map = r300->rws->buffer_map(r300->rws, q->buffer, flags);
+ map = r300->rws->buffer_map(r300->rws, q->buffer, r300->cs, flags);
if (!map)
return FALSE;
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index c783998c78..2acc1a903e 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -2617,7 +2617,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_WR_COMP_DISABLE (0 << 4)
# define R300_WR_COMP_ENABLE (1 << 4)
# define R300_ZB_CB_CLEAR_RMW (0 << 5)
-# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5)
+# define R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY (1 << 5)
# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6)
# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6)
@@ -2673,6 +2673,24 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* Z Buffer Clear Value */
#define R300_ZB_DEPTHCLEARVALUE 0x4f28
+/* Z Mask RAM is a Z compression buffer.
+ * Each dword of the Z Mask contains compression info for 16 4x4 pixel blocks,
+ * that is 2 bits for each block.
+ * On chips with 2 Z pipes, every other dword maps to a different pipe.
+ */
+
+/* The dword offset into Z mask RAM (bits 18:4) */
+#define R300_ZB_ZMASK_OFFSET 0x4f30
+
+/* Z Mask Pitch. */
+#define R300_ZB_ZMASK_PITCH 0x4f34
+
+/* Access to Z Mask RAM in a manner similar to HiZ RAM.
+ * The indices are autoincrementing. */
+#define R300_ZB_ZMASK_WRINDEX 0x4f38
+#define R300_ZB_ZMASK_DWORD 0x4f3c
+#define R300_ZB_ZMASK_RDINDEX 0x4f40
+
/* Hierarchical Z Memory Offset */
#define R300_ZB_HIZ_OFFSET 0x4f44
@@ -3264,8 +3282,8 @@ enum {
# define R500_FC_B_OP0_NONE (0 << 24)
# define R500_FC_B_OP0_DECR (1 << 24)
# define R500_FC_B_OP0_INCR (2 << 24)
-# define R500_FC_B_OP1_DECR (0 << 26)
-# define R500_FC_B_OP1_NONE (1 << 26)
+# define R500_FC_B_OP1_NONE (0 << 26)
+# define R500_FC_B_OP1_DECR (1 << 26)
# define R500_FC_B_OP1_INCR (2 << 26)
# define R500_FC_IGNORE_UNCOVERED (1 << 28)
#define R500_US_FC_INT_CONST_0 0x4c00
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 4afd124c0e..bae02135da 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -35,7 +35,6 @@
#include "util/u_prim.h"
#include "r300_cs.h"
-#include "r300_cb.h"
#include "r300_context.h"
#include "r300_screen_buffer.h"
#include "r300_emit.h"
@@ -224,11 +223,12 @@ static void r300_prepare_for_rendering(struct r300_context *r300,
/* Emitted in flush. */
end_dwords += 26; /* emit_query_end */
+ end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */
cs_dwords += end_dwords;
/* Reserve requested CS space. */
- if (!r300_check_cs(r300, cs_dwords)) {
+ if (cs_dwords > (r300->cs->ndw - r300->cs->cdw)) {
r300->context.flush(&r300->context, 0, NULL);
flushed = TRUE;
}
@@ -278,7 +278,6 @@ static boolean immd_is_good_idea(struct r300_context *r300,
/* We shouldn't map buffers referenced by CS, busy buffers,
* and ones placed in VRAM. */
- /* XXX Check for VRAM buffers. */
for (i = 0; i < vertex_element_count; i++) {
velem = &r300->velems->velem[i];
vbi = velem->vertex_buffer_index;
@@ -286,6 +285,10 @@ static boolean immd_is_good_idea(struct r300_context *r300,
if (!checked[vbi]) {
vbuf = &r300->vertex_buffer[vbi];
+ if (!(r300_buffer(vbuf->buffer)->domain & R300_DOMAIN_GTT)) {
+ return FALSE;
+ }
+
if (r300_buffer_is_referenced(&r300->context,
vbuf->buffer,
R300_REF_CS | R300_REF_HW)) {
@@ -299,8 +302,7 @@ static boolean immd_is_good_idea(struct r300_context *r300,
}
/*****************************************************************************
- * The emission of draw packets for r500. Older GPUs may use these functions *
- * after resolving fallback issues (e.g. stencil ref two-sided). *
+ * The HWTCL draw functions. *
****************************************************************************/
static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
@@ -316,74 +318,70 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
/* Size of the vertex, in dwords. */
unsigned vertex_size = r300->velems->vertex_size_dwords;
- /* Offsets of the attribute, in dwords, from the start of the vertex. */
- unsigned offset[PIPE_MAX_ATTRIBS];
-
/* Size of the vertex element, in dwords. */
unsigned size[PIPE_MAX_ATTRIBS];
/* Stride to the same attrib in the next vertex in the vertex buffer,
* in dwords. */
- unsigned stride[PIPE_MAX_ATTRIBS] = {0};
+ unsigned stride[PIPE_MAX_ATTRIBS];
/* Mapped vertex buffers. */
- uint32_t* map[PIPE_MAX_ATTRIBS] = {0};
- struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {NULL};
+ uint32_t* map[PIPE_MAX_ATTRIBS];
+ uint32_t* mapelem[PIPE_MAX_ATTRIBS];
+ struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0};
- CB_LOCALS;
+ CS_LOCALS(r300);
/* Calculate the vertex size, offsets, strides etc. and map the buffers. */
for (i = 0; i < vertex_element_count; i++) {
velem = &r300->velems->velem[i];
- offset[i] = velem->src_offset / 4;
size[i] = r300->velems->hw_format_size[i] / 4;
vbi = velem->vertex_buffer_index;
+ vbuf = &r300->vertex_buffer[vbi];
+ stride[i] = vbuf->stride / 4;
/* Map the buffer. */
- if (!map[vbi]) {
- vbuf = &r300->vertex_buffer[vbi];
+ if (!transfer[vbi]) {
map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context,
vbuf->buffer,
PIPE_TRANSFER_READ,
&transfer[vbi]);
- stride[vbi] = vbuf->stride / 4;
- map[vbi] += vbuf->buffer_offset / 4 + stride[vbi] * start;
+ map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start;
}
+ mapelem[i] = map[vbi] + (velem->src_offset / 4);
}
dwords = 9 + count * vertex_size;
r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL);
- BEGIN_CS_AS_CB(r300, dwords);
- OUT_CB_REG(R300_GA_COLOR_CONTROL,
+ BEGIN_CS(dwords);
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, mode));
- OUT_CB_REG(R300_VAP_VTX_SIZE, vertex_size);
- OUT_CB_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
- OUT_CB(count - 1);
- OUT_CB(0);
- OUT_CB_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
- OUT_CB(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
+ OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
+ OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+ OUT_CS(count - 1);
+ OUT_CS(0);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
r300_translate_primitive(mode));
/* Emit vertices. */
for (v = 0; v < count; v++) {
for (i = 0; i < vertex_element_count; i++) {
- vbi = r300->velems->velem[i].vertex_buffer_index;
-
- OUT_CB_TABLE(&map[vbi][offset[i] + stride[vbi] * v], size[i]);
+ OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]);
}
}
- END_CB;
+ END_CS;
/* Unmap buffers. */
for (i = 0; i < vertex_element_count; i++) {
vbi = r300->velems->velem[i].vertex_buffer_index;
- if (map[vbi]) {
+ if (transfer[vbi]) {
vbuf = &r300->vertex_buffer[vbi];
pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]);
- map[vbi] = NULL;
+ transfer[vbi] = NULL;
}
}
}
@@ -475,7 +473,7 @@ static void r300_emit_draw_elements(struct r300_context *r300,
(0 << R300_INDX_BUFFER_SKIP_SHIFT));
OUT_CS(offset_dwords << 2);
OUT_CS_BUF_RELOC(indexBuffer, count_dwords,
- r300_buffer(indexBuffer)->domain, 0, 0);
+ r300_buffer(indexBuffer)->domain, 0);
END_CS;
}
@@ -499,6 +497,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
unsigned short_count;
int buffer_offset = 0, index_offset = 0; /* for index bias emulation */
boolean translate = FALSE;
+ unsigned new_offset;
if (r300->skip_rendering) {
return;
@@ -508,6 +507,12 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
return;
}
+ /* Index buffer range checking. */
+ if ((start + count) * indexSize > indexBuffer->width0) {
+ fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n");
+ return;
+ }
+
/* Set up fallback for incompatible vertex layout if needed. */
if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
r300_begin_vertex_translate(r300);
@@ -522,18 +527,17 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
&start, count);
r300_update_derived_state(r300);
- r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count);
+ r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count, &new_offset);
+ start = new_offset;
/* 15 dwords for emit_draw_elements */
r300_prepare_for_rendering(r300,
PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
indexBuffer, 15, buffer_offset, indexBias, NULL);
- u_upload_flush(r300->upload_vb);
- u_upload_flush(r300->upload_ib);
if (alt_num_verts || count <= 65535) {
r300_emit_draw_elements(r300, indexBuffer, indexSize,
- minIndex, maxIndex, mode, start, count);
+ minIndex, maxIndex, mode, start, count);
} else {
do {
short_count = MIN2(count, 65534);
@@ -865,13 +869,12 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
unsigned dwords = 6;
CS_LOCALS(r300);
-
(void) i; (void) ptr;
r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL,
NULL, dwords, 0, 0, NULL);
- DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count);
+ DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count);
/* Uncomment to dump all VBOs rendered through this interface.
* Slow and noisy!
@@ -914,6 +917,7 @@ static void r300_render_draw_elements(struct vbuf_render* render,
unsigned free_dwords;
CS_LOCALS(r300);
+ DBG(r300, DBG_DRAW, "r300: render_draw_elements (count: %d)\n", count);
/* Reserve at least 256 dwords.
*
@@ -924,7 +928,7 @@ static void r300_render_draw_elements(struct vbuf_render* render,
NULL, 256, 0, 0, &end_cs_dwords);
while (count) {
- free_dwords = r300->rws->get_cs_free_dwords(r300->rws);
+ free_dwords = r300->cs->ndw - r300->cs->cdw;
short_count = MIN2(count, (free_dwords - end_cs_dwords - 6) * 2);
@@ -1015,6 +1019,88 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300)
* End of SW TCL functions *
***************************************************************************/
+/* If we used a quad to draw a rectangle, the pixels on the main diagonal
+ * would be computed and stored twice, which makes the clear/copy codepaths
+ * somewhat inefficient. Instead we use a rectangular point sprite. */
+static void r300_blitter_draw_rectangle(struct blitter_context *blitter,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2,
+ float depth,
+ enum blitter_attrib_type type,
+ const float attrib[4])
+{
+ struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter));
+ unsigned last_sprite_coord_enable = r300->sprite_coord_enable;
+ unsigned width = x2 - x1;
+ unsigned height = y2 - y1;
+ unsigned vertex_size =
+ type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4;
+ unsigned dwords = 13 + vertex_size +
+ (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0);
+ const float zeros[4] = {0, 0, 0, 0};
+ CS_LOCALS(r300);
+
+ if (type == UTIL_BLITTER_ATTRIB_TEXCOORD)
+ r300->sprite_coord_enable = 1;
+
+ r300_update_derived_state(r300);
+
+ /* Mark some states we don't care about as non-dirty. */
+ r300->clip_state.dirty = FALSE;
+ r300->viewport_state.dirty = FALSE;
+
+ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL);
+
+ DBG(r300, DBG_DRAW, "r300: draw_rectangle\n");
+
+ BEGIN_CS(dwords);
+ /* Set up GA. */
+ OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16));
+
+ if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
+ /* Set up the GA to generate texcoords. */
+ OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE |
+ (R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT));
+ OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4);
+ OUT_CS_32F(attrib[0]);
+ OUT_CS_32F(attrib[3]);
+ OUT_CS_32F(attrib[2]);
+ OUT_CS_32F(attrib[1]);
+ }
+
+ /* Set up VAP controls. */
+ OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+ OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
+ OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
+ OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+ OUT_CS(1);
+ OUT_CS(0);
+
+ /* Draw. */
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) |
+ R300_VAP_VF_CNTL__PRIM_POINTS);
+
+ OUT_CS_32F(x1 + width * 0.5f);
+ OUT_CS_32F(y1 + height * 0.5f);
+ OUT_CS_32F(depth);
+ OUT_CS_32F(1);
+
+ if (vertex_size == 8) {
+ if (!attrib)
+ attrib = zeros;
+ OUT_CS_TABLE(attrib, 4);
+ }
+ END_CS;
+
+ /* Restore the state. */
+ r300->clip_state.dirty = TRUE;
+ r300->rs_state.dirty = TRUE;
+ r300->viewport_state.dirty = TRUE;
+
+ r300->sprite_coord_enable = last_sprite_coord_enable;
+}
+
static void r300_resource_resolve(struct pipe_context* pipe,
struct pipe_resource* dest,
struct pipe_subresource subdest,
@@ -1022,33 +1108,35 @@ static void r300_resource_resolve(struct pipe_context* pipe,
struct pipe_subresource subsrc)
{
struct r300_context* r300 = r300_context(pipe);
- struct r300_surface* destsurf = r300_surface(
- dest->screen->get_tex_surface(dest->screen,
- dest, subdest.face, subdest.level, 0, 0));
+ struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
struct pipe_surface* srcsurf = src->screen->get_tex_surface(src->screen,
src, subsrc.face, subsrc.level, 0, 0);
float color[] = {0, 0, 0, 0};
- CS_LOCALS(r300);
DBG(r300, DBG_DRAW, "r300: Resolving resource...\n");
- OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1);
- OUT_CS_RELOC(destsurf->buffer, destsurf->offset, 0, destsurf->domain, 0);
-
- OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1);
- OUT_CS_RELOC(destsurf->buffer, destsurf->pitch, 0, destsurf->domain, 0);
+ /* Enable AA resolve. */
+ aa->dest = r300_surface(
+ dest->screen->get_tex_surface(dest->screen, dest, subdest.face,
+ subdest.level, 0, 0));
- OUT_CS_REG(R300_RB3D_AARESOLVE_CTL,
+ aa->aaresolve_ctl =
R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE |
- R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE);
+ R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE;
+ r300->aa_state.size = 12;
+ r300->aa_state.dirty = TRUE;
+ /* Resolve the surface. */
r300->context.clear_render_target(pipe,
srcsurf, color, 0, 0, src->width0, src->height0);
- OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x0);
+ /* Disable AA resolve. */
+ aa->aaresolve_ctl = 0;
+ r300->aa_state.size = 4;
+ r300->aa_state.dirty = TRUE;
pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL);
- pipe_surface_reference((struct pipe_surface**)&destsurf, NULL);
+ pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL);
}
void r300_init_render_functions(struct r300_context *r300)
@@ -1066,6 +1154,7 @@ void r300_init_render_functions(struct r300_context *r300)
}
r300->context.resource_resolve = r300_resource_resolve;
+ r300->blitter->draw_rectangle = r300_blitter_draw_rectangle;
/* Plug in the two-sided stencil reference value fallback if needed. */
if (!r300->screen->caps.is_r500)
diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c
index d509ded3ec..9a6b4e12ff 100644
--- a/src/gallium/drivers/r300/r300_render_stencilref.c
+++ b/src/gallium/drivers/r300/r300_render_stencilref.c
@@ -64,12 +64,12 @@ static void r300_stencilref_begin(struct r300_context *r300)
struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
/* Save state. */
- sr->rs_cull_mode = rs->cull_mode;
+ sr->rs_cull_mode = rs->cb_main[rs->cull_mode_index];
sr->zb_stencilrefmask = dsa->stencil_ref_mask;
sr->ref_value_front = r300->stencil_ref.ref_value[0];
/* We *cull* pixels, therefore no need to mask out the bits. */
- rs->cull_mode |= R300_CULL_BACK;
+ rs->cb_main[rs->cull_mode_index] |= R300_CULL_BACK;
r300->rs_state.dirty = TRUE;
}
@@ -81,7 +81,7 @@ static void r300_stencilref_switch_side(struct r300_context *r300)
struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
- rs->cull_mode = sr->rs_cull_mode | R300_CULL_FRONT;
+ rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode | R300_CULL_FRONT;
dsa->stencil_ref_mask = dsa->stencil_ref_bf;
r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1];
@@ -97,7 +97,7 @@ static void r300_stencilref_end(struct r300_context *r300)
struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
/* Restore state. */
- rs->cull_mode = sr->rs_cull_mode;
+ rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode;
dsa->stencil_ref_mask = sr->zb_stencilrefmask;
r300->stencil_ref.ref_value[0] = sr->ref_value_front;
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 8f7c96b829..676430f5fe 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -30,6 +30,7 @@
#include "r300_screen_buffer.h"
#include "r300_state_inlines.h"
#include "r300_winsys.h"
+#include "r300_public.h"
/* Return the identifier behind whom the brave coders responsible for this
* amalgamation of code, sweat, and duct tape, routinely obscure their names.
@@ -114,6 +115,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_DEPTH_CLAMP:
return 1;
/* Unsupported features (boolean caps). */
@@ -206,6 +208,8 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 1; /* XXX guessed */
case PIPE_CAP_MAX_VS_PREDS:
return is_r500 ? 4 : 0; /* XXX guessed. */
+ case PIPE_CAP_GEOMETRY_SHADER4:
+ return 0;
default:
fprintf(stderr, "r300: Implementation error: Bad param %d\n",
@@ -253,9 +257,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
uint32_t retval = 0;
boolean is_r500 = r300_screen(screen)->caps.is_r500;
boolean is_r400 = r300_screen(screen)->caps.is_r400;
- boolean is_rv350 = r300_screen(screen)->caps.is_rv350;
- boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM ||
- format == PIPE_FORMAT_S8_USCALED_Z24_UNORM;
boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM ||
format == PIPE_FORMAT_R10G10B10X2_SNORM ||
format == PIPE_FORMAT_B10G10R10A2_UNORM ||
@@ -269,12 +270,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
format == PIPE_FORMAT_R16G16B16_FLOAT ||
format == PIPE_FORMAT_R16G16B16A16_FLOAT;
- if (target >= PIPE_MAX_TEXTURE_TYPES) {
- fprintf(stderr, "r300: Implementation error: Received bogus texture "
- "target %d in %s\n", target, __FUNCTION__);
- return FALSE;
- }
-
+ /* Check multisampling support. */
switch (sample_count) {
case 0:
case 1:
@@ -295,8 +291,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
/* Check sampler format support. */
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
- /* Z24 cannot be sampled from on non-r5xx. */
- (is_r500 || !is_z24) &&
/* ATI1N is r5xx-only. */
(is_r500 || !is_ati1n) &&
/* ATI2N is supported on r4xx-r5xx. */
@@ -329,7 +323,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
/* Check vertex buffer format support. */
if (usage & PIPE_BIND_VERTEX_BUFFER &&
/* Half float is supported on >= RV350. */
- (is_rv350 || !is_half_float) &&
+ (is_r400 || is_r500 || !is_half_float) &&
r300_translate_vertex_data_type(format) != R300_INVALID_FORMAT) {
retval |= PIPE_BIND_VERTEX_BUFFER;
}
@@ -348,6 +342,8 @@ static void r300_destroy_screen(struct pipe_screen* pscreen)
struct r300_screen* r300screen = r300_screen(pscreen);
struct r300_winsys_screen *rws = r300_winsys_screen(pscreen);
+ util_mempool_destroy(&r300screen->pool_buffers);
+
if (rws)
rws->destroy(rws);
@@ -387,7 +383,7 @@ static int r300_fence_finish(struct pipe_screen *screen,
return 0; /* 0 == success */
}
-struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws)
+struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws)
{
struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen);
@@ -403,6 +399,10 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws)
r300_init_debug(r300screen);
r300_parse_chipset(&r300screen->caps);
+ util_mempool_create(&r300screen->pool_buffers,
+ sizeof(struct r300_buffer), 64,
+ UTIL_MEMPOOL_SINGLETHREADED);
+
r300screen->rws = rws;
r300screen->screen.winsys = (struct pipe_winsys*)rws;
r300screen->screen.destroy = r300_destroy_screen;
@@ -423,9 +423,3 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws)
return &r300screen->screen;
}
-
-struct r300_winsys_screen *
-r300_winsys_screen(struct pipe_screen *screen)
-{
- return r300_screen(screen)->rws;
-}
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 29cd5dbe26..18745b83a0 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -28,8 +28,12 @@
#include "r300_chipset.h"
+#include "util/u_mempool.h"
+
#include <stdio.h>
+struct r300_winsys_screen;
+
struct r300_screen {
/* Parent class */
struct pipe_screen screen;
@@ -39,16 +43,28 @@ struct r300_screen {
/* Chipset capabilities */
struct r300_capabilities caps;
+ /* Memory pools. */
+ struct util_mempool pool_buffers;
+
/** Combination of DBG_xxx flags */
unsigned debug;
+
+ /* The number of created contexts to know whether we have multiple
+ * contexts or not. */
+ int num_contexts;
};
-/* Convenience cast wrapper. */
+/* Convenience cast wrappers. */
static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) {
return (struct r300_screen*)screen;
}
+static INLINE struct r300_winsys_screen *
+r300_winsys_screen(struct pipe_screen *screen) {
+ return r300_screen(screen)->rws;
+}
+
/* Debug functionality. */
/**
@@ -61,17 +77,20 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) {
* those changes.
*/
/*@{*/
-#define DBG_HELP (1 << 0)
+
/* Logging. */
+#define DBG_PSC (1 << 0)
#define DBG_FP (1 << 1)
#define DBG_VP (1 << 2)
-/* The bit (1 << 3) is unused. */
+#define DBG_SWTCL (1 << 3)
#define DBG_DRAW (1 << 4)
#define DBG_TEX (1 << 5)
#define DBG_TEXALLOC (1 << 6)
#define DBG_RS (1 << 7)
#define DBG_FALL (1 << 8)
#define DBG_FB (1 << 9)
+#define DBG_RS_BLOCK (1 << 10)
+#define DBG_CBZB (1 << 11)
/* Features. */
#define DBG_ANISOHQ (1 << 16)
#define DBG_NO_TILING (1 << 17)
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index 7959e6a2f9..37a080ba48 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -43,7 +43,7 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context,
if (r300_buffer_is_user_buffer(buf))
return PIPE_UNREFERENCED;
- if (r300->rws->is_buffer_referenced(r300->rws, rbuf->buf, domain))
+ if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->buf, domain))
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
return PIPE_UNREFERENCED;
@@ -62,7 +62,8 @@ int r300_upload_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned index_size,
unsigned start,
- unsigned count)
+ unsigned count,
+ unsigned *out_offset)
{
struct pipe_resource *upload_buffer = NULL;
unsigned index_offset = start * index_size;
@@ -79,7 +80,10 @@ int r300_upload_index_buffer(struct r300_context *r300,
goto done;
}
*index_buffer = upload_buffer;
- }
+ *out_offset = index_offset / index_size;
+ } else
+ *out_offset = start;
+
done:
// if (upload_buffer)
// pipe_resource_reference(&upload_buffer, NULL);
@@ -119,31 +123,59 @@ int r300_upload_user_buffers(struct r300_context *r300)
return ret;
}
-static void r300_winsys_buffer_destroy(struct r300_screen *r300screen,
- struct r300_buffer *rbuf)
+static void r300_buffer_destroy(struct pipe_screen *screen,
+ struct pipe_resource *buf)
{
+ struct r300_screen *r300screen = r300_screen(screen);
+ struct r300_buffer *rbuf = r300_buffer(buf);
struct r300_winsys_screen *rws = r300screen->rws;
- if (rbuf->buf) {
- rws->buffer_reference(rws, &rbuf->buf, NULL);
- rbuf->buf = NULL;
- }
+ if (rbuf->constant_buffer)
+ FREE(rbuf->constant_buffer);
+
+ if (rbuf->buf)
+ rws->buffer_reference(rws, &rbuf->buf, NULL);
+
+ util_mempool_free(&r300screen->pool_buffers, rbuf);
}
-static void r300_buffer_destroy(struct pipe_screen *screen,
- struct pipe_resource *buf)
+static struct pipe_transfer*
+r300_default_get_transfer(struct pipe_context *context,
+ struct pipe_resource *resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box)
{
- struct r300_screen *r300screen = r300_screen(screen);
- struct r300_buffer *rbuf = r300_buffer(buf);
+ struct r300_context *r300 = r300_context(context);
+ struct pipe_transfer *transfer =
+ util_mempool_malloc(&r300->pool_transfers);
+
+ transfer->resource = resource;
+ transfer->sr = sr;
+ transfer->usage = usage;
+ transfer->box = *box;
+ transfer->stride = 0;
+ transfer->slice_stride = 0;
+ transfer->data = NULL;
+
+ /* Note strides are zero, this is ok for buffers, but not for
+ * textures 2d & higher at least.
+ */
+ return transfer;
+}
- r300_winsys_buffer_destroy(r300screen, rbuf);
- FREE(rbuf);
+static void r300_default_transfer_destroy(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ util_mempool_free(&r300->pool_transfers, transfer);
}
static void *
r300_buffer_transfer_map( struct pipe_context *pipe,
struct pipe_transfer *transfer )
{
+ struct r300_context *r300 = r300_context(pipe);
struct r300_screen *r300screen = r300_screen(pipe->screen);
struct r300_winsys_screen *rws = r300screen->rws;
struct r300_buffer *rbuf = r300_buffer(transfer->resource);
@@ -153,10 +185,8 @@ r300_buffer_transfer_map( struct pipe_context *pipe,
if (rbuf->user_buffer)
return (uint8_t *) rbuf->user_buffer + transfer->box.x;
-
- if (rbuf->b.b.bind & PIPE_BIND_CONSTANT_BUFFER) {
- goto just_map;
- }
+ if (rbuf->constant_buffer)
+ return (uint8_t *) rbuf->constant_buffer + transfer->box.x;
/* check if the mapping is to a range we already flushed */
if (transfer->usage & PIPE_TRANSFER_DISCARD) {
@@ -170,16 +200,18 @@ r300_buffer_transfer_map( struct pipe_context *pipe,
rws->buffer_reference(rws, &rbuf->buf, NULL);
rbuf->num_ranges = 0;
- rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, 16,
- rbuf->b.b.bind,
- rbuf->domain,
- rbuf->b.b.width0);
+ rbuf->buf =
+ r300screen->rws->buffer_create(r300screen->rws,
+ rbuf->b.b.width0, 16,
+ rbuf->b.b.bind,
+ rbuf->b.b.usage,
+ rbuf->domain);
break;
}
}
}
-just_map:
- map = rws->buffer_map(rws, rbuf->buf, transfer->usage);
+
+ map = rws->buffer_map(rws, rbuf->buf, r300->cs, transfer->usage);
if (map == NULL)
return NULL;
@@ -204,9 +236,8 @@ static void r300_buffer_transfer_flush_region( struct pipe_context *pipe,
if (rbuf->user_buffer)
return;
-
- if (rbuf->b.b.bind & PIPE_BIND_CONSTANT_BUFFER)
- return;
+ if (rbuf->constant_buffer)
+ return;
/* mark the range as used */
for(i = 0; i < rbuf->num_ranges; ++i) {
@@ -237,14 +268,14 @@ static void r300_buffer_transfer_unmap( struct pipe_context *pipe,
struct u_resource_vtbl r300_buffer_vtbl =
{
u_default_resource_get_handle, /* get_handle */
- r300_buffer_destroy, /* resource_destroy */
- r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */
- u_default_get_transfer, /* get_transfer */
- u_default_transfer_destroy, /* transfer_destroy */
- r300_buffer_transfer_map, /* transfer_map */
+ r300_buffer_destroy, /* resource_destroy */
+ r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */
+ r300_default_get_transfer, /* get_transfer */
+ r300_default_transfer_destroy, /* transfer_destroy */
+ r300_buffer_transfer_map, /* transfer_map */
r300_buffer_transfer_flush_region, /* transfer_flush_region */
- r300_buffer_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
+ r300_buffer_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
};
struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
@@ -254,9 +285,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
struct r300_buffer *rbuf;
unsigned alignment = 16;
- rbuf = CALLOC_STRUCT(r300_buffer);
- if (!rbuf)
- goto error1;
+ rbuf = util_mempool_malloc(&r300screen->pool_buffers);
rbuf->magic = R300_BUFFER_MAGIC;
@@ -265,21 +294,29 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
pipe_reference_init(&rbuf->b.b.reference, 1);
rbuf->b.b.screen = screen;
rbuf->domain = R300_DOMAIN_GTT;
+ rbuf->num_ranges = 0;
+ rbuf->buf = NULL;
+ rbuf->constant_buffer = NULL;
+ rbuf->user_buffer = NULL;
+
+ /* Alloc constant buffers in RAM. */
+ if (templ->bind & PIPE_BIND_CONSTANT_BUFFER) {
+ rbuf->constant_buffer = MALLOC(templ->width0);
+ return &rbuf->b.b;
+ }
- rbuf->buf = r300screen->rws->buffer_create(r300screen->rws,
- alignment,
- rbuf->b.b.bind,
- rbuf->domain,
- rbuf->b.b.width0);
+ rbuf->buf =
+ r300screen->rws->buffer_create(r300screen->rws,
+ rbuf->b.b.width0, alignment,
+ rbuf->b.b.bind, rbuf->b.b.usage,
+ rbuf->domain);
- if (!rbuf->buf)
- goto error2;
+ if (!rbuf->buf) {
+ util_mempool_free(&r300screen->pool_buffers, rbuf);
+ return NULL;
+ }
return &rbuf->b.b;
-error2:
- FREE(rbuf);
-error1:
- return NULL;
}
struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
@@ -287,28 +324,28 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
unsigned bytes,
unsigned bind)
{
+ struct r300_screen *r300screen = r300_screen(screen);
struct r300_buffer *rbuf;
- rbuf = CALLOC_STRUCT(r300_buffer);
- if (!rbuf)
- goto no_rbuf;
+ rbuf = util_mempool_malloc(&r300screen->pool_buffers);
rbuf->magic = R300_BUFFER_MAGIC;
pipe_reference_init(&rbuf->b.b.reference, 1);
rbuf->b.vtbl = &r300_buffer_vtbl;
rbuf->b.b.screen = screen;
+ rbuf->b.b.target = PIPE_BUFFER;
rbuf->b.b.format = PIPE_FORMAT_R8_UNORM;
rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE;
rbuf->b.b.bind = bind;
rbuf->b.b.width0 = bytes;
rbuf->b.b.height0 = 1;
rbuf->b.b.depth0 = 1;
+ rbuf->b.b.flags = 0;
rbuf->domain = R300_DOMAIN_GTT;
-
+ rbuf->num_ranges = 0;
+ rbuf->buf = NULL;
+ rbuf->constant_buffer = NULL;
rbuf->user_buffer = ptr;
return &rbuf->b.b;
-
-no_rbuf:
- return NULL;
}
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h
index ff35585870..cafa9f96f2 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.h
+++ b/src/gallium/drivers/r300/r300_screen_buffer.h
@@ -55,6 +55,7 @@ struct r300_buffer
enum r300_buffer_domain domain;
void *user_buffer;
+ void *constant_buffer;
struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES];
unsigned num_ranges;
};
@@ -67,7 +68,7 @@ int r300_upload_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned index_size,
unsigned start,
- unsigned count);
+ unsigned count, unsigned *out_offset);
struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ);
@@ -97,23 +98,4 @@ static INLINE boolean r300_buffer_is_user_buffer(struct pipe_resource *buffer)
return r300_buffer(buffer)->user_buffer ? true : false;
}
-static INLINE boolean r300_add_buffer(struct r300_winsys_screen *rws,
- struct pipe_resource *buffer,
- int rd, int wr)
-{
- struct r300_buffer *buf = r300_buffer(buffer);
-
- if (!buf->buf)
- return true;
-
- return rws->add_buffer(rws, buf->buf, rd, wr);
-}
-
-static INLINE boolean r300_add_texture(struct r300_winsys_screen *rws,
- struct r300_texture *tex,
- int rd, int wr)
-{
- return rws->add_buffer(rws, tex->buffer, rd, wr);
-}
-
#endif
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index bc2b62ba54..3e221f2e02 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -23,6 +23,7 @@
#include "draw/draw_context.h"
+#include "util/u_blitter.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
@@ -428,14 +429,19 @@ static void r300_set_clip_state(struct pipe_context* pipe,
clip->clip = *state;
if (r300->screen->caps.has_tcl) {
- BEGIN_CB(clip->cb, 29);
- OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG,
- (r300->screen->caps.is_r500 ?
- R500_PVS_UCP_START : R300_PVS_UCP_START));
- OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4);
- OUT_CB_TABLE(state->ucp, 6 * 4);
+ r300->clip_state.size = 2 + !!state->nr * 3 + state->nr * 4;
+
+ BEGIN_CB(clip->cb, r300->clip_state.size);
+ if (state->nr) {
+ OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG,
+ (r300->screen->caps.is_r500 ?
+ R500_PVS_UCP_START : R300_PVS_UCP_START));
+ OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, state->nr * 4);
+ OUT_CB_TABLE(state->ucp, state->nr * 4);
+ }
OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) |
- R300_PS_UCP_MODE_CLIP_AS_TRIFAN);
+ R300_PS_UCP_MODE_CLIP_AS_TRIFAN |
+ (state->depth_clamp ? R300_CLIP_DISABLE : 0));
END_CB;
r300->clip_state.dirty = TRUE;
@@ -608,32 +614,43 @@ static void r300_set_stencil_ref(struct pipe_context* pipe,
r300->dsa_state.dirty = TRUE;
}
+static void r300_tex_set_tiling_flags(struct r300_context *r300,
+ struct r300_texture *tex, unsigned level)
+{
+ /* Check if the macrotile flag needs to be changed.
+ * Skip changing the flags otherwise. */
+ if (tex->desc.macrotile[tex->surface_level] !=
+ tex->desc.macrotile[level]) {
+ /* Tiling determines how DRM treats the buffer data.
+ * We must flush CS when changing it if the buffer is referenced. */
+ if (r300->rws->cs_is_buffer_referenced(r300->cs,
+ tex->buffer, R300_REF_CS))
+ r300->context.flush(&r300->context, 0, NULL);
+
+ r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
+ tex->desc.microtile, tex->desc.macrotile[level],
+ tex->desc.stride_in_bytes[0]);
+
+ tex->surface_level = level;
+ }
+}
+
/* This switcheroo is needed just because of goddamned MACRO_SWITCH. */
static void r300_fb_set_tiling_flags(struct r300_context *r300,
- const struct pipe_framebuffer_state *old_state,
- const struct pipe_framebuffer_state *new_state)
+ const struct pipe_framebuffer_state *state)
{
- struct r300_texture *tex;
- unsigned i, level;
+ unsigned i;
/* Set tiling flags for new surfaces. */
- for (i = 0; i < new_state->nr_cbufs; i++) {
- tex = r300_texture(new_state->cbufs[i]->texture);
- level = new_state->cbufs[i]->level;
-
- r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
- tex->pitch[0] * util_format_get_blocksize(tex->b.b.format),
- tex->microtile,
- tex->mip_macrotile[level]);
+ for (i = 0; i < state->nr_cbufs; i++) {
+ r300_tex_set_tiling_flags(r300,
+ r300_texture(state->cbufs[i]->texture),
+ state->cbufs[i]->level);
}
- if (new_state->zsbuf) {
- tex = r300_texture(new_state->zsbuf->texture);
- level = new_state->zsbuf->level;
-
- r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
- tex->pitch[0] * util_format_get_blocksize(tex->b.b.format),
- tex->microtile,
- tex->mip_macrotile[level]);
+ if (state->zsbuf) {
+ r300_tex_set_tiling_flags(r300,
+ r300_texture(state->zsbuf->texture),
+ state->zsbuf->level);
}
}
@@ -654,26 +671,49 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index,
surf->zslice, surf->face, surf->level,
util_format_short_name(surf->format),
- rtex->macrotile ? "YES" : " NO", rtex->microtile ? "YES" : " NO",
- rtex->hwpitch[0], tex->width0, tex->height0, tex->depth0,
+ rtex->desc.macrotile[0] ? "YES" : " NO",
+ rtex->desc.microtile ? "YES" : " NO",
+ rtex->desc.stride_in_pixels[0],
+ tex->width0, tex->height0, tex->depth0,
tex->last_level, util_format_short_name(tex->format));
}
+void r300_mark_fb_state_dirty(struct r300_context *r300,
+ enum r300_fb_state_change change)
+{
+ struct pipe_framebuffer_state *state = r300->fb_state.state;
+
+ /* What is marked as dirty depends on the enum r300_fb_state_change. */
+ r300->gpu_flush.dirty = TRUE;
+ r300->fb_state.dirty = TRUE;
+ r300->hyperz_state.dirty = TRUE;
+
+ if (change == R300_CHANGED_FB_STATE) {
+ r300->aa_state.dirty = TRUE;
+ r300->fb_state_pipelined.dirty = TRUE;
+ }
+
+ /* Now compute the fb_state atom size. */
+ r300->fb_state.size = 2 + (8 * state->nr_cbufs);
+
+ if (r300->cbzb_clear)
+ r300->fb_state.size += 10;
+ else if (state->zsbuf)
+ r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14;
+
+ /* The size of the rest of atoms stays the same. */
+}
+
static void
r300_set_framebuffer_state(struct pipe_context* pipe,
const struct pipe_framebuffer_state* state)
{
struct r300_context* r300 = r300_context(pipe);
+ struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
struct pipe_framebuffer_state *old_state = r300->fb_state.state;
unsigned max_width, max_height, i;
uint32_t zbuffer_bpp = 0;
- if (state->nr_cbufs > 4) {
- fprintf(stderr, "r300: Implementation error: Too many MRTs in %s, "
- "refusing to bind framebuffer state!\n", __FUNCTION__);
- return;
- }
-
if (r300->screen->caps.is_r500) {
max_width = max_height = 4096;
} else if (r300->screen->caps.is_r400) {
@@ -692,8 +732,6 @@ static void
draw_flush(r300->draw);
}
- r300->fb_state.dirty = TRUE;
-
/* If nr_cbufs is changed from zero to non-zero or vice versa... */
if (!!old_state->nr_cbufs != !!state->nr_cbufs) {
r300->blend_state.dirty = TRUE;
@@ -704,12 +742,11 @@ static void
}
/* The tiling flags are dependent on the surface miplevel, unfortunately. */
- r300_fb_set_tiling_flags(r300, r300->fb_state.state, state);
+ r300_fb_set_tiling_flags(r300, state);
- memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state));
+ util_assign_framebuffer_state(r300->fb_state.state, state);
- r300->fb_state.size = (10 * state->nr_cbufs) + (2 * (4 - state->nr_cbufs)) +
- (state->zsbuf ? 10 : 0) + 11;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
/* Polygon offset depends on the zbuffer bit depth. */
if (state->zsbuf && r300->polygon_offset_enabled) {
@@ -728,6 +765,30 @@ static void
}
}
+ /* Set up AA config. */
+ if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
+ if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) {
+ aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE;
+
+ switch (state->cbufs[0]->texture->nr_samples) {
+ case 2:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
+ break;
+ case 3:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3;
+ break;
+ case 4:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
+ break;
+ case 6:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
+ break;
+ }
+ } else {
+ aa->aa_config = 0;
+ }
+ }
+
if (DBG_ON(r300, DBG_FB)) {
fprintf(stderr, "r300: set_framebuffer_state:\n");
for (i = 0; i < state->nr_cbufs; i++) {
@@ -826,6 +887,27 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state);
int i;
float psiz;
+ uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */
+ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */
+ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */
+ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */
+ uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */
+ uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */
+ uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
+ uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
+ uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */
+ uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */
+
+ /* Specifies top of Raster pipe specific enable controls,
+ * i.e. texture coordinates stuffing for points, lines, triangles */
+ uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */
+
+ /* Point sprites texture coordinates, 0: lower left, 1: upper right */
+ float point_texcoord_left; /* R300_GA_POINT_S0: 0x4200 */
+ float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */
+ float point_texcoord_right; /* R300_GA_POINT_S1: 0x4208 */
+ float point_texcoord_top = 0; /* R300_GA_POINT_T1: 0x420c */
+ CB_LOCALS;
/* Copy rasterizer state. */
rs->rs = *state;
@@ -835,18 +917,18 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */
#ifdef PIPE_ARCH_LITTLE_ENDIAN
- rs->vap_control_status = R300_VC_NO_SWAP;
+ vap_control_status = R300_VC_NO_SWAP;
#else
- rs->vap_control_status = R300_VC_32BIT_SWAP;
+ vap_control_status = R300_VC_32BIT_SWAP;
#endif
/* If no TCL engine is present, turn off the HW TCL. */
if (!r300_screen(pipe->screen)->caps.has_tcl) {
- rs->vap_control_status |= R300_VAP_TCL_BYPASS;
+ vap_control_status |= R300_VAP_TCL_BYPASS;
}
/* Point size width and height. */
- rs->point_size =
+ point_size =
pack_float_16_6x(state->point_size) |
(pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT);
@@ -856,68 +938,70 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
* Clamp to [0, max FB size] */
psiz = pipe->screen->get_paramf(pipe->screen,
PIPE_CAP_MAX_POINT_WIDTH);
- rs->point_minmax =
+ point_minmax =
pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT;
} else {
/* We cannot disable the point-size vertex output,
* so clamp it. */
psiz = state->point_size;
- rs->point_minmax =
+ point_minmax =
(pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) |
(pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT);
}
/* Line control. */
- rs->line_control = pack_float_16_6x(state->line_width) |
+ line_control = pack_float_16_6x(state->line_width) |
R300_GA_LINE_CNTL_END_TYPE_COMP;
/* Enable polygon mode */
+ polygon_mode = 0;
if (state->fill_front != PIPE_POLYGON_MODE_FILL ||
state->fill_back != PIPE_POLYGON_MODE_FILL) {
- rs->polygon_mode = R300_GA_POLY_MODE_DUAL;
+ polygon_mode = R300_GA_POLY_MODE_DUAL;
}
/* Front face */
if (state->front_ccw)
- rs->cull_mode = R300_FRONT_FACE_CCW;
+ cull_mode = R300_FRONT_FACE_CCW;
else
- rs->cull_mode = R300_FRONT_FACE_CW;
+ cull_mode = R300_FRONT_FACE_CW;
/* Polygon offset */
+ polygon_offset_enable = 0;
if (util_get_offset(state, state->fill_front)) {
- rs->polygon_offset_enable |= R300_FRONT_ENABLE;
+ polygon_offset_enable |= R300_FRONT_ENABLE;
}
if (util_get_offset(state, state->fill_back)) {
- rs->polygon_offset_enable |= R300_BACK_ENABLE;
+ polygon_offset_enable |= R300_BACK_ENABLE;
}
+ rs->polygon_offset_enable = polygon_offset_enable != 0;
+
/* Polygon mode */
- if (rs->polygon_mode) {
- rs->polygon_mode |=
+ if (polygon_mode) {
+ polygon_mode |=
r300_translate_polygon_mode_front(state->fill_front);
- rs->polygon_mode |=
+ polygon_mode |=
r300_translate_polygon_mode_back(state->fill_back);
}
if (state->cull_face & PIPE_FACE_FRONT) {
- rs->cull_mode |= R300_CULL_FRONT;
+ cull_mode |= R300_CULL_FRONT;
}
if (state->cull_face & PIPE_FACE_BACK) {
- rs->cull_mode |= R300_CULL_BACK;
- }
-
- if (rs->polygon_offset_enable) {
- rs->depth_offset = state->offset_units;
- rs->depth_scale = state->offset_scale;
+ cull_mode |= R300_CULL_BACK;
}
if (state->line_stipple_enable) {
- rs->line_stipple_config =
+ line_stipple_config =
R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE |
(fui((float)state->line_stipple_factor) &
R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK);
/* XXX this might need to be scaled up */
- rs->line_stipple_value = state->line_stipple_pattern;
+ line_stipple_value = state->line_stipple_pattern;
+ } else {
+ line_stipple_config = 0;
+ line_stipple_value = 0;
}
if (state->flatshade) {
@@ -926,35 +1010,78 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
rs->color_control = R300_SHADE_MODEL_SMOOTH;
}
- rs->clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
+ clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
/* Point sprites */
+ stuffing_enable = 0;
if (state->sprite_coord_enable) {
- rs->stuffing_enable = R300_GB_POINT_STUFF_ENABLE;
+ stuffing_enable = R300_GB_POINT_STUFF_ENABLE;
for (i = 0; i < 8; i++) {
if (state->sprite_coord_enable & (1 << i))
- rs->stuffing_enable |=
+ stuffing_enable |=
R300_GB_TEX_STR << (R300_GB_TEX0_SOURCE_SHIFT + (i*2));
}
- rs->point_texcoord_left = 0.0f;
- rs->point_texcoord_right = 1.0f;
+ point_texcoord_left = 0.0f;
+ point_texcoord_right = 1.0f;
switch (state->sprite_coord_mode) {
case PIPE_SPRITE_COORD_UPPER_LEFT:
- rs->point_texcoord_top = 0.0f;
- rs->point_texcoord_bottom = 1.0f;
+ point_texcoord_top = 0.0f;
+ point_texcoord_bottom = 1.0f;
break;
case PIPE_SPRITE_COORD_LOWER_LEFT:
- rs->point_texcoord_top = 1.0f;
- rs->point_texcoord_bottom = 0.0f;
+ point_texcoord_top = 1.0f;
+ point_texcoord_bottom = 0.0f;
break;
}
}
- if (state->gl_rasterization_rules) {
- rs->multisample_position_0 = 0x66666666;
- rs->multisample_position_1 = 0x6666666;
+ /* Build the main command buffer. */
+ BEGIN_CB(rs->cb_main, 25);
+ OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status);
+ OUT_CB_REG(R300_GA_POINT_SIZE, point_size);
+ OUT_CB_REG_SEQ(R300_GA_POINT_MINMAX, 2);
+ OUT_CB(point_minmax);
+ OUT_CB(line_control);
+ OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2);
+ OUT_CB(polygon_offset_enable);
+ rs->cull_mode_index = 9;
+ OUT_CB(cull_mode);
+ OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config);
+ OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value);
+ OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode);
+ OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule);
+ OUT_CB_REG(R300_GB_ENABLE, stuffing_enable);
+ OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4);
+ OUT_CB_32F(point_texcoord_left);
+ OUT_CB_32F(point_texcoord_bottom);
+ OUT_CB_32F(point_texcoord_right);
+ OUT_CB_32F(point_texcoord_top);
+ END_CB;
+
+ /* Build the two command buffers for polygon offset setup. */
+ if (polygon_offset_enable) {
+ float scale = state->offset_scale * 12;
+ float offset = state->offset_units * 4;
+
+ BEGIN_CB(rs->cb_poly_offset_zb16, 5);
+ OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ OUT_CB_32F(scale);
+ OUT_CB_32F(offset);
+ OUT_CB_32F(scale);
+ OUT_CB_32F(offset);
+ END_CB;
+
+ offset = state->offset_units * 2;
+
+ BEGIN_CB(rs->cb_poly_offset_zb24, 5);
+ OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ OUT_CB_32F(scale);
+ OUT_CB_32F(offset);
+ OUT_CB_32F(scale);
+ OUT_CB_32F(offset);
+ END_CB;
}
return (void*)rs;
@@ -986,8 +1113,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
}
UPDATE_STATE(state, r300->rs_state);
- r300->rs_state.size = 25 + (r300->polygon_offset_enabled ? 5 : 0) +
- (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0) ? 5 : 0);
+ r300->rs_state.size = 25 + (r300->polygon_offset_enabled ? 5 : 0);
if (last_sprite_coord_enable != r300->sprite_coord_enable ||
last_two_sided_color != r300->two_sided_color) {
@@ -1056,7 +1182,7 @@ static void*
lod_bias = CLAMP((int)(state->lod_bias * 32 + 1), -(1 << 9), (1 << 9) - 1);
- sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT;
+ sampler->filter1 |= (lod_bias << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK;
/* This is very high quality anisotropic filtering for R5xx.
* It's good for benchmarking the performance of texturing but
@@ -1170,7 +1296,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
/* Set the texrect factor in the fragment shader.
* Needed for RECT and NPOT fallback. */
texture = r300_texture(views[i]->texture);
- if (texture->uses_pitch) {
+ if (texture->desc.is_npot) {
r300->fs_rc_constant_state.dirty = TRUE;
}
@@ -1204,6 +1330,7 @@ r300_create_sampler_view(struct pipe_context *pipe,
{
struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view);
struct r300_texture *tex = r300_texture(texture);
+ boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500;
if (view) {
view->base = *templ;
@@ -1219,8 +1346,9 @@ r300_create_sampler_view(struct pipe_context *pipe,
view->format = tex->tx_format;
view->format.format1 |= r300_translate_texformat(templ->format,
- view->swizzle);
- if (r300_screen(pipe->screen)->caps.is_r500) {
+ view->swizzle,
+ is_r500);
+ if (is_r500) {
view->format.format2 |= r500_tx_format_msb_bit(templ->format);
}
}
@@ -1544,7 +1672,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
const struct pipe_shader_state* shader)
{
struct r300_context* r300 = r300_context(pipe);
-
struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader);
/* Copy state directly into shader. */
@@ -1621,8 +1748,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
{
struct r300_context* r300 = r300_context(pipe);
struct r300_constant_buffer *cbuf;
- struct pipe_transfer *tr;
- float *mapped;
+ uint32_t *mapped = r300_buffer(buf)->user_buffer;
int max_size = 0, max_size_bytes = 0, clamped_size = 0;
switch (shader) {
@@ -1645,8 +1771,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
max_size_bytes = max_size * 4 * sizeof(float);
if (buf == NULL || buf->width0 == 0 ||
- (mapped = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &tr)) == NULL)
- {
+ (mapped = r300_buffer(buf)->constant_buffer) == NULL) {
cbuf->count = 0;
return;
}
@@ -1664,17 +1789,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
clamped_size = MIN2(buf->width0, max_size_bytes);
cbuf->count = clamped_size / (4 * sizeof(float));
-
- if (shader == PIPE_SHADER_FRAGMENT && !r300->screen->caps.is_r500) {
- unsigned i,j;
-
- /* Convert constants to float24. */
- for (i = 0; i < cbuf->count; i++)
- for (j = 0; j < 4; j++)
- cbuf->constants[i][j] = pack_float24(mapped[i*4+j]);
- } else {
- memcpy(cbuf->constants, mapped, clamped_size);
- }
+ cbuf->ptr = mapped;
}
if (shader == PIPE_SHADER_VERTEX) {
@@ -1690,8 +1805,6 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
} else if (shader == PIPE_SHADER_FRAGMENT) {
r300->fs_constants.dirty = TRUE;
}
-
- pipe_buffer_unmap(pipe, buf, tr);
}
void r300_init_state_functions(struct r300_context* r300)
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 3aa8deb63c..a85db27064 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -102,7 +102,8 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
* they won't be rasterized. */
gen_count = 0;
for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) {
- if (vs_outputs->generic[i] != ATTR_UNUSED) {
+ if (vs_outputs->generic[i] != ATTR_UNUSED &&
+ !(r300->sprite_coord_enable & (1 << i))) {
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
vs_outputs->generic[i]);
gen_count++;
@@ -118,7 +119,7 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
/* WPOS. */
if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED && gen_count < 8) {
- DBG(r300, DBG_DRAW, "draw_emit_attrib: WPOS, index: %i\n",
+ DBG(r300, DBG_SWTCL, "draw_emit_attrib: WPOS, index: %i\n",
vs_outputs->wpos);
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
vs_outputs->wpos);
@@ -140,18 +141,19 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300)
/* For each Draw attribute, route it to the fragment shader according
* to the vs_output_tab. */
attrib_count = vinfo->num_attribs;
- DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
+ DBG(r300, DBG_SWTCL, "r300: attrib count: %d\n", attrib_count);
for (i = 0; i < attrib_count; i++) {
- DBG(r300, DBG_DRAW, "r300: attrib: index %d, interp %d, emit %d,"
- " vs_output_tab %d\n", vinfo->attrib[i].src_index,
- vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
- vs_output_tab[i]);
-
- /* Make sure we have a proper destination for our attribute. */
- assert(vs_output_tab[i] != -1);
+ if (vs_output_tab[i] == -1) {
+ assert(0);
+ abort();
+ }
format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
+ DBG(r300, DBG_SWTCL,
+ "r300: swtcl_vertex_psc [%i] <- %s\n",
+ vs_output_tab[i], util_format_short_name(format));
+
/* Obtain the type of data in this attribute. */
type = r300_translate_vertex_data_type(format);
if (type == R300_INVALID_FORMAT) {
@@ -526,15 +528,9 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
struct r300_sampler_state *sampler;
struct r300_sampler_view *view;
struct r300_texture *tex;
- unsigned min_level, max_level, i, size;
+ unsigned min_level, max_level, i, j, size;
unsigned count = MIN2(state->sampler_view_count,
state->sampler_state_count);
- unsigned char depth_swizzle[4] = {
- UTIL_FORMAT_SWIZZLE_X,
- UTIL_FORMAT_SWIZZLE_X,
- UTIL_FORMAT_SWIZZLE_X,
- UTIL_FORMAT_SWIZZLE_X
- };
/* The KIL opcode fix, see below. */
if (!count && !r300->screen->caps.is_r500)
@@ -561,14 +557,29 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
/* Assign a texture cache region. */
texstate->format.format1 |= view->texcache_region;
- /* If compare mode is disabled, the sampler view swizzles
- * are stored in the format.
- * Otherwise, swizzles must be applied after the compare mode
- * in the fragment shader. */
- if (util_format_is_depth_or_stencil(tex->b.b.format)) {
+ /* Depth textures are kinda special. */
+ if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) {
+ unsigned char depth_swizzle[4];
+
+ if (!r300->screen->caps.is_r500 &&
+ util_format_get_blocksizebits(tex->desc.b.b.format) == 32) {
+ /* X24x8 is sampled as Y16X16 on r3xx-r4xx.
+ * The depth here is at the Y component. */
+ for (j = 0; j < 4; j++)
+ depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_Y;
+ } else {
+ for (j = 0; j < 4; j++)
+ depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_X;
+ }
+
+ /* If compare mode is disabled, sampler view swizzles
+ * are stored in the format.
+ * Otherwise, the swizzles must be applied after the compare
+ * mode in the fragment shader. */
if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) {
texstate->format.format1 |=
- r300_get_swizzle_combined(depth_swizzle, view->swizzle);
+ r300_get_swizzle_combined(depth_swizzle,
+ view->swizzle);
} else {
texstate->format.format1 |=
r300_get_swizzle_combined(depth_swizzle, 0);
@@ -576,12 +587,12 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
}
/* to emulate 1D textures through 2D ones correctly */
- if (tex->b.b.target == PIPE_TEXTURE_1D) {
+ if (tex->desc.b.b.target == PIPE_TEXTURE_1D) {
texstate->filter0 &= ~R300_TX_WRAP_T_MASK;
texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
}
- if (tex->uses_pitch) {
+ if (tex->desc.is_npot) {
/* NPOT textures don't support mip filter, unfortunately.
* This prevents incorrect rendering. */
texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK;
@@ -608,7 +619,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
/* determine min/max levels */
/* the MAX_MIP level is the largest (finest) one */
max_level = MIN3(sampler->max_lod + view->base.first_level,
- tex->b.b.last_level, view->base.last_level);
+ tex->desc.b.b.last_level, view->base.last_level);
min_level = MIN2(sampler->min_lod + view->base.first_level,
max_level);
texstate->format.format0 |= R300_TX_NUM_LEVELS(max_level);
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
deleted file mode 100644
index e67a0ae244..0000000000
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright 2009 Joakim Sindholt <opensource@zhasha.com>
- * Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r300_context.h"
-#include "r300_cs.h"
-#include "r300_reg.h"
-#include "r300_screen.h"
-#include "r300_state_invariant.h"
-
-/* Calculate and emit invariant state. This is data that the 3D engine
- * will probably want at the beginning of every CS, but it's not currently
- * handled by any CSO setup, and in addition it doesn't really change much.
- *
- * Note that eventually this should be empty, but it's useful for development
- * and general unduplication of code. */
-void r300_emit_invariant_state(struct r300_context* r300,
- unsigned size, void* state)
-{
- CS_LOCALS(r300);
-
- BEGIN_CS(12 + (r300->screen->caps.has_tcl ? 2 : 0));
-
- /*** Graphics Backend (GB) ***/
- /* Source of fog depth */
- OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W);
-
- /*** Fog (FG) ***/
- OUT_CS_REG(R300_FG_FOG_BLEND, 0x0);
- OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0);
- OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0);
- OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0);
-
- /*** VAP ***/
- /* Sign/normalize control */
- OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO);
- /* TCL-only stuff */
- if (r300->screen->caps.has_tcl) {
- /* Amount of time to wait for vertex fetches in PVS */
- OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff);
- }
-
- END_CS;
-
- /* XXX unsorted stuff from surface_fill */
- BEGIN_CS(38 + (r300->screen->caps.has_tcl ? 7 : 0) +
- (r300->screen->caps.is_rv350 ? 4 : 0) +
- (r300->screen->caps.is_r400 ? 2 : 0));
-
- if (r300->screen->caps.has_tcl) {
- /*Flushing PVS is required before the VAP_GB registers can be changed*/
- OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
- OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
- OUT_CS_32F(1.0);
- OUT_CS_32F(1.0);
- OUT_CS_32F(1.0);
- OUT_CS_32F(1.0);
- }
- /* XXX line tex stuffing */
- OUT_CS_REG_SEQ(R300_GA_LINE_S0, 1);
- OUT_CS_32F(0.0);
- OUT_CS_REG_SEQ(R300_GA_LINE_S1, 1);
- OUT_CS_32F(1.0);
- OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 |
- (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT));
- /* XXX this big chunk should be refactored into rs_state */
- OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000);
- OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000);
- OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001);
- OUT_CS_REG(R300_GA_OFFSET, 0x00000000);
- OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412);
- OUT_CS_REG(R300_GA_FOG_OFFSET, 0x00000000);
- OUT_CS_REG(R300_SU_TEX_WRAP, 0x00000000);
- OUT_CS_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF);
- OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000);
- OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C);
- OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525);
- OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000);
-
- if (r300->screen->caps.is_rv350) {
- OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
- OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE);
- }
-
- OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000);
- OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
- OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000);
- OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000);
- if (r300->screen->caps.is_r400)
- OUT_CS_REG(R400_US_CODE_BANK, 0);
- END_CS;
-}
diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h
deleted file mode 100644
index 83d031c7fe..0000000000
--- a/src/gallium/drivers/r300/r300_state_invariant.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_STATE_INVARIANT_H
-#define R300_STATE_INVARIANT_H
-
-struct r300_context;
-
-void r300_emit_invariant_state(struct r300_context* r300,
- unsigned size, void* state);
-
-#endif /* R300_STATE_INVARIANT_H */
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index ddb6600056..fcdca5605e 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -26,6 +26,7 @@
#include "r300_context.h"
#include "r300_reg.h"
+#include "r300_texture_desc.h"
#include "r300_transfer.h"
#include "r300_screen.h"
#include "r300_winsys.h"
@@ -36,12 +37,6 @@
#include "util/u_memory.h"
#include "pipe/p_screen.h"
-#include "state_tracker/drm_api.h"
-
-enum r300_dim {
- DIM_WIDTH = 0,
- DIM_HEIGHT = 1
-};
unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
const unsigned char *swizzle_view)
@@ -110,7 +105,8 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
* The FORMAT specifies how the texture sampler will treat the texture, and
* makes available X, Y, Z, W, ZERO, and ONE for swizzling. */
uint32_t r300_translate_texformat(enum pipe_format format,
- const unsigned char *swizzle_view)
+ const unsigned char *swizzle_view,
+ boolean is_r500)
{
uint32_t result = 0;
const struct util_format_description *desc;
@@ -135,7 +131,10 @@ uint32_t r300_translate_texformat(enum pipe_format format,
return R300_TX_FORMAT_X16;
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- return R500_TX_FORMAT_Y8X24;
+ if (is_r500)
+ return R500_TX_FORMAT_Y8X24;
+ else
+ return R300_TX_FORMAT_Y16X16;
default:
return ~0; /* Unsupported. */
}
@@ -538,26 +537,27 @@ boolean r300_is_zs_format_supported(enum pipe_format format)
boolean r300_is_sampler_format_supported(enum pipe_format format)
{
- return r300_translate_texformat(format, 0) != ~0;
+ return r300_translate_texformat(format, 0, TRUE) != ~0;
}
static void r300_texture_setup_immutable_state(struct r300_screen* screen,
struct r300_texture* tex)
{
struct r300_texture_format_state* f = &tex->tx_format;
- struct pipe_resource *pt = &tex->b.b;
+ struct pipe_resource *pt = &tex->desc.b.b;
boolean is_r500 = screen->caps.is_r500;
/* Set sampler state. */
f->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) |
R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff);
- if (tex->uses_pitch) {
+ if (tex->desc.uses_stride_addressing) {
/* rectangles love this */
f->format0 |= R300_TX_PITCH_EN;
- f->format2 = (tex->hwpitch[0] - 1) & 0x1fff;
+ f->format2 = (tex->desc.stride_in_pixels[0] - 1) & 0x1fff;
} else {
- /* power of two textures (3D, mipmaps, and no pitch) */
+ /* Power of two textures (3D, mipmaps, and no pitch),
+ * also NPOT textures with a width being POT. */
f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf);
}
@@ -580,8 +580,8 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen,
}
}
- f->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) |
- R300_TXO_MICRO_TILE(tex->microtile);
+ f->tile_config = R300_TXO_MACRO_TILE(tex->desc.macrotile[0]) |
+ R300_TXO_MICRO_TILE(tex->desc.microtile);
}
static void r300_texture_setup_fb_state(struct r300_screen* screen,
@@ -590,23 +590,23 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen,
unsigned i;
/* Set framebuffer state. */
- if (util_format_is_depth_or_stencil(tex->b.b.format)) {
- for (i = 0; i <= tex->b.b.last_level; i++) {
+ if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) {
+ for (i = 0; i <= tex->desc.b.b.last_level; i++) {
tex->fb_state.pitch[i] =
- tex->hwpitch[i] |
- R300_DEPTHMACROTILE(tex->mip_macrotile[i]) |
- R300_DEPTHMICROTILE(tex->microtile);
+ tex->desc.stride_in_pixels[i] |
+ R300_DEPTHMACROTILE(tex->desc.macrotile[i]) |
+ R300_DEPTHMICROTILE(tex->desc.microtile);
}
- tex->fb_state.format = r300_translate_zsformat(tex->b.b.format);
+ tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format);
} else {
- for (i = 0; i <= tex->b.b.last_level; i++) {
+ for (i = 0; i <= tex->desc.b.b.last_level; i++) {
tex->fb_state.pitch[i] =
- tex->hwpitch[i] |
- r300_translate_colorformat(tex->b.b.format) |
- R300_COLOR_TILE(tex->mip_macrotile[i]) |
- R300_COLOR_MICROTILE(tex->microtile);
+ tex->desc.stride_in_pixels[i] |
+ r300_translate_colorformat(tex->desc.b.b.format) |
+ R300_COLOR_TILE(tex->desc.macrotile[i]) |
+ R300_COLOR_MICROTILE(tex->desc.microtile);
}
- tex->fb_state.format = r300_translate_out_fmt(tex->b.b.format);
+ tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format);
}
}
@@ -626,282 +626,6 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen,
r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex));
}
-unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
- unsigned zslice, unsigned face)
-{
- unsigned offset = tex->offset[level];
-
- switch (tex->b.b.target) {
- case PIPE_TEXTURE_3D:
- assert(face == 0);
- return offset + zslice * tex->layer_size[level];
-
- case PIPE_TEXTURE_CUBE:
- assert(zslice == 0);
- return offset + face * tex->layer_size[level];
-
- default:
- assert(zslice == 0 && face == 0);
- return offset;
- }
-}
-
-/* Returns the number of pixels that the texture should be aligned to
- * in the given dimension. */
-static unsigned r300_get_pixel_alignment(struct r300_texture *tex,
- enum r300_buffer_tiling macrotile,
- enum r300_dim dim)
-{
- static const unsigned table[2][5][3][2] =
- {
- {
- /* Macro: linear linear linear
- Micro: linear tiled square-tiled */
- {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */
- {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */
- {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */
- {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */
- {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
- },
- {
- /* Macro: tiled tiled tiled
- Micro: linear tiled square-tiled */
- {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */
- {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */
- {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */
- {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */
- {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
- }
- };
- static const unsigned aa_block[2] = {4, 8};
- unsigned res = 0;
- unsigned pixsize = util_format_get_blocksize(tex->b.b.format);
-
- assert(macrotile <= R300_BUFFER_TILED);
- assert(tex->microtile <= R300_BUFFER_SQUARETILED);
- assert(pixsize <= 16);
- assert(dim <= DIM_HEIGHT);
-
- if (tex->b.b.nr_samples > 1) {
- /* Multisampled textures have their own alignment scheme. */
- if (pixsize == 4)
- res = aa_block[dim];
- } else {
- /* Standard alignment. */
- res = table[macrotile][util_logbase2(pixsize)][tex->microtile][dim];
- }
-
- assert(res);
- return res;
-}
-
-/* Return true if macrotiling should be enabled on the miplevel. */
-static boolean r300_texture_macro_switch(struct r300_texture *tex,
- unsigned level,
- boolean rv350_mode,
- enum r300_dim dim)
-{
- unsigned tile, texdim;
-
- tile = r300_get_pixel_alignment(tex, R300_BUFFER_TILED, dim);
- if (dim == DIM_WIDTH) {
- texdim = u_minify(tex->b.b.width0, level);
- } else {
- texdim = u_minify(tex->b.b.height0, level);
- }
-
- /* See TX_FILTER1_n.MACRO_SWITCH. */
- if (rv350_mode) {
- return texdim >= tile;
- } else {
- return texdim > tile;
- }
-}
-
-/**
- * Return the stride, in bytes, of the texture images of the given texture
- * at the given level.
- */
-unsigned r300_texture_get_stride(struct r300_screen* screen,
- struct r300_texture* tex, unsigned level)
-{
- unsigned tile_width, width, stride;
-
- if (tex->stride_override)
- return tex->stride_override;
-
- /* Check the level. */
- if (level > tex->b.b.last_level) {
- SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n",
- __FUNCTION__, level, tex->b.b.last_level);
- return 0;
- }
-
- width = u_minify(tex->b.b.width0, level);
-
- if (util_format_is_plain(tex->b.b.format)) {
- tile_width = r300_get_pixel_alignment(tex, tex->mip_macrotile[level],
- DIM_WIDTH);
- width = align(width, tile_width);
-
- stride = util_format_get_stride(tex->b.b.format, width);
-
- /* Some IGPs need a minimum stride of 64 bytes, hmm...
- * This doesn't seem to apply to tiled textures, according to r300c. */
- if (!tex->microtile && !tex->mip_macrotile[level] &&
- (screen->caps.family == CHIP_FAMILY_RS600 ||
- screen->caps.family == CHIP_FAMILY_RS690 ||
- screen->caps.family == CHIP_FAMILY_RS740)) {
- return stride < 64 ? 64 : stride;
- }
-
- /* The alignment to 32 bytes is sort of implied by the layout... */
- return stride;
- } else {
- return align(util_format_get_stride(tex->b.b.format, width), 32);
- }
-}
-
-static unsigned r300_texture_get_nblocksy(struct r300_texture* tex,
- unsigned level)
-{
- unsigned height, tile_height;
-
- height = u_minify(tex->b.b.height0, level);
-
- if (util_format_is_plain(tex->b.b.format)) {
- tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level],
- DIM_HEIGHT);
- height = align(height, tile_height);
-
- /* This is needed for the kernel checker, unfortunately. */
- height = util_next_power_of_two(height);
- }
-
- return util_format_get_nblocksy(tex->b.b.format, height);
-}
-
-static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen,
- struct r300_texture *tex)
-{
- /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures
- * incorrectly. This is a workaround to prevent CS from being rejected. */
-
- unsigned i, size;
-
- if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) &&
- tex->b.b.target == PIPE_TEXTURE_3D &&
- tex->b.b.last_level > 0) {
- size = 0;
-
- for (i = 0; i <= tex->b.b.last_level; i++) {
- size += r300_texture_get_stride(screen, tex, i) *
- r300_texture_get_nblocksy(tex, i);
- }
-
- size *= tex->b.b.depth0;
- tex->size = size;
- }
-}
-
-static void r300_setup_miptree(struct r300_screen* screen,
- struct r300_texture* tex)
-{
- struct pipe_resource* base = &tex->b.b;
- unsigned stride, size, layer_size, nblocksy, i;
- boolean rv350_mode = screen->caps.is_rv350;
-
- SCREEN_DBG(screen, DBG_TEXALLOC,
- "r300: Making miptree for texture, format %s\n",
- util_format_short_name(base->format));
-
- for (i = 0; i <= base->last_level; i++) {
- /* Let's see if this miplevel can be macrotiled. */
- tex->mip_macrotile[i] =
- (tex->macrotile == R300_BUFFER_TILED &&
- r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) &&
- r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ?
- R300_BUFFER_TILED : R300_BUFFER_LINEAR;
-
- stride = r300_texture_get_stride(screen, tex, i);
- nblocksy = r300_texture_get_nblocksy(tex, i);
- layer_size = stride * nblocksy;
-
- if (base->nr_samples) {
- layer_size *= base->nr_samples;
- }
-
- if (base->target == PIPE_TEXTURE_CUBE)
- size = layer_size * 6;
- else
- size = layer_size * u_minify(base->depth0, i);
-
- tex->offset[i] = tex->size;
- tex->size = tex->offset[i] + size;
- tex->layer_size[i] = layer_size;
- tex->pitch[i] = stride / util_format_get_blocksize(base->format);
- tex->hwpitch[i] =
- tex->pitch[i] * util_format_get_blockwidth(base->format);
-
- SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d "
- "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n",
- i, u_minify(base->width0, i), u_minify(base->height0, i),
- u_minify(base->depth0, i), stride, tex->size,
- tex->mip_macrotile[i] ? "TRUE" : "FALSE");
- }
-}
-
-static void r300_setup_flags(struct r300_texture* tex)
-{
- tex->uses_pitch = !util_is_power_of_two(tex->b.b.width0) ||
- !util_is_power_of_two(tex->b.b.height0) ||
- tex->stride_override;
-}
-
-static void r300_setup_tiling(struct pipe_screen *screen,
- struct r300_texture *tex)
-{
- struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys;
- enum pipe_format format = tex->b.b.format;
- boolean rv350_mode = r300_screen(screen)->caps.is_rv350;
- boolean is_zb = util_format_is_depth_or_stencil(format);
- boolean dbg_no_tiling = SCREEN_DBG_ON(r300_screen(screen), DBG_NO_TILING);
-
- if (!util_format_is_plain(format)) {
- return;
- }
-
- /* If height == 1, disable microtiling except for zbuffer. */
- if (!is_zb && (tex->b.b.height0 == 1 || dbg_no_tiling)) {
- return;
- }
-
- /* Set microtiling. */
- switch (util_format_get_blocksize(format)) {
- case 1:
- case 4:
- tex->microtile = R300_BUFFER_TILED;
- break;
-
- case 2:
- case 8:
- if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) {
- tex->microtile = R300_BUFFER_SQUARETILED;
- }
- break;
- }
-
- if (dbg_no_tiling) {
- return;
- }
-
- /* Set macrotiling. */
- if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) &&
- r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) {
- tex->macrotile = R300_BUFFER_TILED;
- }
-}
-
static unsigned r300_texture_is_referenced(struct pipe_context *context,
struct pipe_resource *texture,
unsigned face, unsigned level)
@@ -909,7 +633,8 @@ static unsigned r300_texture_is_referenced(struct pipe_context *context,
struct r300_context *r300 = r300_context(context);
struct r300_texture *rtex = (struct r300_texture *)texture;
- if (r300->rws->is_buffer_referenced(r300->rws, rtex->buffer, R300_REF_CS))
+ if (r300->rws->cs_is_buffer_referenced(r300->cs,
+ rtex->buffer, R300_REF_CS))
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
return PIPE_UNREFERENCED;
@@ -936,12 +661,11 @@ static boolean r300_texture_get_handle(struct pipe_screen* screen,
return FALSE;
}
- whandle->stride = r300_texture_get_stride(r300_screen(screen), tex, 0);
-
- return rws->buffer_get_handle(rws, tex->buffer, whandle);
+ return rws->buffer_get_handle(rws, tex->buffer,
+ tex->desc.stride_in_bytes[0], whandle);
}
-struct u_resource_vtbl r300_texture_vtbl =
+struct u_resource_vtbl r300_texture_vtbl =
{
r300_texture_get_handle, /* get_handle */
r300_texture_destroy, /* resource_destroy */
@@ -954,17 +678,69 @@ struct u_resource_vtbl r300_texture_vtbl =
u_default_transfer_inline_write /* transfer_inline_write */
};
-/* Create a new texture. */
-struct pipe_resource* r300_texture_create(struct pipe_screen* screen,
- const struct pipe_resource* base)
+/* The common texture constructor. */
+static struct r300_texture*
+r300_texture_create_object(struct r300_screen *rscreen,
+ const struct pipe_resource *base,
+ enum r300_buffer_tiling microtile,
+ enum r300_buffer_tiling macrotile,
+ unsigned stride_in_bytes_override,
+ unsigned max_buffer_size,
+ struct r300_winsys_buffer *buffer)
{
- struct r300_texture* tex = CALLOC_STRUCT(r300_texture);
- struct r300_screen* rscreen = r300_screen(screen);
- struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys;
-
+ struct r300_winsys_screen *rws = rscreen->rws;
+ struct r300_texture *tex = CALLOC_STRUCT(r300_texture);
if (!tex) {
+ if (buffer)
+ rws->buffer_reference(rws, &buffer, NULL);
+ return NULL;
+ }
+
+ /* Initialize the descriptor. */
+ if (!r300_texture_desc_init(rscreen, &tex->desc, base,
+ microtile, macrotile,
+ stride_in_bytes_override,
+ max_buffer_size)) {
+ if (buffer)
+ rws->buffer_reference(rws, &buffer, NULL);
+ FREE(tex);
return NULL;
}
+ /* Initialize the hardware state. */
+ r300_texture_setup_immutable_state(rscreen, tex);
+ r300_texture_setup_fb_state(rscreen, tex);
+
+ tex->desc.b.vtbl = &r300_texture_vtbl;
+ pipe_reference_init(&tex->desc.b.b.reference, 1);
+ tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ?
+ R300_DOMAIN_GTT :
+ R300_DOMAIN_VRAM | R300_DOMAIN_GTT;
+ tex->buffer = buffer;
+
+ /* Create the backing buffer if needed. */
+ if (!tex->buffer) {
+ tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048,
+ base->bind, base->usage, tex->domain);
+
+ if (!tex->buffer) {
+ FREE(tex);
+ return NULL;
+ }
+ }
+
+ rws->buffer_set_tiling(rws, tex->buffer,
+ tex->desc.microtile, tex->desc.macrotile[0],
+ tex->desc.stride_in_bytes[0]);
+
+ return tex;
+}
+
+/* Create a new texture. */
+struct pipe_resource *r300_texture_create(struct pipe_screen *screen,
+ const struct pipe_resource *base)
+{
+ struct r300_screen *rscreen = r300_screen(screen);
+ enum r300_buffer_tiling microtile, macrotile;
/* Refuse to create a texture with size 0. */
if (!base->width0 ||
@@ -974,58 +750,70 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen,
fprintf(stderr, "r300: texture_create: "
"Got invalid texture dimensions: %ix%ix%i\n",
base->width0, base->height0, base->depth0);
- FREE(tex);
return NULL;
}
- tex->b.b = *base;
- tex->b.vtbl = &r300_texture_vtbl;
- pipe_reference_init(&tex->b.b.reference, 1);
- tex->b.b.screen = screen;
+ if ((base->flags & R300_RESOURCE_FLAG_TRANSFER) ||
+ (base->bind & PIPE_BIND_SCANOUT)) {
+ microtile = R300_BUFFER_LINEAR;
+ macrotile = R300_BUFFER_LINEAR;
+ } else {
+ microtile = R300_BUFFER_SELECT_LAYOUT;
+ macrotile = R300_BUFFER_SELECT_LAYOUT;
+ }
+
+ return (struct pipe_resource*)
+ r300_texture_create_object(rscreen, base, microtile, macrotile,
+ 0, 0, NULL);
+}
+
+struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
+ const struct pipe_resource *base,
+ struct winsys_handle *whandle)
+{
+ struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys;
+ struct r300_screen *rscreen = r300_screen(screen);
+ struct r300_winsys_buffer *buffer;
+ enum r300_buffer_tiling microtile, macrotile;
+ unsigned stride, size;
- r300_setup_flags(tex);
- if (!(base->flags & R300_RESOURCE_FLAG_TRANSFER) &&
- !(base->bind & PIPE_BIND_SCANOUT)) {
- r300_setup_tiling(screen, tex);
+ /* Support only 2D textures without mipmaps */
+ if (base->target != PIPE_TEXTURE_2D ||
+ base->depth0 != 1 ||
+ base->last_level != 0) {
+ return NULL;
}
- r300_setup_miptree(rscreen, tex);
- r300_texture_3d_fix_mipmapping(rscreen, tex);
- r300_texture_setup_immutable_state(rscreen, tex);
- r300_texture_setup_fb_state(rscreen, tex);
- SCREEN_DBG(rscreen, DBG_TEX,
- "r300: texture_create: Macro: %s, Micro: %s, Pitch: %i, "
- "Dim: %ix%ix%i, LastLevel: %i, Size: %i, Format: %s\n",
- tex->macrotile ? "YES" : " NO",
- tex->microtile ? "YES" : " NO",
- tex->hwpitch[0],
- base->width0, base->height0, base->depth0, base->last_level,
- tex->size,
- util_format_short_name(base->format));
+ buffer = rws->buffer_from_handle(rws, whandle, &stride, &size);
+ if (!buffer)
+ return NULL;
- tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? R300_DOMAIN_GTT :
- R300_DOMAIN_VRAM;
+ rws->buffer_get_tiling(rws, buffer, &microtile, &macrotile);
- tex->buffer = rws->buffer_create(rws, 2048, base->bind, tex->domain,
- tex->size);
+ /* Enforce a microtiled zbuffer. */
+ if (util_format_is_depth_or_stencil(base->format) &&
+ microtile == R300_BUFFER_LINEAR) {
+ switch (util_format_get_blocksize(base->format)) {
+ case 4:
+ microtile = R300_BUFFER_TILED;
+ break;
- if (!tex->buffer) {
- FREE(tex);
- return NULL;
+ case 2:
+ if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT))
+ microtile = R300_BUFFER_SQUARETILED;
+ break;
+ }
}
- rws->buffer_set_tiling(rws, tex->buffer,
- tex->pitch[0] * util_format_get_blocksize(tex->b.b.format),
- tex->microtile,
- tex->macrotile);
-
- return (struct pipe_resource*)tex;
+ return (struct pipe_resource*)
+ r300_texture_create_object(rscreen, base, microtile, macrotile,
+ stride, size, buffer);
}
/* Not required to implement u_resource_vtbl, consider moving to another file:
*/
struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
- struct pipe_resource* texture,
+ struct pipe_resource* texture,
unsigned face,
unsigned level,
unsigned zslice,
@@ -1035,6 +823,8 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
if (surface) {
+ uint32_t offset, tile_height;
+
pipe_reference_init(&surface->base.reference, 1);
pipe_resource_reference(&surface->base.texture, texture);
surface->base.format = texture->format;
@@ -1046,10 +836,49 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
surface->base.level = level;
surface->buffer = tex->buffer;
+
+ /* Prefer VRAM if there are multiple domains to choose from. */
surface->domain = tex->domain;
- surface->offset = r300_texture_get_offset(tex, level, zslice, face);
+ if (surface->domain & R300_DOMAIN_VRAM)
+ surface->domain &= ~R300_DOMAIN_GTT;
+
+ surface->offset = r300_texture_get_offset(&tex->desc,
+ level, zslice, face);
surface->pitch = tex->fb_state.pitch[level];
surface->format = tex->fb_state.format;
+
+ /* Parameters for the CBZB clear. */
+ surface->cbzb_allowed = tex->desc.cbzb_allowed[level];
+ surface->cbzb_width = align(surface->base.width, 64);
+
+ /* Height must be aligned to the size of a tile. */
+ tile_height = r300_get_pixel_alignment(tex->desc.b.b.format,
+ tex->desc.b.b.nr_samples,
+ tex->desc.microtile,
+ tex->desc.macrotile[level],
+ DIM_HEIGHT);
+
+ surface->cbzb_height = align((surface->base.height + 1) / 2,
+ tile_height);
+
+ /* Offset must be aligned to 2K and must point at the beginning
+ * of a scanline. */
+ offset = surface->offset +
+ tex->desc.stride_in_bytes[level] * surface->cbzb_height;
+ surface->cbzb_midpoint_offset = offset & ~2047;
+
+ surface->cbzb_pitch = surface->pitch & 0x1ffffc;
+
+ if (util_format_get_blocksizebits(surface->base.format) == 32)
+ surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+ else
+ surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z;
+
+ SCREEN_DBG(r300_screen(screen), DBG_CBZB,
+ "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n",
+ surface->cbzb_width, surface->cbzb_height,
+ offset & 2047,
+ tex->desc.macrotile[level] ? "YES" : " NO");
}
return &surface->base;
@@ -1062,88 +891,3 @@ void r300_tex_surface_destroy(struct pipe_surface* s)
pipe_resource_reference(&s->texture, NULL);
FREE(s);
}
-
-struct pipe_resource*
-r300_texture_from_handle(struct pipe_screen* screen,
- const struct pipe_resource* base,
- struct winsys_handle *whandle)
-{
- struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys;
- struct r300_screen* rscreen = r300_screen(screen);
- struct r300_winsys_buffer *buffer;
- struct r300_texture* tex;
- boolean override_zb_flags;
-
- /* Support only 2D textures without mipmaps */
- if (base->target != PIPE_TEXTURE_2D ||
- base->depth0 != 1 ||
- base->last_level != 0) {
- return NULL;
- }
-
- buffer = rws->buffer_from_handle(rws, whandle->handle);
- if (!buffer) {
- return NULL;
- }
-
- tex = CALLOC_STRUCT(r300_texture);
- if (!tex) {
- return NULL;
- }
-
- tex->b.b = *base;
- tex->b.vtbl = &r300_texture_vtbl;
- pipe_reference_init(&tex->b.b.reference, 1);
- tex->b.b.screen = screen;
- tex->domain = R300_DOMAIN_VRAM;
-
- tex->stride_override = whandle->stride;
-
- /* one ref already taken */
- tex->buffer = buffer;
-
- rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile);
- r300_setup_flags(tex);
- SCREEN_DBG(rscreen, DBG_TEX,
- "r300: texture_from_handle: Macro: %s, Micro: %s, "
- "Pitch: % 4i, Dim: %ix%i, Format: %s\n",
- tex->macrotile ? "YES" : " NO",
- tex->microtile ? "YES" : " NO",
- whandle->stride / util_format_get_blocksize(base->format),
- base->width0, base->height0,
- util_format_short_name(base->format));
-
- /* Enforce microtiled zbuffer. */
- override_zb_flags = util_format_is_depth_or_stencil(base->format) &&
- tex->microtile == R300_BUFFER_LINEAR;
-
- if (override_zb_flags) {
- switch (util_format_get_blocksize(base->format)) {
- case 4:
- tex->microtile = R300_BUFFER_TILED;
- break;
-
- case 2:
- if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) {
- tex->microtile = R300_BUFFER_SQUARETILED;
- break;
- }
- /* Pass through. */
-
- default:
- override_zb_flags = FALSE;
- }
- }
-
- r300_setup_miptree(rscreen, tex);
- r300_texture_setup_immutable_state(rscreen, tex);
- r300_texture_setup_fb_state(rscreen, tex);
-
- if (override_zb_flags) {
- rws->buffer_set_tiling(rws, tex->buffer,
- tex->pitch[0] * util_format_get_blocksize(tex->b.b.format),
- tex->microtile,
- tex->macrotile);
- }
- return (struct pipe_resource*)tex;
-}
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 99e7694254..a4524320fd 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -35,16 +35,11 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
const unsigned char *swizzle_view);
uint32_t r300_translate_texformat(enum pipe_format format,
- const unsigned char *swizzle_view);
+ const unsigned char *swizzle_view,
+ boolean is_r500);
uint32_t r500_tx_format_msb_bit(enum pipe_format format);
-unsigned r300_texture_get_stride(struct r300_screen* screen,
- struct r300_texture* tex, unsigned level);
-
-unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
- unsigned zslice, unsigned face);
-
void r300_texture_reinterpret_format(struct pipe_screen *screen,
struct pipe_resource *tex,
enum pipe_format new_format);
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
new file mode 100644
index 0000000000..343089bf2c
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_texture_desc.h"
+
+#include "r300_context.h"
+#include "r300_winsys.h"
+
+#include "util/u_format.h"
+
+/* Returns the number of pixels that the texture should be aligned to
+ * in the given dimension. */
+unsigned r300_get_pixel_alignment(enum pipe_format format,
+ unsigned num_samples,
+ enum r300_buffer_tiling microtile,
+ enum r300_buffer_tiling macrotile,
+ enum r300_dim dim)
+{
+ static const unsigned table[2][5][3][2] =
+ {
+ {
+ /* Macro: linear linear linear
+ Micro: linear tiled square-tiled */
+ {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */
+ {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */
+ {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */
+ {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */
+ {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
+ },
+ {
+ /* Macro: tiled tiled tiled
+ Micro: linear tiled square-tiled */
+ {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */
+ {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */
+ {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */
+ {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */
+ {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
+ }
+ };
+ static const unsigned aa_block[2] = {4, 8};
+ unsigned tile = 0;
+ unsigned pixsize = util_format_get_blocksize(format);
+
+ assert(macrotile <= R300_BUFFER_TILED);
+ assert(microtile <= R300_BUFFER_SQUARETILED);
+ assert(pixsize <= 16);
+ assert(dim <= DIM_HEIGHT);
+
+ if (num_samples > 1) {
+ /* Multisampled textures have their own alignment scheme. */
+ if (pixsize == 4)
+ tile = aa_block[dim];
+ /* XXX FP16 AA. */
+ } else {
+ /* Standard alignment. */
+ tile = table[macrotile][util_logbase2(pixsize)][microtile][dim];
+ }
+
+ assert(tile);
+ return tile;
+}
+
+/* Return true if macrotiling should be enabled on the miplevel. */
+static boolean r300_texture_macro_switch(struct r300_texture_desc *desc,
+ unsigned level,
+ boolean rv350_mode,
+ enum r300_dim dim)
+{
+ unsigned tile, texdim;
+
+ tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples,
+ desc->microtile, R300_BUFFER_TILED, dim);
+ if (dim == DIM_WIDTH) {
+ texdim = u_minify(desc->b.b.width0, level);
+ } else {
+ texdim = u_minify(desc->b.b.height0, level);
+ }
+
+ /* See TX_FILTER1_n.MACRO_SWITCH. */
+ if (rv350_mode) {
+ return texdim >= tile;
+ } else {
+ return texdim > tile;
+ }
+}
+
+/**
+ * Return the stride, in bytes, of the texture image of the given texture
+ * at the given level.
+ */
+static unsigned r300_texture_get_stride(struct r300_screen *screen,
+ struct r300_texture_desc *desc,
+ unsigned level)
+{
+ unsigned tile_width, width, stride;
+
+ if (desc->stride_in_bytes_override)
+ return desc->stride_in_bytes_override;
+
+ /* Check the level. */
+ if (level > desc->b.b.last_level) {
+ SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n",
+ __FUNCTION__, level, desc->b.b.last_level);
+ return 0;
+ }
+
+ width = u_minify(desc->b.b.width0, level);
+
+ if (util_format_is_plain(desc->b.b.format)) {
+ tile_width = r300_get_pixel_alignment(desc->b.b.format,
+ desc->b.b.nr_samples,
+ desc->microtile,
+ desc->macrotile[level],
+ DIM_WIDTH);
+ width = align(width, tile_width);
+
+ stride = util_format_get_stride(desc->b.b.format, width);
+
+ /* Some IGPs need a minimum stride of 64 bytes, hmm...
+ * This doesn't seem to apply to tiled textures, according to r300c. */
+ if (!desc->microtile && !desc->macrotile[level] &&
+ (screen->caps.family == CHIP_FAMILY_RS600 ||
+ screen->caps.family == CHIP_FAMILY_RS690 ||
+ screen->caps.family == CHIP_FAMILY_RS740)) {
+ return stride < 64 ? 64 : stride;
+ }
+
+ /* The alignment to 32 bytes is sort of implied by the layout... */
+ return stride;
+ } else {
+ return align(util_format_get_stride(desc->b.b.format, width), 32);
+ }
+}
+
+static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
+ unsigned level,
+ boolean *out_aligned_for_cbzb)
+{
+ unsigned height, tile_height;
+
+ height = u_minify(desc->b.b.height0, level);
+
+ if (util_format_is_plain(desc->b.b.format)) {
+ tile_height = r300_get_pixel_alignment(desc->b.b.format,
+ desc->b.b.nr_samples,
+ desc->microtile,
+ desc->macrotile[level],
+ DIM_HEIGHT);
+ height = align(height, tile_height);
+
+ /* This is needed for the kernel checker, unfortunately. */
+ if ((desc->b.b.target != PIPE_TEXTURE_1D &&
+ desc->b.b.target != PIPE_TEXTURE_2D) ||
+ desc->b.b.last_level != 0) {
+ height = util_next_power_of_two(height);
+ }
+
+ /* See if the CBZB clear can be used on the buffer,
+ * taking the texture size into account. */
+ if (out_aligned_for_cbzb) {
+ if (desc->macrotile[level]) {
+ /* When clearing, the layer (width*height) is horizontally split
+ * into two, and the upper and lower halves are cleared by the CB
+ * and ZB units, respectively. Therefore, the number of macrotiles
+ * in the Y direction must be even. */
+
+ /* Align the height so that there is an even number of macrotiles.
+ * Do so for 3 or more macrotiles in the Y direction. */
+ if (level == 0 && desc->b.b.last_level == 0 &&
+ (desc->b.b.target == PIPE_TEXTURE_1D ||
+ desc->b.b.target == PIPE_TEXTURE_2D) &&
+ height >= tile_height * 3) {
+ height = align(height, tile_height * 2);
+ }
+
+ *out_aligned_for_cbzb = height % (tile_height * 2) == 0;
+ } else {
+ *out_aligned_for_cbzb = FALSE;
+ }
+ }
+ }
+
+ return util_format_get_nblocksy(desc->b.b.format, height);
+}
+
+static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen,
+ struct r300_texture_desc *desc)
+{
+ /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures
+ * incorrectly. This is a workaround to prevent CS from being rejected. */
+
+ unsigned i, size;
+
+ if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) &&
+ desc->b.b.target == PIPE_TEXTURE_3D &&
+ desc->b.b.last_level > 0) {
+ size = 0;
+
+ for (i = 0; i <= desc->b.b.last_level; i++) {
+ size += desc->stride_in_bytes[i] *
+ r300_texture_get_nblocksy(desc, i, FALSE);
+ }
+
+ size *= desc->b.b.depth0;
+ desc->size_in_bytes = size;
+ }
+}
+
+/* Get a width in pixels from a stride in bytes. */
+static unsigned stride_to_width(enum pipe_format format,
+ unsigned stride_in_bytes)
+{
+ return (stride_in_bytes / util_format_get_blocksize(format)) *
+ util_format_get_blockwidth(format);
+}
+
+static void r300_setup_miptree(struct r300_screen *screen,
+ struct r300_texture_desc *desc,
+ boolean align_for_cbzb)
+{
+ struct pipe_resource *base = &desc->b.b;
+ unsigned stride, size, layer_size, nblocksy, i;
+ boolean rv350_mode = screen->caps.is_rv350;
+ boolean aligned_for_cbzb;
+
+ desc->size_in_bytes = 0;
+
+ SCREEN_DBG(screen, DBG_TEXALLOC,
+ "r300: Making miptree for texture, format %s\n",
+ util_format_short_name(base->format));
+
+ for (i = 0; i <= base->last_level; i++) {
+ /* Let's see if this miplevel can be macrotiled. */
+ desc->macrotile[i] =
+ (desc->macrotile[0] == R300_BUFFER_TILED &&
+ r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) &&
+ r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ?
+ R300_BUFFER_TILED : R300_BUFFER_LINEAR;
+
+ stride = r300_texture_get_stride(screen, desc, i);
+
+ /* Compute the number of blocks in Y, see if the CBZB clear can be
+ * used on the texture. */
+ aligned_for_cbzb = FALSE;
+ if (align_for_cbzb && desc->cbzb_allowed[i])
+ nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb);
+ else
+ nblocksy = r300_texture_get_nblocksy(desc, i, NULL);
+
+ layer_size = stride * nblocksy;
+
+ if (base->nr_samples) {
+ layer_size *= base->nr_samples;
+ }
+
+ if (base->target == PIPE_TEXTURE_CUBE)
+ size = layer_size * 6;
+ else
+ size = layer_size * u_minify(base->depth0, i);
+
+ desc->offset_in_bytes[i] = desc->size_in_bytes;
+ desc->size_in_bytes = desc->offset_in_bytes[i] + size;
+ desc->layer_size_in_bytes[i] = layer_size;
+ desc->stride_in_bytes[i] = stride;
+ desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride);
+ desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb;
+
+ SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d "
+ "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n",
+ i, u_minify(base->width0, i), u_minify(base->height0, i),
+ u_minify(base->depth0, i), stride, desc->size_in_bytes,
+ desc->macrotile[i] ? "TRUE" : "FALSE");
+ }
+}
+
+static void r300_setup_flags(struct r300_texture_desc *desc)
+{
+ desc->uses_stride_addressing =
+ !util_is_power_of_two(desc->b.b.width0) ||
+ !util_is_power_of_two(desc->b.b.height0) ||
+ (desc->stride_in_bytes_override &&
+ stride_to_width(desc->b.b.format,
+ desc->stride_in_bytes_override) != desc->b.b.width0);
+
+ desc->is_npot =
+ desc->uses_stride_addressing ||
+ !util_is_power_of_two(desc->b.b.height0);
+}
+
+static void r300_setup_cbzb_flags(struct r300_screen *rscreen,
+ struct r300_texture_desc *desc)
+{
+ unsigned i, bpp;
+ boolean first_level_valid;
+
+ bpp = util_format_get_blocksizebits(desc->b.b.format);
+
+ /* 1) The texture must be point-sampled,
+ * 2) The depth must be 16 or 32 bits.
+ * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage
+ * with certain texture sizes. Macrotiling ensures the alignment. */
+ first_level_valid = desc->b.b.nr_samples <= 1 &&
+ (bpp == 16 || bpp == 32) &&
+ desc->macrotile[0];
+
+ for (i = 0; i <= desc->b.b.last_level; i++)
+ desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i];
+}
+
+static void r300_setup_tiling(struct r300_screen *screen,
+ struct r300_texture_desc *desc)
+{
+ struct r300_winsys_screen *rws = screen->rws;
+ enum pipe_format format = desc->b.b.format;
+ boolean rv350_mode = screen->caps.is_rv350;
+ boolean is_zb = util_format_is_depth_or_stencil(format);
+ boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING);
+
+ if (!util_format_is_plain(format)) {
+ return;
+ }
+
+ /* If height == 1, disable microtiling except for zbuffer. */
+ if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) {
+ return;
+ }
+
+ /* Set microtiling. */
+ switch (util_format_get_blocksize(format)) {
+ case 1:
+ case 4:
+ desc->microtile = R300_BUFFER_TILED;
+ break;
+
+ case 2:
+ case 8:
+ if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) {
+ desc->microtile = R300_BUFFER_SQUARETILED;
+ }
+ break;
+ }
+
+ if (dbg_no_tiling) {
+ return;
+ }
+
+ /* Set macrotiling. */
+ if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) &&
+ r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) {
+ desc->macrotile[0] = R300_BUFFER_TILED;
+ }
+}
+
+static void r300_tex_print_info(struct r300_screen *rscreen,
+ struct r300_texture_desc *desc,
+ const char *func)
+{
+ fprintf(stderr,
+ "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, "
+ "LastLevel: %i, Size: %i, Format: %s\n",
+ func,
+ desc->macrotile[0] ? "YES" : " NO",
+ desc->microtile ? "YES" : " NO",
+ desc->stride_in_pixels[0],
+ desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0,
+ desc->b.b.last_level, desc->size_in_bytes,
+ util_format_short_name(desc->b.b.format));
+}
+
+boolean r300_texture_desc_init(struct r300_screen *rscreen,
+ struct r300_texture_desc *desc,
+ const struct pipe_resource *base,
+ enum r300_buffer_tiling microtile,
+ enum r300_buffer_tiling macrotile,
+ unsigned stride_in_bytes_override,
+ unsigned max_buffer_size)
+{
+ desc->b.b = *base;
+ desc->b.b.screen = &rscreen->screen;
+
+ desc->stride_in_bytes_override = stride_in_bytes_override;
+
+ if (microtile == R300_BUFFER_SELECT_LAYOUT ||
+ macrotile == R300_BUFFER_SELECT_LAYOUT) {
+ r300_setup_tiling(rscreen, desc);
+ } else {
+ desc->microtile = microtile;
+ desc->macrotile[0] = macrotile;
+ assert(desc->b.b.last_level == 0);
+ }
+
+ r300_setup_flags(desc);
+ r300_setup_cbzb_flags(rscreen, desc);
+
+ /* Setup the miptree description. */
+ r300_setup_miptree(rscreen, desc, TRUE);
+ /* If the required buffer size is larger the given max size,
+ * try again without the alignment for the CBZB clear. */
+ if (max_buffer_size && desc->size_in_bytes > max_buffer_size) {
+ r300_setup_miptree(rscreen, desc, FALSE);
+ }
+
+ r300_texture_3d_fix_mipmapping(rscreen, desc);
+
+ if (max_buffer_size) {
+ /* Make sure the buffer we got is large enough. */
+ if (desc->size_in_bytes > max_buffer_size) {
+ fprintf(stderr, "r300: texture_from_handle: The buffer is not "
+ "large enough. Got: %i, Need: %i, Info:\n",
+ max_buffer_size, desc->size_in_bytes);
+ r300_tex_print_info(rscreen, desc, "texture_from_handle");
+ return FALSE;
+ }
+
+ desc->buffer_size_in_bytes = max_buffer_size;
+ } else {
+ desc->buffer_size_in_bytes = desc->size_in_bytes;
+ }
+
+ if (SCREEN_DBG_ON(rscreen, DBG_TEX))
+ r300_tex_print_info(rscreen, desc, "texture_from_handle");
+
+ return TRUE;
+}
+
+unsigned r300_texture_get_offset(struct r300_texture_desc *desc,
+ unsigned level, unsigned zslice,
+ unsigned face)
+{
+ unsigned offset = desc->offset_in_bytes[level];
+
+ switch (desc->b.b.target) {
+ case PIPE_TEXTURE_3D:
+ assert(face == 0);
+ return offset + zslice * desc->layer_size_in_bytes[level];
+
+ case PIPE_TEXTURE_CUBE:
+ assert(zslice == 0);
+ return offset + face * desc->layer_size_in_bytes[level];
+
+ default:
+ assert(zslice == 0 && face == 0);
+ return offset;
+ }
+}
diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h
new file mode 100644
index 0000000000..95de66f654
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_texture_desc.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_TEXTURE_DESC_H
+#define R300_TEXTURE_DESC_H
+
+#include "r300_defines.h"
+
+struct pipe_resource;
+struct r300_screen;
+struct r300_texture_desc;
+struct r300_texture;
+
+enum r300_dim {
+ DIM_WIDTH = 0,
+ DIM_HEIGHT = 1
+};
+
+unsigned r300_get_pixel_alignment(enum pipe_format format,
+ unsigned num_samples,
+ enum r300_buffer_tiling microtile,
+ enum r300_buffer_tiling macrotile,
+ enum r300_dim dim);
+
+boolean r300_texture_desc_init(struct r300_screen *rscreen,
+ struct r300_texture_desc *desc,
+ const struct pipe_resource *base,
+ enum r300_buffer_tiling microtile,
+ enum r300_buffer_tiling macrotile,
+ unsigned stride_in_bytes_override,
+ unsigned max_buffer_size);
+
+unsigned r300_texture_get_offset(struct r300_texture_desc *desc,
+ unsigned level, unsigned zslice,
+ unsigned face);
+
+#endif
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 5394e04f72..51b2c55550 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -71,7 +71,7 @@ static unsigned translate_opcode(unsigned opcode)
case TGSI_OPCODE_COS: return RC_OPCODE_COS;
case TGSI_OPCODE_DDX: return RC_OPCODE_DDX;
case TGSI_OPCODE_DDY: return RC_OPCODE_DDY;
- /* case TGSI_OPCODE_KILP: return RC_OPCODE_KILP; */
+ case TGSI_OPCODE_KILP: return RC_OPCODE_KILP;
/* case TGSI_OPCODE_PK2H: return RC_OPCODE_PK2H; */
/* case TGSI_OPCODE_PK2US: return RC_OPCODE_PK2US; */
/* case TGSI_OPCODE_PK4B: return RC_OPCODE_PK4B; */
diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c
index d41f258836..e9333b35ef 100644
--- a/src/gallium/drivers/r300/r300_transfer.c
+++ b/src/gallium/drivers/r300/r300_transfer.c
@@ -22,7 +22,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "r300_transfer.h"
-#include "r300_texture.h"
+#include "r300_texture_desc.h"
#include "r300_screen_buffer.h"
#include "util/u_memory.h"
@@ -35,8 +35,8 @@ struct r300_transfer {
/* Offset from start of buffer. */
unsigned offset;
- /* Detiled texture. */
- struct r300_texture *detiled_texture;
+ /* Linear texture. */
+ struct r300_texture *linear_texture;
};
/* Convenience cast wrapper. */
@@ -57,7 +57,7 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx,
subdst.face = 0;
subdst.level = 0;
- ctx->resource_copy_region(ctx, &r300transfer->detiled_texture->b.b, subdst,
+ ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, subdst,
0, 0, 0,
tex, transfer->sr,
transfer->box.x, transfer->box.y, transfer->box.z,
@@ -77,9 +77,11 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx,
ctx->resource_copy_region(ctx, tex, transfer->sr,
transfer->box.x, transfer->box.y, transfer->box.z,
- &r300transfer->detiled_texture->b.b, subsrc,
+ &r300transfer->linear_texture->desc.b.b, subsrc,
0, 0, 0,
transfer->box.width, transfer->box.height);
+
+ ctx->flush(ctx, 0, NULL);
}
struct pipe_transfer*
@@ -89,19 +91,21 @@ r300_texture_get_transfer(struct pipe_context *ctx,
unsigned usage,
const struct pipe_box *box)
{
+ struct r300_context *r300 = r300_context(ctx);
struct r300_texture *tex = r300_texture(texture);
- struct r300_screen *r300screen = r300_screen(ctx->screen);
struct r300_transfer *trans;
struct pipe_resource base;
boolean referenced_cs, referenced_hw, blittable;
- referenced_cs = r300screen->rws->is_buffer_referenced(
- r300screen->rws, tex->buffer, R300_REF_CS);
+ referenced_cs =
+ r300->rws->cs_is_buffer_referenced(r300->cs,
+ tex->buffer, R300_REF_CS);
if (referenced_cs) {
referenced_hw = TRUE;
} else {
- referenced_hw = r300screen->rws->is_buffer_referenced(
- r300screen->rws, tex->buffer, R300_REF_HW);
+ referenced_hw =
+ r300->rws->cs_is_buffer_referenced(r300->cs,
+ tex->buffer, R300_REF_HW);
}
blittable = ctx->screen->is_format_supported(
@@ -119,7 +123,7 @@ r300_texture_get_transfer(struct pipe_context *ctx,
/* If the texture is tiled, we must create a temporary detiled texture
* for this transfer.
* Also make write transfers pipelined. */
- if (tex->microtile || tex->macrotile ||
+ if (tex->desc.microtile || tex->desc.macrotile[sr.level] ||
((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) {
base.target = PIPE_TEXTURE_2D;
base.format = texture->format;
@@ -144,23 +148,23 @@ r300_texture_get_transfer(struct pipe_context *ctx,
}
/* Create the temporary texture. */
- trans->detiled_texture = r300_texture(
+ trans->linear_texture = r300_texture(
ctx->screen->resource_create(ctx->screen,
&base));
- if (!trans->detiled_texture) {
+ if (!trans->linear_texture) {
/* Oh crap, the thing can't create the texture.
* Let's flush and try again. */
ctx->flush(ctx, 0, NULL);
- trans->detiled_texture = r300_texture(
+ trans->linear_texture = r300_texture(
ctx->screen->resource_create(ctx->screen,
&base));
- if (!trans->detiled_texture) {
+ if (!trans->linear_texture) {
/* For linear textures, it's safe to fallback to
* an unpipelined transfer. */
- if (!tex->microtile && !tex->macrotile) {
+ if (!tex->desc.microtile && !tex->desc.macrotile[sr.level]) {
goto unpipelined;
}
@@ -172,8 +176,8 @@ r300_texture_get_transfer(struct pipe_context *ctx,
}
}
- assert(!trans->detiled_texture->microtile &&
- !trans->detiled_texture->macrotile);
+ assert(!trans->linear_texture->desc.microtile &&
+ !trans->linear_texture->desc.macrotile[0]);
/* Set the stride.
*
@@ -183,7 +187,7 @@ r300_texture_get_transfer(struct pipe_context *ctx,
* right thing internally.
*/
trans->transfer.stride =
- r300_texture_get_stride(r300screen, trans->detiled_texture, 0);
+ trans->linear_texture->desc.stride_in_bytes[0];
if (usage & PIPE_TRANSFER_READ) {
/* We cannot map a tiled texture directly because the data is
@@ -198,11 +202,11 @@ r300_texture_get_transfer(struct pipe_context *ctx,
unpipelined:
/* Unpipelined transfer. */
- trans->transfer.stride =
- r300_texture_get_stride(r300screen, tex, sr.level);
- trans->offset = r300_texture_get_offset(tex, sr.level, box->z, sr.face);
+ trans->transfer.stride = tex->desc.stride_in_bytes[sr.level];
+ trans->offset = r300_texture_get_offset(&tex->desc,
+ sr.level, box->z, sr.face);
- if (referenced_cs && (usage & PIPE_TRANSFER_READ))
+ if (referenced_cs)
ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
return &trans->transfer;
}
@@ -214,13 +218,13 @@ void r300_texture_transfer_destroy(struct pipe_context *ctx,
{
struct r300_transfer *r300transfer = r300_transfer(trans);
- if (r300transfer->detiled_texture) {
+ if (r300transfer->linear_texture) {
if (trans->usage & PIPE_TRANSFER_WRITE) {
r300_copy_into_tiled_texture(ctx, r300transfer);
}
pipe_resource_reference(
- (struct pipe_resource**)&r300transfer->detiled_texture, NULL);
+ (struct pipe_resource**)&r300transfer->linear_texture, NULL);
}
pipe_resource_reference(&trans->resource, NULL);
FREE(trans);
@@ -229,21 +233,23 @@ void r300_texture_transfer_destroy(struct pipe_context *ctx,
void* r300_texture_transfer_map(struct pipe_context *ctx,
struct pipe_transfer *transfer)
{
+ struct r300_context *r300 = r300_context(ctx);
struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys;
struct r300_transfer *r300transfer = r300_transfer(transfer);
struct r300_texture *tex = r300_texture(transfer->resource);
char *map;
- enum pipe_format format = tex->b.b.format;
+ enum pipe_format format = tex->desc.b.b.format;
- if (r300transfer->detiled_texture) {
+ if (r300transfer->linear_texture) {
/* The detiled texture is of the same size as the region being mapped
* (no offset needed). */
return rws->buffer_map(rws,
- r300transfer->detiled_texture->buffer,
+ r300transfer->linear_texture->buffer,
+ r300->cs,
transfer->usage);
} else {
/* Tiling is disabled. */
- map = rws->buffer_map(rws, tex->buffer,
+ map = rws->buffer_map(rws, tex->buffer, r300->cs,
transfer->usage);
if (!map) {
@@ -263,8 +269,8 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx,
struct r300_transfer *r300transfer = r300_transfer(transfer);
struct r300_texture *tex = r300_texture(transfer->resource);
- if (r300transfer->detiled_texture) {
- rws->buffer_unmap(rws, r300transfer->detiled_texture->buffer);
+ if (r300transfer->linear_texture) {
+ rws->buffer_unmap(rws, r300transfer->linear_texture->buffer);
} else {
rws->buffer_unmap(rws, tex->buffer);
}
diff --git a/src/gallium/drivers/r300/r300_vs_draw.c b/src/gallium/drivers/r300/r300_vs_draw.c
index d64040b891..2939963c35 100644
--- a/src/gallium/drivers/r300/r300_vs_draw.c
+++ b/src/gallium/drivers/r300/r300_vs_draw.c
@@ -185,7 +185,7 @@ static void transform_decl(struct tgsi_transform_context *ctx,
if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) {
insert_output(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0,
TGSI_INTERPOLATE_LINEAR);
- vsctx->color_used[2] = TRUE;
+ vsctx->bcolor_used[0] = TRUE;
}
/* One more case is handled in insert_trailing_bcolor. */
break;
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index 77c1c13ef9..ff11546a64 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -1,5 +1,6 @@
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -23,17 +24,25 @@
#ifndef R300_WINSYS_H
#define R300_WINSYS_H
-/* The public interface header for the r300 pipe driver.
- * Any winsys hosting this pipe needs to implement r300_winsys and then
- * call r300_create_screen to start things. */
+/* The public winsys interface header for the r300 pipe driver.
+ * Any winsys hosting this pipe needs to implement r300_winsys_screen and then
+ * call r300_screen_create to start things. */
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "r300_defines.h"
+struct r300_winsys_screen;
+
struct r300_winsys_buffer;
+struct r300_winsys_cs {
+ uint32_t *ptr; /* Pointer to the beginning of the CS. */
+ unsigned cdw; /* Number of used dwords. */
+ unsigned ndw; /* Size of the CS in dwords. */
+};
+
enum r300_value_id {
R300_VID_PCI_ID,
R300_VID_GB_PIPES,
@@ -48,121 +57,251 @@ enum r300_reference_domain { /* bitfield */
};
struct r300_winsys_screen {
+ /**
+ * Destroy this winsys.
+ *
+ * \param ws The winsys this function is called from.
+ */
void (*destroy)(struct r300_winsys_screen *ws);
-
+
/**
+ * Query a system value from a winsys.
+ *
+ * \param ws The winsys this function is called from.
+ * \param vid One of the R300_VID_* enums.
+ */
+ uint32_t (*get_value)(struct r300_winsys_screen *ws,
+ enum r300_value_id vid);
+
+ /**************************************************************************
* Buffer management. Buffer attributes are mostly fixed over its lifetime.
*
* Remember that gallium gets to choose the interface it needs, and the
* window systems must then implement that interface (rather than the
* other way around...).
+ *************************************************************************/
+
+ /**
+ * Create a buffer object.
*
- * usage is a bitmask of R300_WINSYS_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This
- * usage argument is only an optimization hint, not a guarantee, therefore
- * proper behavior must be observed in all circumstances.
- *
- * alignment indicates the client's alignment requirements, eg for
- * SSE instructions.
+ * \param ws The winsys this function is called from.
+ * \param size The size to allocate.
+ * \param alignment An alignment of the buffer in memory.
+ * \param bind A bitmask of the PIPE_BIND_* flags.
+ * \param usage A bitmask of the PIPE_USAGE_* flags.
+ * \param domain A bitmask of the R300_DOMAIN_* flags.
+ * \return The created buffer object.
*/
struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws,
- unsigned alignment,
- unsigned usage,
- enum r300_buffer_domain domain,
- unsigned size);
+ unsigned size,
+ unsigned alignment,
+ unsigned bind,
+ unsigned usage,
+ enum r300_buffer_domain domain);
/**
- * Map the entire data store of a buffer object into the client's address.
- * flags is bitmask of R300_WINSYS_BUFFER_USAGE_CPU_READ/WRITE flags.
+ * Reference a buffer object (assign with reference counting).
+ *
+ * \param ws The winsys this function is called from.
+ * \param pdst A destination pointer to set the source buffer to.
+ * \param src A source buffer object.
*/
- void *(*buffer_map)( struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf,
- unsigned usage);
+ void (*buffer_reference)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer **pdst,
+ struct r300_winsys_buffer *src);
- void (*buffer_unmap)( struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf );
+ /**
+ * Map the entire data store of a buffer object into the client's address
+ * space.
+ *
+ * \param ws The winsys this function is called from.
+ * \param buf A winsys buffer object to map.
+ * \param cs A command stream to flush if the buffer is referenced by it.
+ * \param usage A bitmask of the PIPE_TRANSFER_* flags.
+ * \return The pointer at the beginning of the buffer.
+ */
+ void *(*buffer_map)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf,
+ struct r300_winsys_cs *cs,
+ enum pipe_transfer_usage usage);
- void (*buffer_destroy)( struct r300_winsys_buffer *buf );
+ /**
+ * Unmap a buffer object from the client's address space.
+ *
+ * \param ws The winsys this function is called from.
+ * \param buf A winsys buffer object to unmap.
+ */
+ void (*buffer_unmap)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf);
+ /**
+ * Wait for a buffer object until it is not used by a GPU. This is
+ * equivalent to a fence placed after the last command using the buffer,
+ * and synchronizing to the fence.
+ *
+ * \param ws The winsys this function is called from.
+ * \param buf A winsys buffer object to wait for.
+ */
+ void (*buffer_wait)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf);
- void (*buffer_reference)(struct r300_winsys_screen *rws,
- struct r300_winsys_buffer **pdst,
- struct r300_winsys_buffer *src);
+ /**
+ * Return tiling flags describing a memory layout of a buffer object.
+ *
+ * \param ws The winsys this function is called from.
+ * \param buf A winsys buffer object to get the flags from.
+ * \param macrotile A pointer to the return value of the microtile flag.
+ * \param microtile A pointer to the return value of the macrotile flag.
+ *
+ * \note microtile and macrotile are not bitmasks!
+ */
+ void (*buffer_get_tiling)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf,
+ enum r300_buffer_tiling *microtile,
+ enum r300_buffer_tiling *macrotile);
- void (*buffer_wait)(struct r300_winsys_screen *rws,
- struct r300_winsys_buffer *buf);
+ /**
+ * Set tiling flags describing a memory layout of a buffer object.
+ *
+ * \param ws The winsys this function is called from.
+ * \param buf A winsys buffer object to set the flags for.
+ * \param macrotile A macrotile flag.
+ * \param microtile A microtile flag.
+ * \param stride A stride of the buffer in bytes, for texturing.
+ *
+ * \note microtile and macrotile are not bitmasks!
+ */
+ void (*buffer_set_tiling)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf,
+ enum r300_buffer_tiling microtile,
+ enum r300_buffer_tiling macrotile,
+ unsigned stride);
- /* Add a pipe_resource to the list of buffer objects to validate. */
- boolean (*add_buffer)(struct r300_winsys_screen *winsys,
- struct r300_winsys_buffer *buf,
- enum r300_buffer_domain rd,
- enum r300_buffer_domain wd);
+ /**
+ * Get a winsys buffer from a winsys handle. The internal structure
+ * of the handle is platform-specific and only a winsys should access it.
+ *
+ * \param ws The winsys this function is called from.
+ * \param whandle A winsys handle pointer as was received from a state
+ * tracker.
+ * \param stride The returned buffer stride in bytes.
+ * \param size The returned buffer size.
+ */
+ struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws,
+ struct winsys_handle *whandle,
+ unsigned *stride,
+ unsigned *size);
+ /**
+ * Get a winsys handle from a winsys buffer. The internal structure
+ * of the handle is platform-specific and only a winsys should access it.
+ *
+ * \param ws The winsys this function is called from.
+ * \param buf A winsys buffer object to get the handle from.
+ * \param whandle A winsys handle pointer.
+ * \param stride A stride of the buffer in bytes, for texturing.
+ * \return TRUE on success.
+ */
+ boolean (*buffer_get_handle)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf,
+ unsigned stride,
+ struct winsys_handle *whandle);
- /* Revalidate all currently setup pipe_buffers.
- * Returns TRUE if a flush is required. */
- boolean (*validate)(struct r300_winsys_screen* winsys);
+ /**************************************************************************
+ * Command submission.
+ *
+ * Each pipe context should create its own command stream and submit
+ * commands independently of other contexts.
+ *************************************************************************/
- /* Return the number of free dwords in CS. */
- unsigned (*get_cs_free_dwords)(struct r300_winsys_screen *winsys);
+ /**
+ * Create a command stream.
+ *
+ * \param ws The winsys this function is called from.
+ */
+ struct r300_winsys_cs *(*cs_create)(struct r300_winsys_screen *ws);
- /* Return the pointer to the first free dword in CS and assume a pipe
- * driver wants to fill "count" dwords. */
- uint32_t *(*get_cs_pointer)(struct r300_winsys_screen *winsys,
- unsigned count);
+ /**
+ * Destroy a command stream.
+ *
+ * \param cs A command stream to destroy.
+ */
+ void (*cs_destroy)(struct r300_winsys_cs *cs);
- /* Write a dword to the command buffer. */
- void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword);
+ /**
+ * Add a buffer object to the list of buffers to validate.
+ *
+ * \param cs A command stream to add buffer for validation against.
+ * \param buf A winsys buffer to validate.
+ * \param rd A read domain containing a bitmask
+ * of the R300_DOMAIN_* flags.
+ * \param wd A write domain containing a bitmask
+ * of the R300_DOMAIN_* flags.
+ */
+ void (*cs_add_buffer)(struct r300_winsys_cs *cs,
+ struct r300_winsys_buffer *buf,
+ enum r300_buffer_domain rd,
+ enum r300_buffer_domain wd);
- /* Write a table of dwords to the command buffer. */
- void (*write_cs_table)(struct r300_winsys_screen* winsys,
- const void *dwords, unsigned count);
+ /**
+ * Revalidate all currently set up winsys buffers.
+ * Returns TRUE if a flush is required.
+ *
+ * \param cs A command stream to validate.
+ */
+ boolean (*cs_validate)(struct r300_winsys_cs *cs);
- /* Write a relocated dword to the command buffer. */
- void (*write_cs_reloc)(struct r300_winsys_screen *winsys,
+ /**
+ * Write a relocated dword to a command buffer.
+ *
+ * \param cs A command stream the relocation is written to.
+ * \param buf A winsys buffer to write the relocation for.
+ * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags.
+ * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags.
+ */
+ void (*cs_write_reloc)(struct r300_winsys_cs *cs,
struct r300_winsys_buffer *buf,
enum r300_buffer_domain rd,
- enum r300_buffer_domain wd,
- uint32_t flags);
-
- /* Flush the CS. */
- void (*flush_cs)(struct r300_winsys_screen* winsys);
-
- /* winsys flush - callback from winsys when flush required */
- void (*set_flush_cb)(struct r300_winsys_screen *winsys,
- void (*flush_cb)(void *), void *data);
-
- void (*reset_bos)(struct r300_winsys_screen *winsys);
-
- void (*buffer_get_tiling)(struct r300_winsys_screen *winsys,
- struct r300_winsys_buffer *buffer,
- enum r300_buffer_tiling *microtiled,
- enum r300_buffer_tiling *macrotiled);
+ enum r300_buffer_domain wd);
- void (*buffer_set_tiling)(struct r300_winsys_screen *winsys,
- struct r300_winsys_buffer *buffer,
- uint32_t pitch,
- enum r300_buffer_tiling microtiled,
- enum r300_buffer_tiling macrotiled);
-
- uint32_t (*get_value)(struct r300_winsys_screen *winsys,
- enum r300_value_id vid);
+ /**
+ * Flush a command stream.
+ *
+ * \param cs A command stream to flush.
+ */
+ void (*cs_flush)(struct r300_winsys_cs *cs);
- struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *winsys,
- unsigned handle);
+ /**
+ * Set a flush callback which is called from winsys when flush is
+ * required.
+ *
+ * \param cs A command stream to set the callback for.
+ * \param flush A flush callback function associated with the command stream.
+ * \param user A user pointer that will be passed to the flush callback.
+ */
+ void (*cs_set_flush)(struct r300_winsys_cs *cs,
+ void (*flush)(void *),
+ void *user);
- boolean (*buffer_get_handle)(struct r300_winsys_screen *winsys,
- struct r300_winsys_buffer *buffer,
- struct winsys_handle *whandle);
+ /**
+ * Reset the list of buffer objects to validate, usually called
+ * prior to adding buffer objects for validation.
+ *
+ * \param cs A command stream to reset buffers for.
+ */
+ void (*cs_reset_buffers)(struct r300_winsys_cs *cs);
- boolean (*is_buffer_referenced)(struct r300_winsys_screen *winsys,
- struct r300_winsys_buffer *buffer,
- enum r300_reference_domain domain);
+ /**
+ * Return TRUE if a buffer is referenced by a command stream or by hardware
+ * (i.e. is busy), based on the domain parameter.
+ *
+ * \param cs A command stream.
+ * \param buf A winsys buffer.
+ * \param domain A bitmask of the R300_REF_* enums.
+ */
+ boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs,
+ struct r300_winsys_buffer *buf,
+ enum r300_reference_domain domain);
};
-struct r300_winsys_screen *
-r300_winsys_screen(struct pipe_screen *screen);
-
-/* Creates a new r300 screen. */
-struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws);
-
#endif /* R300_WINSYS_H */
diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile
index aae31a6a6e..8f1e1366b5 100644
--- a/src/gallium/drivers/r600/Makefile
+++ b/src/gallium/drivers/r600/Makefile
@@ -18,10 +18,7 @@ C_SOURCES = \
r600_state.c \
r600_texture.c \
r600_shader.c \
- r600_compiler.c \
- r600_compiler_tgsi.c \
- r600_compiler_dump.c \
- r600_compiler_r600.c \
- r600_compiler_r700.c
+ r600_asm.c \
+ r700_asm.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript
index 26e2f1941c..99c8644e02 100644
--- a/src/gallium/drivers/r600/SConscript
+++ b/src/gallium/drivers/r600/SConscript
@@ -27,11 +27,8 @@ r600 = env.ConvenienceLibrary(
'r600_state.c',
'r600_texture.c',
'r600_shader.c',
- 'r600_compiler.c',
- 'r600_compiler_tgsi.c',
- 'r600_compiler_dump.c',
- 'r600_compiler_r600.c',
- 'r600_compiler_r700.c'
+ 'r600_asm.c',
+ 'r700_asm.c',
])
Export('r600')
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
new file mode 100644
index 0000000000..e678a2fdf2
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -0,0 +1,468 @@
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "r600_asm.h"
+#include "r600_context.h"
+#include "util/u_memory.h"
+#include "r600_sq.h"
+#include <stdio.h>
+#include <errno.h>
+
+int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
+
+static struct r600_bc_cf *r600_bc_cf(void)
+{
+ struct r600_bc_cf *cf = CALLOC_STRUCT(r600_bc_cf);
+
+ if (cf == NULL)
+ return NULL;
+ LIST_INITHEAD(&cf->list);
+ LIST_INITHEAD(&cf->alu);
+ LIST_INITHEAD(&cf->vtx);
+ LIST_INITHEAD(&cf->tex);
+ return cf;
+}
+
+static struct r600_bc_alu *r600_bc_alu(void)
+{
+ struct r600_bc_alu *alu = CALLOC_STRUCT(r600_bc_alu);
+
+ if (alu == NULL)
+ return NULL;
+ LIST_INITHEAD(&alu->list);
+ return alu;
+}
+
+static struct r600_bc_vtx *r600_bc_vtx(void)
+{
+ struct r600_bc_vtx *vtx = CALLOC_STRUCT(r600_bc_vtx);
+
+ if (vtx == NULL)
+ return NULL;
+ LIST_INITHEAD(&vtx->list);
+ return vtx;
+}
+
+static struct r600_bc_tex *r600_bc_tex(void)
+{
+ struct r600_bc_tex *tex = CALLOC_STRUCT(r600_bc_tex);
+
+ if (tex == NULL)
+ return NULL;
+ LIST_INITHEAD(&tex->list);
+ return tex;
+}
+
+int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
+{
+ LIST_INITHEAD(&bc->cf);
+ bc->family = family;
+ return 0;
+}
+
+static int r600_bc_add_cf(struct r600_bc *bc)
+{
+ struct r600_bc_cf *cf = r600_bc_cf();
+
+ if (cf == NULL)
+ return -ENOMEM;
+ LIST_ADDTAIL(&cf->list, &bc->cf);
+ if (bc->cf_last)
+ cf->id = bc->cf_last->id + 2;
+ bc->cf_last = cf;
+ bc->ncf++;
+ bc->ndw += 2;
+ return 0;
+}
+
+int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
+{
+ int r;
+
+ r = r600_bc_add_cf(bc);
+ if (r)
+ return r;
+ bc->cf_last->inst = output->inst;
+ memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output));
+ return 0;
+}
+
+int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+{
+ struct r600_bc_alu *nalu = r600_bc_alu();
+ struct r600_bc_alu *lalu;
+ int i, r;
+
+ if (nalu == NULL)
+ return -ENOMEM;
+ memcpy(nalu, alu, sizeof(struct r600_bc_alu));
+ nalu->nliteral = 0;
+
+ /* cf can contains only alu or only vtx or only tex */
+ if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) {
+ r = r600_bc_add_cf(bc);
+ if (r) {
+ free(nalu);
+ return r;
+ }
+ bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3;
+ }
+ /* number of gpr == the last gpr used in any alu */
+ for (i = 0; i < 3; i++) {
+ if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) {
+ bc->ngpr = alu->src[i].sel + 1;
+ }
+ /* compute how many literal are needed
+ * either 2 or 4 literals
+ */
+ if (alu->src[i].sel == 253) {
+ if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
+ nalu->nliteral = (alu->src[i].chan + 2) & 0x6;
+ }
+ }
+ }
+ if (!LIST_IS_EMPTY(&bc->cf_last->alu)) {
+ lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
+ if (!lalu->last && lalu->nliteral > nalu->nliteral) {
+ nalu->nliteral = lalu->nliteral;
+ }
+ }
+ if (alu->dst.sel >= bc->ngpr) {
+ bc->ngpr = alu->dst.sel + 1;
+ }
+ LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
+ /* each alu use 2 dwords */
+ bc->cf_last->ndw += 2;
+ bc->ndw += 2;
+ return 0;
+}
+
+int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
+{
+ struct r600_bc_alu *alu;
+
+ if (bc->cf_last == NULL) {
+ R600_ERR("no last CF\n");
+ return -EINVAL;
+ }
+ if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
+ return 0;
+ }
+ if (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) ||
+ LIST_IS_EMPTY(&bc->cf_last->alu)) {
+ R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
+ return -EINVAL;
+ }
+ alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
+ if (!alu->last || !alu->nliteral) {
+ return 0;
+ }
+ memcpy(alu->value, value, 4 * 4);
+ bc->cf_last->ndw += alu->nliteral;
+ bc->ndw += alu->nliteral;
+ return 0;
+}
+
+int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
+{
+ struct r600_bc_vtx *nvtx = r600_bc_vtx();
+ int r;
+
+ if (nvtx == NULL)
+ return -ENOMEM;
+ memcpy(nvtx, vtx, sizeof(struct r600_bc_vtx));
+
+ /* cf can contains only alu or only vtx or only tex */
+ if (bc->cf_last == NULL ||
+ (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX &&
+ bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC)) {
+ r = r600_bc_add_cf(bc);
+ if (r) {
+ free(nvtx);
+ return r;
+ }
+ bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX;
+ }
+ LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx);
+ /* each fetch use 4 dwords */
+ bc->cf_last->ndw += 4;
+ bc->ndw += 4;
+ return 0;
+}
+
+int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
+{
+ struct r600_bc_tex *ntex = r600_bc_tex();
+ int r;
+
+ if (ntex == NULL)
+ return -ENOMEM;
+ memcpy(ntex, tex, sizeof(struct r600_bc_tex));
+
+ /* cf can contains only alu or only vtx or only tex */
+ if (bc->cf_last == NULL ||
+ bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
+ r = r600_bc_add_cf(bc);
+ if (r) {
+ free(ntex);
+ return r;
+ }
+ bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX;
+ }
+ LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex);
+ /* each texture fetch use 4 dwords */
+ bc->cf_last->ndw += 4;
+ bc->ndw += 4;
+ return 0;
+}
+
+static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
+{
+ bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
+ S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
+ S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) |
+ S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
+ bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) |
+ S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) |
+ S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) |
+ S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) |
+ S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) |
+ S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
+ bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
+ bc->bytecode[id++] = 0;
+ return 0;
+}
+
+static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsigned id)
+{
+ bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) |
+ S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
+ S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
+ S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
+ bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) |
+ S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) |
+ S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) |
+ S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) |
+ S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) |
+ S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) |
+ S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) |
+ S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) |
+ S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) |
+ S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) |
+ S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w);
+ bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) |
+ S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) |
+ S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) |
+ S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) |
+ S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) |
+ S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) |
+ S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) |
+ S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w);
+ bc->bytecode[id++] = 0;
+ return 0;
+}
+
+int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
+{
+ unsigned i;
+
+ /* don't replace gpr by pv or ps for destination register */
+ if (alu->is_op3) {
+ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
+ S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
+ S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
+ S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
+ S_SQ_ALU_WORD0_LAST(alu->last);
+ bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
+ S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+ S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
+ S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
+ S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
+ S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) |
+ S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
+ } else {
+ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
+ S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
+ S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
+ S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
+ S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
+ S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
+ S_SQ_ALU_WORD0_LAST(alu->last);
+ bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
+ S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+ S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
+ S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
+ S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+ S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
+ S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
+ }
+ if (alu->last) {
+ for (i = 0; i < alu->nliteral; i++) {
+ bc->bytecode[id++] = alu->value[i];
+ }
+ }
+ return 0;
+}
+
+int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
+{
+ unsigned id = cf->id;
+
+ switch (cf->inst) {
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1);
+ bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
+ S_SQ_CF_ALU_WORD1_BARRIER(1) |
+ S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
+ break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+ bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
+ bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+ break;
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
+ S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
+ S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
+ S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
+ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+ break;
+ default:
+ R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int r600_bc_build(struct r600_bc *bc)
+{
+ struct r600_bc_cf *cf;
+ struct r600_bc_alu *alu;
+ struct r600_bc_vtx *vtx;
+ struct r600_bc_tex *tex;
+ unsigned addr;
+ int r;
+
+
+ /* first path compute addr of each CF block */
+ /* addr start after all the CF instructions */
+ addr = bc->cf_last->id + 2;
+ LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
+ switch (cf->inst) {
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+ /* fetch node need to be 16 bytes aligned*/
+ addr += 3;
+ addr &= 0xFFFFFFFCUL;
+ break;
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+ break;
+ default:
+ R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
+ return -EINVAL;
+ }
+ cf->addr = addr;
+ addr += cf->ndw;
+ bc->ndw = cf->addr + cf->ndw;
+ }
+ free(bc->bytecode);
+ bc->bytecode = calloc(1, bc->ndw * 4);
+ if (bc->bytecode == NULL)
+ return -ENOMEM;
+ LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
+ addr = cf->addr;
+ r = r600_bc_cf_build(bc, cf);
+ if (r)
+ return r;
+ switch (cf->inst) {
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ switch (bc->family) {
+ case CHIP_R600:
+ case CHIP_RV610:
+ case CHIP_RV630:
+ case CHIP_RV670:
+ case CHIP_RV620:
+ case CHIP_RV635:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ r = r600_bc_alu_build(bc, alu, addr);
+ break;
+ case CHIP_RV770:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_RV740:
+ r = r700_bc_alu_build(bc, alu, addr);
+ break;
+ default:
+ R600_ERR("unknown family %d\n", bc->family);
+ return -EINVAL;
+ }
+ if (r)
+ return r;
+ addr += 2;
+ if (alu->last) {
+ addr += alu->nliteral;
+ }
+ }
+ break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+ LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+ r = r600_bc_vtx_build(bc, vtx, addr);
+ if (r)
+ return r;
+ addr += 4;
+ }
+ break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+ LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
+ r = r600_bc_tex_build(bc, tex, addr);
+ if (r)
+ return r;
+ addr += 4;
+ }
+ break;
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+ break;
+ default:
+ R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
new file mode 100644
index 0000000000..88fb957440
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef R600_ASM_H
+#define R600_ASM_H
+
+#include "radeon.h"
+#include "util/u_double_list.h"
+
+struct r600_bc_alu_src {
+ unsigned sel;
+ unsigned chan;
+ unsigned neg;
+ unsigned abs;
+};
+
+struct r600_bc_alu_dst {
+ unsigned sel;
+ unsigned chan;
+ unsigned clamp;
+ unsigned write;
+};
+
+struct r600_bc_alu {
+ struct list_head list;
+ struct r600_bc_alu_src src[3];
+ struct r600_bc_alu_dst dst;
+ unsigned inst;
+ unsigned last;
+ unsigned is_op3;
+ unsigned nliteral;
+ u32 value[4];
+};
+
+struct r600_bc_tex {
+ struct list_head list;
+ unsigned inst;
+ unsigned resource_id;
+ unsigned src_gpr;
+ unsigned src_rel;
+ unsigned dst_gpr;
+ unsigned dst_rel;
+ unsigned dst_sel_x;
+ unsigned dst_sel_y;
+ unsigned dst_sel_z;
+ unsigned dst_sel_w;
+ unsigned lod_bias;
+ unsigned coord_type_x;
+ unsigned coord_type_y;
+ unsigned coord_type_z;
+ unsigned coord_type_w;
+ unsigned offset_x;
+ unsigned offset_y;
+ unsigned offset_z;
+ unsigned sampler_id;
+ unsigned src_sel_x;
+ unsigned src_sel_y;
+ unsigned src_sel_z;
+ unsigned src_sel_w;
+};
+
+struct r600_bc_vtx {
+ struct list_head list;
+ unsigned inst;
+ unsigned fetch_type;
+ unsigned buffer_id;
+ unsigned src_gpr;
+ unsigned src_sel_x;
+ unsigned mega_fetch_count;
+ unsigned dst_gpr;
+ unsigned dst_sel_x;
+ unsigned dst_sel_y;
+ unsigned dst_sel_z;
+ unsigned dst_sel_w;
+};
+
+struct r600_bc_output {
+ unsigned array_base;
+ unsigned type;
+ unsigned end_of_program;
+ unsigned inst;
+ unsigned elem_size;
+ unsigned gpr;
+ unsigned swizzle_x;
+ unsigned swizzle_y;
+ unsigned swizzle_z;
+ unsigned swizzle_w;
+ unsigned barrier;
+};
+
+struct r600_bc_cf {
+ struct list_head list;
+ unsigned inst;
+ unsigned addr;
+ unsigned ndw;
+ unsigned id;
+ struct list_head alu;
+ struct list_head tex;
+ struct list_head vtx;
+ struct r600_bc_output output;
+};
+
+struct r600_bc {
+ enum radeon_family family;
+ struct list_head cf;
+ struct r600_bc_cf *cf_last;
+ unsigned ndw;
+ unsigned ncf;
+ unsigned ngpr;
+ unsigned nresource;
+ u32 *bytecode;
+};
+
+int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
+int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu);
+int r600_bc_add_literal(struct r600_bc *bc, const u32 *value);
+int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
+int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
+int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
+int r600_bc_build(struct r600_bc *bc);
+
+#endif
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 272f4dd673..bc6e336ba7 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -29,7 +29,7 @@
#include <util/u_math.h>
#include <util/u_inlines.h>
#include <util/u_memory.h>
-#include "state_tracker/drm_api.h"
+#include "state_tracker/drm_driver.h"
#include "r600_screen.h"
#include "r600_context.h"
diff --git a/src/gallium/drivers/r600/r600_compiler.c b/src/gallium/drivers/r600/r600_compiler.c
deleted file mode 100644
index f1be2bbdf4..0000000000
--- a/src/gallium/drivers/r600/r600_compiler.c
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <errno.h>
-#include "r600_compiler.h"
-
-struct c_vector *c_vector_new(void)
-{
- struct c_vector *v = calloc(1, sizeof(struct c_vector));
-
- if (v == NULL) {
- return NULL;
- }
- c_list_init(v);
- return v;
-}
-
-static unsigned c_opcode_is_alu(unsigned opcode)
-{
- switch (opcode) {
- case C_OPCODE_MOV:
- case C_OPCODE_MUL:
- case C_OPCODE_MAD:
- case C_OPCODE_ARL:
- case C_OPCODE_LIT:
- case C_OPCODE_RCP:
- case C_OPCODE_RSQ:
- case C_OPCODE_EXP:
- case C_OPCODE_LOG:
- case C_OPCODE_ADD:
- case C_OPCODE_DP3:
- case C_OPCODE_DP4:
- case C_OPCODE_DST:
- case C_OPCODE_MIN:
- case C_OPCODE_MAX:
- case C_OPCODE_SLT:
- case C_OPCODE_SGE:
- case C_OPCODE_SUB:
- case C_OPCODE_LRP:
- case C_OPCODE_CND:
- case C_OPCODE_DP2A:
- case C_OPCODE_FRC:
- case C_OPCODE_CLAMP:
- case C_OPCODE_FLR:
- case C_OPCODE_ROUND:
- case C_OPCODE_EX2:
- case C_OPCODE_LG2:
- case C_OPCODE_POW:
- case C_OPCODE_XPD:
- case C_OPCODE_ABS:
- case C_OPCODE_RCC:
- case C_OPCODE_DPH:
- case C_OPCODE_COS:
- case C_OPCODE_DDX:
- case C_OPCODE_DDY:
- case C_OPCODE_PK2H:
- case C_OPCODE_PK2US:
- case C_OPCODE_PK4B:
- case C_OPCODE_PK4UB:
- case C_OPCODE_RFL:
- case C_OPCODE_SEQ:
- case C_OPCODE_SFL:
- case C_OPCODE_SGT:
- case C_OPCODE_SIN:
- case C_OPCODE_SLE:
- case C_OPCODE_SNE:
- case C_OPCODE_STR:
- case C_OPCODE_UP2H:
- case C_OPCODE_UP2US:
- case C_OPCODE_UP4B:
- case C_OPCODE_UP4UB:
- case C_OPCODE_X2D:
- case C_OPCODE_ARA:
- case C_OPCODE_ARR:
- case C_OPCODE_BRA:
- case C_OPCODE_SSG:
- case C_OPCODE_CMP:
- case C_OPCODE_SCS:
- case C_OPCODE_NRM:
- case C_OPCODE_DIV:
- case C_OPCODE_DP2:
- case C_OPCODE_CEIL:
- case C_OPCODE_I2F:
- case C_OPCODE_NOT:
- case C_OPCODE_TRUNC:
- case C_OPCODE_SHL:
- case C_OPCODE_AND:
- case C_OPCODE_OR:
- case C_OPCODE_MOD:
- case C_OPCODE_XOR:
- case C_OPCODE_SAD:
- case C_OPCODE_NRM4:
- case C_OPCODE_F2I:
- case C_OPCODE_IDIV:
- case C_OPCODE_IMAX:
- case C_OPCODE_IMIN:
- case C_OPCODE_INEG:
- case C_OPCODE_ISGE:
- case C_OPCODE_ISHR:
- case C_OPCODE_ISLT:
- case C_OPCODE_F2U:
- case C_OPCODE_U2F:
- case C_OPCODE_UADD:
- case C_OPCODE_UDIV:
- case C_OPCODE_UMAD:
- case C_OPCODE_UMAX:
- case C_OPCODE_UMIN:
- case C_OPCODE_UMOD:
- case C_OPCODE_UMUL:
- case C_OPCODE_USEQ:
- case C_OPCODE_USGE:
- case C_OPCODE_USHR:
- case C_OPCODE_USLT:
- case C_OPCODE_USNE:
- return 1;
- case C_OPCODE_END:
- case C_OPCODE_VFETCH:
- case C_OPCODE_KILP:
- case C_OPCODE_CAL:
- case C_OPCODE_RET:
- case C_OPCODE_TXB:
- case C_OPCODE_TXL:
- case C_OPCODE_BRK:
- case C_OPCODE_IF:
- case C_OPCODE_BGNFOR:
- case C_OPCODE_REP:
- case C_OPCODE_ELSE:
- case C_OPCODE_ENDIF:
- case C_OPCODE_ENDFOR:
- case C_OPCODE_ENDREP:
- case C_OPCODE_PUSHA:
- case C_OPCODE_POPA:
- case C_OPCODE_TXF:
- case C_OPCODE_TXQ:
- case C_OPCODE_CONT:
- case C_OPCODE_EMIT:
- case C_OPCODE_ENDPRIM:
- case C_OPCODE_BGNLOOP:
- case C_OPCODE_BGNSUB:
- case C_OPCODE_ENDLOOP:
- case C_OPCODE_ENDSUB:
- case C_OPCODE_NOP:
- case C_OPCODE_CALLNZ:
- case C_OPCODE_IFC:
- case C_OPCODE_BREAKC:
- case C_OPCODE_KIL:
- case C_OPCODE_TEX:
- case C_OPCODE_TXD:
- case C_OPCODE_TXP:
- case C_OPCODE_SWITCH:
- case C_OPCODE_CASE:
- case C_OPCODE_DEFAULT:
- case C_OPCODE_ENDSWITCH:
- default:
- return 0;
- }
-}
-
-
-/* NEW */
-void c_node_init(struct c_node *node)
-{
- memset(node, 0, sizeof(struct c_node));
- c_list_init(&node->predecessors);
- c_list_init(&node->successors);
- c_list_init(&node->childs);
- c_list_init(&node->insts);
- node->parent = NULL;
-}
-
-static struct c_node_link *c_node_link_new(struct c_node *node)
-{
- struct c_node_link *link;
-
- link = calloc(1, sizeof(struct c_node_link));
- if (link == NULL)
- return NULL;
- c_list_init(link);
- link->node = node;
- return link;
-}
-
-int c_node_cfg_link(struct c_node *predecessor, struct c_node *successor)
-{
- struct c_node_link *pedge, *sedge;
-
- pedge = c_node_link_new(successor);
- sedge = c_node_link_new(predecessor);
- if (sedge == NULL || pedge == NULL) {
- free(sedge);
- free(pedge);
- return -ENOMEM;
- }
- c_list_add_tail(pedge, &predecessor->successors);
- c_list_add_tail(sedge, &successor->predecessors);
- return 0;
-}
-
-int c_node_add_new_instruction_head(struct c_node *node, struct c_instruction *instruction)
-{
- struct c_instruction *inst = calloc(1, sizeof(struct c_instruction));
-
- if (inst == NULL)
- return -ENOMEM;
- memcpy(inst, instruction, sizeof(struct c_instruction));
- c_list_add(inst, &node->insts);
- return 0;
-}
-
-int c_node_add_new_instruction(struct c_node *node, struct c_instruction *instruction)
-{
- struct c_instruction *inst = calloc(1, sizeof(struct c_instruction));
-
- if (inst == NULL)
- return -ENOMEM;
- memcpy(inst, instruction, sizeof(struct c_instruction));
- c_list_add_tail(inst, &node->insts);
- return 0;
-}
-
-struct c_node *c_shader_cfg_new_node_after(struct c_shader *shader, struct c_node *predecessor)
-{
- struct c_node *node = calloc(1, sizeof(struct c_node));
-
- if (node == NULL)
- return NULL;
- c_node_init(node);
- if (c_node_cfg_link(predecessor, node)) {
- free(node);
- return NULL;
- }
- c_list_add_tail(node, &shader->nodes);
- return node;
-}
-
-int c_shader_init(struct c_shader *shader, unsigned type)
-{
- unsigned i;
- int r;
-
- shader->type = type;
- for (i = 0; i < C_FILE_COUNT; i++) {
- shader->files[i].nvectors = 0;
- c_list_init(&shader->files[i].vectors);
- }
- c_list_init(&shader->nodes);
- c_node_init(&shader->entry);
- c_node_init(&shader->end);
- shader->entry.opcode = C_OPCODE_ENTRY;
- shader->end.opcode = C_OPCODE_END;
- r = c_node_cfg_link(&shader->entry, &shader->end);
- if (r)
- return r;
- return 0;
-}
-
-struct c_vector *c_shader_vector_new(struct c_shader *shader, unsigned file, unsigned name, int sid)
-{
- struct c_vector *v = calloc(1, sizeof(struct c_vector));
- int i;
-
- if (v == NULL) {
- return NULL;
- }
- for (i = 0; i < 4; i++) {
- v->channel[i] = calloc(1, sizeof(struct c_channel));
- if (v->channel[i] == NULL)
- goto out_err;
- v->channel[i]->vindex = i;
- v->channel[i]->vector = v;
- }
- v->file = file;
- v->name = name;
- v->sid = sid;
- shader->files[v->file].nvectors++;
- v->id = shader->nvectors++;
- c_list_add_tail(v, &shader->files[v->file].vectors);
- return v;
-out_err:
- for (i = 0; i < 4; i++) {
- free(v->channel[i]);
- }
- free(v);
- return NULL;
-}
-
-static void c_node_remove_link(struct c_node_link *head, struct c_node *node)
-{
- struct c_node_link *link, *tmp;
-
- c_list_for_each_safe(link, tmp, head) {
- if (link->node == node) {
- c_list_del(link);
- free(link);
- }
- }
-}
-
-static void c_node_destroy(struct c_node *node)
-{
- struct c_instruction *i, *ni;
- struct c_node_link *link, *tmp;
-
- c_list_for_each_safe(i, ni, &node->insts) {
- c_list_del(i);
- free(i);
- }
- if (node->parent)
- c_node_remove_link(&node->parent->childs, node);
- node->parent = NULL;
- c_list_for_each_safe(link, tmp, &node->predecessors) {
- c_node_remove_link(&link->node->successors, node);
- c_list_del(link);
- free(link);
- }
- c_list_for_each_safe(link, tmp, &node->successors) {
- c_node_remove_link(&link->node->predecessors, node);
- c_list_del(link);
- free(link);
- }
- c_list_for_each_safe(link, tmp, &node->childs) {
- link->node->parent = NULL;
- c_list_del(link);
- free(link);
- }
-}
-
-void c_shader_destroy(struct c_shader *shader)
-{
- struct c_node *n, *nn;
- struct c_vector *v, *nv;
- unsigned i;
-
- for (i = 0; i < C_FILE_COUNT; i++) {
- shader->files[i].nvectors = 0;
- c_list_for_each_safe(v, nv, &shader->files[i].vectors) {
- c_list_del(v);
- free(v->channel[0]);
- free(v->channel[1]);
- free(v->channel[2]);
- free(v->channel[3]);
- free(v);
- }
- }
- c_list_for_each_safe(n, nn, &shader->nodes) {
- c_list_del(n);
- c_node_destroy(n);
- }
- memset(shader, 0, sizeof(struct c_shader));
-}
-
-static void c_shader_dfs_without_rec(struct c_node *entry, struct c_node *node)
-{
- struct c_node_link *link;
-
- if (entry == node || entry->visited)
- return;
- entry->visited = 1;
- c_list_for_each(link, &entry->successors) {
- c_shader_dfs_without_rec(link->node, node);
- }
-}
-
-static void c_shader_dfs_without(struct c_shader *shader, struct c_node *node)
-{
- struct c_node *n;
-
- shader->entry.visited = 0;
- shader->end.visited = 0;
- c_list_for_each(n, &shader->nodes) {
- n->visited = 0;
- }
- c_shader_dfs_without_rec(&shader->entry, node);
-}
-
-static int c_shader_build_dominator_tree_rec(struct c_shader *shader, struct c_node *node)
-{
- struct c_node_link *link, *nlink;
- unsigned found = 0;
- int r;
-
- if (node->done)
- return 0;
- node->done = 1;
- c_list_for_each(link, &node->predecessors) {
- /* if we remove this predecessor can we reach the current node ? */
- c_shader_dfs_without(shader, link->node);
- if (node->visited == 0) {
- /* we were unable to visit current node thus current
- * predecessor is the immediate dominator of node, as
- * their can be only one immediate dominator we break
- */
- node->parent = link->node;
- nlink = c_node_link_new(node);
- if (nlink == NULL)
- return -ENOMEM;
- c_list_add_tail(nlink, &link->node->childs);
- found = 1;
- break;
- }
- }
- /* this shouldn't happen there should at least be 1 denominator for each node */
- if (!found && node->opcode != C_OPCODE_ENTRY) {
- fprintf(stderr, "invalid flow control graph node %p (%d) has no immediate dominator\n",
- node, node->opcode);
- return -EINVAL;
- }
- c_list_for_each(link, &node->predecessors) {
- r = c_shader_build_dominator_tree_rec(shader, link->node);
- if (r)
- return r;
- }
- return 0;
-}
-
-int c_shader_build_dominator_tree(struct c_shader *shader)
-{
- struct c_node *node;
- c_list_for_each(node, &shader->nodes) {
- node->done = 0;
- }
- return c_shader_build_dominator_tree_rec(shader, &shader->end);
-}
diff --git a/src/gallium/drivers/r600/r600_compiler.h b/src/gallium/drivers/r600/r600_compiler.h
deleted file mode 100644
index 3de19970c3..0000000000
--- a/src/gallium/drivers/r600/r600_compiler.h
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#ifndef R600_COMPILER_H
-#define R600_COMPILER_H
-
-struct c_vector;
-
-/* operand are the basic source/destination of each operation */
-struct c_channel {
- struct c_channel *next;
- struct c_channel *prev;
- unsigned vindex; /**< index in vector X,Y,Z,W (0,1,2,3) */
- unsigned value; /**< immediate value 32bits */
- struct c_vector *vector; /**< vector to which it belongs */
-};
-
-/* in GPU world most of the time operand are grouped into vector
- * of 4 component this structure is mostly and handler to group
- * operand into a same vector
- */
-struct c_vector {
- struct c_vector *next;
- struct c_vector *prev;
- unsigned id; /**< vector uniq id */
- unsigned name; /**< semantic name */
- unsigned file; /**< operand file C_FILE_* */
- int sid; /**< semantic id */
- struct c_channel *channel[4]; /**< operands */
-};
-
-#define c_list_init(e) do { (e)->next = e; (e)->prev = e; } while(0)
-#define c_list_add(e, h) do { (e)->next = (h)->next; (e)->prev = h; (h)->next = e; (e)->next->prev = e; } while(0)
-#define c_list_add_tail(e, h) do { (e)->next = h; (e)->prev = (h)->prev; (h)->prev = e; (e)->prev->next = e; } while(0)
-#define c_list_del(e) do { (e)->next->prev = (e)->prev; (e)->prev->next = (e)->next; c_list_init(e); } while(0)
-#define c_list_for_each(p, h) for (p = (h)->next; p != (h); p = p->next)
-#define c_list_for_each_from(p, s, h) for (p = s; p != (h); p = p->next)
-#define c_list_for_each_safe(p, n, h) for (p = (h)->next, n = p->next; p != (h); p = n, n = p->next)
-#define c_list_empty(h) ((h)->next == h)
-
-
-#define C_PROGRAM_TYPE_VS 0
-#define C_PROGRAM_TYPE_FS 1
-#define C_PROGRAM_TYPE_COUNT 2
-
-#define C_NODE_FLAG_ALU 1
-#define C_NODE_FLAG_FETCH 2
-
-#define C_SWIZZLE_X 0
-#define C_SWIZZLE_Y 1
-#define C_SWIZZLE_Z 2
-#define C_SWIZZLE_W 3
-#define C_SWIZZLE_0 4
-#define C_SWIZZLE_1 5
-#define C_SWIZZLE_D 6
-
-#define C_FILE_NULL 0
-#define C_FILE_CONSTANT 1
-#define C_FILE_INPUT 2
-#define C_FILE_OUTPUT 3
-#define C_FILE_TEMPORARY 4
-#define C_FILE_SAMPLER 5
-#define C_FILE_ADDRESS 6
-#define C_FILE_IMMEDIATE 7
-#define C_FILE_LOOP 8
-#define C_FILE_PREDICATE 9
-#define C_FILE_SYSTEM_VALUE 10
-#define C_FILE_RESOURCE 11
-#define C_FILE_COUNT 12
-
-#define C_SEMANTIC_POSITION 0
-#define C_SEMANTIC_COLOR 1
-#define C_SEMANTIC_BCOLOR 2 /**< back-face color */
-#define C_SEMANTIC_FOG 3
-#define C_SEMANTIC_PSIZE 4
-#define C_SEMANTIC_GENERIC 5
-#define C_SEMANTIC_NORMAL 6
-#define C_SEMANTIC_FACE 7
-#define C_SEMANTIC_EDGEFLAG 8
-#define C_SEMANTIC_PRIMID 9
-#define C_SEMANTIC_INSTANCEID 10
-#define C_SEMANTIC_VERTEXID 11
-#define C_SEMANTIC_COUNT 12 /**< number of semantic values */
-
-#define C_OPCODE_NOP 0
-#define C_OPCODE_MOV 1
-#define C_OPCODE_LIT 2
-#define C_OPCODE_RCP 3
-#define C_OPCODE_RSQ 4
-#define C_OPCODE_EXP 5
-#define C_OPCODE_LOG 6
-#define C_OPCODE_MUL 7
-#define C_OPCODE_ADD 8
-#define C_OPCODE_DP3 9
-#define C_OPCODE_DP4 10
-#define C_OPCODE_DST 11
-#define C_OPCODE_MIN 12
-#define C_OPCODE_MAX 13
-#define C_OPCODE_SLT 14
-#define C_OPCODE_SGE 15
-#define C_OPCODE_MAD 16
-#define C_OPCODE_SUB 17
-#define C_OPCODE_LRP 18
-#define C_OPCODE_CND 19
-/* gap */
-#define C_OPCODE_DP2A 21
-/* gap */
-#define C_OPCODE_FRC 24
-#define C_OPCODE_CLAMP 25
-#define C_OPCODE_FLR 26
-#define C_OPCODE_ROUND 27
-#define C_OPCODE_EX2 28
-#define C_OPCODE_LG2 29
-#define C_OPCODE_POW 30
-#define C_OPCODE_XPD 31
-/* gap */
-#define C_OPCODE_ABS 33
-#define C_OPCODE_RCC 34
-#define C_OPCODE_DPH 35
-#define C_OPCODE_COS 36
-#define C_OPCODE_DDX 37
-#define C_OPCODE_DDY 38
-#define C_OPCODE_KILP 39 /* predicated kill */
-#define C_OPCODE_PK2H 40
-#define C_OPCODE_PK2US 41
-#define C_OPCODE_PK4B 42
-#define C_OPCODE_PK4UB 43
-#define C_OPCODE_RFL 44
-#define C_OPCODE_SEQ 45
-#define C_OPCODE_SFL 46
-#define C_OPCODE_SGT 47
-#define C_OPCODE_SIN 48
-#define C_OPCODE_SLE 49
-#define C_OPCODE_SNE 50
-#define C_OPCODE_STR 51
-#define C_OPCODE_TEX 52
-#define C_OPCODE_TXD 53
-#define C_OPCODE_TXP 54
-#define C_OPCODE_UP2H 55
-#define C_OPCODE_UP2US 56
-#define C_OPCODE_UP4B 57
-#define C_OPCODE_UP4UB 58
-#define C_OPCODE_X2D 59
-#define C_OPCODE_ARA 60
-#define C_OPCODE_ARR 61
-#define C_OPCODE_BRA 62
-#define C_OPCODE_CAL 63
-#define C_OPCODE_RET 64
-#define C_OPCODE_SSG 65 /* SGN */
-#define C_OPCODE_CMP 66
-#define C_OPCODE_SCS 67
-#define C_OPCODE_TXB 68
-#define C_OPCODE_NRM 69
-#define C_OPCODE_DIV 70
-#define C_OPCODE_DP2 71
-#define C_OPCODE_TXL 72
-#define C_OPCODE_BRK 73
-#define C_OPCODE_IF 74
-#define C_OPCODE_BGNFOR 75
-#define C_OPCODE_REP 76
-#define C_OPCODE_ELSE 77
-#define C_OPCODE_ENDIF 78
-#define C_OPCODE_ENDFOR 79
-#define C_OPCODE_ENDREP 80
-#define C_OPCODE_PUSHA 81
-#define C_OPCODE_POPA 82
-#define C_OPCODE_CEIL 83
-#define C_OPCODE_I2F 84
-#define C_OPCODE_NOT 85
-#define C_OPCODE_TRUNC 86
-#define C_OPCODE_SHL 87
-/* gap */
-#define C_OPCODE_AND 89
-#define C_OPCODE_OR 90
-#define C_OPCODE_MOD 91
-#define C_OPCODE_XOR 92
-#define C_OPCODE_SAD 93
-#define C_OPCODE_TXF 94
-#define C_OPCODE_TXQ 95
-#define C_OPCODE_CONT 96
-#define C_OPCODE_EMIT 97
-#define C_OPCODE_ENDPRIM 98
-#define C_OPCODE_BGNLOOP 99
-#define C_OPCODE_BGNSUB 100
-#define C_OPCODE_ENDLOOP 101
-#define C_OPCODE_ENDSUB 102
-/* gap */
-#define C_OPCODE_NRM4 112
-#define C_OPCODE_CALLNZ 113
-#define C_OPCODE_IFC 114
-#define C_OPCODE_BREAKC 115
-#define C_OPCODE_KIL 116 /* conditional kill */
-#define C_OPCODE_END 117 /* aka HALT */
-/* gap */
-#define C_OPCODE_F2I 119
-#define C_OPCODE_IDIV 120
-#define C_OPCODE_IMAX 121
-#define C_OPCODE_IMIN 122
-#define C_OPCODE_INEG 123
-#define C_OPCODE_ISGE 124
-#define C_OPCODE_ISHR 125
-#define C_OPCODE_ISLT 126
-#define C_OPCODE_F2U 127
-#define C_OPCODE_U2F 128
-#define C_OPCODE_UADD 129
-#define C_OPCODE_UDIV 130
-#define C_OPCODE_UMAD 131
-#define C_OPCODE_UMAX 132
-#define C_OPCODE_UMIN 133
-#define C_OPCODE_UMOD 134
-#define C_OPCODE_UMUL 135
-#define C_OPCODE_USEQ 136
-#define C_OPCODE_USGE 137
-#define C_OPCODE_USHR 138
-#define C_OPCODE_USLT 139
-#define C_OPCODE_USNE 140
-#define C_OPCODE_SWITCH 141
-#define C_OPCODE_CASE 142
-#define C_OPCODE_DEFAULT 143
-#define C_OPCODE_ENDSWITCH 144
-#define C_OPCODE_VFETCH 145
-#define C_OPCODE_ENTRY 146
-#define C_OPCODE_ARL 147
-#define C_OPCODE_LAST 148
-
-#define C_OPERAND_FLAG_ABS (1 << 0)
-#define C_OPERAND_FLAG_NEG (1 << 1)
-
-struct c_operand {
- struct c_vector *vector;
- unsigned swizzle;
- unsigned flag;
-};
-
-struct c_op {
- unsigned ninput;
- struct c_operand input[3];
- struct c_operand output;
- unsigned opcode;
-};
-
-struct c_instruction {
- struct c_instruction *next, *prev;
- unsigned nop;
- struct c_op op[5];
-};
-
-struct c_node;
-
-struct c_node_link {
- struct c_node_link *next;
- struct c_node_link *prev;
- struct c_node *node;
-};
-
-/**
- * struct c_node
- *
- * @next: all node are in a double linked list, this point to
- * next node
- * @next: all node are in a double linked list, this point to
- * previous node
- * @predecessors: list of all predecessor nodes in the flow graph
- * @successors: list of all sucessor nodes in the flow graph
- * @parent: parent node in the depth first walk tree
- * @childs: child nodes in the depth first walk tree
- */
-struct c_node {
- struct c_node *next, *prev;
- struct c_node_link predecessors;
- struct c_node_link successors;
- struct c_node *parent;
- struct c_node_link childs;
- struct c_instruction insts;
- unsigned opcode;
- unsigned visited;
- unsigned done;
- void *backend;
-};
-
-struct c_file {
- unsigned nvectors;
- struct c_vector vectors;
-};
-
-struct c_shader {
- unsigned nvectors;
- struct c_file files[C_FILE_COUNT];
- struct c_node nodes;
- struct c_node entry;
- struct c_node end;
- unsigned type;
-};
-
-int c_shader_init(struct c_shader *shader, unsigned type);
-void c_shader_destroy(struct c_shader *shader);
-struct c_vector *c_shader_vector_new(struct c_shader *shader, unsigned file, unsigned name, int sid);
-int c_shader_build_dominator_tree(struct c_shader *shader);
-void c_shader_dump(struct c_shader *shader);
-
-void c_node_init(struct c_node *node);
-int c_node_add_new_instruction(struct c_node *node, struct c_instruction *instruction);
-int c_node_add_new_instruction_head(struct c_node *node, struct c_instruction *instruction);
-
-/* control flow graph functions */
-int c_node_cfg_link(struct c_node *predecessor, struct c_node *successor);
-struct c_node *c_node_cfg_new_after(struct c_node *predecessor);
-struct c_node *c_shader_cfg_new_node_after(struct c_shader *shader, struct c_node *predecessor);
-
-struct c_vector *c_vector_new(void);
-
-#endif
diff --git a/src/gallium/drivers/r600/r600_compiler_dump.c b/src/gallium/drivers/r600/r600_compiler_dump.c
deleted file mode 100644
index 485032088c..0000000000
--- a/src/gallium/drivers/r600/r600_compiler_dump.c
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-#include <stdio.h>
-#include "r600_compiler.h"
-
-static const char *c_file_swz[] = {
- "x",
- "y",
- "z",
- "w",
- "0",
- "1",
- ".",
-};
-
-static const char *c_file_str[] = {
- "NULL",
- "CONSTANT",
- "INPUT",
- "OUTPUT",
- "TEMPORARY",
- "SAMPLER",
- "ADDRESS",
- "IMMEDIATE",
- "LOOP",
- "PREDICATE",
- "SYSTEM_VALUE",
-};
-
-static const char *c_semantic_str[] = {
- "POSITION",
- "COLOR",
- "BCOLOR",
- "FOG",
- "PSIZE",
- "GENERIC",
- "NORMAL",
- "FACE",
- "EDGEFLAG",
- "PRIMID",
- "INSTANCEID",
-};
-
-static const char *c_opcode_str[] = {
- "ARL",
- "MOV",
- "LIT",
- "RCP",
- "RSQ",
- "EXP",
- "LOG",
- "MUL",
- "ADD",
- "DP3",
- "DP4",
- "DST",
- "MIN",
- "MAX",
- "SLT",
- "SGE",
- "MAD",
- "SUB",
- "LRP",
- "CND",
- "(INVALID)",
- "DP2A",
- "(INVALID)",
- "(INVALID)",
- "FRC",
- "CLAMP",
- "FLR",
- "ROUND",
- "EX2",
- "LG2",
- "POW",
- "XPD",
- "(INVALID)",
- "ABS",
- "RCC",
- "DPH",
- "COS",
- "DDX",
- "DDY",
- "KILP",
- "PK2H",
- "PK2US",
- "PK4B",
- "PK4UB",
- "RFL",
- "SEQ",
- "SFL",
- "SGT",
- "SIN",
- "SLE",
- "SNE",
- "STR",
- "TEX",
- "TXD",
- "TXP",
- "UP2H",
- "UP2US",
- "UP4B",
- "UP4UB",
- "X2D",
- "ARA",
- "ARR",
- "BRA",
- "CAL",
- "RET",
- "SSG",
- "CMP",
- "SCS",
- "TXB",
- "NRM",
- "DIV",
- "DP2",
- "TXL",
- "BRK",
- "IF",
- "BGNFOR",
- "REP",
- "ELSE",
- "ENDIF",
- "ENDFOR",
- "ENDREP",
- "PUSHA",
- "POPA",
- "CEIL",
- "I2F",
- "NOT",
- "TRUNC",
- "SHL",
- "(INVALID)",
- "AND",
- "OR",
- "MOD",
- "XOR",
- "SAD",
- "TXF",
- "TXQ",
- "CONT",
- "EMIT",
- "ENDPRIM",
- "BGNLOOP",
- "BGNSUB",
- "ENDLOOP",
- "ENDSUB",
- "(INVALID)",
- "(INVALID)",
- "(INVALID)",
- "(INVALID)",
- "NOP",
- "(INVALID)",
- "(INVALID)",
- "(INVALID)",
- "(INVALID)",
- "NRM4",
- "CALLNZ",
- "IFC",
- "BREAKC",
- "KIL",
- "END",
- "(INVALID)",
- "F2I",
- "IDIV",
- "IMAX",
- "IMIN",
- "INEG",
- "ISGE",
- "ISHR",
- "ISLT",
- "F2U",
- "U2F",
- "UADD",
- "UDIV",
- "UMAD",
- "UMAX",
- "UMIN",
- "UMOD",
- "UMUL",
- "USEQ",
- "USGE",
- "USHR",
- "USLT",
- "USNE",
- "SWITCH",
- "CASE",
- "DEFAULT",
- "ENDSWITCH",
- "VFETCH",
- "ENTRY",
-};
-
-static inline const char *c_get_name(const char *name[], unsigned i)
-{
- return name[i];
-}
-
-static void pindent(unsigned indent)
-{
- unsigned i;
- for (i = 0; i < indent; i++)
- fprintf(stderr, " ");
-}
-
-static void c_node_dump(struct c_node *node, unsigned indent)
-{
- struct c_instruction *i;
- unsigned j, k;
-
- pindent(indent); fprintf(stderr, "# node %s\n", c_get_name(c_opcode_str, node->opcode));
- c_list_for_each(i, &node->insts) {
- for (k = 0; k < i->nop; k++) {
- pindent(indent);
- fprintf(stderr, "%s", c_get_name(c_opcode_str, i->op[k].opcode));
- fprintf(stderr, " %s[%d][%s]",
- c_get_name(c_file_str, i->op[k].output.vector->file),
- i->op[k].output.vector->id,
- c_get_name(c_file_swz, i->op[k].output.swizzle));
- for (j = 0; j < i->op[k].ninput; j++) {
- fprintf(stderr, " %s[%d][%s]",
- c_get_name(c_file_str, i->op[k].input[j].vector->file),
- i->op[k].input[j].vector->id,
- c_get_name(c_file_swz, i->op[k].input[j].swizzle));
- }
- fprintf(stderr, ";\n");
- }
- }
-}
-
-static void c_shader_dump_rec(struct c_shader *shader, struct c_node *node, unsigned indent)
-{
- struct c_node_link *link;
-
- c_node_dump(node, indent);
- c_list_for_each(link, &node->childs) {
- c_shader_dump_rec(shader, link->node, indent + 1);
- }
-}
-
-void c_shader_dump(struct c_shader *shader)
-{
- c_shader_dump_rec(shader, &shader->entry, 0);
-}
diff --git a/src/gallium/drivers/r600/r600_compiler_r600.c b/src/gallium/drivers/r600/r600_compiler_r600.c
deleted file mode 100644
index 14ea8ab6e8..0000000000
--- a/src/gallium/drivers/r600/r600_compiler_r600.c
+++ /dev/null
@@ -1,891 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <errno.h>
-#include <util/u_format.h>
-#include "r600_screen.h"
-#include "r600_context.h"
-#include "r600_sq.h"
-
-
-struct r600_alu_instruction {
- unsigned copcode;
- enum r600_instruction instruction;
-};
-
-static int r600_shader_alu_translate(struct r600_shader *rshader,
- struct r600_shader_node *node,
- struct c_instruction *instruction);
-struct r600_alu_instruction r600_alu_instruction[C_OPCODE_LAST];
-struct r600_instruction_info r600_instruction_info[];
-
-int r600_shader_insert_fetch(struct c_shader *shader)
-{
- struct c_vector *vi, *vr, *v, *nv;
- struct c_instruction instruction;
- int r;
-
- if (shader->type != C_PROGRAM_TYPE_VS)
- return 0;
- vi = c_shader_vector_new(shader, C_FILE_INPUT, C_SEMANTIC_VERTEXID, -1);
- if (vi == NULL)
- return -ENOMEM;
- c_list_for_each_safe(v, nv, &shader->files[C_FILE_INPUT].vectors) {
- if (v == vi)
- continue;
- vr = c_shader_vector_new(shader, C_FILE_RESOURCE, C_SEMANTIC_GENERIC, -1);
- if (vr == NULL)
- return -ENOMEM;
- memset(&instruction, 0, sizeof(struct c_instruction));
- instruction.nop = 4;
- instruction.op[0].opcode = C_OPCODE_VFETCH;
- instruction.op[1].opcode = C_OPCODE_VFETCH;
- instruction.op[2].opcode = C_OPCODE_VFETCH;
- instruction.op[3].opcode = C_OPCODE_VFETCH;
- instruction.op[0].ninput = 2;
- instruction.op[1].ninput = 2;
- instruction.op[2].ninput = 2;
- instruction.op[3].ninput = 2;
- instruction.op[0].output.vector = v;
- instruction.op[1].output.vector = v;
- instruction.op[2].output.vector = v;
- instruction.op[3].output.vector = v;
- instruction.op[0].input[0].vector = vi;
- instruction.op[0].input[1].vector = vr;
- instruction.op[1].input[0].vector = vi;
- instruction.op[1].input[1].vector = vr;
- instruction.op[2].input[0].vector = vi;
- instruction.op[2].input[1].vector = vr;
- instruction.op[3].input[0].vector = vi;
- instruction.op[3].input[1].vector = vr;
- instruction.op[0].output.swizzle = C_SWIZZLE_X;
- instruction.op[1].output.swizzle = C_SWIZZLE_Y;
- instruction.op[2].output.swizzle = C_SWIZZLE_Z;
- instruction.op[3].output.swizzle = C_SWIZZLE_W;
- r = c_node_add_new_instruction_head(&shader->entry, &instruction);
- if (r)
- return r;
- c_list_del(v);
- shader->files[C_FILE_INPUT].nvectors--;
- c_list_add_tail(v, &shader->files[C_FILE_TEMPORARY].vectors);
- shader->files[C_FILE_TEMPORARY].nvectors++;
- v->file = C_FILE_TEMPORARY;
- }
- return 0;
-}
-
-void r600_shader_cleanup(struct r600_shader *rshader)
-{
- struct r600_shader_node *n, *nn;
- struct r600_shader_vfetch *vf, *nvf;
- struct r600_shader_alu *alu, *nalu;
- int i;
-
- if (rshader == NULL)
- return;
- if (rshader->gpr) {
- for (i = 0; i < rshader->nvector; i++) {
- free(rshader->gpr[i]);
- }
- free(rshader->gpr);
- rshader->gpr = NULL;
- }
- c_list_for_each_safe(n, nn, &rshader->nodes) {
- c_list_del(n);
- c_list_for_each_safe(vf, nvf, &n->vfetch) {
- c_list_del(vf);
- free(vf);
- }
- c_list_for_each_safe(alu, nalu, &n->alu) {
- c_list_del(alu);
- free(alu);
- }
- free(n);
- }
- free(rshader->bcode);
- return;
-}
-
-int r600_shader_vfetch_bytecode(struct r600_shader *rshader,
- struct r600_shader_node *rnode,
- struct r600_shader_vfetch *vfetch,
- unsigned *cid)
-{
- unsigned id = *cid;
-
- vfetch->cf_addr = id;
- rshader->bcode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vfetch->src[1].sel) |
- S_SQ_VTX_WORD0_SRC_GPR(vfetch->src[0].sel) |
- S_SQ_VTX_WORD0_SRC_SEL_X(vfetch->src[0].sel) |
- S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
- rshader->bcode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vfetch->dst[0].chan) |
- S_SQ_VTX_WORD1_DST_SEL_Y(vfetch->dst[1].chan) |
- S_SQ_VTX_WORD1_DST_SEL_Z(vfetch->dst[2].chan) |
- S_SQ_VTX_WORD1_DST_SEL_W(vfetch->dst[3].chan) |
- S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) |
- S_SQ_VTX_WORD1_GPR_DST_GPR(vfetch->dst[0].sel);
- rshader->bcode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
- rshader->bcode[id++] = 0;
- *cid = id;
- return 0;
-}
-
-int r600_shader_update(struct r600_shader *rshader, enum pipe_format *resource_format)
-{
- struct r600_shader_node *rnode;
- struct r600_shader_vfetch *vfetch;
- unsigned i;
-
- memcpy(rshader->resource_format, resource_format,
- rshader->nresource * sizeof(enum pipe_format));
- c_list_for_each(rnode, &rshader->nodes) {
- c_list_for_each(vfetch, &rnode->vfetch) {
- const struct util_format_description *desc;
- i = vfetch->cf_addr + 1;
- rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_X;
- rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_Y;
- rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_Z;
- rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_W;
- desc = util_format_description(resource_format[vfetch->src[1].sel]);
- if (desc == NULL) {
- fprintf(stderr, "%s unknown format %d\n", __func__, resource_format[vfetch->src[1].sel]);
- continue;
- }
- /* WARNING so far TGSI swizzle match R600 ones */
- rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]);
- rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]);
- rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]);
- rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]);
- }
- }
- return 0;
-}
-
-int r600_shader_register(struct r600_shader *rshader)
-{
- struct c_vector *v, *nv;
- unsigned tid, cid, rid, i;
-
- rshader->nvector = rshader->cshader.nvectors;
- rshader->gpr = calloc(rshader->nvector, sizeof(void*));
- if (rshader->gpr == NULL)
- return -ENOMEM;
- tid = 0;
- cid = 0;
- rid = 0;
- /* alloc input first */
- c_list_for_each(v, &rshader->cshader.files[C_FILE_INPUT].vectors) {
- nv = c_vector_new();
- if (nv == NULL) {
- return -ENOMEM;
- }
- memcpy(nv, v, sizeof(struct c_vector));
- nv->id = tid++;
- rshader->gpr[v->id] = nv;
- }
- for (i = 0; i < C_FILE_COUNT; i++) {
- if (i == C_FILE_INPUT || i == C_FILE_IMMEDIATE)
- continue;
- c_list_for_each(v, &rshader->cshader.files[i].vectors) {
- switch (v->file) {
- case C_FILE_OUTPUT:
- case C_FILE_TEMPORARY:
- nv = c_vector_new();
- if (nv == NULL) {
- return -ENOMEM;
- }
- memcpy(nv, v, sizeof(struct c_vector));
- nv->id = tid++;
- rshader->gpr[v->id] = nv;
- break;
- case C_FILE_CONSTANT:
- nv = c_vector_new();
- if (nv == NULL) {
- return -ENOMEM;
- }
- memcpy(nv, v, sizeof(struct c_vector));
- nv->id = (cid++) + 256;
- rshader->gpr[v->id] = nv;
- break;
- case C_FILE_RESOURCE:
- nv = c_vector_new();
- if (nv == NULL) {
- return -ENOMEM;
- }
- memcpy(nv, v, sizeof(struct c_vector));
- nv->id = (rid++);
- rshader->gpr[v->id] = nv;
- break;
- default:
- fprintf(stderr, "%s:%d unsupported file %d\n", __func__, __LINE__, v->file);
- return -EINVAL;
- }
- }
- }
- rshader->ngpr = tid;
- rshader->nconstant = cid;
- rshader->nresource = rid;
- return 0;
-}
-
-int r600_shader_find_gpr(struct r600_shader *rshader, struct c_vector *v, unsigned swizzle,
- struct r600_shader_operand *operand)
-{
- struct c_vector *tmp;
-
- /* Values [0,127] correspond to GPR[0..127].
- * Values [256,511] correspond to cfile constants c[0..255].
- * Other special values are shown in the list below.
- * 248 SQ_ALU_SRC_0: special constant 0.0.
- * 249 SQ_ALU_SRC_1: special constant 1.0 float.
- * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
- * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
- * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
- * 253 SQ_ALU_SRC_LITERAL: literal constant.
- * 254 SQ_ALU_SRC_PV: previous vector result.
- * 255 SQ_ALU_SRC_PS: previous scalar result.
- */
- operand->vector = v;
- operand->sel = 248;
- operand->chan = 0;
- operand->neg = 0;
- operand->abs = 0;
- if (v == NULL)
- return 0;
- if (v->file == C_FILE_IMMEDIATE) {
- operand->sel = 253;
- } else {
- tmp = rshader->gpr[v->id];
- if (tmp == NULL) {
- fprintf(stderr, "%s %d unknown register\n", __FILE__, __LINE__);
- return -EINVAL;
- }
- operand->sel = tmp->id;
- }
- operand->chan = swizzle;
- switch (swizzle) {
- case C_SWIZZLE_X:
- case C_SWIZZLE_Y:
- case C_SWIZZLE_Z:
- case C_SWIZZLE_W:
- break;
- case C_SWIZZLE_0:
- operand->sel = 248;
- operand->chan = 0;
- break;
- case C_SWIZZLE_1:
- operand->sel = 249;
- operand->chan = 0;
- break;
- default:
- fprintf(stderr, "%s %d invalid swizzle %d\n", __FILE__, __LINE__, swizzle);
- return -EINVAL;
- }
- return 0;
-}
-
-static struct r600_shader_node *r600_shader_new_node(struct r600_shader *rshader, struct c_node *node)
-{
- struct r600_shader_node *rnode;
-
- rnode = CALLOC_STRUCT(r600_shader_node);
- if (rnode == NULL)
- return NULL;
- rnode->node = node;
- c_list_init(&rnode->vfetch);
- c_list_init(&rnode->alu);
- c_list_add_tail(rnode, &rshader->nodes);
- return rnode;
-}
-
-static int r600_shader_add_vfetch(struct r600_shader *rshader,
- struct r600_shader_node *node,
- struct c_instruction *instruction)
-{
- struct r600_shader_vfetch *vfetch;
- struct r600_shader_node *rnode;
- int r;
-
- if (instruction == NULL)
- return 0;
- if (instruction->op[0].opcode != C_OPCODE_VFETCH)
- return 0;
- if (!c_list_empty(&node->alu)) {
- rnode = r600_shader_new_node(rshader, node->node);
- if (rnode == NULL)
- return -ENOMEM;
- node = rnode;
- }
- vfetch = calloc(1, sizeof(struct r600_shader_vfetch));
- if (vfetch == NULL)
- return -ENOMEM;
- r = r600_shader_find_gpr(rshader, instruction->op[0].output.vector, 0, &vfetch->dst[0]);
- if (r)
- return r;
- r = r600_shader_find_gpr(rshader, instruction->op[0].input[0].vector, 0, &vfetch->src[0]);
- if (r)
- return r;
- r = r600_shader_find_gpr(rshader, instruction->op[0].input[1].vector, 0, &vfetch->src[1]);
- if (r)
- return r;
- vfetch->dst[0].chan = C_SWIZZLE_X;
- vfetch->dst[1].chan = C_SWIZZLE_Y;
- vfetch->dst[2].chan = C_SWIZZLE_Z;
- vfetch->dst[3].chan = C_SWIZZLE_W;
- c_list_add_tail(vfetch, &node->vfetch);
- node->nslot += 2;
- return 0;
-}
-
-static int r600_node_translate(struct r600_shader *rshader, struct c_node *node)
-{
- struct c_instruction *instruction;
- struct r600_shader_node *rnode;
- int r;
-
- rnode = r600_shader_new_node(rshader, node);
- if (rnode == NULL)
- return -ENOMEM;
- c_list_for_each(instruction, &node->insts) {
- switch (instruction->op[0].opcode) {
- case C_OPCODE_VFETCH:
- r = r600_shader_add_vfetch(rshader, rnode, instruction);
- if (r) {
- fprintf(stderr, "%s %d vfetch failed\n", __func__, __LINE__);
- return r;
- }
- break;
- default:
- r = r600_shader_alu_translate(rshader, rnode, instruction);
- if (r) {
- fprintf(stderr, "%s %d alu failed\n", __func__, __LINE__);
- return r;
- }
- break;
- }
- }
- return 0;
-}
-
-int r600_shader_translate_rec(struct r600_shader *rshader, struct c_node *node)
-{
- struct c_node_link *link;
- int r;
-
- if (node->opcode == C_OPCODE_END)
- return 0;
- r = r600_node_translate(rshader, node);
- if (r)
- return r;
- c_list_for_each(link, &node->childs) {
- r = r600_shader_translate_rec(rshader, link->node);
- if (r)
- return r;
- }
- return 0;
-}
-
-static struct r600_shader_alu *r600_shader_insert_alu(struct r600_shader *rshader, struct r600_shader_node *node)
-{
- struct r600_shader_alu *alu;
-
- alu = CALLOC_STRUCT(r600_shader_alu);
- if (alu == NULL)
- return NULL;
- alu->alu[0].inst = INST_NOP;
- alu->alu[1].inst = INST_NOP;
- alu->alu[2].inst = INST_NOP;
- alu->alu[3].inst = INST_NOP;
- alu->alu[4].inst = INST_NOP;
- alu->alu[0].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- alu->alu[1].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- alu->alu[2].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- alu->alu[3].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- alu->alu[4].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- c_list_add_tail(alu, &node->alu);
- return alu;
-}
-
-static int r600_shader_alu_translate(struct r600_shader *rshader,
- struct r600_shader_node *node,
- struct c_instruction *instruction)
-{
- struct r600_shader_node *rnode;
- struct r600_shader_alu *alu;
- int i, j, r, comp, litteral_lastcomp = -1;
-
- if (!c_list_empty(&node->vfetch)) {
- rnode = r600_shader_new_node(rshader, node->node);
- if (rnode == NULL) {
- fprintf(stderr, "%s %d new node failed\n", __func__, __LINE__);
- return -ENOMEM;
- }
- node = rnode;
- }
-
- /* initialize alu */
- alu = r600_shader_insert_alu(rshader, node);
-
- /* check special operation like lit */
-
- /* go through operation */
- for (i = 0; i < instruction->nop; i++) {
- struct r600_alu_instruction *ainfo = &r600_alu_instruction[instruction->op[i].opcode];
- struct r600_instruction_info *iinfo = &r600_instruction_info[ainfo->instruction];
- unsigned comp;
-
- /* check that output is a valid component */
- comp = instruction->op[i].output.swizzle;
- switch (comp) {
- case C_SWIZZLE_X:
- case C_SWIZZLE_Y:
- case C_SWIZZLE_Z:
- case C_SWIZZLE_W:
- break;
- case C_SWIZZLE_0:
- case C_SWIZZLE_1:
- default:
- fprintf(stderr, "%s %d invalid output\n", __func__, __LINE__);
- return -EINVAL;
- }
- alu->alu[comp].inst = ainfo->instruction;
- alu->alu[comp].opcode = iinfo->opcode;
- alu->alu[comp].is_op3 = iinfo->is_op3;
- for (j = 0; j < instruction->op[i].ninput; j++) {
- r = r600_shader_find_gpr(rshader, instruction->op[i].input[j].vector,
- instruction->op[i].input[j].swizzle, &alu->alu[comp].src[j]);
- if (r) {
- fprintf(stderr, "%s %d register failed\n", __FILE__, __LINE__);
- return r;
- }
- if (instruction->op[i].input[j].vector->file == C_FILE_IMMEDIATE) {
- r = instruction->op[i].input[j].swizzle;
- switch (r) {
- case C_SWIZZLE_X:
- case C_SWIZZLE_Y:
- case C_SWIZZLE_Z:
- case C_SWIZZLE_W:
- break;
- case C_SWIZZLE_0:
- case C_SWIZZLE_1:
- default:
- fprintf(stderr, "%s %d invalid input\n", __func__, __LINE__);
- return -EINVAL;
- }
- alu->literal[r] = instruction->op[i].input[j].vector->channel[r]->value;
- if (r > litteral_lastcomp) {
- litteral_lastcomp = r;
- }
- }
- }
- r = r600_shader_find_gpr(rshader, instruction->op[i].output.vector,
- instruction->op[i].output.swizzle, &alu->alu[comp].dst);
- if (r) {
- fprintf(stderr, "%s %d register failed\n", __FILE__, __LINE__);
- return r;
- }
- }
- switch (litteral_lastcomp) {
- case 0:
- case 1:
- alu->nliteral = 2;
- break;
- case 2:
- case 3:
- alu->nliteral = 4;
- break;
- case -1:
- default:
- break;
- }
-printf("nliteral: %d\n", alu->nliteral);
- for (i = instruction->nop; i >= 0; i--) {
- if (alu->alu[i].inst != INST_NOP) {
- alu->alu[i].last = 1;
- alu->nalu = i + 1;
- break;
- }
- }
- return 0;
-}
-
-void r600_shader_node_place(struct r600_shader *rshader)
-{
- struct r600_shader_node *node, *nnode;
- struct r600_shader_alu *alu, *nalu;
- struct r600_shader_vfetch *vfetch, *nvfetch;
- unsigned cf_id = 0, cf_addr = 0;
-
- rshader->ncf = 0;
- rshader->nslot = 0;
- c_list_for_each_safe(node, nnode, &rshader->nodes) {
- c_list_for_each_safe(alu, nalu, &node->alu) {
- node->nslot += alu->nalu;
- node->nslot += alu->nliteral >> 1;
- }
- node->nfetch = 0;
- c_list_for_each_safe(vfetch, nvfetch, &node->vfetch) {
- node->nslot += 2;
- node->nfetch += 1;
- }
- if (!c_list_empty(&node->vfetch)) {
- /* fetch node need to be 16 bytes aligned*/
- cf_addr += 1;
- cf_addr &= 0xFFFFFFFEUL;
- }
- node->cf_id = cf_id;
- node->cf_addr = cf_addr;
- cf_id += 2;
- cf_addr += node->nslot * 2;
- rshader->ncf++;
- }
- rshader->nslot = cf_addr;
- c_list_for_each_safe(node, nnode, &rshader->nodes) {
- node->cf_addr += cf_id * 2;
- }
- rshader->ncf += rshader->cshader.files[C_FILE_OUTPUT].nvectors;
- rshader->ndw = rshader->ncf * 2 + rshader->nslot * 2;
-}
-
-int r600_shader_legalize(struct r600_shader *rshader)
-{
- return 0;
-}
-
-
-static int r600_cshader_legalize_rec(struct c_shader *shader, struct c_node *node)
-{
- struct c_node_link *link;
- struct c_instruction *i;
- struct c_operand operand;
- unsigned k;
- int r;
-
- c_list_for_each(i, &node->insts) {
- for (k = 0; k < i->nop; k++) {
- switch (i->op[k].opcode) {
- case C_OPCODE_SLT:
- i->op[k].opcode = C_OPCODE_SGT;
- memcpy(&operand, &i->op[k].input[0], sizeof(struct c_operand));
- memcpy(&i->op[k].input[0], &i->op[k].input[1], sizeof(struct c_operand));
- memcpy(&i->op[k].input[1], &operand, sizeof(struct c_operand));
- break;
- default:
- break;
- }
- }
- }
- c_list_for_each(link, &node->childs) {
- r = r600_cshader_legalize_rec(shader, link->node);
- if (r) {
- return r;
- }
- }
- return 0;
-}
-
-int r600_cshader_legalize(struct c_shader *shader)
-{
- return r600_cshader_legalize_rec(shader, &shader->entry);
-}
-
-
-struct r600_instruction_info r600_instruction_info[] = {
- {INST_ADD, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, 0, 0},
- {INST_MUL, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, 0, 0},
- {INST_MUL_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE, 0, 0},
- {INST_MAX, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, 0, 0},
- {INST_MIN, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, 0, 0},
- {INST_MAX_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_DX10, 0, 0},
- {INST_MIN_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_DX10, 0, 0},
- {INST_SETE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, 0, 0},
- {INST_SETGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, 0, 0},
- {INST_SETGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, 0, 0},
- {INST_SETNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, 0, 0},
- {INST_SETE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_DX10, 0, 0},
- {INST_SETGT_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_DX10, 0, 0},
- {INST_SETGE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_DX10, 0, 0},
- {INST_SETNE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_DX10, 0, 0},
- {INST_FRACT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, 0, 0},
- {INST_TRUNC, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, 0, 0},
- {INST_CEIL, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, 0, 0},
- {INST_RNDNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, 0, 0},
- {INST_FLOOR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, 0, 0},
- {INST_MOVA, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA, 0, 0},
- {INST_MOVA_FLOOR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR, 0, 0},
- {INST_MOVA_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, 0, 0},
- {INST_MOV, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, 0, 0},
- {INST_NOP, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, 0, 0},
- {INST_PRED_SETGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT, 0, 0},
- {INST_PRED_SETGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT, 0, 0},
- {INST_PRED_SETE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE, 0, 0},
- {INST_PRED_SETGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT, 0, 0},
- {INST_PRED_SETGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE, 0, 0},
- {INST_PRED_SETNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE, 0, 0},
- {INST_PRED_SET_INV, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV, 0, 0},
- {INST_PRED_SET_POP, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP, 0, 0},
- {INST_PRED_SET_CLR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR, 0, 0},
- {INST_PRED_SET_RESTORE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE, 0, 0},
- {INST_PRED_SETE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH, 0, 0},
- {INST_PRED_SETGT_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH, 0, 0},
- {INST_PRED_SETGE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH, 0, 0},
- {INST_PRED_SETNE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH, 0, 0},
- {INST_KILLE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE, 0, 0},
- {INST_KILLGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, 0, 0},
- {INST_KILLGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE, 0, 0},
- {INST_KILLNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE, 0, 0},
- {INST_AND_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, 0, 0},
- {INST_OR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, 0, 0},
- {INST_XOR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, 0, 0},
- {INST_NOT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, 0, 0},
- {INST_ADD_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, 0, 0},
- {INST_SUB_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, 0, 0},
- {INST_MAX_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, 0, 0},
- {INST_MIN_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, 0, 0},
- {INST_MAX_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, 0, 0},
- {INST_MIN_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, 0, 0},
- {INST_SETE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, 0, 0},
- {INST_SETGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, 0, 0},
- {INST_SETGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, 0, 0},
- {INST_SETNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, 0, 0},
- {INST_SETGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, 0, 0},
- {INST_SETGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, 0, 0},
- {INST_KILLGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT, 0, 0},
- {INST_KILLGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT, 0, 0},
- {INST_PRED_SETE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT, 0, 0},
- {INST_PRED_SETGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT, 0, 0},
- {INST_PRED_SETGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT, 0, 0},
- {INST_PRED_SETNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT, 0, 0},
- {INST_KILLE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT, 0, 0},
- {INST_KILLGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT, 0, 0},
- {INST_KILLGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT, 0, 0},
- {INST_KILLNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT, 0, 0},
- {INST_PRED_SETE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT, 0, 0},
- {INST_PRED_SETGT_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT, 0, 0},
- {INST_PRED_SETGE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT, 0, 0},
- {INST_PRED_SETNE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT, 0, 0},
- {INST_PRED_SETLT_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT, 0, 0},
- {INST_PRED_SETLE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT, 0, 0},
- {INST_DOT4, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, 0, 0},
- {INST_DOT4_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE, 0, 0},
- {INST_CUBE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE, 0, 0},
- {INST_MAX4, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4, 0, 0},
- {INST_MOVA_GPR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT, 0, 0},
- {INST_EXP_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, 1, 0},
- {INST_LOG_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED, 1, 0},
- {INST_LOG_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, 1, 0},
- {INST_RECIP_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, 1, 0},
- {INST_RECIP_FF, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF, 1, 0},
- {INST_RECIP_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, 1, 0},
- {INST_RECIPSQRT_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED, 1, 0},
- {INST_RECIPSQRT_FF, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF, 1, 0},
- {INST_RECIPSQRT_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, 1, 0},
- {INST_SQRT_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE, 1, 0},
- {INST_FLT_TO_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, 1, 0},
- {INST_INT_TO_FLT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, 1, 0},
- {INST_UINT_TO_FLT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, 1, 0},
- {INST_SIN, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, 1, 0},
- {INST_COS, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, 1, 0},
- {INST_ASHR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, 1, 0},
- {INST_LSHR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, 1, 0},
- {INST_LSHL_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, 1, 0},
- {INST_MULLO_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, 1, 0},
- {INST_MULHI_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT, 1, 0},
- {INST_MULLO_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, 1, 0},
- {INST_MULHI_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT, 1, 0},
- {INST_RECIP_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT, 1, 0},
- {INST_RECIP_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT, 1, 0},
- {INST_FLT_TO_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, 1, 0},
- {INST_MUL_LIT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT, 1, 1},
- {INST_MUL_LIT_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2, 1, 1},
- {INST_MUL_LIT_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4, 1, 1},
- {INST_MUL_LIT_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2, 1, 1},
- {INST_MULADD, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, 0, 1},
- {INST_MULADD_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M2, 0, 1},
- {INST_MULADD_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M4, 0, 1},
- {INST_MULADD_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_D2, 0, 1},
- {INST_MULADD_IEEE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE, 0, 1},
- {INST_MULADD_IEEE_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M2, 0, 1},
- {INST_MULADD_IEEE_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M4, 0, 1},
- {INST_MULADD_IEEE_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_D2, 0, 1},
- {INST_CNDE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE, 0, 1},
- {INST_CNDGT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT, 0, 1},
- {INST_CNDGE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE, 0, 1},
- {INST_CNDE_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT, 0, 1},
- {INST_CNDGT_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT, 0, 1},
- {INST_CNDGE_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT, 0, 1},
-};
-
-struct r600_alu_instruction r600_alu_instruction[C_OPCODE_LAST] = {
- {C_OPCODE_NOP, INST_NOP},
- {C_OPCODE_MOV, INST_MOV},
- {C_OPCODE_LIT, INST_NOP},
- {C_OPCODE_RCP, INST_RECIP_IEEE},
- {C_OPCODE_RSQ, INST_RECIPSQRT_IEEE},
- {C_OPCODE_EXP, INST_EXP_IEEE},
- {C_OPCODE_LOG, INST_LOG_IEEE},
- {C_OPCODE_MUL, INST_MUL},
- {C_OPCODE_ADD, INST_ADD},
- {C_OPCODE_DP3, INST_DOT4},
- {C_OPCODE_DP4, INST_DOT4},
- {C_OPCODE_DST, INST_NOP},
- {C_OPCODE_MIN, INST_MIN},
- {C_OPCODE_MAX, INST_MAX},
- {C_OPCODE_SLT, INST_NOP},
- {C_OPCODE_SGE, INST_NOP},
- {C_OPCODE_MAD, INST_MULADD},
- {C_OPCODE_SUB, INST_COUNT},
- {C_OPCODE_LRP, INST_NOP},
- {C_OPCODE_CND, INST_NOP},
- {20, INST_NOP},
- {C_OPCODE_DP2A, INST_NOP},
- {22, INST_NOP},
- {23, INST_NOP},
- {C_OPCODE_FRC, INST_NOP},
- {C_OPCODE_CLAMP, INST_NOP},
- {C_OPCODE_FLR, INST_NOP},
- {C_OPCODE_ROUND, INST_NOP},
- {C_OPCODE_EX2, INST_NOP},
- {C_OPCODE_LG2, INST_NOP},
- {C_OPCODE_POW, INST_NOP},
- {C_OPCODE_XPD, INST_NOP},
- {32, INST_NOP},
- {C_OPCODE_ABS, INST_COUNT},
- {C_OPCODE_RCC, INST_NOP},
- {C_OPCODE_DPH, INST_NOP},
- {C_OPCODE_COS, INST_COS},
- {C_OPCODE_DDX, INST_NOP},
- {C_OPCODE_DDY, INST_NOP},
- {C_OPCODE_KILP, INST_NOP},
- {C_OPCODE_PK2H, INST_NOP},
- {C_OPCODE_PK2US, INST_NOP},
- {C_OPCODE_PK4B, INST_NOP},
- {C_OPCODE_PK4UB, INST_NOP},
- {C_OPCODE_RFL, INST_NOP},
- {C_OPCODE_SEQ, INST_NOP},
- {C_OPCODE_SFL, INST_NOP},
- {C_OPCODE_SGT, INST_SETGT},
- {C_OPCODE_SIN, INST_SIN},
- {C_OPCODE_SLE, INST_NOP},
- {C_OPCODE_SNE, INST_NOP},
- {C_OPCODE_STR, INST_NOP},
- {C_OPCODE_TEX, INST_NOP},
- {C_OPCODE_TXD, INST_NOP},
- {C_OPCODE_TXP, INST_NOP},
- {C_OPCODE_UP2H, INST_NOP},
- {C_OPCODE_UP2US, INST_NOP},
- {C_OPCODE_UP4B, INST_NOP},
- {C_OPCODE_UP4UB, INST_NOP},
- {C_OPCODE_X2D, INST_NOP},
- {C_OPCODE_ARA, INST_NOP},
- {C_OPCODE_ARR, INST_NOP},
- {C_OPCODE_BRA, INST_NOP},
- {C_OPCODE_CAL, INST_NOP},
- {C_OPCODE_RET, INST_NOP},
- {C_OPCODE_SSG, INST_NOP},
- {C_OPCODE_CMP, INST_NOP},
- {C_OPCODE_SCS, INST_NOP},
- {C_OPCODE_TXB, INST_NOP},
- {C_OPCODE_NRM, INST_NOP},
- {C_OPCODE_DIV, INST_NOP},
- {C_OPCODE_DP2, INST_NOP},
- {C_OPCODE_TXL, INST_NOP},
- {C_OPCODE_BRK, INST_NOP},
- {C_OPCODE_IF, INST_NOP},
- {C_OPCODE_BGNFOR, INST_NOP},
- {C_OPCODE_REP, INST_NOP},
- {C_OPCODE_ELSE, INST_NOP},
- {C_OPCODE_ENDIF, INST_NOP},
- {C_OPCODE_ENDFOR, INST_NOP},
- {C_OPCODE_ENDREP, INST_NOP},
- {C_OPCODE_PUSHA, INST_NOP},
- {C_OPCODE_POPA, INST_NOP},
- {C_OPCODE_CEIL, INST_NOP},
- {C_OPCODE_I2F, INST_NOP},
- {C_OPCODE_NOT, INST_NOP},
- {C_OPCODE_TRUNC, INST_NOP},
- {C_OPCODE_SHL, INST_NOP},
- {88, INST_NOP},
- {C_OPCODE_AND, INST_NOP},
- {C_OPCODE_OR, INST_NOP},
- {C_OPCODE_MOD, INST_NOP},
- {C_OPCODE_XOR, INST_NOP},
- {C_OPCODE_SAD, INST_NOP},
- {C_OPCODE_TXF, INST_NOP},
- {C_OPCODE_TXQ, INST_NOP},
- {C_OPCODE_CONT, INST_NOP},
- {C_OPCODE_EMIT, INST_NOP},
- {C_OPCODE_ENDPRIM, INST_NOP},
- {C_OPCODE_BGNLOOP, INST_NOP},
- {C_OPCODE_BGNSUB, INST_NOP},
- {C_OPCODE_ENDLOOP, INST_NOP},
- {C_OPCODE_ENDSUB, INST_NOP},
- {103, INST_NOP},
- {104, INST_NOP},
- {105, INST_NOP},
- {106, INST_NOP},
- {107, INST_NOP},
- {108, INST_NOP},
- {109, INST_NOP},
- {110, INST_NOP},
- {111, INST_NOP},
- {C_OPCODE_NRM4, INST_NOP},
- {C_OPCODE_CALLNZ, INST_NOP},
- {C_OPCODE_IFC, INST_NOP},
- {C_OPCODE_BREAKC, INST_NOP},
- {C_OPCODE_KIL, INST_NOP},
- {C_OPCODE_END, INST_NOP},
- {118, INST_NOP},
- {C_OPCODE_F2I, INST_NOP},
- {C_OPCODE_IDIV, INST_NOP},
- {C_OPCODE_IMAX, INST_NOP},
- {C_OPCODE_IMIN, INST_NOP},
- {C_OPCODE_INEG, INST_NOP},
- {C_OPCODE_ISGE, INST_NOP},
- {C_OPCODE_ISHR, INST_NOP},
- {C_OPCODE_ISLT, INST_NOP},
- {C_OPCODE_F2U, INST_NOP},
- {C_OPCODE_U2F, INST_NOP},
- {C_OPCODE_UADD, INST_NOP},
- {C_OPCODE_UDIV, INST_NOP},
- {C_OPCODE_UMAD, INST_NOP},
- {C_OPCODE_UMAX, INST_NOP},
- {C_OPCODE_UMIN, INST_NOP},
- {C_OPCODE_UMOD, INST_NOP},
- {C_OPCODE_UMUL, INST_NOP},
- {C_OPCODE_USEQ, INST_NOP},
- {C_OPCODE_USGE, INST_NOP},
- {C_OPCODE_USHR, INST_NOP},
- {C_OPCODE_USLT, INST_NOP},
- {C_OPCODE_USNE, INST_NOP},
- {C_OPCODE_SWITCH, INST_NOP},
- {C_OPCODE_CASE, INST_NOP},
- {C_OPCODE_DEFAULT, INST_NOP},
- {C_OPCODE_ENDSWITCH, INST_NOP},
- {C_OPCODE_VFETCH, INST_NOP},
- {C_OPCODE_ENTRY, INST_NOP},
- {C_OPCODE_ARL, INST_NOP},
-};
diff --git a/src/gallium/drivers/r600/r600_compiler_r700.c b/src/gallium/drivers/r600/r600_compiler_r700.c
deleted file mode 100644
index 809a57ae5c..0000000000
--- a/src/gallium/drivers/r600/r600_compiler_r700.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <errno.h>
-#include "r600_context.h"
-#include "r700_sq.h"
-
-static int r700_shader_cf_node_bytecode(struct r600_shader *rshader,
- struct r600_shader_node *rnode,
- unsigned *cid)
-{
- unsigned id = *cid;
-
- if (rnode->nfetch) {
- rshader->bcode[id++] = S_SQ_CF_WORD0_ADDR(rnode->cf_addr >> 1);
- rshader->bcode[id++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(rnode->nfetch - 1);
- } else {
- rshader->bcode[id++] = S_SQ_CF_ALU_WORD0_ADDR(rnode->cf_addr >> 1);
- rshader->bcode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) |
- S_SQ_CF_ALU_WORD1_BARRIER(1) |
- S_SQ_CF_ALU_WORD1_COUNT(rnode->nslot - 1);
- }
- *cid = id;
- return 0;
-}
-
-static int r700_shader_cf_output_bytecode(struct r600_shader *rshader,
- struct c_vector *v,
- unsigned *cid,
- unsigned end)
-{
- struct r600_shader_operand out;
- unsigned id = *cid;
- int r;
-
- r = r600_shader_find_gpr(rshader, v, 0, &out);
- if (r)
- return r;
- rshader->bcode[id + 0] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(out.sel) |
- S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(3);
- rshader->bcode[id + 1] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(0) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(1) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(2) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(3) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(1) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end);
- switch (v->name) {
- case C_SEMANTIC_POSITION:
- rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(60) |
- S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- break;
- case C_SEMANTIC_COLOR:
- if (rshader->cshader.type == C_PROGRAM_TYPE_VS) {
- rshader->output[rshader->noutput].gpr = out.sel;
- rshader->output[rshader->noutput].sid = v->sid;
- rshader->output[rshader->noutput].name = v->name;
- rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(rshader->noutput++) |
- S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- } else {
- rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(0) |
- S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
- }
- break;
- case C_SEMANTIC_GENERIC:
- rshader->output[rshader->noutput].gpr = out.sel;
- rshader->output[rshader->noutput].sid = v->sid;
- rshader->output[rshader->noutput].name = v->name;
- rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(rshader->noutput++) |
- S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- break;
- default:
- fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__);
- return -EINVAL;
- }
- *cid = id + 2;
- return 0;
-}
-
-static int r700_shader_alu_bytecode(struct r600_shader *rshader,
- struct r600_shader_node *rnode,
- struct r600_shader_inst *alu,
- unsigned *cid)
-{
- unsigned id = *cid;
-
- /* don't replace gpr by pv or ps for destination register */
- if (alu->is_op3) {
- rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
- S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
- S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
- S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
- S_SQ_ALU_WORD0_LAST(alu->last);
- rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
- S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
- S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
- S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
- S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
- S_SQ_ALU_WORD1_OP3_ALU_INST(alu->opcode) |
- S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
- } else {
- rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
- S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
- S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
- S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
- S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
- S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
- S_SQ_ALU_WORD0_LAST(alu->last);
- rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
- S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
- S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
- S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
- S_SQ_ALU_WORD1_OP2_WRITE_MASK(1) |
- S_SQ_ALU_WORD1_OP2_ALU_INST(alu->opcode) |
- S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
- }
- *cid = id;
- return 0;
-}
-
-int r700_shader_translate(struct r600_shader *rshader)
-{
- struct c_shader *shader = &rshader->cshader;
- struct r600_shader_node *rnode;
- struct r600_shader_vfetch *vfetch;
- struct r600_shader_alu *alu;
- struct c_vector *v;
- unsigned id, i, end;
- int r;
-
- r = r600_shader_register(rshader);
- if (r) {
- fprintf(stderr, "%s %d register allocation failed\n", __FILE__, __LINE__);
- return r;
- }
- r = r600_shader_translate_rec(rshader, &shader->entry);
- if (r) {
- fprintf(stderr, "%s %d translation failed\n", __FILE__, __LINE__);
- return r;
- }
- r = r600_shader_legalize(rshader);
- if (r) {
- fprintf(stderr, "%s %d legalize failed\n", __FILE__, __LINE__);
- return r;
- }
- r600_shader_node_place(rshader);
- rshader->bcode = malloc(rshader->ndw * 4);
- if (rshader->bcode == NULL)
- return -ENOMEM;
- c_list_for_each(rnode, &rshader->nodes) {
- id = rnode->cf_addr;
- c_list_for_each(vfetch, &rnode->vfetch) {
- r = r600_shader_vfetch_bytecode(rshader, rnode, vfetch, &id);
- if (r)
- return r;
- }
- c_list_for_each(alu, &rnode->alu) {
- for (i = 0; i < alu->nalu; i++) {
- r = r700_shader_alu_bytecode(rshader, rnode, &alu->alu[i], &id);
- if (r)
- return r;
- }
- for (i = 0; i < alu->nliteral; i++) {
- rshader->bcode[id++] = alu->literal[i];
- }
- }
- }
- id = 0;
- c_list_for_each(rnode, &rshader->nodes) {
- r = r700_shader_cf_node_bytecode(rshader, rnode, &id);
- if (r)
- return r;
- }
- c_list_for_each(v, &rshader->cshader.files[C_FILE_OUTPUT].vectors) {
- end = 0;
- if (v->next == &rshader->cshader.files[C_FILE_OUTPUT].vectors)
- end = 1;
- r = r700_shader_cf_output_bytecode(rshader, v, &id, end);
- if (r)
- return r;
- }
- c_list_for_each(v, &rshader->cshader.files[C_FILE_INPUT].vectors) {
- rshader->input[rshader->ninput].gpr = rshader->ninput;
- rshader->input[rshader->ninput].sid = v->sid;
- rshader->input[rshader->ninput].name = v->name;
- rshader->ninput++;
- }
- return 0;
-}
diff --git a/src/gallium/drivers/r600/r600_compiler_tgsi.c b/src/gallium/drivers/r600/r600_compiler_tgsi.c
deleted file mode 100644
index 172cf154a3..0000000000
--- a/src/gallium/drivers/r600/r600_compiler_tgsi.c
+++ /dev/null
@@ -1,730 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <errno.h>
-#include <tgsi/tgsi_parse.h>
-#include <tgsi/tgsi_scan.h>
-#include "r600_shader.h"
-#include "r600_context.h"
-
-struct tgsi_shader {
- struct c_vector **v[TGSI_FILE_COUNT];
- struct tgsi_shader_info info;
- struct tgsi_parse_context parser;
- const struct tgsi_token *tokens;
- struct c_shader *shader;
- struct c_node *node;
-};
-
-static unsigned tgsi_file_to_c_file(unsigned file);
-static unsigned tgsi_sname_to_c_sname(unsigned sname);
-static int tgsi_opcode_to_c_opcode(unsigned opcode, unsigned *copcode);
-
-static int tgsi_shader_init(struct tgsi_shader *ts,
- const struct tgsi_token *tokens,
- struct c_shader *shader)
-{
- int i;
-
- ts->shader = shader;
- ts->tokens = tokens;
- tgsi_scan_shader(ts->tokens, &ts->info);
- tgsi_parse_init(&ts->parser, ts->tokens);
- /* initialize to NULL in case of error */
- for (i = 0; i < C_FILE_COUNT; i++) {
- ts->v[i] = NULL;
- }
- for (i = 0; i < TGSI_FILE_COUNT; i++) {
- if (ts->info.file_count[i] > 0) {
- ts->v[i] = calloc(ts->info.file_count[i], sizeof(void*));
- if (ts->v[i] == NULL) {
- fprintf(stderr, "%s:%d unsupported %d %d\n", __func__, __LINE__, i, ts->info.file_count[i]);
- return -ENOMEM;
- }
- }
- }
- return 0;
-}
-
-static void tgsi_shader_destroy(struct tgsi_shader *ts)
-{
- int i;
-
- for (i = 0; i < TGSI_FILE_COUNT; i++) {
- free(ts->v[i]);
- }
- tgsi_parse_free(&ts->parser);
-}
-
-static int ntransform_declaration(struct tgsi_shader *ts)
-{
- struct tgsi_full_declaration *fd = &ts->parser.FullToken.FullDeclaration;
- struct c_vector *v;
- unsigned file;
- unsigned name;
- int sid;
- int i;
-
- if (fd->Declaration.Dimension) {
- fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__);
- return -EINVAL;
- }
- for (i = fd->Range.First ; i <= fd->Range.Last; i++) {
- sid = i;
- name = C_SEMANTIC_GENERIC;
- file = tgsi_file_to_c_file(fd->Declaration.File);
- if (file == TGSI_FILE_NULL) {
- fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__);
- return -EINVAL;
- }
- if (fd->Declaration.Semantic) {
- name = tgsi_sname_to_c_sname(fd->Semantic.Name);
- sid = fd->Semantic.Index;
- }
- v = c_shader_vector_new(ts->shader, file, name, sid);
- if (v == NULL) {
- fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__);
- return -ENOMEM;
- }
- ts->v[fd->Declaration.File][i] = v;
- }
- return 0;
-}
-
-static int ntransform_immediate(struct tgsi_shader *ts)
-{
- struct tgsi_full_immediate *fd = &ts->parser.FullToken.FullImmediate;
- struct c_vector *v;
- unsigned file;
- unsigned name;
-
- if (fd->Immediate.DataType != TGSI_IMM_FLOAT32) {
- fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__);
- return -EINVAL;
- }
- name = C_SEMANTIC_GENERIC;
- file = C_FILE_IMMEDIATE;
- v = c_shader_vector_new(ts->shader, file, name, 0);
- if (v == NULL) {
- fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__);
- return -ENOMEM;
- }
- v->channel[0]->value = fd->u[0].Uint;
- v->channel[1]->value = fd->u[1].Uint;
- v->channel[2]->value = fd->u[2].Uint;
- v->channel[3]->value = fd->u[3].Uint;
- ts->v[TGSI_FILE_IMMEDIATE][0] = v;
- return 0;
-}
-
-static int ntransform_instruction(struct tgsi_shader *ts)
-{
- struct tgsi_full_instruction *fi = &ts->parser.FullToken.FullInstruction;
- struct c_shader *shader = ts->shader;
- struct c_instruction instruction;
- unsigned opcode;
- int i, j, r;
-
- if (fi->Instruction.NumDstRegs > 1) {
- fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__);
- return -EINVAL;
- }
- if (fi->Instruction.Saturate) {
- fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__);
- return -EINVAL;
- }
- if (fi->Instruction.Predicate) {
- fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__);
- return -EINVAL;
- }
- if (fi->Instruction.Label) {
- fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__);
- return -EINVAL;
- }
- if (fi->Instruction.Texture) {
- fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__);
- return -EINVAL;
- }
- for (i = 0; i < fi->Instruction.NumSrcRegs; i++) {
- if (fi->Src[i].Register.Indirect ||
- fi->Src[i].Register.Dimension ||
- fi->Src[i].Register.Absolute) {
- fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__);
- return -EINVAL;
- }
- }
- for (i = 0; i < fi->Instruction.NumDstRegs; i++) {
- if (fi->Dst[i].Register.Indirect || fi->Dst[i].Register.Dimension) {
- fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__);
- return -EINVAL;
- }
- }
- r = tgsi_opcode_to_c_opcode(fi->Instruction.Opcode, &opcode);
- if (r) {
- fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__);
- return r;
- }
- if (opcode == C_OPCODE_END) {
- return c_node_cfg_link(ts->node, &shader->end);
- }
- /* FIXME add flow instruction handling */
- memset(&instruction, 0, sizeof(struct c_instruction));
- instruction.nop = 0;
- for (j = 0; j < 4; j++) {
- instruction.op[instruction.nop].opcode = opcode;
- instruction.op[instruction.nop].ninput = fi->Instruction.NumSrcRegs;
- for (i = 0; i < fi->Instruction.NumSrcRegs; i++) {
- instruction.op[instruction.nop].input[i].vector = ts->v[fi->Src[i].Register.File][fi->Src[i].Register.Index];
- switch (j) {
- case 0:
- instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleX;
- break;
- case 1:
- instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleY;
- break;
- case 2:
- instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleZ;
- break;
- case 3:
- instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleW;
- break;
- default:
- return -EINVAL;
- }
- }
- instruction.op[instruction.nop].output.vector = ts->v[fi->Dst[0].Register.File][fi->Dst[0].Register.Index];
- switch (j) {
- case 0:
- instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_X : C_SWIZZLE_D;
- break;
- case 1:
- instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_Y : C_SWIZZLE_D;
- break;
- case 2:
- instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_Z : C_SWIZZLE_D;
- break;
- case 3:
- instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_W : C_SWIZZLE_D;
- break;
- default:
- return -EINVAL;
- }
- instruction.nop++;
- }
- return c_node_add_new_instruction(ts->node, &instruction);
-}
-
-int c_shader_from_tgsi(struct c_shader *shader, unsigned type,
- const struct tgsi_token *tokens)
-{
- struct tgsi_shader ts;
- int r = 0;
-
- c_shader_init(shader, type);
- r = tgsi_shader_init(&ts, tokens, shader);
- if (r)
- goto out_err;
- ts.shader = shader;
- ts.node = &shader->entry;
- while (!tgsi_parse_end_of_tokens(&ts.parser)) {
- tgsi_parse_token(&ts.parser);
- switch (ts.parser.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- r = ntransform_immediate(&ts);
- if (r)
- goto out_err;
- break;
- case TGSI_TOKEN_TYPE_DECLARATION:
- r = ntransform_declaration(&ts);
- if (r)
- goto out_err;
- break;
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- r = ntransform_instruction(&ts);
- if (r)
- goto out_err;
- break;
- default:
- r = -EINVAL;
- goto out_err;
- }
- }
- tgsi_shader_destroy(&ts);
- return 0;
-out_err:
- c_shader_destroy(shader);
- tgsi_shader_destroy(&ts);
- return r;
-}
-
-static unsigned tgsi_file_to_c_file(unsigned file)
-{
- switch (file) {
- case TGSI_FILE_CONSTANT:
- return C_FILE_CONSTANT;
- case TGSI_FILE_INPUT:
- return C_FILE_INPUT;
- case TGSI_FILE_OUTPUT:
- return C_FILE_OUTPUT;
- case TGSI_FILE_TEMPORARY:
- return C_FILE_TEMPORARY;
- case TGSI_FILE_SAMPLER:
- return C_FILE_SAMPLER;
- case TGSI_FILE_ADDRESS:
- return C_FILE_ADDRESS;
- case TGSI_FILE_IMMEDIATE:
- return C_FILE_IMMEDIATE;
- case TGSI_FILE_PREDICATE:
- return C_FILE_PREDICATE;
- case TGSI_FILE_SYSTEM_VALUE:
- return C_FILE_SYSTEM_VALUE;
- case TGSI_FILE_NULL:
- return C_FILE_NULL;
- default:
- fprintf(stderr, "%s:%d unsupported file %d\n", __func__, __LINE__, file);
- return C_FILE_NULL;
- }
-}
-
-static unsigned tgsi_sname_to_c_sname(unsigned sname)
-{
- switch (sname) {
- case TGSI_SEMANTIC_POSITION:
- return C_SEMANTIC_POSITION;
- case TGSI_SEMANTIC_COLOR:
- return C_SEMANTIC_COLOR;
- case TGSI_SEMANTIC_BCOLOR:
- return C_SEMANTIC_BCOLOR;
- case TGSI_SEMANTIC_FOG:
- return C_SEMANTIC_FOG;
- case TGSI_SEMANTIC_PSIZE:
- return C_SEMANTIC_PSIZE;
- case TGSI_SEMANTIC_GENERIC:
- return C_SEMANTIC_GENERIC;
- case TGSI_SEMANTIC_NORMAL:
- return C_SEMANTIC_NORMAL;
- case TGSI_SEMANTIC_FACE:
- return C_SEMANTIC_FACE;
- case TGSI_SEMANTIC_EDGEFLAG:
- return C_SEMANTIC_EDGEFLAG;
- case TGSI_SEMANTIC_PRIMID:
- return C_SEMANTIC_PRIMID;
- case TGSI_SEMANTIC_INSTANCEID:
- return C_SEMANTIC_INSTANCEID;
- default:
- return C_SEMANTIC_GENERIC;
- }
-}
-
-static int tgsi_opcode_to_c_opcode(unsigned opcode, unsigned *copcode)
-{
- switch (opcode) {
- case TGSI_OPCODE_MOV:
- *copcode = C_OPCODE_MOV;
- return 0;
- case TGSI_OPCODE_MUL:
- *copcode = C_OPCODE_MUL;
- return 0;
- case TGSI_OPCODE_MAD:
- *copcode = C_OPCODE_MAD;
- return 0;
- case TGSI_OPCODE_END:
- *copcode = C_OPCODE_END;
- return 0;
- case TGSI_OPCODE_ARL:
- *copcode = C_OPCODE_ARL;
- return 0;
- case TGSI_OPCODE_LIT:
- *copcode = C_OPCODE_LIT;
- return 0;
- case TGSI_OPCODE_RCP:
- *copcode = C_OPCODE_RCP;
- return 0;
- case TGSI_OPCODE_RSQ:
- *copcode = C_OPCODE_RSQ;
- return 0;
- case TGSI_OPCODE_EXP:
- *copcode = C_OPCODE_EXP;
- return 0;
- case TGSI_OPCODE_LOG:
- *copcode = C_OPCODE_LOG;
- return 0;
- case TGSI_OPCODE_ADD:
- *copcode = C_OPCODE_ADD;
- return 0;
- case TGSI_OPCODE_DP3:
- *copcode = C_OPCODE_DP3;
- return 0;
- case TGSI_OPCODE_DP4:
- *copcode = C_OPCODE_DP4;
- return 0;
- case TGSI_OPCODE_DST:
- *copcode = C_OPCODE_DST;
- return 0;
- case TGSI_OPCODE_MIN:
- *copcode = C_OPCODE_MIN;
- return 0;
- case TGSI_OPCODE_MAX:
- *copcode = C_OPCODE_MAX;
- return 0;
- case TGSI_OPCODE_SLT:
- *copcode = C_OPCODE_SLT;
- return 0;
- case TGSI_OPCODE_SGE:
- *copcode = C_OPCODE_SGE;
- return 0;
- case TGSI_OPCODE_SUB:
- *copcode = C_OPCODE_SUB;
- return 0;
- case TGSI_OPCODE_LRP:
- *copcode = C_OPCODE_LRP;
- return 0;
- case TGSI_OPCODE_CND:
- *copcode = C_OPCODE_CND;
- return 0;
- case TGSI_OPCODE_DP2A:
- *copcode = C_OPCODE_DP2A;
- return 0;
- case TGSI_OPCODE_FRC:
- *copcode = C_OPCODE_FRC;
- return 0;
- case TGSI_OPCODE_CLAMP:
- *copcode = C_OPCODE_CLAMP;
- return 0;
- case TGSI_OPCODE_FLR:
- *copcode = C_OPCODE_FLR;
- return 0;
- case TGSI_OPCODE_ROUND:
- *copcode = C_OPCODE_ROUND;
- return 0;
- case TGSI_OPCODE_EX2:
- *copcode = C_OPCODE_EX2;
- return 0;
- case TGSI_OPCODE_LG2:
- *copcode = C_OPCODE_LG2;
- return 0;
- case TGSI_OPCODE_POW:
- *copcode = C_OPCODE_POW;
- return 0;
- case TGSI_OPCODE_XPD:
- *copcode = C_OPCODE_XPD;
- return 0;
- case TGSI_OPCODE_ABS:
- *copcode = C_OPCODE_ABS;
- return 0;
- case TGSI_OPCODE_RCC:
- *copcode = C_OPCODE_RCC;
- return 0;
- case TGSI_OPCODE_DPH:
- *copcode = C_OPCODE_DPH;
- return 0;
- case TGSI_OPCODE_COS:
- *copcode = C_OPCODE_COS;
- return 0;
- case TGSI_OPCODE_DDX:
- *copcode = C_OPCODE_DDX;
- return 0;
- case TGSI_OPCODE_DDY:
- *copcode = C_OPCODE_DDY;
- return 0;
- case TGSI_OPCODE_KILP:
- *copcode = C_OPCODE_KILP;
- return 0;
- case TGSI_OPCODE_PK2H:
- *copcode = C_OPCODE_PK2H;
- return 0;
- case TGSI_OPCODE_PK2US:
- *copcode = C_OPCODE_PK2US;
- return 0;
- case TGSI_OPCODE_PK4B:
- *copcode = C_OPCODE_PK4B;
- return 0;
- case TGSI_OPCODE_PK4UB:
- *copcode = C_OPCODE_PK4UB;
- return 0;
- case TGSI_OPCODE_RFL:
- *copcode = C_OPCODE_RFL;
- return 0;
- case TGSI_OPCODE_SEQ:
- *copcode = C_OPCODE_SEQ;
- return 0;
- case TGSI_OPCODE_SFL:
- *copcode = C_OPCODE_SFL;
- return 0;
- case TGSI_OPCODE_SGT:
- *copcode = C_OPCODE_SGT;
- return 0;
- case TGSI_OPCODE_SIN:
- *copcode = C_OPCODE_SIN;
- return 0;
- case TGSI_OPCODE_SLE:
- *copcode = C_OPCODE_SLE;
- return 0;
- case TGSI_OPCODE_SNE:
- *copcode = C_OPCODE_SNE;
- return 0;
- case TGSI_OPCODE_STR:
- *copcode = C_OPCODE_STR;
- return 0;
- case TGSI_OPCODE_TEX:
- *copcode = C_OPCODE_TEX;
- return 0;
- case TGSI_OPCODE_TXD:
- *copcode = C_OPCODE_TXD;
- return 0;
- case TGSI_OPCODE_TXP:
- *copcode = C_OPCODE_TXP;
- return 0;
- case TGSI_OPCODE_UP2H:
- *copcode = C_OPCODE_UP2H;
- return 0;
- case TGSI_OPCODE_UP2US:
- *copcode = C_OPCODE_UP2US;
- return 0;
- case TGSI_OPCODE_UP4B:
- *copcode = C_OPCODE_UP4B;
- return 0;
- case TGSI_OPCODE_UP4UB:
- *copcode = C_OPCODE_UP4UB;
- return 0;
- case TGSI_OPCODE_X2D:
- *copcode = C_OPCODE_X2D;
- return 0;
- case TGSI_OPCODE_ARA:
- *copcode = C_OPCODE_ARA;
- return 0;
- case TGSI_OPCODE_ARR:
- *copcode = C_OPCODE_ARR;
- return 0;
- case TGSI_OPCODE_BRA:
- *copcode = C_OPCODE_BRA;
- return 0;
- case TGSI_OPCODE_CAL:
- *copcode = C_OPCODE_CAL;
- return 0;
- case TGSI_OPCODE_RET:
- *copcode = C_OPCODE_RET;
- return 0;
- case TGSI_OPCODE_SSG:
- *copcode = C_OPCODE_SSG;
- return 0;
- case TGSI_OPCODE_CMP:
- *copcode = C_OPCODE_CMP;
- return 0;
- case TGSI_OPCODE_SCS:
- *copcode = C_OPCODE_SCS;
- return 0;
- case TGSI_OPCODE_TXB:
- *copcode = C_OPCODE_TXB;
- return 0;
- case TGSI_OPCODE_NRM:
- *copcode = C_OPCODE_NRM;
- return 0;
- case TGSI_OPCODE_DIV:
- *copcode = C_OPCODE_DIV;
- return 0;
- case TGSI_OPCODE_DP2:
- *copcode = C_OPCODE_DP2;
- return 0;
- case TGSI_OPCODE_TXL:
- *copcode = C_OPCODE_TXL;
- return 0;
- case TGSI_OPCODE_BRK:
- *copcode = C_OPCODE_BRK;
- return 0;
- case TGSI_OPCODE_IF:
- *copcode = C_OPCODE_IF;
- return 0;
- case TGSI_OPCODE_ELSE:
- *copcode = C_OPCODE_ELSE;
- return 0;
- case TGSI_OPCODE_ENDIF:
- *copcode = C_OPCODE_ENDIF;
- return 0;
- case TGSI_OPCODE_PUSHA:
- *copcode = C_OPCODE_PUSHA;
- return 0;
- case TGSI_OPCODE_POPA:
- *copcode = C_OPCODE_POPA;
- return 0;
- case TGSI_OPCODE_CEIL:
- *copcode = C_OPCODE_CEIL;
- return 0;
- case TGSI_OPCODE_I2F:
- *copcode = C_OPCODE_I2F;
- return 0;
- case TGSI_OPCODE_NOT:
- *copcode = C_OPCODE_NOT;
- return 0;
- case TGSI_OPCODE_TRUNC:
- *copcode = C_OPCODE_TRUNC;
- return 0;
- case TGSI_OPCODE_SHL:
- *copcode = C_OPCODE_SHL;
- return 0;
- case TGSI_OPCODE_AND:
- *copcode = C_OPCODE_AND;
- return 0;
- case TGSI_OPCODE_OR:
- *copcode = C_OPCODE_OR;
- return 0;
- case TGSI_OPCODE_MOD:
- *copcode = C_OPCODE_MOD;
- return 0;
- case TGSI_OPCODE_XOR:
- *copcode = C_OPCODE_XOR;
- return 0;
- case TGSI_OPCODE_SAD:
- *copcode = C_OPCODE_SAD;
- return 0;
- case TGSI_OPCODE_TXF:
- *copcode = C_OPCODE_TXF;
- return 0;
- case TGSI_OPCODE_TXQ:
- *copcode = C_OPCODE_TXQ;
- return 0;
- case TGSI_OPCODE_CONT:
- *copcode = C_OPCODE_CONT;
- return 0;
- case TGSI_OPCODE_EMIT:
- *copcode = C_OPCODE_EMIT;
- return 0;
- case TGSI_OPCODE_ENDPRIM:
- *copcode = C_OPCODE_ENDPRIM;
- return 0;
- case TGSI_OPCODE_BGNLOOP:
- *copcode = C_OPCODE_BGNLOOP;
- return 0;
- case TGSI_OPCODE_BGNSUB:
- *copcode = C_OPCODE_BGNSUB;
- return 0;
- case TGSI_OPCODE_ENDLOOP:
- *copcode = C_OPCODE_ENDLOOP;
- return 0;
- case TGSI_OPCODE_ENDSUB:
- *copcode = C_OPCODE_ENDSUB;
- return 0;
- case TGSI_OPCODE_NOP:
- *copcode = C_OPCODE_NOP;
- return 0;
- case TGSI_OPCODE_NRM4:
- *copcode = C_OPCODE_NRM4;
- return 0;
- case TGSI_OPCODE_CALLNZ:
- *copcode = C_OPCODE_CALLNZ;
- return 0;
- case TGSI_OPCODE_IFC:
- *copcode = C_OPCODE_IFC;
- return 0;
- case TGSI_OPCODE_BREAKC:
- *copcode = C_OPCODE_BREAKC;
- return 0;
- case TGSI_OPCODE_KIL:
- *copcode = C_OPCODE_KIL;
- return 0;
- case TGSI_OPCODE_F2I:
- *copcode = C_OPCODE_F2I;
- return 0;
- case TGSI_OPCODE_IDIV:
- *copcode = C_OPCODE_IDIV;
- return 0;
- case TGSI_OPCODE_IMAX:
- *copcode = C_OPCODE_IMAX;
- return 0;
- case TGSI_OPCODE_IMIN:
- *copcode = C_OPCODE_IMIN;
- return 0;
- case TGSI_OPCODE_INEG:
- *copcode = C_OPCODE_INEG;
- return 0;
- case TGSI_OPCODE_ISGE:
- *copcode = C_OPCODE_ISGE;
- return 0;
- case TGSI_OPCODE_ISHR:
- *copcode = C_OPCODE_ISHR;
- return 0;
- case TGSI_OPCODE_ISLT:
- *copcode = C_OPCODE_ISLT;
- return 0;
- case TGSI_OPCODE_F2U:
- *copcode = C_OPCODE_F2U;
- return 0;
- case TGSI_OPCODE_U2F:
- *copcode = C_OPCODE_U2F;
- return 0;
- case TGSI_OPCODE_UADD:
- *copcode = C_OPCODE_UADD;
- return 0;
- case TGSI_OPCODE_UDIV:
- *copcode = C_OPCODE_UDIV;
- return 0;
- case TGSI_OPCODE_UMAD:
- *copcode = C_OPCODE_UMAD;
- return 0;
- case TGSI_OPCODE_UMAX:
- *copcode = C_OPCODE_UMAX;
- return 0;
- case TGSI_OPCODE_UMIN:
- *copcode = C_OPCODE_UMIN;
- return 0;
- case TGSI_OPCODE_UMOD:
- *copcode = C_OPCODE_UMOD;
- return 0;
- case TGSI_OPCODE_UMUL:
- *copcode = C_OPCODE_UMUL;
- return 0;
- case TGSI_OPCODE_USEQ:
- *copcode = C_OPCODE_USEQ;
- return 0;
- case TGSI_OPCODE_USGE:
- *copcode = C_OPCODE_USGE;
- return 0;
- case TGSI_OPCODE_USHR:
- *copcode = C_OPCODE_USHR;
- return 0;
- case TGSI_OPCODE_USLT:
- *copcode = C_OPCODE_USLT;
- return 0;
- case TGSI_OPCODE_USNE:
- *copcode = C_OPCODE_USNE;
- return 0;
- case TGSI_OPCODE_SWITCH:
- *copcode = C_OPCODE_SWITCH;
- return 0;
- case TGSI_OPCODE_CASE:
- *copcode = C_OPCODE_CASE;
- return 0;
- case TGSI_OPCODE_DEFAULT:
- *copcode = C_OPCODE_DEFAULT;
- return 0;
- case TGSI_OPCODE_ENDSWITCH:
- *copcode = C_OPCODE_ENDSWITCH;
- return 0;
- default:
- fprintf(stderr, "%s:%d unsupported opcode %d\n", __func__, __LINE__, opcode);
- return -EINVAL;
- }
-}
diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c
index 0a7efe3bfb..05575b5767 100644
--- a/src/gallium/drivers/r600/r600_context.c
+++ b/src/gallium/drivers/r600/r600_context.c
@@ -32,6 +32,7 @@
#include "r600_resource.h"
#include "r600_screen.h"
#include "r600_context.h"
+#include "r600d.h"
static void r600_destroy_context(struct pipe_context *context)
{
@@ -62,6 +63,245 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
dc++;
}
+static void r600_init_config(struct r600_context *rctx)
+{
+ int ps_prio;
+ int vs_prio;
+ int gs_prio;
+ int es_prio;
+ int num_ps_gprs;
+ int num_vs_gprs;
+ int num_gs_gprs;
+ int num_es_gprs;
+ int num_temp_gprs;
+ int num_ps_threads;
+ int num_vs_threads;
+ int num_gs_threads;
+ int num_es_threads;
+ int num_ps_stack_entries;
+ int num_vs_stack_entries;
+ int num_gs_stack_entries;
+ int num_es_stack_entries;
+ enum radeon_family family;
+
+ family = radeon_get_family(rctx->rw);
+ ps_prio = 0;
+ vs_prio = 1;
+ gs_prio = 2;
+ es_prio = 3;
+ switch (family) {
+ case CHIP_R600:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_RV630:
+ case CHIP_RV635:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 144;
+ num_vs_threads = 40;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_RV610:
+ case CHIP_RV620:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ default:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_RV670:
+ num_ps_gprs = 144;
+ num_vs_gprs = 40;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_RV770:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 188;
+ num_vs_threads = 60;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 256;
+ num_vs_stack_entries = 256;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_RV730:
+ case CHIP_RV740:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 188;
+ num_vs_threads = 60;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_RV710:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 144;
+ num_vs_threads = 48;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ }
+ printf("ps_prio : %d\n", ps_prio);
+ printf("vs_prio : %d\n", vs_prio);
+ printf("gs_prio : %d\n", gs_prio);
+ printf("es_prio : %d\n", es_prio);
+ printf("num_ps_gprs : %d\n", num_ps_gprs);
+ printf("num_vs_gprs : %d\n", num_vs_gprs);
+ printf("num_gs_gprs : %d\n", num_gs_gprs);
+ printf("num_es_gprs : %d\n", num_es_gprs);
+ printf("num_temp_gprs : %d\n", num_temp_gprs);
+ printf("num_ps_threads : %d\n", num_ps_threads);
+ printf("num_vs_threads : %d\n", num_vs_threads);
+ printf("num_gs_threads : %d\n", num_gs_threads);
+ printf("num_es_threads : %d\n", num_es_threads);
+ printf("num_ps_stack_entries : %d\n", num_ps_stack_entries);
+ printf("num_vs_stack_entries : %d\n", num_vs_stack_entries);
+ printf("num_gs_stack_entries : %d\n", num_gs_stack_entries);
+ printf("num_es_stack_entries : %d\n", num_es_stack_entries);
+
+ rctx->config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG);
+
+ rctx->config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000;
+ switch (family) {
+ case CHIP_RV610:
+ case CHIP_RV620:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ case CHIP_RV710:
+ break;
+ default:
+ rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1);
+ break;
+ }
+ rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1);
+ rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1);
+ rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio);
+ rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio);
+ rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio);
+ rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio);
+
+ rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0;
+ rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
+ rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
+ rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
+
+ rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0;
+ rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
+ rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs);
+
+ rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0;
+ rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads);
+ rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads);
+ rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads);
+ rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads);
+
+ rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0;
+ rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
+ rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
+
+ rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0;
+ rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
+ rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
+
+ rctx->config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000;
+ rctx->config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002;
+ rctx->config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000;
+ rctx->config->states[R600_CONFIG__DB_DEBUG] = 0x00000000;
+ rctx->config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204;
+ rctx->config->states[R600_CONFIG__SX_MISC] = 0x00000000;
+ rctx->config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001;
+ rctx->config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003;
+ rctx->config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000;
+ rctx->config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000;
+ rctx->config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000;
+ rctx->config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000;
+ rctx->config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000;
+ rctx->config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000;
+ rctx->config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000;
+ rctx->config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000;
+ rctx->config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000;
+ rctx->config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000;
+ rctx->config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001;
+ rctx->config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000;
+ rctx->config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000;
+ radeon_state_pm4(rctx->config);
+}
+
struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv)
{
struct r600_context *rctx = CALLOC_STRUCT(r600_context);
@@ -107,49 +347,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv)
rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF;
radeon_state_pm4(rctx->cb_cntl);
- rctx->config = radeon_state(rscreen->rw, R600_CONFIG_TYPE, R600_CONFIG);
- rctx->config->states[R600_CONFIG__SQ_CONFIG] = 0xE400000C;
- rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0x403800C0;
- rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0x00000000;
- rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0x00003090;
- rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0x00800080;
- rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0x00000000;
- rctx->config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000;
- rctx->config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002;
- rctx->config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000;
- rctx->config->states[R600_CONFIG__DB_DEBUG] = 0x00000000;
- rctx->config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204;
- rctx->config->states[R600_CONFIG__SX_MISC] = 0x00000000;
- rctx->config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001;
- rctx->config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003;
- rctx->config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000;
- rctx->config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000;
- rctx->config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000;
- rctx->config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000;
- rctx->config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000;
- rctx->config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000;
- rctx->config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001;
- rctx->config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000;
- rctx->config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000;
- radeon_state_pm4(rctx->config);
+ r600_init_config(rctx);
rctx->ctx = radeon_ctx(rscreen->rw);
rctx->draw = radeon_draw(rscreen->rw);
diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h
index f27ff58ed4..669aaec0b2 100644
--- a/src/gallium/drivers/r600/r600_context.h
+++ b/src/gallium/drivers/r600/r600_context.h
@@ -40,7 +40,6 @@ struct r600_vertex_elements_state
};
struct r600_pipe_shader {
- unsigned type;
struct r600_shader shader;
struct radeon_bo *bo;
struct radeon_state *state;
@@ -92,8 +91,10 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv)
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *rpshader);
struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx,
- unsigned type,
const struct tgsi_token *tokens);
int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader);
+#define R600_ERR(fmt, args...) \
+ fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args)
+
#endif
diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c
index e3175b627a..7241ab1c17 100644
--- a/src/gallium/drivers/r600/r600_helper.c
+++ b/src/gallium/drivers/r600/r600_helper.c
@@ -27,6 +27,7 @@
#include <errno.h>
#include <util/u_inlines.h>
#include "r600_screen.h"
+#include "r600_context.h"
#include "r600d.h"
int r600_conv_pipe_format(unsigned pformat, unsigned *format)
@@ -49,6 +50,12 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format)
case PIPE_FORMAT_R8G8B8A8_SSCALED:
*format = V_0280A0_COLOR_8_8_8_8;
return 0;
+ case PIPE_FORMAT_R32_FLOAT:
+ *format = V_0280A0_COLOR_32_FLOAT;
+ return 0;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ *format = V_0280A0_COLOR_32_32_FLOAT;
+ return 0;
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_I8_UNORM:
@@ -60,8 +67,6 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format)
case PIPE_FORMAT_R64G64_FLOAT:
case PIPE_FORMAT_R64G64B64_FLOAT:
case PIPE_FORMAT_R64G64B64A64_FLOAT:
- case PIPE_FORMAT_R32_FLOAT:
- case PIPE_FORMAT_R32G32_FLOAT:
case PIPE_FORMAT_R32_UNORM:
case PIPE_FORMAT_R32G32_UNORM:
case PIPE_FORMAT_R32G32B32_UNORM:
@@ -111,7 +116,7 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format)
case PIPE_FORMAT_R32G32B32_FIXED:
case PIPE_FORMAT_R32G32B32A32_FIXED:
default:
- fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, pformat);
+ R600_ERR("unsupported %d\n", pformat);
return -EINVAL;
}
}
diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h
new file mode 100644
index 0000000000..1d89c9f9f6
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_public.h
@@ -0,0 +1,9 @@
+
+#ifndef R600_PUBLIC_H
+#define R600_PUBLIC_H
+
+struct radeon;
+
+struct pipe_screen* r600_screen_create(struct radeon *rw);
+
+#endif
diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c
index 1d83383fd9..dec6fa8d27 100644
--- a/src/gallium/drivers/r600/r600_screen.c
+++ b/src/gallium/drivers/r600/r600_screen.c
@@ -31,6 +31,7 @@
#include "r600_screen.h"
#include "r600_texture.h"
#include "r600_context.h"
+#include "r600_public.h"
#include <stdio.h>
static const char* r600_get_vendor(struct pipe_screen* pscreen)
@@ -40,7 +41,13 @@ static const char* r600_get_vendor(struct pipe_screen* pscreen)
static const char* r600_get_name(struct pipe_screen* pscreen)
{
- return "R600/R700 (HD2XXX,HD3XXX,HD4XXX)";
+ struct r600_screen *screen = r600_screen(pscreen);
+ enum radeon_family family = radeon_get_family(screen->rw);
+
+ if (family >= CHIP_R600 && family < CHIP_RV770)
+ return "R600 (HD2XXX,HD3XXX)";
+ else
+ return "R700 (HD4XXX)";
}
static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
@@ -240,7 +247,7 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
FREE(rscreen);
}
-struct pipe_screen *radeon_create_screen(struct radeon *rw)
+struct pipe_screen *r600_screen_create(struct radeon *rw)
{
struct r600_screen* rscreen;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 6b29d33379..e5e6786fd0 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -19,40 +19,113 @@
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
*/
-#include <stdio.h>
-#include <errno.h>
-#include <util/u_inlines.h>
-#include <util/u_format.h>
-#include <util/u_memory.h>
-#include <tgsi/tgsi_dump.h>
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_format.h"
#include "r600_screen.h"
#include "r600_context.h"
+#include "r600_shader.h"
+#include "r600_asm.h"
+#include "r600_sq.h"
#include "r600d.h"
+#include <stdio.h>
+#include <errno.h>
+
+static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+
+static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
+{
+ struct r600_context *rctx = r600_context(ctx);
+ const struct util_format_description *desc;
+ enum pipe_format resource_format[160];
+ unsigned i, nresources = 0;
+ struct r600_bc *bc = &shader->bc;
+ struct r600_bc_cf *cf;
+ struct r600_bc_vtx *vtx;
+
+ if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
+ return 0;
+ for (i = 0; i < rctx->vertex_elements->count; i++) {
+ resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
+ }
+ LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
+ switch (cf->inst) {
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+ LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+ desc = util_format_description(resource_format[vtx->buffer_id]);
+ if (desc == NULL) {
+ R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
+ return -EINVAL;
+ }
+ vtx->dst_sel_x = desc->swizzle[0];
+ vtx->dst_sel_y = desc->swizzle[1];
+ vtx->dst_sel_z = desc->swizzle[2];
+ vtx->dst_sel_w = desc->swizzle[3];
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return r600_bc_build(&shader->bc);
+}
+
+struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx,
+ const struct tgsi_token *tokens)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+ struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader);
+ int r;
+
+fprintf(stderr, "--------------------------------------------------------------\n");
+tgsi_dump(tokens, 0);
+ if (rpshader == NULL)
+ return NULL;
+ rpshader->shader.family = radeon_get_family(rscreen->rw);
+ r = r600_shader_from_tgsi(tokens, &rpshader->shader);
+ if (r) {
+ R600_ERR("translation from TGSI failed !\n");
+ goto out_err;
+ }
+ r = r600_bc_build(&rpshader->shader.bc);
+ if (r) {
+ R600_ERR("building bytecode failed !\n");
+ goto out_err;
+ }
+fprintf(stderr, "______________________________________________________________\n");
+ return rpshader;
+out_err:
+ free(rpshader);
+ return NULL;
+}
static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *rpshader)
{
struct r600_screen *rscreen = r600_screen(ctx->screen);
struct r600_shader *rshader = &rpshader->shader;
struct radeon_state *state;
- unsigned i, tmp;
+ unsigned i, j, tmp;
rpshader->state = radeon_state_decref(rpshader->state);
state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
if (state == NULL)
return -ENOMEM;
- for (i = 0; i < rshader->noutput; i += 4) {
- tmp = rshader->output[i].sid;
- tmp |= rshader->output[i + 1].sid << 8;
- tmp |= rshader->output[i + 2].sid << 16;
- tmp |= rshader->output[i + 3].sid << 24;
- state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] = tmp;
- }
- state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1);
- state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->ngpr);
+ for (i = 0; i < 10; i++) {
+ state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
+ }
+ for (i = 0, j = 0; i < rshader->noutput; i++) {
+ if (rshader->output[i].name != TGSI_SEMANTIC_POSITION) {
+ tmp = rshader->output[i].sid << ((j & 3) * 8);
+ state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + j / 4] |= tmp;
+ j++;
+ }
+ }
+ state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
+ state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
rpshader->state = state;
rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
rpshader->state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
@@ -81,7 +154,7 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
S_0286CC_PERSP_GRADIENT_ENA(1);
state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
- state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->ngpr);
+ state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002;
rpshader->state = state;
rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
@@ -100,21 +173,21 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *r
/* copy new shader */
radeon_bo_decref(rscreen->rw, rpshader->bo);
rpshader->bo = NULL;
- rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->ndw * 4,
+ rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
4096, NULL);
if (rpshader->bo == NULL) {
return -ENOMEM;
}
radeon_bo_map(rscreen->rw, rpshader->bo);
- memcpy(rpshader->bo->data, rshader->bcode, rshader->ndw * 4);
+ memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
radeon_bo_unmap(rscreen->rw, rpshader->bo);
/* build state */
rshader->flat_shade = rctx->flat_shade;
- switch (rpshader->type) {
- case C_PROGRAM_TYPE_VS:
+ switch (rshader->processor_type) {
+ case TGSI_PROCESSOR_VERTEX:
r = r600_pipe_shader_vs(ctx, rpshader);
break;
- case C_PROGRAM_TYPE_FS:
+ case TGSI_PROCESSOR_FRAGMENT:
r = r600_pipe_shader_ps(ctx, rpshader);
break;
default:
@@ -124,100 +197,813 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *r
return r;
}
-struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, unsigned type, const struct tgsi_token *tokens)
+int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader)
{
- struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader);
- struct r600_shader *rshader = &rpshader->shader;
+ struct r600_context *rctx = r600_context(ctx);
int r;
if (rpshader == NULL)
- return NULL;
- rpshader->type = type;
- c_list_init(&rshader->nodes);
- fprintf(stderr, "<<\n");
- tgsi_dump(tokens, 0);
- fprintf(stderr, "--------------------------------------------------------------\n");
- r = c_shader_from_tgsi(&rshader->cshader, type, tokens);
- if (r) {
- r600_pipe_shader_destroy(ctx, rpshader);
- fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__);
- return NULL;
+ return -EINVAL;
+ /* there should be enough input */
+ if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
+ R600_ERR("%d resources provided, expecting %d\n",
+ rctx->vertex_elements->count, rpshader->shader.bc.nresource);
+ return -EINVAL;
}
- r = r600_shader_insert_fetch(&rshader->cshader);
- if (r) {
- r600_pipe_shader_destroy(ctx, rpshader);
- fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__);
- return NULL;
+ r = r600_shader_update(ctx, &rpshader->shader);
+ if (r)
+ return r;
+ return r600_pipe_shader(ctx, rpshader);
+}
+
+struct r600_shader_tgsi_instruction;
+
+struct r600_shader_ctx {
+ struct tgsi_shader_info info;
+ struct tgsi_parse_context parse;
+ const struct tgsi_token *tokens;
+ unsigned type;
+ unsigned file_offset[TGSI_FILE_COUNT];
+ unsigned temp_reg;
+ struct r600_shader_tgsi_instruction *inst_info;
+ struct r600_bc *bc;
+ struct r600_shader *shader;
+};
+
+struct r600_shader_tgsi_instruction {
+ unsigned tgsi_opcode;
+ unsigned is_op3;
+ unsigned r600_opcode;
+ int (*process)(struct r600_shader_ctx *ctx);
+};
+
+static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
+
+static int tgsi_is_supported(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
+ int j;
+
+ if (i->Instruction.NumDstRegs > 1) {
+ R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
+ return -EINVAL;
}
- r = c_shader_build_dominator_tree(&rshader->cshader);
- if (r) {
- r600_pipe_shader_destroy(ctx, rpshader);
- fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__);
- return NULL;
+ if (i->Instruction.Saturate) {
+ R600_ERR("staturate unsupported\n");
+ return -EINVAL;
}
- c_shader_dump(&rshader->cshader);
- r = r600_cshader_legalize(&rshader->cshader);
- if (r) {
- r600_pipe_shader_destroy(ctx, rpshader);
- fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__);
- return NULL;
+ if (i->Instruction.Predicate) {
+ R600_ERR("predicate unsupported\n");
+ return -EINVAL;
}
- r = r700_shader_translate(rshader);
- if (r) {
- r600_pipe_shader_destroy(ctx, rpshader);
- fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__);
- return NULL;
+ if (i->Instruction.Label) {
+ R600_ERR("label unsupported\n");
+ return -EINVAL;
}
-#if 1
-#if 0
- fprintf(stderr, "--------------------------------------------------------------\n");
- for (int i = 0; i < rshader->ndw; i++) {
- fprintf(stderr, "0x%08X\n", rshader->bcode[i]);
+ for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
+ if (i->Src[j].Register.Indirect ||
+ i->Src[j].Register.Dimension ||
+ i->Src[j].Register.Absolute) {
+ R600_ERR("unsupported src (indirect|dimension|absolute)\n");
+ return -EINVAL;
+ }
}
-#endif
- fprintf(stderr, ">>\n\n");
-#endif
- return rpshader;
+ for (j = 0; j < i->Instruction.NumDstRegs; j++) {
+ if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
+ R600_ERR("unsupported dst (indirect|dimension)\n");
+ return -EINVAL;
+ }
+ }
+ return 0;
}
-void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *rpshader)
+static int tgsi_declaration(struct r600_shader_ctx *ctx)
{
- struct r600_screen *rscreen = r600_screen(ctx->screen);
+ struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
+ struct r600_bc_vtx vtx;
+ unsigned i;
+ int r;
- if (rpshader == NULL)
- return;
- radeon_bo_decref(rscreen->rw, rpshader->bo);
- rpshader->bo = NULL;
- r600_shader_cleanup(&rpshader->shader);
- FREE(rpshader);
+ switch (d->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ i = ctx->shader->ninput++;
+ ctx->shader->input[i].name = d->Semantic.Name;
+ ctx->shader->input[i].sid = d->Semantic.Index;
+ ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
+ if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+ /* turn input into fetch */
+ memset(&vtx, 0, sizeof(struct r600_bc_vtx));
+ vtx.inst = 0;
+ vtx.fetch_type = 0;
+ vtx.buffer_id = i;
+ /* register containing the index into the buffer */
+ vtx.src_gpr = 0;
+ vtx.src_sel_x = 0;
+ vtx.mega_fetch_count = 0x1F;
+ vtx.dst_gpr = ctx->shader->input[i].gpr;
+ vtx.dst_sel_x = 0;
+ vtx.dst_sel_y = 1;
+ vtx.dst_sel_z = 2;
+ vtx.dst_sel_w = 3;
+ r = r600_bc_add_vtx(ctx->bc, &vtx);
+ if (r)
+ return r;
+ }
+ break;
+ case TGSI_FILE_OUTPUT:
+ i = ctx->shader->noutput++;
+ ctx->shader->output[i].name = d->Semantic.Name;
+ ctx->shader->output[i].sid = d->Semantic.Index;
+ ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
+ break;
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_SAMPLER:
+ break;
+ default:
+ R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
+ return -EINVAL;
+ }
+ return 0;
}
-int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader)
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
{
- struct r600_context *rctx = r600_context(ctx);
- struct r600_shader *rshader;
- enum pipe_format resource_format[160];
- unsigned i, nresources = 0;
- int r;
+ struct tgsi_full_immediate *immediate;
+ struct r600_shader_ctx ctx;
+ struct r600_bc_output output;
+ unsigned opcode;
+ int i, r = 0, pos0;
+ u32 value[4];
- if (rpshader == NULL)
- return -EINVAL;
- rshader = &rpshader->shader;
- switch (rpshader->type) {
- case C_PROGRAM_TYPE_VS:
- for (i = 0; i < rctx->vertex_elements->count; i++) {
- resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
+ ctx.bc = &shader->bc;
+ ctx.shader = shader;
+ r = r600_bc_init(ctx.bc, shader->family);
+ if (r)
+ return r;
+ ctx.tokens = tokens;
+ tgsi_scan_shader(tokens, &ctx.info);
+ tgsi_parse_init(&ctx.parse, tokens);
+ ctx.type = ctx.parse.FullHeader.Processor.Processor;
+ shader->processor_type = ctx.type;
+
+ /* register allocations */
+ /* Values [0,127] correspond to GPR[0..127].
+ * Values [256,511] correspond to cfile constants c[0..255].
+ * Other special values are shown in the list below.
+ * 248 SQ_ALU_SRC_0: special constant 0.0.
+ * 249 SQ_ALU_SRC_1: special constant 1.0 float.
+ * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
+ * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
+ * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
+ * 253 SQ_ALU_SRC_LITERAL: literal constant.
+ * 254 SQ_ALU_SRC_PV: previous vector result.
+ * 255 SQ_ALU_SRC_PS: previous scalar result.
+ */
+ for (i = 0; i < TGSI_FILE_COUNT; i++) {
+ ctx.file_offset[i] = 0;
+ }
+ if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+ ctx.file_offset[TGSI_FILE_INPUT] = 1;
+ }
+ ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
+ ctx.info.file_count[TGSI_FILE_INPUT];
+ ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
+ ctx.info.file_count[TGSI_FILE_OUTPUT];
+ ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
+ ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
+ ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
+ ctx.info.file_count[TGSI_FILE_TEMPORARY];
+
+ while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
+ tgsi_parse_token(&ctx.parse);
+ switch (ctx.parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ immediate = &ctx.parse.FullToken.FullImmediate;
+ value[0] = immediate->u[0].Uint;
+ value[1] = immediate->u[1].Uint;
+ value[2] = immediate->u[2].Uint;
+ value[3] = immediate->u[3].Uint;
+ break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ r = tgsi_declaration(&ctx);
+ if (r)
+ goto out_err;
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ r = tgsi_is_supported(&ctx);
+ if (r)
+ goto out_err;
+ opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
+ ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
+ r = ctx.inst_info->process(&ctx);
+ if (r)
+ goto out_err;
+ r = r600_bc_add_literal(ctx.bc, value);
+ if (r)
+ goto out_err;
+ break;
+ default:
+ R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
+ r = -EINVAL;
+ goto out_err;
+ }
+ }
+ /* export output */
+ for (i = 0, pos0 = 0; i < shader->noutput; i++) {
+ memset(&output, 0, sizeof(struct r600_bc_output));
+ output.gpr = shader->output[i].gpr;
+ output.elem_size = 3;
+ output.swizzle_x = 0;
+ output.swizzle_y = 1;
+ output.swizzle_z = 2;
+ output.swizzle_w = 3;
+ output.barrier = 1;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output.array_base = i - pos0;
+ output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
+ switch (ctx.type == TGSI_PROCESSOR_VERTEX) {
+ case TGSI_PROCESSOR_VERTEX:
+ if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
+ output.array_base = 60;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ /* position doesn't count in array_base */
+ pos0 = 1;
+ }
+ break;
+ case TGSI_PROCESSOR_FRAGMENT:
+ if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
+ output.array_base = 0;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ } else {
+ R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
+ r = -EINVAL;
+ goto out_err;
+ }
+ break;
+ default:
+ R600_ERR("unsupported processor type %d\n", ctx.type);
+ r = -EINVAL;
+ goto out_err;
}
+ if (i == (shader->noutput - 1)) {
+ output.end_of_program = 1;
+ }
+ r = r600_bc_add_output(ctx.bc, &output);
+ if (r)
+ goto out_err;
+ }
+ tgsi_parse_free(&ctx.parse);
+ return 0;
+out_err:
+ tgsi_parse_free(&ctx.parse);
+ return r;
+}
+
+static int tgsi_unsupported(struct r600_shader_ctx *ctx)
+{
+ R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
+ return -EINVAL;
+}
+
+static int tgsi_end(struct r600_shader_ctx *ctx)
+{
+ return 0;
+}
+
+static int tgsi_src(struct r600_shader_ctx *ctx,
+ const struct tgsi_full_src_register *tgsi_src,
+ unsigned swizzle,
+ struct r600_bc_alu_src *r600_src)
+{
+ r600_src->sel = tgsi_src->Register.Index;
+ if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
+ r600_src->sel = 0;
+ }
+ r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
+ switch (swizzle) {
+ case 0:
+ r600_src->chan = tgsi_src->Register.SwizzleX;
break;
- default:
+ case 1:
+ r600_src->chan = tgsi_src->Register.SwizzleY;
break;
- }
- /* there should be enough input */
- if (nresources < rshader->nresource)
+ case 2:
+ r600_src->chan = tgsi_src->Register.SwizzleZ;
+ break;
+ case 3:
+ r600_src->chan = tgsi_src->Register.SwizzleW;
+ break;
+ default:
return -EINVAL;
- /* FIXME compare resources */
- r = r600_shader_update(rshader, resource_format);
- if (r)
+ }
+ return 0;
+}
+
+static int tgsi_dst(struct r600_shader_ctx *ctx,
+ const struct tgsi_full_dst_register *tgsi_dst,
+ unsigned swizzle,
+ struct r600_bc_alu_dst *r600_dst)
+{
+ r600_dst->sel = tgsi_dst->Register.Index;
+ r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
+ r600_dst->chan = swizzle;
+ r600_dst->write = 1;
+ return 0;
+}
+
+static int tgsi_op2(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, r;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
+ } else {
+ alu.inst = ctx->inst_info->r600_opcode;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
+ if (r)
+ return r;
+ }
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+ }
+ /* handle some special cases */
+ switch (ctx->inst_info->tgsi_opcode) {
+ case TGSI_OPCODE_SUB:
+ alu.src[1].neg = 1;
+ break;
+ default:
+ break;
+ }
+ if (i == 3) {
+ alu.last = 1;
+ }
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int tgsi_slt(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, r;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
+ } else {
+ alu.inst = ctx->inst_info->r600_opcode;
+ r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]);
+ if (r)
+ return r;
+ r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[0]);
+ if (r)
+ return r;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+ }
+ if (i == 3) {
+ alu.last = 1;
+ }
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int tgsi_lit(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+
+ int r;
+
+
+ if (inst->Dst[0].Register.WriteMask & (1 << 0))
+ {
+ /* dst.x, <- 1.0 */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.src[0].sel = 249; /*1.0*/
+ alu.src[0].chan = 0;
+ r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+ if (r)
+ return r;
+ if ((inst->Dst[0].Register.WriteMask & 0xe) == 0)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+
+ if (inst->Dst[0].Register.WriteMask & (1 << 1))
+ {
+ /* dst.y = max(src.x, 0.0) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
+ r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[1].sel = 248; /*0.0*/
+ alu.src[1].chan = 0;
+ r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
+ if (r)
+ return r;
+ if ((inst->Dst[0].Register.WriteMask & 0xa) == 0)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ if (inst->Dst[0].Register.WriteMask & (1 << 3))
+ {
+ /* dst.w, <- 1.0 */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.src[0].sel = 249;
+ alu.src[0].chan = 0;
+ r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
+ if (r)
+ return r;
+ if ((inst->Dst[0].Register.WriteMask & 0x4) == 0)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ if (inst->Dst[0].Register.WriteMask & (1 << 2))
+ {
+ /* dst.z = log(src.y) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
+ r = tgsi_src(ctx, &inst->Src[0], 1, &alu.src[0]);
+ if (r)
+ return r;
+ r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
+ if (r)
+ return r;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ int chan = alu.dst.chan;
+ int sel = alu.dst.sel;
+
+ /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
+ r = tgsi_src(ctx, &inst->Src[0], 3, &alu.src[0]);
+ if (r)
return r;
- return r600_pipe_shader(ctx, rpshader);
+ alu.src[1].sel = sel;
+ alu.src[1].chan = chan;
+ r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[2]);
+ if (r)
+ return r;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.is_op3 = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ /* dst.z = exp(tmp.x) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
+ if (r)
+ return r;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int tgsi_trans(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, r;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ alu.inst = ctx->inst_info->r600_opcode;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
+ if (r)
+ return r;
+ }
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+ return 0;
}
+
+static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
+{
+ struct r600_bc_alu alu;
+ int i, r;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
+ } else {
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = i;
+ }
+ if (i == 3) {
+ alu.last = 1;
+ }
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int tgsi_op3(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, r;
+
+ /* do it in 2 step as op3 doesn't support writemask */
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = ctx->inst_info->r600_opcode;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
+ if (r)
+ return r;
+ }
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.is_op3 = 1;
+ if (i == 3) {
+ alu.last = 1;
+ }
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return tgsi_helper_copy(ctx, inst);
+}
+
+static int tgsi_dp(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, r;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = ctx->inst_info->r600_opcode;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
+ if (r)
+ return r;
+ }
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ /* handle some special cases */
+ switch (ctx->inst_info->tgsi_opcode) {
+ case TGSI_OPCODE_DP2:
+ if (i > 1) {
+ alu.src[0].sel = alu.src[1].sel = 248;
+ alu.src[0].chan = alu.src[1].chan = 0;
+ }
+ break;
+ case TGSI_OPCODE_DP3:
+ if (i > 2) {
+ alu.src[0].sel = alu.src[1].sel = 248;
+ alu.src[0].chan = alu.src[1].chan = 0;
+ }
+ break;
+ default:
+ break;
+ }
+ if (i == 3) {
+ alu.last = 1;
+ }
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return tgsi_helper_copy(ctx, inst);
+}
+
+static int tgsi_tex(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_tex tex;
+
+ memset(&tex, 0, sizeof(struct r600_bc_tex));
+ tex.inst = ctx->inst_info->r600_opcode;
+ tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
+ tex.sampler_id = tex.resource_id;
+ tex.src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
+ tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Src[0].Register.Index;
+ tex.dst_sel_x = 0;
+ tex.dst_sel_y = 1;
+ tex.dst_sel_z = 2;
+ tex.dst_sel_w = 3;
+ tex.src_sel_x = 0;
+ tex.src_sel_y = 1;
+ tex.src_sel_z = 2;
+ tex.src_sel_w = 3;
+ return r600_bc_add_tex(ctx->bc, &tex);
+}
+
+static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
+ {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
+ {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
+ {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans},
+ {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
+ {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
+ {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+ {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+ {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
+ {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
+ {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
+ {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
+ {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
+ {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TEX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex},
+ {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */
+ {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+ {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* conditional kill */
+ {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
+ /* gap */
+ {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+};
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 7d30ca79d1..23b6a83b9a 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -23,241 +23,23 @@
#ifndef R600_SHADER_H
#define R600_SHADER_H
-#include "r600_compiler.h"
-#include "radeon.h"
-
-struct r600_shader_operand {
- struct c_vector *vector;
- unsigned sel;
- unsigned chan;
- unsigned neg;
- unsigned abs;
-};
-
-struct r600_shader_vfetch {
- struct r600_shader_vfetch *next;
- struct r600_shader_vfetch *prev;
- unsigned cf_addr;
- struct r600_shader_operand src[2];
- struct r600_shader_operand dst[4];
-};
-
-struct r600_shader_inst {
- unsigned is_op3;
- unsigned opcode;
- unsigned inst;
- struct r600_shader_operand src[3];
- struct r600_shader_operand dst;
- unsigned last;
-};
-
-struct r600_shader_alu {
- struct r600_shader_alu *next;
- struct r600_shader_alu *prev;
- unsigned nalu;
- unsigned nliteral;
- unsigned nconstant;
- struct r600_shader_inst alu[5];
- u32 literal[4];
-};
-
-struct r600_shader_node {
- struct r600_shader_node *next;
- struct r600_shader_node *prev;
- unsigned cf_id; /**< cf index (in dw) in byte code */
- unsigned cf_addr; /**< instructions index (in dw) in byte code */
- unsigned nslot; /**< number of slot (2 dw) needed by this node */
- unsigned nfetch;
- struct c_node *node; /**< compiler node from which this node originate */
- struct r600_shader_vfetch vfetch; /**< list of vfetch instructions */
- struct r600_shader_alu alu; /**< list of alu instructions */
-};
+#include "r600_asm.h"
struct r600_shader_io {
- unsigned name;
- unsigned gpr;
- int sid;
+ unsigned name;
+ unsigned gpr;
+ int sid;
};
struct r600_shader {
- unsigned stack_size; /**< stack size needed by this shader */
- unsigned ngpr; /**< number of GPR needed by this shader */
- unsigned nconstant; /**< number of constants used by this shader */
- unsigned nresource; /**< number of resources used by this shader */
- unsigned noutput;
- unsigned ninput;
- unsigned nvector;
- unsigned ncf; /**< total number of cf clauses */
- unsigned nslot; /**< total number of slots (2 dw) */
- unsigned flat_shade; /**< are we flat shading */
- struct r600_shader_node nodes; /**< list of node */
- struct r600_shader_io input[32];
- struct r600_shader_io output[32];
- /* TODO replace GPR by some better register allocator */
- struct c_vector **gpr;
- unsigned ndw; /**< bytes code size in dw */
- u32 *bcode; /**< bytes code */
- enum pipe_format resource_format[160]; /**< format of resource */
- struct c_shader cshader;
+ unsigned processor_type;
+ struct r600_bc bc;
+ boolean flat_shade;
+ unsigned ninput;
+ unsigned noutput;
+ struct r600_shader_io input[32];
+ struct r600_shader_io output[32];
+ enum radeon_family family;
};
-void r600_shader_cleanup(struct r600_shader *rshader);
-int r600_shader_register(struct r600_shader *rshader);
-int r600_shader_node(struct r600_shader *shader);
-void r600_shader_node_place(struct r600_shader *rshader);
-int r600_shader_find_gpr(struct r600_shader *rshader, struct c_vector *v, unsigned swizzle,
- struct r600_shader_operand *operand);
-int r600_shader_vfetch_bytecode(struct r600_shader *rshader,
- struct r600_shader_node *rnode,
- struct r600_shader_vfetch *vfetch,
- unsigned *cid);
-int r600_shader_update(struct r600_shader *rshader,
- enum pipe_format *resource_format);
-int r600_shader_legalize(struct r600_shader *rshader);
-int r600_cshader_legalize(struct c_shader *shader);
-
-int r700_shader_translate(struct r600_shader *rshader);
-
-int c_shader_from_tgsi(struct c_shader *shader, unsigned type,
- const struct tgsi_token *tokens);
-int r600_shader_register(struct r600_shader *rshader);
-int r600_shader_translate_rec(struct r600_shader *rshader, struct c_node *node);
-int r700_shader_translate(struct r600_shader *rshader);
-int r600_shader_insert_fetch(struct c_shader *shader);
-
-enum r600_instruction {
- INST_ADD = 0,
- INST_MUL = 1,
- INST_MUL_IEEE = 2,
- INST_MAX = 3,
- INST_MIN = 4,
- INST_MAX_DX10 = 5,
- INST_MIN_DX10 = 6,
- INST_SETE = 7,
- INST_SETGT = 8,
- INST_SETGE = 9,
- INST_SETNE = 10,
- INST_SETE_DX10 = 11,
- INST_SETGT_DX10 = 12,
- INST_SETGE_DX10 = 13,
- INST_SETNE_DX10 = 14,
- INST_FRACT = 15,
- INST_TRUNC = 16,
- INST_CEIL = 17,
- INST_RNDNE = 18,
- INST_FLOOR = 19,
- INST_MOVA = 20,
- INST_MOVA_FLOOR = 21,
- INST_MOVA_INT = 22,
- INST_MOV = 23,
- INST_NOP = 24,
- INST_PRED_SETGT_UINT = 25,
- INST_PRED_SETGE_UINT = 26,
- INST_PRED_SETE = 27,
- INST_PRED_SETGT = 28,
- INST_PRED_SETGE = 29,
- INST_PRED_SETNE = 30,
- INST_PRED_SET_INV = 31,
- INST_PRED_SET_POP = 32,
- INST_PRED_SET_CLR = 33,
- INST_PRED_SET_RESTORE = 34,
- INST_PRED_SETE_PUSH = 35,
- INST_PRED_SETGT_PUSH = 36,
- INST_PRED_SETGE_PUSH = 37,
- INST_PRED_SETNE_PUSH = 38,
- INST_KILLE = 39,
- INST_KILLGT = 40,
- INST_KILLGE = 41,
- INST_KILLNE = 42,
- INST_AND_INT = 43,
- INST_OR_INT = 44,
- INST_XOR_INT = 45,
- INST_NOT_INT = 46,
- INST_ADD_INT = 47,
- INST_SUB_INT = 48,
- INST_MAX_INT = 49,
- INST_MIN_INT = 50,
- INST_MAX_UINT = 51,
- INST_MIN_UINT = 52,
- INST_SETE_INT = 53,
- INST_SETGT_INT = 54,
- INST_SETGE_INT = 55,
- INST_SETNE_INT = 56,
- INST_SETGT_UINT = 57,
- INST_SETGE_UINT = 58,
- INST_KILLGT_UINT = 59,
- INST_KILLGE_UINT = 60,
- INST_PRED_SETE_INT = 61,
- INST_PRED_SETGT_INT = 62,
- INST_PRED_SETGE_INT = 63,
- INST_PRED_SETNE_INT = 64,
- INST_KILLE_INT = 65,
- INST_KILLGT_INT = 66,
- INST_KILLGE_INT = 67,
- INST_KILLNE_INT = 68,
- INST_PRED_SETE_PUSH_INT = 69,
- INST_PRED_SETGT_PUSH_INT = 70,
- INST_PRED_SETGE_PUSH_INT = 71,
- INST_PRED_SETNE_PUSH_INT = 72,
- INST_PRED_SETLT_PUSH_INT = 73,
- INST_PRED_SETLE_PUSH_INT = 74,
- INST_DOT4 = 75,
- INST_DOT4_IEEE = 76,
- INST_CUBE = 77,
- INST_MAX4 = 78,
- INST_MOVA_GPR_INT = 79,
- INST_EXP_IEEE = 80,
- INST_LOG_CLAMPED = 81,
- INST_LOG_IEEE = 82,
- INST_RECIP_CLAMPED = 83,
- INST_RECIP_FF = 84,
- INST_RECIP_IEEE = 85,
- INST_RECIPSQRT_CLAMPED = 86,
- INST_RECIPSQRT_FF = 87,
- INST_RECIPSQRT_IEEE = 88,
- INST_SQRT_IEEE = 89,
- INST_FLT_TO_INT = 90,
- INST_INT_TO_FLT = 91,
- INST_UINT_TO_FLT = 92,
- INST_SIN = 93,
- INST_COS = 94,
- INST_ASHR_INT = 95,
- INST_LSHR_INT = 96,
- INST_LSHL_INT = 97,
- INST_MULLO_INT = 98,
- INST_MULHI_INT = 99,
- INST_MULLO_UINT = 100,
- INST_MULHI_UINT = 101,
- INST_RECIP_INT = 102,
- INST_RECIP_UINT = 103,
- INST_FLT_TO_UINT = 104,
- INST_MUL_LIT = 105,
- INST_MUL_LIT_M2 = 106,
- INST_MUL_LIT_M4 = 107,
- INST_MUL_LIT_D2 = 108,
- INST_MULADD = 109,
- INST_MULADD_M2 = 110,
- INST_MULADD_M4 = 111,
- INST_MULADD_D2 = 112,
- INST_MULADD_IEEE = 113,
- INST_MULADD_IEEE_M2 = 114,
- INST_MULADD_IEEE_M4 = 115,
- INST_MULADD_IEEE_D2 = 116,
- INST_CNDE = 117,
- INST_CNDGT = 118,
- INST_CNDGE = 119,
- INST_CNDE_INT = 120,
- INST_CNDGT_INT = 121,
- INST_CNDGE_INT = 122,
- INST_COUNT
-};
-
-struct r600_instruction_info {
- enum r600_instruction instruction;
- unsigned opcode;
- unsigned is_trans;
- unsigned is_op3;
-};
-
-
#endif
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index 71aa09719e..002660c654 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -87,9 +87,9 @@
#define G_SQ_CF_WORD1_BARRIER(x) (((x) >> 31) & 0x1)
#define C_SQ_CF_WORD1_BARRIER 0x7FFFFFFF
#define P_SQ_CF_ALU_WORD0
-#define S_SQ_CF_ALU_WORD0_ALU_ADDR(x) (((x) & 0x3FFFFF) << 0)
-#define G_SQ_CF_ALU_WORD0_ALU_ADDR(x) (((x) >> 0) & 0x3FFFFF)
-#define C_SQ_CF_ALU_WORD0_ALU_ADDR 0xFFC00000
+#define S_SQ_CF_ALU_WORD0_ADDR(x) (((x) & 0x3FFFFF) << 0)
+#define G_SQ_CF_ALU_WORD0_ADDR(x) (((x) >> 0) & 0x3FFFFF)
+#define C_SQ_CF_ALU_WORD0_ADDR 0xFFC00000
#define S_SQ_CF_ALU_WORD0_KCACHE_BANK0(x) (((x) & 0xF) << 22)
#define G_SQ_CF_ALU_WORD0_KCACHE_BANK0(x) (((x) >> 22) & 0xF)
#define C_SQ_CF_ALU_WORD0_KCACHE_BANK0 0xFC3FFFFF
@@ -109,15 +109,15 @@
#define S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(x) (((x) & 0xFF) << 10)
#define G_SQ_CF_ALU_WORD1_KCACHE_ADDR1(x) (((x) >> 10) & 0xFF)
#define C_SQ_CF_ALU_WORD1_KCACHE_ADDR1 0xFFFC03FF
-#define S_SQ_CF_ALU_WORD1_ALU_COUNT(x) (((x) & 0x7F) << 18)
-#define G_SQ_CF_ALU_WORD1_ALU_COUNT(x) (((x) >> 18) & 0x7F)
-#define C_SQ_CF_ALU_WORD1_ALU_COUNT 0xFE03FFFF
+#define S_SQ_CF_ALU_WORD1_COUNT(x) (((x) & 0x7F) << 18)
+#define G_SQ_CF_ALU_WORD1_COUNT(x) (((x) >> 18) & 0x7F)
+#define C_SQ_CF_ALU_WORD1_COUNT 0xFE03FFFF
#define S_SQ_CF_ALU_WORD1_USES_WATERFALL(x) (((x) & 0x1) << 25)
#define G_SQ_CF_ALU_WORD1_USES_WATERFALL(x) (((x) >> 25) & 0x1)
#define C_SQ_CF_ALU_WORD1_USES_WATERFALL 0xFDFFFFFF
-#define S_SQ_CF_ALU_WORD1_CF_ALU_INST(x) (((x) & 0xF) << 26)
-#define G_SQ_CF_ALU_WORD1_CF_ALU_INST(x) (((x) >> 26) & 0xF)
-#define C_SQ_CF_ALU_WORD1_CF_ALU_INST 0xC3FFFFFF
+#define S_SQ_CF_ALU_WORD1_CF_INST(x) (((x) & 0xF) << 26)
+#define G_SQ_CF_ALU_WORD1_CF_INST(x) (((x) >> 26) & 0xF)
+#define C_SQ_CF_ALU_WORD1_CF_INST 0xC3FFFFFF
#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU 0x00000008
#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE 0x00000009
#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER 0x0000000A
@@ -546,6 +546,8 @@
#define S_SQ_TEX_WORD1_COORD_TYPE_X(x) (((x) & 0x1) << 28)
#define G_SQ_TEX_WORD1_COORD_TYPE_X(x) (((x) >> 28) & 0x1)
#define C_SQ_TEX_WORD1_COORD_TYPE_X 0xEFFFFFFF
+#define V_SQ_TEX_WORD1_COORD_UNNORMALIZED 0x00000000
+#define V_SQ_TEX_WORD1_COORD_NORMALIZED 0x00000001
#define S_SQ_TEX_WORD1_COORD_TYPE_Y(x) (((x) & 0x1) << 29)
#define G_SQ_TEX_WORD1_COORD_TYPE_Y(x) (((x) >> 29) & 0x1)
#define C_SQ_TEX_WORD1_COORD_TYPE_Y 0xDFFFFFFF
@@ -580,27 +582,5 @@
#define S_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) & 0x7) << 29)
#define G_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) >> 29) & 0x7)
#define C_SQ_TEX_WORD2_SRC_SEL_W 0x1FFFFFFF
-#define P_SQ_ALU_WORD1_OP2_V2
-#define S_SQ_ALU_WORD1_OP2_V2_SRC0_ABS(x) (((x) & 0x1) << 0)
-#define G_SQ_ALU_WORD1_OP2_V2_SRC0_ABS(x) (((x) >> 0) & 0x1)
-#define C_SQ_ALU_WORD1_OP2_V2_SRC0_ABS 0xFFFFFFFE
-#define S_SQ_ALU_WORD1_OP2_V2_SRC1_ABS(x) (((x) & 0x1) << 1)
-#define G_SQ_ALU_WORD1_OP2_V2_SRC1_ABS(x) (((x) >> 1) & 0x1)
-#define C_SQ_ALU_WORD1_OP2_V2_SRC1_ABS 0xFFFFFFFD
-#define S_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK(x) (((x) & 0x1) << 2)
-#define G_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK(x) (((x) >> 2) & 0x1)
-#define C_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK 0xFFFFFFFB
-#define S_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED(x) (((x) & 0x1) << 3)
-#define G_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED(x) (((x) >> 3) & 0x1)
-#define C_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED 0xFFFFFFF7
-#define S_SQ_ALU_WORD1_OP2_V2_WRITE_MASK(x) (((x) & 0x1) << 4)
-#define G_SQ_ALU_WORD1_OP2_V2_WRITE_MASK(x) (((x) >> 4) & 0x1)
-#define C_SQ_ALU_WORD1_OP2_V2_WRITE_MASK 0xFFFFFFEF
-#define S_SQ_ALU_WORD1_OP2_V2_OMOD(x) (((x) & 0x3) << 5)
-#define G_SQ_ALU_WORD1_OP2_V2_OMOD(x) (((x) >> 5) & 0x3)
-#define C_SQ_ALU_WORD1_OP2_V2_OMOD 0xFFFFFF9F
-#define S_SQ_ALU_WORD1_OP2_V2_ALU_INST(x) (((x) & 0x7FF) << 7)
-#define G_SQ_ALU_WORD1_OP2_V2_ALU_INST(x) (((x) >> 7) & 0x7FF)
-#define C_SQ_ALU_WORD1_OP2_V2_ALU_INST 0xFFFC007F
#endif
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 4150f88785..84a13e4ef7 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -151,7 +151,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
static void *r600_create_fs_state(struct pipe_context *ctx,
const struct pipe_shader_state *shader)
{
- return r600_pipe_shader_create(ctx, C_PROGRAM_TYPE_FS, shader->tokens);
+ return r600_pipe_shader_create(ctx, shader->tokens);
}
static void r600_bind_fs_state(struct pipe_context *ctx, void *state)
@@ -164,7 +164,7 @@ static void r600_bind_fs_state(struct pipe_context *ctx, void *state)
static void *r600_create_vs_state(struct pipe_context *ctx,
const struct pipe_shader_state *shader)
{
- return r600_pipe_shader_create(ctx, C_PROGRAM_TYPE_VS, shader->tokens);
+ return r600_pipe_shader_create(ctx, shader->tokens);
}
static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 7d94bbe510..903cfad80a 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -29,7 +29,7 @@
#include <util/u_math.h>
#include <util/u_inlines.h>
#include <util/u_memory.h>
-#include "state_tracker/drm_api.h"
+#include "state_tracker/drm_driver.h"
#include "r600_screen.h"
#include "r600_texture.h"
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index d2c7248ff2..44834984c6 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -81,6 +81,81 @@
#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count))
/* Registers */
+#define R_008C00_SQ_CONFIG 0x00008C00
+#define S_008C00_VC_ENABLE(x) (((x) & 0x1) << 0)
+#define G_008C00_VC_ENABLE(x) (((x) >> 0) & 0x1)
+#define C_008C00_VC_ENABLE(x) 0xFFFFFFFE
+#define S_008C00_EXPORT_SRC_C(x) (((x) & 0x1) << 1)
+#define G_008C00_EXPORT_SRC_C(x) (((x) >> 1) & 0x1)
+#define C_008C00_EXPORT_SRC_C(x) 0xFFFFFFFD
+#define S_008C00_DX9_CONSTS(x) (((x) & 0x1) << 2)
+#define G_008C00_DX9_CONSTS(x) (((x) >> 2) & 0x1)
+#define C_008C00_DX9_CONSTS(x) 0xFFFFFFFB
+#define S_008C00_ALU_INST_PREFER_VECTOR(x) (((x) & 0x1) << 3)
+#define G_008C00_ALU_INST_PREFER_VECTOR(x) (((x) >> 3) & 0x1)
+#define C_008C00_ALU_INST_PREFER_VECTOR(x) 0xFFFFFFF7
+#define S_008C00_DX10_CLAMP(x) (((x) & 0x1) << 4)
+#define G_008C00_DX10_CLAMP(x) (((x) >> 4) & 0x1)
+#define C_008C00_DX10_CLAMP(x) 0xFFFFFFEF
+#define S_008C00_CLAUSE_SEQ_PRIO(x) (((x) & 0x3) << 8)
+#define G_008C00_CLAUSE_SEQ_PRIO(x) (((x) >> 8) & 0x3)
+#define C_008C00_CLAUSE_SEQ_PRIO(x) 0xFFFFFCFF
+#define S_008C00_PS_PRIO(x) (((x) & 0x3) << 24)
+#define G_008C00_PS_PRIO(x) (((x) >> 24) & 0x3)
+#define C_008C00_PS_PRIO(x) 0xFCFFFFFF
+#define S_008C00_VS_PRIO(x) (((x) & 0x3) << 26)
+#define G_008C00_VS_PRIO(x) (((x) >> 26) & 0x3)
+#define C_008C00_VS_PRIO(x) 0xF3FFFFFF
+#define S_008C00_GS_PRIO(x) (((x) & 0x3) << 28)
+#define G_008C00_GS_PRIO(x) (((x) >> 28) & 0x3)
+#define C_008C00_GS_PRIO(x) 0xCFFFFFFF
+#define S_008C00_ES_PRIO(x) (((x) & 0x3) << 30)
+#define G_008C00_ES_PRIO(x) (((x) >> 30) & 0x3)
+#define C_008C00_ES_PRIO(x) 0x3FFFFFFF
+#define R_008C04_SQ_GPR_RESOURCE_MGMT_1 0x00008C04
+#define S_008C04_NUM_PS_GPRS(x) (((x) & 0xFF) << 0)
+#define G_008C04_NUM_PS_GPRS(x) (((x) >> 0) & 0xFF)
+#define C_008C04_NUM_PS_GPRS(x) 0xFFFFFF00
+#define S_008C04_NUM_VS_GPRS(x) (((x) & 0xFF) << 16)
+#define G_008C04_NUM_VS_GPRS(x) (((x) >> 16) & 0xFF)
+#define C_008C04_NUM_VS_GPRS(x) 0xFF00FFFF
+#define S_008C04_NUM_CLAUSE_TEMP_GPRS(x) (((x) & 0xF) << 28)
+#define G_008C04_NUM_CLAUSE_TEMP_GPRS(x) (((x) >> 28) & 0xF)
+#define C_008C04_NUM_CLAUSE_TEMP_GPRS(x) 0x0FFFFFFF
+#define R_008C08_SQ_GPR_RESOURCE_MGMT_2 0x00008C08
+#define S_008C08_NUM_GS_GPRS(x) (((x) & 0xFF) << 0)
+#define G_008C08_NUM_GS_GPRS(x) (((x) >> 0) & 0xFF)
+#define C_008C08_NUM_GS_GPRS(x) 0xFFFFFF00
+#define S_008C08_NUM_ES_GPRS(x) (((x) & 0xFF) << 16)
+#define G_008C08_NUM_ES_GPRS(x) (((x) >> 16) & 0xFF)
+#define C_008C08_NUM_ES_GPRS(x) 0xFF00FFFF
+#define R_008C0C_SQ_THREAD_RESOURCE_MGMT 0x00008C0C
+#define S_008C0C_NUM_PS_THREADS(x) (((x) & 0xFF) << 0)
+#define G_008C0C_NUM_PS_THREADS(x) (((x) >> 0) & 0xFF)
+#define C_008C0C_NUM_PS_THREADS(x) 0xFFFFFF00
+#define S_008C0C_NUM_VS_THREADS(x) (((x) & 0xFF) << 8)
+#define G_008C0C_NUM_VS_THREADS(x) (((x) >> 8) & 0xFF)
+#define C_008C0C_NUM_VS_THREADS(x) 0xFFFF00FF
+#define S_008C0C_NUM_GS_THREADS(x) (((x) & 0xFF) << 16)
+#define G_008C0C_NUM_GS_THREADS(x) (((x) >> 16) & 0xFF)
+#define C_008C0C_NUM_GS_THREADS(x) 0xFF00FFFF
+#define S_008C0C_NUM_ES_THREADS(x) (((x) & 0xFF) << 24)
+#define G_008C0C_NUM_ES_THREADS(x) (((x) >> 24) & 0xFF)
+#define C_008C0C_NUM_ES_THREADS(x) 0x00FFFFFF
+#define R_008C10_SQ_STACK_RESOURCE_MGMT_1 0x00008C10
+#define S_008C10_NUM_PS_STACK_ENTRIES(x) (((x) & 0xFFF) << 0)
+#define G_008C10_NUM_PS_STACK_ENTRIES(x) (((x) >> 0) & 0xFFF)
+#define C_008C10_NUM_PS_STACK_ENTRIES(x) 0xFFFFF000
+#define S_008C10_NUM_VS_STACK_ENTRIES(x) (((x) & 0xFFF) << 16)
+#define G_008C10_NUM_VS_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF)
+#define C_008C10_NUM_VS_STACK_ENTRIES(x) 0xF000FFFF
+#define R_008C14_SQ_STACK_RESOURCE_MGMT_2 0x00008C14
+#define S_008C14_NUM_GS_STACK_ENTRIES(x) (((x) & 0xFFF) << 0)
+#define G_008C14_NUM_GS_STACK_ENTRIES(x) (((x) >> 0) & 0xFFF)
+#define C_008C14_NUM_GS_STACK_ENTRIES(x) 0xFFFFF000
+#define S_008C14_NUM_ES_STACK_ENTRIES(x) (((x) & 0xFFF) << 16)
+#define G_008C14_NUM_ES_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF)
+#define C_008C14_NUM_ES_STACK_ENTRIES(x) 0xF000FFFF
#define R_0280A0_CB_COLOR0_INFO 0x0280A0
#define S_0280A0_ENDIAN(x) (((x) & 0x3) << 0)
#define G_0280A0_ENDIAN(x) (((x) >> 0) & 0x3)
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
new file mode 100644
index 0000000000..3532ba5b0c
--- /dev/null
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "r600_asm.h"
+#include "r600_context.h"
+#include "util/u_memory.h"
+#include "r700_sq.h"
+#include <stdio.h>
+#include <errno.h>
+
+int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
+{
+ unsigned i;
+
+ /* don't replace gpr by pv or ps for destination register */
+ if (alu->is_op3) {
+ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
+ S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
+ S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
+ S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
+ S_SQ_ALU_WORD0_LAST(alu->last);
+ bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
+ S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+ S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
+ S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
+ S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
+ S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) |
+ S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
+ } else {
+ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
+ S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
+ S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
+ S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
+ S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
+ S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
+ S_SQ_ALU_WORD0_LAST(alu->last);
+ bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
+ S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+ S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
+ S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
+ S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+ S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
+ S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
+ }
+ if (alu->last) {
+ for (i = 0; i < alu->nliteral; i++) {
+ bc->bytecode[id++] = alu->value[i];
+ }
+ }
+ return 0;
+}
diff --git a/src/gallium/drivers/r600/r700_sq.h b/src/gallium/drivers/r600/r700_sq.h
index 8266af6d1f..9a117aeb1d 100644
--- a/src/gallium/drivers/r600/r700_sq.h
+++ b/src/gallium/drivers/r600/r700_sq.h
@@ -583,27 +583,5 @@
#define S_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) & 0x7) << 29)
#define G_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) >> 29) & 0x7)
#define C_SQ_TEX_WORD2_SRC_SEL_W 0x1FFFFFFF
-#define P_SQ_ALU_WORD1_OP2_V2
-#define S_SQ_ALU_WORD1_OP2_V2_SRC0_ABS(x) (((x) & 0x1) << 0)
-#define G_SQ_ALU_WORD1_OP2_V2_SRC0_ABS(x) (((x) >> 0) & 0x1)
-#define C_SQ_ALU_WORD1_OP2_V2_SRC0_ABS 0xFFFFFFFE
-#define S_SQ_ALU_WORD1_OP2_V2_SRC1_ABS(x) (((x) & 0x1) << 1)
-#define G_SQ_ALU_WORD1_OP2_V2_SRC1_ABS(x) (((x) >> 1) & 0x1)
-#define C_SQ_ALU_WORD1_OP2_V2_SRC1_ABS 0xFFFFFFFD
-#define S_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK(x) (((x) & 0x1) << 2)
-#define G_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK(x) (((x) >> 2) & 0x1)
-#define C_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK 0xFFFFFFFB
-#define S_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED(x) (((x) & 0x1) << 3)
-#define G_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED(x) (((x) >> 3) & 0x1)
-#define C_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED 0xFFFFFFF7
-#define S_SQ_ALU_WORD1_OP2_V2_WRITE_MASK(x) (((x) & 0x1) << 4)
-#define G_SQ_ALU_WORD1_OP2_V2_WRITE_MASK(x) (((x) >> 4) & 0x1)
-#define C_SQ_ALU_WORD1_OP2_V2_WRITE_MASK 0xFFFFFFEF
-#define S_SQ_ALU_WORD1_OP2_V2_OMOD(x) (((x) & 0x3) << 5)
-#define G_SQ_ALU_WORD1_OP2_V2_OMOD(x) (((x) >> 5) & 0x3)
-#define C_SQ_ALU_WORD1_OP2_V2_OMOD 0xFFFFFF9F
-#define S_SQ_ALU_WORD1_OP2_V2_ALU_INST(x) (((x) & 0x7FF) << 7)
-#define G_SQ_ALU_WORD1_OP2_V2_ALU_INST(x) (((x) >> 7) & 0x7FF)
-#define C_SQ_ALU_WORD1_OP2_V2_ALU_INST 0xFFFC007F
#endif
diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h
index ec94b112d6..3a8405f9b4 100644
--- a/src/gallium/drivers/r600/radeon.h
+++ b/src/gallium/drivers/r600/radeon.h
@@ -28,8 +28,6 @@ typedef uint8_t u8;
struct radeon;
-struct pipe_screen *radeon_create_screen(struct radeon *rw);
-
enum radeon_family {
CHIP_UNKNOWN,
CHIP_R100,
@@ -79,6 +77,8 @@ enum radeon_family {
CHIP_LAST,
};
+enum radeon_family radeon_get_family(struct radeon *rw);
+
/*
* radeon object functions
*/
diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c
index 00b167e256..e0dd5cf8c2 100644
--- a/src/gallium/drivers/rbug/rbug_context.c
+++ b/src/gallium/drivers/rbug/rbug_context.c
@@ -97,15 +97,7 @@ rbug_draw_block_locked(struct rbug_context *rb_pipe, int flag)
/* wait for rbug to clear the blocked flag */
while (rb_pipe->draw_blocked & flag) {
rb_pipe->draw_blocked |= flag;
-#ifdef PIPE_THREAD_HAVE_CONDVAR
pipe_condvar_wait(rb_pipe->draw_cond, rb_pipe->draw_mutex);
-#else
- pipe_mutex_unlock(rb_pipe->draw_mutex);
-#ifdef PIPE_SUBSYSTEM_WINDOWS_USER
- Sleep(1);
-#endif
- pipe_mutex_lock(rb_pipe->draw_mutex);
-#endif
}
}
diff --git a/src/gallium/drivers/rbug/rbug_core.c b/src/gallium/drivers/rbug/rbug_core.c
index f1aab3869b..9dc663b079 100644
--- a/src/gallium/drivers/rbug/rbug_core.c
+++ b/src/gallium/drivers/rbug/rbug_core.c
@@ -407,9 +407,7 @@ rbug_context_draw_step(struct rbug_rbug *tr_rbug, struct rbug_header *header, ui
}
pipe_mutex_unlock(rb_context->draw_mutex);
-#ifdef PIPE_THREAD_HAVE_CONDVAR
pipe_condvar_broadcast(rb_context->draw_cond);
-#endif
pipe_mutex_unlock(rb_screen->list_mutex);
@@ -442,9 +440,7 @@ rbug_context_draw_unblock(struct rbug_rbug *tr_rbug, struct rbug_header *header,
rb_context->draw_blocker &= ~unblock->unblock;
pipe_mutex_unlock(rb_context->draw_mutex);
-#ifdef PIPE_THREAD_HAVE_CONDVAR
pipe_condvar_broadcast(rb_context->draw_cond);
-#endif
pipe_mutex_unlock(rb_screen->list_mutex);
@@ -476,9 +472,7 @@ rbug_context_draw_rule(struct rbug_rbug *tr_rbug, struct rbug_header *header, ui
rb_context->draw_blocker |= RBUG_BLOCK_RULE;
pipe_mutex_unlock(rb_context->draw_mutex);
-#ifdef PIPE_THREAD_HAVE_CONDVAR
pipe_condvar_broadcast(rb_context->draw_cond);
-#endif
pipe_mutex_unlock(rb_screen->list_mutex);
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index 79daa68f3b..9e727c9381 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -47,43 +47,6 @@
-
-/**
- * Draw vertex arrays, with optional indexing.
- * Basically, map the vertex buffers (and drawing surfaces), then hand off
- * the drawing to the 'draw' module.
- */
-static void
-softpipe_draw_range_elements_instanced(struct pipe_context *pipe,
- struct pipe_resource *indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count,
- unsigned startInstance,
- unsigned instanceCount);
-
-
-void
-softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
- unsigned start, unsigned count)
-{
- softpipe_draw_range_elements_instanced(pipe,
- NULL,
- 0,
- 0,
- 0,
- 0xffffffff,
- mode,
- start,
- count,
- 0,
- 1);
-}
-
void
softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode)
{
@@ -136,6 +99,93 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode)
}
+/**
+ * This function handles drawing indexed and non-indexed prims,
+ * instanced and non-instanced drawing, with or without min/max element
+ * indexes.
+ * All the other drawing functions are expressed in terms of this
+ * function.
+ *
+ * For non-indexed prims, indexBuffer should be NULL.
+ * For non-instanced drawing, instanceCount should be 1.
+ * When the min/max element indexes aren't known, minIndex should be 0
+ * and maxIndex should be ~0.
+ */
+static void
+softpipe_draw_range_elements_instanced(struct pipe_context *pipe,
+ struct pipe_resource *indexBuffer,
+ unsigned indexSize,
+ int indexBias,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
+ struct softpipe_context *sp = softpipe_context(pipe);
+ struct draw_context *draw = sp->draw;
+ unsigned i;
+
+ if (!softpipe_check_render_cond(sp))
+ return;
+
+ sp->reduced_api_prim = u_reduced_prim(mode);
+
+ if (sp->dirty) {
+ softpipe_update_derived(sp);
+ }
+
+ softpipe_map_transfers(sp);
+
+ /* Map vertex buffers */
+ for (i = 0; i < sp->num_vertex_buffers; i++) {
+ void *buf = softpipe_resource(sp->vertex_buffer[i].buffer)->data;
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes = softpipe_resource(indexBuffer)->data;
+ draw_set_mapped_element_buffer_range(draw,
+ indexSize,
+ indexBias,
+ minIndex,
+ maxIndex,
+ mapped_indexes);
+ } else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer_range(draw,
+ 0, 0,
+ start,
+ start + count - 1,
+ NULL);
+ }
+
+ /* draw! */
+ draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount);
+
+ /* unmap vertex/index buffers - will cause draw module to flush */
+ for (i = 0; i < sp->num_vertex_buffers; i++) {
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+ if (indexBuffer) {
+ draw_set_mapped_element_buffer(draw, 0, 0, NULL);
+ }
+
+ /*
+ * TODO: Flush only when a user vertex/index buffer is present
+ * (or even better, modify draw module to do this
+ * internally when this condition is seen?)
+ */
+ draw_flush(draw);
+
+ /* Note: leave drawing surfaces mapped */
+ sp->dirty_render_cache = TRUE;
+}
+
+
void
softpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_resource *indexBuffer,
@@ -223,76 +273,20 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe,
instanceCount);
}
-static void
-softpipe_draw_range_elements_instanced(struct pipe_context *pipe,
- struct pipe_resource *indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count,
- unsigned startInstance,
- unsigned instanceCount)
+void
+softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count)
{
- struct softpipe_context *sp = softpipe_context(pipe);
- struct draw_context *draw = sp->draw;
- unsigned i;
-
- if (!softpipe_check_render_cond(sp))
- return;
-
- sp->reduced_api_prim = u_reduced_prim(mode);
-
- if (sp->dirty) {
- softpipe_update_derived(sp);
- }
-
- softpipe_map_transfers(sp);
-
- /* Map vertex buffers */
- for (i = 0; i < sp->num_vertex_buffers; i++) {
- void *buf = softpipe_resource(sp->vertex_buffer[i].buffer)->data;
- draw_set_mapped_vertex_buffer(draw, i, buf);
- }
-
- /* Map index buffer, if present */
- if (indexBuffer) {
- void *mapped_indexes = softpipe_resource(indexBuffer)->data;
- draw_set_mapped_element_buffer_range(draw,
- indexSize,
- indexBias,
- minIndex,
- maxIndex,
- mapped_indexes);
- } else {
- /* no index/element buffer */
- draw_set_mapped_element_buffer_range(draw,
- 0, 0,
- start,
- start + count - 1,
- NULL);
- }
-
- /* draw! */
- draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount);
-
- /* unmap vertex/index buffers - will cause draw module to flush */
- for (i = 0; i < sp->num_vertex_buffers; i++) {
- draw_set_mapped_vertex_buffer(draw, i, NULL);
- }
- if (indexBuffer) {
- draw_set_mapped_element_buffer(draw, 0, 0, NULL);
- }
-
- /*
- * TODO: Flush only when a user vertex/index buffer is present
- * (or even better, modify draw module to do this
- * internally when this condition is seen?)
- */
- draw_flush(draw);
-
- /* Note: leave drawing surfaces mapped */
- sp->dirty_render_cache = TRUE;
+ softpipe_draw_range_elements_instanced(pipe,
+ NULL,
+ 0,
+ 0,
+ 0,
+ 0xffffffff,
+ mode,
+ start,
+ count,
+ 0,
+ 1);
}
+
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index 00187febf0..6af1b2d061 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -208,7 +208,7 @@ logicop_quad(struct quad_stage *qs,
res4[j] = ~0;
break;
default:
- assert(0);
+ assert(0 && "invalid logicop mode");
}
for (j = 0; j < 4; j++) {
@@ -221,11 +221,18 @@ logicop_quad(struct quad_stage *qs,
+/**
+ * Do blending for a 2x2 quad for one color buffer.
+ * \param quadColor the incoming quad colors
+ * \param dest the destination/framebuffer quad colors
+ * \param blend_index which set of blending terms to use
+ * \param has_dst_alpha does the dest color buffer have an alpha channel?
+ */
static void
blend_quad(struct quad_stage *qs,
float (*quadColor)[4],
float (*dest)[4],
- unsigned cbuf,
+ unsigned blend_index,
boolean has_dst_alpha)
{
static const float zero[4] = { 0, 0, 0, 0 };
@@ -236,7 +243,7 @@ blend_quad(struct quad_stage *qs,
/*
* Compute src/first term RGB
*/
- switch (softpipe->blend->rt[cbuf].rgb_src_factor) {
+ switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
case PIPE_BLENDFACTOR_ONE:
VEC4_COPY(source[0], quadColor[0]); /* R */
VEC4_COPY(source[1], quadColor[1]); /* G */
@@ -395,13 +402,13 @@ blend_quad(struct quad_stage *qs,
assert(0); /* to do */
break;
default:
- assert(0);
+ assert(0 && "invalid rgb src factor");
}
/*
* Compute src/first term A
*/
- switch (softpipe->blend->rt[cbuf].alpha_src_factor) {
+ switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
case PIPE_BLENDFACTOR_ONE:
VEC4_COPY(source[3], quadColor[3]); /* A */
break;
@@ -469,14 +476,14 @@ blend_quad(struct quad_stage *qs,
}
break;
default:
- assert(0);
+ assert(0 && "invalid alpha src factor");
}
/*
* Compute dest/second term RGB
*/
- switch (softpipe->blend->rt[cbuf].rgb_dst_factor) {
+ switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
/* dest = dest * 1 NO-OP, leave dest as-is */
break;
@@ -625,13 +632,13 @@ blend_quad(struct quad_stage *qs,
assert(0);
break;
default:
- assert(0);
+ assert(0 && "invalid rgb dst factor");
}
/*
* Compute dest/second term A
*/
- switch (softpipe->blend->rt[cbuf].alpha_dst_factor) {
+ switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
/* dest = dest * 1 NO-OP, leave dest as-is */
break;
@@ -696,13 +703,13 @@ blend_quad(struct quad_stage *qs,
}
break;
default:
- assert(0);
+ assert(0 && "invalid alpha dst factor");
}
/*
* Combine RGB terms
*/
- switch (softpipe->blend->rt[cbuf].rgb_func) {
+ switch (softpipe->blend->rt[blend_index].rgb_func) {
case PIPE_BLEND_ADD:
VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
@@ -729,13 +736,13 @@ blend_quad(struct quad_stage *qs,
VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
break;
default:
- assert(0);
+ assert(0 && "invalid rgb blend func");
}
/*
* Combine A terms
*/
- switch (softpipe->blend->rt[cbuf].alpha_func) {
+ switch (softpipe->blend->rt[blend_index].alpha_func) {
case PIPE_BLEND_ADD:
VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
break;
@@ -752,7 +759,7 @@ blend_quad(struct quad_stage *qs,
VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
break;
default:
- assert(0);
+ assert(0 && "invalid alpha blend func");
}
}
@@ -822,7 +829,7 @@ blend_fallback(struct quad_stage *qs,
logicop_quad( qs, quadColor, dest );
}
else if (blend->rt[blend_buf].blend_enable) {
- blend_quad( qs, quadColor, dest, cbuf, has_dst_alpha );
+ blend_quad( qs, quadColor, dest, blend_buf, has_dst_alpha );
}
if (blend->rt[blend_buf].colormask != 0xf)
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
index 72117c233e..5590d40892 100644
--- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -82,7 +82,7 @@ get_depth_stencil_values( struct depth_data *data,
data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
}
- break;
+ break;
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
@@ -92,6 +92,14 @@ get_depth_stencil_values( struct depth_data *data,
data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
}
break;
+ case PIPE_FORMAT_S8_USCALED:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ data->bzzzz[j] = 0;
+ data->stencilVals[j] = tile->data.stencil8[y][x];
+ }
+ break;
default:
assert(0);
}
@@ -227,6 +235,14 @@ write_depth_stencil_values( struct depth_data *data,
tile->data.depth32[y][x] = data->bzzzz[j] << 8;
}
break;
+ case PIPE_FORMAT_S8_USCALED:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.stencil8[y][x] = data->stencilVals[j];
+ }
+ break;
+
default:
assert(0);
}
@@ -661,20 +677,6 @@ static unsigned mask_count[16] =
-/** helper to get number of Z buffer bits */
-static unsigned
-get_depth_bits(struct quad_stage *qs)
-{
- struct pipe_surface *zsurf = qs->softpipe->framebuffer.zsbuf;
- if (zsurf)
- return util_format_get_component_bits(zsurf->format,
- UTIL_FORMAT_COLORSPACE_ZS, 0);
- else
- return 0;
-}
-
-
-
/**
* General depth/stencil test function. Used when there's no fast-path.
*/
@@ -693,9 +695,8 @@ depth_test_quads_fallback(struct quad_stage *qs,
nr = alpha_test_quads(qs, quads, nr);
}
- if (get_depth_bits(qs) > 0 &&
- (qs->softpipe->depth_stencil->depth.enabled ||
- qs->softpipe->depth_stencil->stencil[0].enabled)) {
+ if (qs->softpipe->depth_stencil->depth.enabled ||
+ qs->softpipe->depth_stencil->stencil[0].enabled) {
data.ps = qs->softpipe->framebuffer.zsbuf;
data.format = data.ps->format;
@@ -794,8 +795,7 @@ choose_depth_test(struct quad_stage *qs,
boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
- boolean depth = (get_depth_bits(qs) > 0 &&
- qs->softpipe->depth_stencil->depth.enabled);
+ boolean depth = qs->softpipe->depth_stencil->depth.enabled;
unsigned depthfunc = qs->softpipe->depth_stencil->depth.func;
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 907e94b59b..d240bcbf3b 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -109,7 +109,7 @@ shade_quads(struct quad_stage *qs,
{
struct softpipe_context *softpipe = qs->softpipe;
struct tgsi_exec_machine *machine = softpipe->fs_machine;
- unsigned i, pass = 0;
+ unsigned i, nr_quads = 0;
for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
machine->Consts[i] = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT][i];
@@ -123,11 +123,11 @@ shade_quads(struct quad_stage *qs,
if (/*do_coverage*/ 0)
coverage_quad( qs, quads[i] );
- quads[pass++] = quads[i];
+ quads[nr_quads++] = quads[i];
}
- if (pass)
- qs->next->run(qs->next, quads, pass);
+ if (nr_quads)
+ qs->next->run(qs->next, quads, nr_quads);
}
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index fc57d3eb61..93af6ee5b0 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -149,6 +149,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
return 0;
+
+ case PIPE_CAP_GEOMETRY_SHADER4:
+ return 1;
default:
return 0;
}
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index ff83c66d8b..cf7ab81405 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -71,7 +71,7 @@ lerp(float a, float v0, float v1)
/**
- * Do 2D/biliner interpolation of float values.
+ * Do 2D/bilinear interpolation of float values.
* v00, v10, v01 and v11 are typically four texture samples in a square/box.
* a and b are the horizontal and vertical interpolants.
* It's important that this function is inlined when compiled with
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
index b3e1c49406..eb74f14a7b 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
@@ -63,19 +63,21 @@ sp_create_tex_tile_cache( struct pipe_context *pipe )
void
sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc)
{
- uint pos;
+ if (tc) {
+ uint pos;
- for (pos = 0; pos < NUM_ENTRIES; pos++) {
- /*assert(tc->entries[pos].x < 0);*/
- }
- if (tc->transfer) {
- tc->pipe->transfer_destroy(tc->pipe, tc->transfer);
- }
- if (tc->tex_trans) {
- tc->pipe->transfer_destroy(tc->pipe, tc->tex_trans);
- }
+ for (pos = 0; pos < NUM_ENTRIES; pos++) {
+ /*assert(tc->entries[pos].x < 0);*/
+ }
+ if (tc->transfer) {
+ tc->pipe->transfer_destroy(tc->pipe, tc->transfer);
+ }
+ if (tc->tex_trans) {
+ tc->pipe->transfer_destroy(tc->pipe, tc->tex_trans);
+ }
- FREE( tc );
+ FREE( tc );
+ }
}
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index f4db6f6ef0..bf33fd9417 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -115,16 +115,18 @@ sp_create_tile_cache( struct pipe_context *pipe )
void
sp_destroy_tile_cache(struct softpipe_tile_cache *tc)
{
- uint pos;
+ if (tc) {
+ uint pos;
- for (pos = 0; pos < NUM_ENTRIES; pos++) {
- /*assert(tc->entries[pos].x < 0);*/
- }
- if (tc->transfer) {
- tc->pipe->transfer_destroy(tc->pipe, tc->transfer);
- }
+ for (pos = 0; pos < NUM_ENTRIES; pos++) {
+ /*assert(tc->entries[pos].x < 0);*/
+ }
+ if (tc->transfer) {
+ tc->pipe->transfer_destroy(tc->pipe, tc->transfer);
+ }
- FREE( tc );
+ FREE( tc );
+ }
}
@@ -284,7 +286,11 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc)
assert(pt->resource);
/* clear the scratch tile to the clear value */
- clear_tile(&tc->tile, pt->resource->format, tc->clear_val);
+ if (tc->depth_stencil) {
+ clear_tile(&tc->tile, pt->resource->format, tc->clear_val);
+ } else {
+ clear_tile_rgba(&tc->tile, pt->resource->format, tc->clear_color);
+ }
/* push the tile to all positions marked as clear */
for (y = 0; y < h; y += TILE_SIZE) {
@@ -292,11 +298,18 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc)
union tile_address addr = tile_address(x, y);
if (is_clear_flag_set(tc->clear_flags, addr)) {
- pipe_put_tile_raw(tc->pipe,
- pt,
- x, y, TILE_SIZE, TILE_SIZE,
- tc->tile.data.color32, 0/*STRIDE*/);
-
+ /* write the scratch tile to the surface */
+ if (tc->depth_stencil) {
+ pipe_put_tile_raw(tc->pipe,
+ pt,
+ x, y, TILE_SIZE, TILE_SIZE,
+ tc->tile.data.any, 0/*STRIDE*/);
+ }
+ else {
+ pipe_put_tile_rgba(tc->pipe, pt,
+ x, y, TILE_SIZE, TILE_SIZE,
+ (float *) tc->tile.data.color);
+ }
numCleared++;
}
}
diff --git a/src/gallium/drivers/svga/svga_public.h b/src/gallium/drivers/svga/svga_public.h
new file mode 100644
index 0000000000..ded2e2482a
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_public.h
@@ -0,0 +1,42 @@
+/**********************************************************
+ * Copyright 2010 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * VMware SVGA public interface. Used by targets to create a stack.
+ *
+ * @author Jakob Bornecrantz Fonseca <jakob@vmware.com>
+ */
+
+#ifndef SVGA_PUBLIC_H_
+#define SVGA_PUBLIC_H_
+
+struct pipe_screen;
+struct svga_winsys_screen;
+
+struct pipe_screen *
+svga_screen_create(struct svga_winsys_screen *sws);
+
+#endif /* SVGA_PUBLIC_H_ */
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 54d9faeb72..077ff9a2cf 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -29,6 +29,7 @@
#include "util/u_math.h"
#include "svga_winsys.h"
+#include "svga_public.h"
#include "svga_context.h"
#include "svga_screen.h"
#include "svga_resource_texture.h"
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h
index a2dcc84f7d..5e4bdeff2e 100644
--- a/src/gallium/drivers/svga/svga_winsys.h
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -288,9 +288,6 @@ struct svga_winsys_screen
};
-struct pipe_screen *
-svga_screen_create(struct svga_winsys_screen *sws);
-
struct svga_winsys_screen *
svga_winsys_screen(struct pipe_screen *screen);
diff --git a/src/gallium/drivers/trace/Makefile b/src/gallium/drivers/trace/Makefile
index 1b0c087a2a..99e5fb81c2 100644
--- a/src/gallium/drivers/trace/Makefile
+++ b/src/gallium/drivers/trace/Makefile
@@ -8,7 +8,6 @@ C_SOURCES = \
tr_dump.c \
tr_dump_state.c \
tr_screen.c \
- tr_drm.c \
tr_texture.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript
index 0dc43a9ec4..06b0c4863a 100644
--- a/src/gallium/drivers/trace/SConscript
+++ b/src/gallium/drivers/trace/SConscript
@@ -6,7 +6,6 @@ trace = env.ConvenienceLibrary(
target = 'trace',
source = [
'tr_context.c',
- 'tr_drm.c',
'tr_dump.c',
'tr_dump_state.c',
'tr_screen.c',
diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c
deleted file mode 100644
index e685033212..0000000000
--- a/src/gallium/drivers/trace/tr_drm.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "state_tracker/drm_api.h"
-
-#include "util/u_memory.h"
-#include "rbug/rbug_public.h"
-#include "tr_drm.h"
-#include "tr_screen.h"
-#include "tr_public.h"
-
-struct trace_drm_api
-{
- struct drm_api base;
-
- struct drm_api *api;
-};
-
-static INLINE struct trace_drm_api *
-trace_drm_api(struct drm_api *_api)
-{
- return (struct trace_drm_api *)_api;
-}
-
-static struct pipe_screen *
-trace_drm_create_screen(struct drm_api *_api, int fd)
-{
- struct trace_drm_api *tr_api = trace_drm_api(_api);
- struct drm_api *api = tr_api->api;
- struct pipe_screen *screen;
-
- /* TODO trace call */
-
- screen = api->create_screen(api, fd);
-
- return trace_screen_create(rbug_screen_create(screen));
-}
-
-static void
-trace_drm_destroy(struct drm_api *_api)
-{
- struct trace_drm_api *tr_api = trace_drm_api(_api);
- struct drm_api *api = tr_api->api;
-
- if (api->destroy)
- api->destroy(api);
-
- FREE(tr_api);
-}
-
-struct drm_api *
-trace_drm_create(struct drm_api *api)
-{
- struct trace_drm_api *tr_api;
-
- if (!api)
- goto error;
-
- if (!trace_enabled() && !rbug_enabled())
- goto error;
-
- tr_api = CALLOC_STRUCT(trace_drm_api);
-
- if (!tr_api)
- goto error;
-
- tr_api->base.name = api->name;
- tr_api->base.driver_name = api->driver_name;
- tr_api->base.create_screen = trace_drm_create_screen;
- tr_api->base.destroy = trace_drm_destroy;
- tr_api->api = api;
-
- return &tr_api->base;
-
-error:
- return api;
-}