summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/Makefile5
-rw-r--r--src/gallium/auxiliary/SConscript5
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c37
-rw-r--r--src/gallium/auxiliary/draw/draw_decompose_tmp.h26
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_gs_tmp.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c17
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c43
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h30
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c52
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h47
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_elts.c89
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c18
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c18
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c21
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c21
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_so_emit.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_util.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c200
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp.h238
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h98
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c610
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h19
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vsplit.c208
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h307
-rw-r--r--src/gallium/auxiliary/draw/draw_so_emit_tmp.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_split_tmp.h176
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c369
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c14
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_soa.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c26
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c21
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_quad.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c24
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c14
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_cpu.c6
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c498
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h99
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.h2
-rw-r--r--src/gallium/auxiliary/translate/translate.c8
-rw-r--r--src/gallium/auxiliary/translate/translate.h15
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c305
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c1288
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c140
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.h13
-rw-r--r--src/gallium/auxiliary/util/u_debug.c2
-rw-r--r--src/gallium/auxiliary/util/u_format.h38
-rw-r--r--src/gallium/auxiliary/util/u_framebuffer.c6
-rw-r--r--src/gallium/auxiliary/util/u_split_prim.h105
-rw-r--r--src/gallium/auxiliary/util/u_sse.h29
-rw-r--r--src/gallium/auxiliary/util/u_staging.c95
-rw-r--r--src/gallium/auxiliary/util/u_staging.h37
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.c79
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.h18
-rw-r--r--src/gallium/docs/source/conf.py2
-rw-r--r--src/gallium/docs/source/debugging.rst101
-rw-r--r--src/gallium/docs/source/distro.rst5
-rw-r--r--src/gallium/docs/source/exts/formatting.py31
-rw-r--r--src/gallium/docs/source/exts/tgsi.py17
-rw-r--r--src/gallium/docs/source/index.rst1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c17
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c190
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h112
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c115
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.py29
-rw-r--r--src/gallium/drivers/nouveau/nouveau_stateobj.h10
-rw-r--r--src/gallium/drivers/nouveau/nouveau_util.h100
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h5
-rw-r--r--src/gallium/drivers/nv50/nv50_push.c8
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c8
-rw-r--r--src/gallium/drivers/r300/r300_blit.c90
-rw-r--r--src/gallium/drivers/r300/r300_context.c89
-rw-r--r--src/gallium/drivers/r300/r300_context.h11
-rw-r--r--src/gallium/drivers/r300/r300_emit.c41
-rw-r--r--src/gallium/drivers/r300/r300_emit.h1
-rw-r--r--src/gallium/drivers/r300/r300_flush.c3
-rw-r--r--src/gallium/drivers/r300/r300_fs.c19
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c71
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.h2
-rw-r--r--src/gallium/drivers/r300/r300_reg.h21
-rw-r--r--src/gallium/drivers/r300/r300_render.c38
-rw-r--r--src/gallium/drivers/r300/r300_screen.c6
-rw-r--r--src/gallium/drivers/r300/r300_shader_semantics.h2
-rw-r--r--src/gallium/drivers/r300/r300_state.c20
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c88
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c6
-rw-r--r--src/gallium/drivers/r300/r300_vs.c5
-rw-r--r--src/gallium/drivers/r600/r600_asm.c10
-rw-r--r--src/gallium/drivers/r600/r600_asm.h1
-rw-r--r--src/gallium/drivers/r600/r600_context.c29
-rw-r--r--src/gallium/drivers/r600/r600_context.h5
-rw-r--r--src/gallium/drivers/r600/r600_draw.c2
-rw-r--r--src/gallium/drivers/r600/r600_screen.c105
-rw-r--r--src/gallium/drivers/r600/r600_screen.h2
-rw-r--r--src/gallium/drivers/r600/r600_shader.c111
-rw-r--r--src/gallium/drivers/r600/r600_shader.h1
-rw-r--r--src/gallium/drivers/r600/r600_state.c117
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h4
-rw-r--r--src/gallium/drivers/r600/r600_texture.c248
-rw-r--r--src/gallium/drivers/r600/r600d.h135
-rw-r--r--src/gallium/drivers/r600/radeon.h61
-rw-r--r--src/gallium/include/pipe/p_compiler.h10
-rw-r--r--src/gallium/state_trackers/dri/common/dri_drawable.c57
-rw-r--r--src/gallium/state_trackers/dri/common/dri_drawable.h4
-rw-r--r--src/gallium/state_trackers/dri/drm/dri2.c86
-rw-r--r--src/gallium/state_trackers/dri/sw/drisw.c4
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d.c3
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d.h20
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_api.c8
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_sync.c284
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_sync.h53
-rw-r--r--src/gallium/targets/dri-r600/Makefile2
-rw-r--r--src/gallium/targets/dri-r600/SConscript2
-rw-r--r--src/gallium/targets/egl/st_GLESv1_CM.c1
-rw-r--r--src/gallium/targets/egl/st_GLESv2.c1
-rw-r--r--src/gallium/tests/graw/SConscript5
-rw-r--r--src/gallium/tests/unit/Makefile3
-rw-r--r--src/gallium/tests/unit/SConscript8
-rw-r--r--src/gallium/tests/unit/translate_test.c310
-rw-r--r--src/gallium/winsys/r600/drm/SConscript25
-rw-r--r--src/gallium/winsys/r600/drm/r600_drm.c3
-rw-r--r--src/gallium/winsys/r600/drm/r600_states.h96
-rw-r--r--src/gallium/winsys/r600/drm/radeon.c1
-rw-r--r--src/gallium/winsys/r600/drm/radeon_bo_pb.c186
-rw-r--r--src/gallium/winsys/r600/drm/radeon_ctx.c3
-rw-r--r--src/gallium/winsys/r600/drm/radeon_priv.h30
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm.c36
-rw-r--r--src/gallium/winsys/svga/drm/vmw_screen_dri.c2
135 files changed, 5927 insertions, 3010 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 843b72bc38..eb2a40cbaa 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -26,7 +26,6 @@ C_SOURCES = \
draw/draw_pipe_wide_line.c \
draw/draw_pipe_wide_point.c \
draw/draw_pt.c \
- draw/draw_pt_elts.c \
draw/draw_pt_emit.c \
draw/draw_pt_fetch.c \
draw/draw_pt_fetch_emit.c \
@@ -35,8 +34,7 @@ C_SOURCES = \
draw/draw_pt_post_vs.c \
draw/draw_pt_so_emit.c \
draw/draw_pt_util.c \
- draw/draw_pt_varray.c \
- draw/draw_pt_vcache.c \
+ draw/draw_pt_vsplit.c \
draw/draw_vertex.c \
draw/draw_vs.c \
draw/draw_vs_varient.c \
@@ -131,6 +129,7 @@ C_SOURCES = \
util/u_sampler.c \
util/u_simple_shaders.c \
util/u_snprintf.c \
+ util/u_staging.c \
util/u_surface.c \
util/u_surfaces.c \
util/u_texture.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 1f09198721..30e5d02c9b 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -71,7 +71,6 @@ source = [
'draw/draw_pipe_wide_line.c',
'draw/draw_pipe_wide_point.c',
'draw/draw_pt.c',
- 'draw/draw_pt_elts.c',
'draw/draw_pt_emit.c',
'draw/draw_pt_fetch.c',
'draw/draw_pt_fetch_emit.c',
@@ -80,8 +79,7 @@ source = [
'draw/draw_pt_post_vs.c',
'draw/draw_pt_so_emit.c',
'draw/draw_pt_util.c',
- 'draw/draw_pt_varray.c',
- 'draw/draw_pt_vcache.c',
+ 'draw/draw_pt_vsplit.c',
'draw/draw_vertex.c',
'draw/draw_vs.c',
'draw/draw_vs_aos.c',
@@ -180,6 +178,7 @@ source = [
'util/u_sampler.c',
'util/u_simple_shaders.c',
'util/u_snprintf.c',
+ 'util/u_staging.c',
'util/u_surface.c',
'util/u_surfaces.c',
'util/u_texture.c',
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 995b675b9a..d118a8db52 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -34,6 +34,7 @@
#include "pipe/p_context.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
#include "draw_context.h"
#include "draw_vs.h"
#include "draw_gs.h"
@@ -41,6 +42,25 @@
#if HAVE_LLVM
#include "gallivm/lp_bld_init.h"
#include "draw_llvm.h"
+
+static boolean
+draw_get_option_use_llvm(void)
+{
+ static boolean first = TRUE;
+ static boolean value;
+ if (first) {
+ first = FALSE;
+ value = debug_get_bool_option("DRAW_USE_LLVM", TRUE);
+
+#ifdef PIPE_ARCH_X86
+ util_cpu_detect();
+ /* require SSE2 due to LLVM PR6960. */
+ if (!util_cpu_caps.has_sse2)
+ value = FALSE;
+#endif
+ }
+ return value;
+}
#endif
struct draw_context *draw_create( struct pipe_context *pipe )
@@ -50,10 +70,13 @@ struct draw_context *draw_create( struct pipe_context *pipe )
goto fail;
#if HAVE_LLVM
- lp_build_init();
- assert(lp_build_engine);
- draw->engine = lp_build_engine;
- draw->llvm = draw_llvm_create(draw);
+ if(draw_get_option_use_llvm())
+ {
+ lp_build_init();
+ assert(lp_build_engine);
+ draw->engine = lp_build_engine;
+ draw->llvm = draw_llvm_create(draw);
+ }
#endif
if (!draw_init(draw))
@@ -135,7 +158,8 @@ void draw_destroy( struct draw_context *draw )
draw_vs_destroy( draw );
draw_gs_destroy( draw );
#ifdef HAVE_LLVM
- draw_llvm_destroy( draw->llvm );
+ if(draw->llvm)
+ draw_llvm_destroy( draw->llvm );
#endif
FREE( draw );
@@ -659,7 +683,8 @@ draw_set_mapped_texture(struct draw_context *draw,
const void *data[DRAW_MAX_TEXTURE_LEVELS])
{
#ifdef HAVE_LLVM
- draw_llvm_set_mapped_texture(draw,
+ if(draw->llvm)
+ draw_llvm_set_mapped_texture(draw,
sampler_idx,
width, height, depth, last_level,
row_stride, img_stride, data);
diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
index a52d2b5058..a142563af9 100644
--- a/src/gallium/auxiliary/draw/draw_decompose_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
@@ -54,10 +54,10 @@ FUNC(FUNC_VARS)
FUNC_ENTER;
- /* prim, count, and last_vertex_last should have been defined */
+ /* prim, prim_flags, count, and last_vertex_last should have been defined */
if (0) {
- debug_printf("%s: prim 0x%x, count %d, last_vertex_last %d\n",
- __FUNCTION__, prim, count, last_vertex_last);
+ debug_printf("%s: prim 0x%x, prim_flags 0x%x, count %d, last_vertex_last %d\n",
+ __FUNCTION__, prim, prim_flags, count, last_vertex_last);
}
switch (prim) {
@@ -80,7 +80,7 @@ FUNC(FUNC_VARS)
case PIPE_PRIM_LINE_LOOP:
case PIPE_PRIM_LINE_STRIP:
if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
+ flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
idx[1] = GET_ELT(0);
idx[2] = idx[1];
@@ -90,7 +90,7 @@ FUNC(FUNC_VARS)
LINE(flags, idx[0], idx[1]);
}
/* close the loop */
- if (prim == PIPE_PRIM_LINE_LOOP)
+ if (prim == PIPE_PRIM_LINE_LOOP && !prim_flags)
LINE(flags, idx[1], idx[2]);
}
break;
@@ -255,17 +255,23 @@ FUNC(FUNC_VARS)
if (last_vertex_last) {
flags = (DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_2 |
DRAW_PIPE_EDGE_FLAG_0);
+ if (!(prim_flags & DRAW_SPLIT_BEFORE))
+ flags |= DRAW_PIPE_EDGE_FLAG_2;
+
edge_next = DRAW_PIPE_EDGE_FLAG_0;
- edge_finish = DRAW_PIPE_EDGE_FLAG_1;
+ edge_finish =
+ (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_1;
}
else {
flags = (DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_0 |
DRAW_PIPE_EDGE_FLAG_1);
+ if (!(prim_flags & DRAW_SPLIT_BEFORE))
+ flags |= DRAW_PIPE_EDGE_FLAG_0;
+
edge_next = DRAW_PIPE_EDGE_FLAG_1;
- edge_finish = DRAW_PIPE_EDGE_FLAG_2;
+ edge_finish =
+ (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_2;
}
idx[0] = GET_ELT(0);
@@ -300,7 +306,7 @@ FUNC(FUNC_VARS)
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
if (count >= 4) {
- flags = DRAW_PIPE_RESET_STIPPLE;
+ flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
idx[1] = GET_ELT(0);
idx[2] = GET_ELT(1);
idx[3] = GET_ELT(2);
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 4a1013e79a..50a03ac95a 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -380,7 +380,7 @@ static void gs_tri_adj(struct draw_geometry_shader *shader,
#define FUNC gs_run_elts
#define LOCAL_VARS const ushort *elts = input_prims->elts;
-#define GET_ELT(idx) (elts[idx] & ~DRAW_PIPE_FLAG_MASK)
+#define GET_ELT(idx) (elts[idx])
#include "draw_gs_tmp.h"
@@ -457,6 +457,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
output_prims->start = 0;
output_prims->count = shader->emitted_vertices;
output_prims->prim = shader->output_primitive;
+ output_prims->flags = 0x0;
output_prims->primitive_lengths = shader->primitive_lengths;
output_prims->primitive_count = shader->emitted_primitives;
output_verts->count = shader->emitted_vertices;
diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h
index 4a17af0dea..de7b02655a 100644
--- a/src/gallium/auxiliary/draw/draw_gs_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h
@@ -6,12 +6,10 @@
#define FUNC_ENTER \
/* declare more local vars */ \
- struct draw_context *draw = gs->draw; \
const unsigned prim = input_prims->prim; \
+ const unsigned prim_flags = input_prims->flags; \
const unsigned count = input_prims->count; \
- const boolean last_vertex_last = \
- !(draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first); \
+ const boolean last_vertex_last = TRUE; \
do { \
debug_assert(input_prims->primitive_count == 1); \
switch (prim) { \
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index de99b00a81..58d3e345e5 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -210,13 +210,6 @@ draw_llvm_create(struct draw_context *draw)
{
struct draw_llvm *llvm;
-#ifdef PIPE_ARCH_X86
- util_cpu_detect();
- /* require SSE2 due to LLVM PR6960. */
- if (!util_cpu_caps.has_sse2)
- return NULL;
-#endif
-
llvm = CALLOC_STRUCT( draw_llvm );
if (!llvm)
return NULL;
@@ -683,7 +676,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
unsigned i, j;
struct lp_build_context bld;
struct lp_build_loop_state lp_loop;
- struct lp_type vs_type = lp_type_float_vec(32);
const int max_vertices = 4;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
void *code;
@@ -732,7 +724,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- lp_build_context_init(&bld, builder, vs_type);
+ lp_build_context_init(&bld, builder, lp_type_int(32));
end = lp_build_add(&bld, start, count);
@@ -845,9 +837,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
struct draw_context *draw = llvm->draw;
unsigned i, j;
struct lp_build_context bld;
- struct lp_build_context bld_int;
struct lp_build_loop_state lp_loop;
- struct lp_type vs_type = lp_type_float_vec(32);
const int max_vertices = 4;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
LLVMValueRef fetch_max;
@@ -899,8 +889,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- lp_build_context_init(&bld, builder, vs_type);
- lp_build_context_init(&bld_int, builder, lp_type_int(32));
+ lp_build_context_init(&bld, builder, lp_type_int(32));
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
@@ -935,7 +924,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
/* make sure we're not out of bounds which can happen
* if fetch_count % 4 != 0, because on the last iteration
* a few of the 4 vertex fetches will be out of bounds */
- true_index = lp_build_min(&bld_int, true_index, fetch_max);
+ true_index = lp_build_min(&bld, true_index, fetch_max);
fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
&true_index, 1, "");
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 070ac803c8..b75262a357 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -169,27 +169,29 @@ static void do_triangle( struct draw_context *draw,
/*
* Set up macros for draw_pt_decompose.h template code.
* This code uses vertex indexes / elements.
- *
- * Flags are needed by the stipple and unfilled stages. When the two stages
- * are active, vcache_run_extras is called and the flags are stored in the
- * higher bits of i0. Otherwise, flags do not matter.
*/
-#define TRIANGLE(flags,i0,i1,i2) \
- do_triangle( draw, \
- i0, /* flags */ \
- verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1), \
- verts + stride * (i2) )
-
-#define LINE(flags,i0,i1) \
- do_line( draw, \
- i0, /* flags */ \
- verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1) )
+#define TRIANGLE(flags,i0,i1,i2) \
+ do { \
+ do_triangle( draw, \
+ flags, \
+ verts + stride * (i0), \
+ verts + stride * (i1), \
+ verts + stride * (i2) ); \
+ } while (0)
+
+#define LINE(flags,i0,i1) \
+ do { \
+ do_line( draw, \
+ flags, \
+ verts + stride * (i0), \
+ verts + stride * (i1) ); \
+ } while (0)
#define POINT(i0) \
- do_point( draw, verts + stride * (i0) )
+ do { \
+ do_point( draw, verts + stride * (i0) ); \
+ } while (0)
#define GET_ELT(idx) (elts[idx])
@@ -197,6 +199,7 @@ static void do_triangle( struct draw_context *draw,
#define FUNC_VARS \
struct draw_context *draw, \
unsigned prim, \
+ unsigned prim_flags, \
struct vertex_header *vertices, \
unsigned stride, \
const ushort *elts, \
@@ -240,8 +243,7 @@ void draw_pipeline_run( struct draw_context *draw,
unsigned max_index = 0x0, i;
/* find the largest element index */
for (i = 0; i < count; i++) {
- unsigned int index = (prim_info->elts[start + i]
- & ~DRAW_PIPE_FLAG_MASK);
+ unsigned int index = prim_info->elts[start + i];
if (index > max_index)
max_index = index;
}
@@ -251,6 +253,7 @@ void draw_pipeline_run( struct draw_context *draw,
pipe_run_elts(draw,
prim_info->prim,
+ prim_info->flags,
vert_info->verts,
vert_info->stride,
prim_info->elts + start,
@@ -288,6 +291,7 @@ void draw_pipeline_run( struct draw_context *draw,
#define FUNC_VARS \
struct draw_context *draw, \
unsigned prim, \
+ unsigned prim_flags, \
struct vertex_header *vertices, \
unsigned stride, \
unsigned count
@@ -320,6 +324,7 @@ void draw_pipeline_run_linear( struct draw_context *draw,
pipe_run_linear(draw,
prim_info->prim,
+ prim_info->flags,
(struct vertex_header*)verts,
vert_info->stride,
count);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 3c93c9014a..58c5858734 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -353,9 +353,6 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
/* Allocate a new vertex buffer */
vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
- /* even number */
- vbuf->max_vertices = vbuf->max_vertices & ~1;
-
if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1;
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 397d4bf653..854c45f060 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -140,8 +140,7 @@ struct draw_context
} middle;
struct {
- struct draw_pt_front_end *vcache;
- struct draw_pt_front_end *varray;
+ struct draw_pt_front_end *vsplit;
} front;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
@@ -296,6 +295,10 @@ struct draw_vertex_info {
unsigned count;
};
+/* these flags are set if the primitive is a segment of a larger one */
+#define DRAW_SPLIT_BEFORE 0x1
+#define DRAW_SPLIT_AFTER 0x2
+
struct draw_prim_info {
boolean linear;
unsigned start;
@@ -304,6 +307,7 @@ struct draw_prim_info {
unsigned count;
unsigned prim;
+ unsigned flags;
unsigned *primitive_lengths;
unsigned primitive_count;
};
@@ -369,21 +373,15 @@ void draw_pipeline_destroy( struct draw_context *draw );
-/* We use the top few bits in the elts[] parameter to convey a little
- * API information. This limits the number of vertices we can address
- * to only 4096 -- if that becomes a problem, we can switch to 32-bit
- * draw indices.
- *
- * These flags expected at first vertex of lines & triangles when
- * unfilled and/or line stipple modes are operational.
+/*
+ * These flags are used by the pipeline when unfilled and/or line stipple modes
+ * are operational.
*/
-#define DRAW_PIPE_MAX_VERTICES (0x1<<12)
-#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12)
-#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12)
-#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12)
-#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12)
-#define DRAW_PIPE_RESET_STIPPLE (0x8<<12)
-#define DRAW_PIPE_FLAG_MASK (0xf<<12)
+#define DRAW_PIPE_EDGE_FLAG_0 0x1
+#define DRAW_PIPE_EDGE_FLAG_1 0x2
+#define DRAW_PIPE_EDGE_FLAG_2 0x4
+#define DRAW_PIPE_EDGE_FLAG_ALL 0x7
+#define DRAW_PIPE_RESET_STIPPLE 0x8
void draw_pipeline_run( struct draw_context *draw,
const struct draw_vertex_info *vert,
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 248927505d..feacd8258b 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -43,21 +43,9 @@
DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE)
DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE)
-#ifdef HAVE_LLVM
-DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE)
-#endif
-
-static unsigned trim( unsigned count, unsigned first, unsigned incr )
-{
- if (count < first)
- return 0;
- return count - (count - first) % incr;
-}
-
-
/* Overall we split things into:
- * - frontend -- prepare fetch_elts, draw_elts - eg vcache
+ * - frontend -- prepare fetch_elts, draw_elts - eg vsplit
* - middle -- fetch, shade, cliptest, viewport
* - pipeline -- the prim pipeline: clipping, wide lines, etc
* - backend -- the vbuf_render provided by the driver.
@@ -77,7 +65,7 @@ draw_pt_arrays(struct draw_context *draw,
{
unsigned first, incr;
draw_pt_split_prim(prim, &first, &incr);
- count = trim(count, first, incr);
+ count = draw_pt_trim_count(count, first, incr);
if (count < first)
return TRUE;
}
@@ -115,22 +103,11 @@ draw_pt_arrays(struct draw_context *draw,
middle = draw->pt.middle.general;
}
-
- /* Pick the right frontend
- */
- if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
- frontend = draw->pt.front.vcache;
- } else {
- frontend = draw->pt.front.varray;
- }
+ frontend = draw->pt.front.vsplit;
frontend->prepare( frontend, prim, middle, opt );
- frontend->run(frontend,
- draw_pt_elt_func(draw),
- draw_pt_elt_ptr(draw, start),
- draw->pt.user.eltBias,
- count);
+ frontend->run(frontend, start, count);
frontend->finish( frontend );
@@ -143,12 +120,8 @@ boolean draw_pt_init( struct draw_context *draw )
draw->pt.test_fse = debug_get_option_draw_fse();
draw->pt.no_fse = debug_get_option_draw_no_fse();
- draw->pt.front.vcache = draw_pt_vcache( draw );
- if (!draw->pt.front.vcache)
- return FALSE;
-
- draw->pt.front.varray = draw_pt_varray(draw);
- if (!draw->pt.front.varray)
+ draw->pt.front.vsplit = draw_pt_vsplit(draw);
+ if (!draw->pt.front.vsplit)
return FALSE;
draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw );
@@ -164,7 +137,7 @@ boolean draw_pt_init( struct draw_context *draw )
return FALSE;
#if HAVE_LLVM
- if (debug_get_option_draw_use_llvm())
+ if (draw->llvm)
draw->pt.middle.llvm = draw_pt_fetch_pipeline_or_emit_llvm( draw );
#endif
@@ -194,14 +167,9 @@ void draw_pt_destroy( struct draw_context *draw )
draw->pt.middle.fetch_shade_emit = NULL;
}
- if (draw->pt.front.vcache) {
- draw->pt.front.vcache->destroy( draw->pt.front.vcache );
- draw->pt.front.vcache = NULL;
- }
-
- if (draw->pt.front.varray) {
- draw->pt.front.varray->destroy( draw->pt.front.varray );
- draw->pt.front.varray = NULL;
+ if (draw->pt.front.vsplit) {
+ draw->pt.front.vsplit->destroy( draw->pt.front.vsplit );
+ draw->pt.front.vsplit = NULL;
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 44356fba4c..0db5666529 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -35,8 +35,6 @@
#include "pipe/p_compiler.h"
-typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx );
-
struct draw_pt_middle_end;
struct draw_context;
struct draw_prim_info;
@@ -52,13 +50,18 @@ struct draw_vertex_info;
/* The "front end" - prepare sets of fetch, draw elements for the
* middle end.
*
- * Currenly one version of this:
- * - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims
- * Later:
- * - varray, varray_split
- * - velement, velement_split
+ * The fetch elements are indices to the vertices. The draw elements are
+ * indices to the fetched vertices. When both arrays of elements are both
+ * linear, middle->run_linear is called; When only the fetch elements are
+ * linear, middle->run_linear_elts is called; Otherwise, middle->run is
+ * called.
+ *
+ * When the number of the draw elements exceeds max_vertex of the middle end,
+ * the draw elements (as well as the fetch elements) are splitted and the
+ * middle end is called multiple times.
*
- * Currenly only using the vcache version.
+ * Currenly there is:
+ * - vsplit - catchall implementation, splits big prims
*/
struct draw_pt_front_end {
void (*prepare)( struct draw_pt_front_end *,
@@ -67,9 +70,7 @@ struct draw_pt_front_end {
unsigned opt );
void (*run)( struct draw_pt_front_end *,
- pt_elt_func elt_func,
- const void *elt_ptr,
- int elt_bias,
+ unsigned start,
unsigned count );
void (*finish)( struct draw_pt_front_end * );
@@ -80,6 +81,8 @@ struct draw_pt_front_end {
/* The "middle end" - prepares actual hardware vertices for the
* hardware backend.
*
+ * prim_flags is as defined by pipe_draw_info::flags.
+ *
* Currently two versions of this:
* - fetch, vertex shade, cliptest, prim-pipeline
* - fetch, emit (ie passthrough)
@@ -94,11 +97,13 @@ struct draw_pt_middle_end {
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count );
+ unsigned draw_count,
+ unsigned prim_flags );
void (*run_linear)(struct draw_pt_middle_end *,
unsigned start,
- unsigned count);
+ unsigned count,
+ unsigned prim_flags );
/* Transform all vertices in a linear range and then draw them with
* the supplied element list. May fail and return FALSE.
@@ -107,7 +112,8 @@ struct draw_pt_middle_end {
unsigned fetch_start,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count );
+ unsigned draw_count,
+ unsigned prim_flags );
int (*get_max_vertex_count)( struct draw_pt_middle_end * );
@@ -122,19 +128,11 @@ struct vbuf_render;
struct vertex_header;
-/* Helper functions.
- */
-pt_elt_func draw_pt_elt_func( struct draw_context *draw );
-const void *draw_pt_elt_ptr( struct draw_context *draw,
- unsigned start );
-
/* Frontends:
*
- * Currently only the general-purpose vcache implementation, could add
- * a special case for tiny vertex buffers.
+ * Currently only the general-purpose vsplit implementation.
*/
-struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
-struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
+struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw);
/* Middle-ends:
@@ -237,6 +235,7 @@ void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
* Utils:
*/
void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr);
+unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr);
#endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c
deleted file mode 100644
index 88f4d9f495..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_elts.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "draw/draw_pt.h"
-#include "draw/draw_private.h"
-
-/* Neat get_elt func that also works for varrays drawing by encoding
- * the start value into a pointer.
- */
-
-static unsigned elt_uint( const void *elts, unsigned idx )
-{
- return *(((const uint *)elts) + idx);
-}
-
-static unsigned elt_ushort( const void *elts, unsigned idx )
-{
- return *(((const ushort *)elts) + idx);
-}
-
-static unsigned elt_ubyte( const void *elts, unsigned idx )
-{
- return *(((const ubyte *)elts) + idx);
-}
-
-static unsigned elt_vert( const void *elts, unsigned idx )
-{
- /* unsigned index is packed in the pointer */
- return (unsigned)(uintptr_t)elts + idx;
-}
-
-pt_elt_func draw_pt_elt_func( struct draw_context *draw )
-{
- switch (draw->pt.user.eltSize) {
- case 0: return &elt_vert;
- case 1: return &elt_ubyte;
- case 2: return &elt_ushort;
- case 4: return &elt_uint;
- default: return NULL;
- }
-}
-
-const void *draw_pt_elt_ptr( struct draw_context *draw,
- unsigned start )
-{
- const char *elts = draw->pt.user.elts;
-
- switch (draw->pt.user.eltSize) {
- case 0:
- return (const void *)(((const ubyte *)NULL) + start);
- case 1:
- return (const void *)(((const ubyte *)elts) + start);
- case 2:
- return (const void *)(((const ushort *)elts) + start);
- case 4:
- return (const void *)(((const uint *)elts) + start);
- default:
- return NULL;
- }
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index 5568fbb9f8..89d96c4235 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -120,9 +120,6 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
-
- /* even number */
- *max_vertices = *max_vertices & ~1;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 5c8af17c8e..80a89428b6 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -191,15 +191,6 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
-
- /* Return an even number of verts.
- * This prevents "parity" errors when splitting long triangle strips which
- * can lead to front/back culling mix-ups.
- * Every other triangle in a strip has an alternate front/back orientation
- * so splitting at an odd position can cause the orientation of subsequent
- * triangles to get reversed.
- */
- *max_vertices = *max_vertices & ~1;
}
@@ -210,7 +201,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -273,7 +265,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count )
+ unsigned count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -334,7 +327,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index b8270280b6..a31d3feb16 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -175,15 +175,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
- /* Return an even number of verts.
- * This prevents "parity" errors when splitting long triangle strips which
- * can lead to front/back culling mix-ups.
- * Every other triangle in a strip has an alternate front/back orientation
- * so splitting at an odd position can cause the orientation of subsequent
- * triangles to get reversed.
- */
- *max_vertices = *max_vertices & ~1;
-
/* Probably need to do this somewhere (or fix exec shader not to
* need it):
*/
@@ -197,7 +188,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
static void fse_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count )
+ unsigned count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -265,7 +257,8 @@ fse_run(struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -327,7 +320,8 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 5b16c3788e..96b40fb363 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -112,16 +112,13 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
gs_out_prim,
max_vertices );
- *max_vertices = MAX2( *max_vertices,
- DRAW_PIPE_MAX_VERTICES );
+ *max_vertices = MAX2( *max_vertices, 4096 );
}
else {
- *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ /* limit max fetches by limiting max_vertices */
+ *max_vertices = 4096;
}
- /* return even number */
- *max_vertices = *max_vertices & ~1;
-
/* No need to prepare the shader.
*/
vs->prepare(vs, draw);
@@ -295,7 +292,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -311,6 +309,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@@ -320,7 +319,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count)
+ unsigned count,
+ unsigned prim_flags)
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -336,6 +336,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
prim_info.count = count;
prim_info.elts = NULL;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &count;
@@ -348,7 +349,8 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -364,6 +366,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 4b99bee86a..78b1bf988c 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -118,16 +118,13 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
out_prim,
max_vertices );
- *max_vertices = MAX2( *max_vertices,
- DRAW_PIPE_MAX_VERTICES );
+ *max_vertices = MAX2( *max_vertices, 4096 );
}
else {
- *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ /* limit max fetches by limiting max_vertices */
+ *max_vertices = 4096;
}
- /* return even number */
- *max_vertices = *max_vertices & ~1;
-
draw_llvm_make_variant_key(fpme->llvm, &key);
li = first_elem(&shader->variants);
@@ -294,7 +291,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -310,6 +308,7 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@@ -319,7 +318,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count)
+ unsigned count,
+ unsigned prim_flags)
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -335,6 +335,7 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
prim_info.count = count;
prim_info.elts = NULL;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &count;
@@ -348,7 +349,8 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -364,6 +366,7 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
index f7f4f24d35..c86bdd99a3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -225,7 +225,7 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2)
#define FUNC so_run_elts
#define LOCAL_VARS const ushort *elts = input_prims->elts;
-#define GET_ELT(idx) (elts[start + (idx)] & ~DRAW_PIPE_FLAG_MASK)
+#define GET_ELT(idx) (elts[start + (idx)])
#include "draw_so_emit_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
index 3236d38e6a..513bbbed21 100644
--- a/src/gallium/auxiliary/draw/draw_pt_util.c
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -53,7 +53,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
case PIPE_PRIM_LINES_ADJACENCY:
*first = 4;
- *incr = 2;
+ *incr = 4;
break;
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
*first = 4;
@@ -65,7 +65,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
case PIPE_PRIM_TRIANGLES_ADJACENCY:
*first = 6;
- *incr = 3;
+ *incr = 6;
break;
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_TRIANGLE_FAN:
@@ -75,7 +75,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
*first = 6;
- *incr = 1;
+ *incr = 2;
break;
case PIPE_PRIM_QUADS:
*first = 4;
@@ -92,3 +92,10 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
}
}
+
+unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr)
+{
+ if (count < first)
+ return 0;
+ return count - (count - first) % incr;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
deleted file mode 100644
index cd7bb7bf25..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_pt.h"
-
-#define FETCH_MAX 256
-#define DRAW_MAX (FETCH_MAX+8)
-
-struct varray_frontend {
- struct draw_pt_front_end base;
- struct draw_context *draw;
-
- ushort draw_elts[DRAW_MAX];
- unsigned fetch_elts[FETCH_MAX];
-
- unsigned driver_fetch_max;
- unsigned fetch_max;
-
- struct draw_pt_middle_end *middle;
-
- unsigned input_prim;
- unsigned output_prim;
-};
-
-
-static void varray_flush_linear(struct varray_frontend *varray,
- unsigned start, unsigned count)
-{
- if (count) {
- assert(varray->middle->run_linear);
- varray->middle->run_linear(varray->middle, start, count);
- }
-}
-
-static void varray_line_loop_segment(struct varray_frontend *varray,
- unsigned start,
- unsigned segment_start,
- unsigned segment_count,
- boolean end )
-{
- assert(segment_count < varray->fetch_max);
- if (segment_count >= 1) {
- unsigned nr = 0, i;
-
- for (i = 0; i < segment_count; i++)
- varray->fetch_elts[nr++] = start + segment_start + i;
-
- if (end)
- varray->fetch_elts[nr++] = start;
-
- assert(nr <= FETCH_MAX);
-
- varray->middle->run(varray->middle,
- varray->fetch_elts,
- nr,
- varray->draw_elts, /* ie. linear */
- nr);
- }
-}
-
-
-
-static void varray_fan_segment(struct varray_frontend *varray,
- unsigned start,
- unsigned segment_start,
- unsigned segment_count )
-{
- assert(segment_count < varray->fetch_max);
- if (segment_count >= 2) {
- unsigned nr = 0, i;
-
- if (segment_start != 0)
- varray->fetch_elts[nr++] = start;
-
- for (i = 0 ; i < segment_count; i++)
- varray->fetch_elts[nr++] = start + segment_start + i;
-
- assert(nr <= FETCH_MAX);
-
- varray->middle->run(varray->middle,
- varray->fetch_elts,
- nr,
- varray->draw_elts, /* ie. linear */
- nr);
- }
-}
-
-
-
-
-#define FUNC varray_run
-#include "draw_pt_varray_tmp_linear.h"
-
-static unsigned decompose_prim[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY + 1] = {
- PIPE_PRIM_POINTS,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */
- PIPE_PRIM_LINE_STRIP,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLE_STRIP,
- PIPE_PRIM_TRIANGLE_FAN,
- PIPE_PRIM_QUADS,
- PIPE_PRIM_QUAD_STRIP,
- PIPE_PRIM_POLYGON,
- PIPE_PRIM_LINES_ADJACENCY,
- PIPE_PRIM_LINE_STRIP_ADJACENCY,
- PIPE_PRIM_TRIANGLES_ADJACENCY,
- PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
-};
-
-
-
-static void varray_prepare(struct draw_pt_front_end *frontend,
- unsigned in_prim,
- struct draw_pt_middle_end *middle,
- unsigned opt)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
-
- varray->base.run = varray_run;
-
- varray->input_prim = in_prim;
- assert(in_prim < Elements(decompose_prim));
- varray->output_prim = decompose_prim[in_prim];
-
- varray->middle = middle;
- middle->prepare(middle,
- varray->output_prim,
- opt, &varray->driver_fetch_max );
-
- /* check that the max is even */
- assert((varray->driver_fetch_max & 1) == 0);
-
- varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max);
-}
-
-
-
-
-static void varray_finish(struct draw_pt_front_end *frontend)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- varray->middle->finish(varray->middle);
- varray->middle = NULL;
-}
-
-static void varray_destroy(struct draw_pt_front_end *frontend)
-{
- FREE(frontend);
-}
-
-
-struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw)
-{
- ushort i;
- struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend);
- if (varray == NULL)
- return NULL;
-
- varray->base.prepare = varray_prepare;
- varray->base.run = NULL;
- varray->base.finish = varray_finish;
- varray->base.destroy = varray_destroy;
- varray->draw = draw;
-
- for (i = 0; i < DRAW_MAX; i++) {
- varray->draw_elts[i] = i;
- }
-
- return &varray->base;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
deleted file mode 100644
index 7c722457c3..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ /dev/null
@@ -1,238 +0,0 @@
-
-static void FUNC(struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- unsigned count)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- struct draw_context *draw = varray->draw;
- unsigned start = (unsigned)elts;
-
- boolean flatfirst = (draw->rasterizer->flatshade &&
- draw->rasterizer->flatshade_first);
- unsigned i, j;
- ushort flags;
- unsigned first, incr;
-
- varray->fetch_start = start;
-
- draw_pt_split_prim(varray->input_prim, &first, &incr);
-
-#if 0
- debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
- varray->input_prim,
- start, count);
-#endif
-
- switch (varray->input_prim) {
- case PIPE_PRIM_POINTS:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i < end; i++) {
- POINT(varray, i + 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+1 < end; i += 2) {
- LINE(varray, DRAW_PIPE_RESET_STIPPLE,
- i + 0, i + 1);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
-
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 1; i < end; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
- }
- LINE(varray, flags, i - 1, 0);
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- flags = DRAW_PIPE_RESET_STIPPLE;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 1; i < end; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i += 3) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1, i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1 + (i&1), i + 2 - (i&1));
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- }
- else {
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i + 2 < end; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0 + (i&1), i + 1 - (i&1), i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- else {
- flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, flags, 0, i + 1, i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- }
- break;
-
- case PIPE_PRIM_QUADS:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+3 < end; i += 4) {
- QUAD(varray, i + 0, i + 1, i + 2, i + 3);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+3 < end; i += 2) {
- QUAD(varray, i + 2, i + 0, i + 1, i + 3);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
-
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++, flags = edge_middle) {
-
- if (i + 3 == count)
- flags |= edge_last;
-
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-
- varray_flush(varray);
-}
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
deleted file mode 100644
index a292346be9..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ /dev/null
@@ -1,98 +0,0 @@
-static unsigned trim( unsigned count, unsigned first, unsigned incr )
-{
- return count - (count - first) % incr;
-}
-
-static void FUNC(struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned count)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- unsigned start = (unsigned) ((char *) elts - (char *) NULL);
-
- unsigned j;
- unsigned first, incr;
-
- assert(elt_bias == 0);
-
- draw_pt_split_prim(varray->input_prim, &first, &incr);
-
- /* Sanitize primitive length:
- */
- count = trim(count, first, incr);
- if (count < first)
- return;
-
-#if 0
- debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
- varray->input_prim,
- start, count);
-#endif
-
- switch (varray->input_prim) {
- case PIPE_PRIM_POINTS:
- case PIPE_PRIM_LINES:
- case PIPE_PRIM_TRIANGLES:
- case PIPE_PRIM_LINE_STRIP:
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_QUADS:
- case PIPE_PRIM_QUAD_STRIP:
- case PIPE_PRIM_LINES_ADJACENCY:
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- for (j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr );
- varray_flush_linear(varray, start + j, nr);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- /* Always have to decompose as we've stated that this will be
- * emitted as a line-strip.
- */
- for (j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
- varray_line_loop_segment(varray, start, j, nr, nr == remaining);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- break;
-
-
- case PIPE_PRIM_POLYGON:
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count < varray->driver_fetch_max) {
- varray_flush_linear(varray, start, count);
- }
- else {
- for ( j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
- varray_fan_segment(varray, start, j, nr);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-}
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
deleted file mode 100644
index a848b54f7d..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ /dev/null
@@ -1,610 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "util/u_memory.h"
-#include "util/u_prim.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_pt.h"
-
-
-#define CACHE_MAX 256
-#define FETCH_MAX 256
-#define DRAW_MAX (16*1024)
-
-
-struct vcache_frontend {
- struct draw_pt_front_end base;
- struct draw_context *draw;
-
- unsigned in[CACHE_MAX];
- ushort out[CACHE_MAX];
-
- ushort draw_elts[DRAW_MAX];
- unsigned fetch_elts[FETCH_MAX];
-
- unsigned draw_count;
- unsigned fetch_count;
- unsigned fetch_max;
-
- struct draw_pt_middle_end *middle;
-
- unsigned input_prim;
- unsigned output_prim;
-
- unsigned middle_prim;
- unsigned opt;
-};
-
-
-static INLINE void
-vcache_flush( struct vcache_frontend *vcache )
-{
- if (vcache->middle_prim != vcache->output_prim) {
- vcache->middle_prim = vcache->output_prim;
- vcache->middle->prepare( vcache->middle,
- vcache->middle_prim,
- vcache->opt,
- &vcache->fetch_max );
- }
-
- if (vcache->draw_count) {
- vcache->middle->run( vcache->middle,
- vcache->fetch_elts,
- vcache->fetch_count,
- vcache->draw_elts,
- vcache->draw_count );
- }
-
- memset(vcache->in, ~0, sizeof(vcache->in));
- vcache->fetch_count = 0;
- vcache->draw_count = 0;
-}
-
-
-static INLINE void
-vcache_check_flush( struct vcache_frontend *vcache )
-{
- if (vcache->draw_count + 6 >= DRAW_MAX ||
- vcache->fetch_count + 6 >= FETCH_MAX) {
- vcache_flush( vcache );
- }
-}
-
-
-static INLINE void
-vcache_elt( struct vcache_frontend *vcache,
- unsigned felt,
- ushort flags )
-{
- unsigned idx = felt % CACHE_MAX;
-
- if (vcache->in[idx] != felt) {
- assert(vcache->fetch_count < FETCH_MAX);
-
- vcache->in[idx] = felt;
- vcache->out[idx] = (ushort)vcache->fetch_count;
- vcache->fetch_elts[vcache->fetch_count++] = felt;
- }
-
- vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags;
-}
-
-
-
-static INLINE void
-vcache_triangle( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_flags( struct vcache_frontend *vcache,
- ushort flags,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_flags( struct vcache_frontend *vcache,
- ushort flags,
- unsigned i0,
- unsigned i1 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_point( struct vcache_frontend *vcache,
- unsigned i0 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_adj_flags( struct vcache_frontend *vcache,
- unsigned flags,
- unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
-{
- vcache_elt(vcache, a0, 0);
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, a1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_adj( struct vcache_frontend *vcache,
- unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
-{
- vcache_elt(vcache, a0, 0);
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, a1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_adj_flags( struct vcache_frontend *vcache,
- unsigned flags,
- unsigned i0, unsigned a0,
- unsigned i1, unsigned a1,
- unsigned i2, unsigned a2 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, a0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, a1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_elt(vcache, a2, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_adj( struct vcache_frontend *vcache,
- unsigned i0, unsigned a0,
- unsigned i1, unsigned a1,
- unsigned i2, unsigned a2 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, a0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, a1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_elt(vcache, a2, 0);
- vcache_check_flush(vcache);
-}
-
-
-/* At least for now, we're back to using a template include file for
- * this. The two paths aren't too different though - it may be
- * possible to reunify them.
- */
-#define TRIANGLE(flags,i0,i1,i2) vcache_triangle_flags(vcache,flags,i0,i1,i2)
-#define LINE(flags,i0,i1) vcache_line_flags(vcache,flags,i0,i1)
-#define POINT(i0) vcache_point(vcache,i0)
-#define LINE_ADJ(flags,a0,i0,i1,a1) \
- vcache_line_adj_flags(vcache,flags,a0,i0,i1,a1)
-#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
- vcache_triangle_adj_flags(vcache,flags,i0,a0,i1,a1,i2,a2)
-#define FUNC vcache_run_extras
-#include "draw_pt_vcache_tmp.h"
-
-#define TRIANGLE(flags,i0,i1,i2) vcache_triangle(vcache,i0,i1,i2)
-#define LINE(flags,i0,i1) vcache_line(vcache,i0,i1)
-#define POINT(i0) vcache_point(vcache,i0)
-#define LINE_ADJ(flags,a0,i0,i1,a1) \
- vcache_line_adj(vcache,a0,i0,i1,a1)
-#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
- vcache_triangle_adj(vcache,i0,a0,i1,a1,i2,a2)
-#define FUNC vcache_run
-#include "draw_pt_vcache_tmp.h"
-
-static INLINE void
-rebase_uint_elts( const unsigned *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-
-static INLINE void
-rebase_ushort_elts( const ushort *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-
-static INLINE void
-rebase_ubyte_elts( const ubyte *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-
-static INLINE void
-translate_uint_elts( const unsigned *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-
-static INLINE void
-translate_ushort_elts( const ushort *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-
-static INLINE void
-translate_ubyte_elts( const ubyte *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-
-
-
-#if 0
-static INLINE enum pipe_format
-format_from_get_elt( pt_elt_func get_elt )
-{
- switch (draw->pt.user.eltSize) {
- case 1: return PIPE_FORMAT_R8_UNORM;
- case 2: return PIPE_FORMAT_R16_UNORM;
- case 4: return PIPE_FORMAT_R32_UNORM;
- default: return PIPE_FORMAT_NONE;
- }
-}
-#endif
-
-
-/**
- * Check if any vertex attributes use instance divisors.
- * Note that instance divisors complicate vertex fetching so we need
- * to take the vcache path when they're in use.
- */
-static boolean
-any_instance_divisors(const struct draw_context *draw)
-{
- uint i;
-
- for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
- uint div = draw->pt.vertex_element[i].instance_divisor;
- if (div)
- return TRUE;
- }
- return FALSE;
-}
-
-
-static INLINE void
-vcache_check_run( struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned draw_count )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- struct draw_context *draw = vcache->draw;
- const unsigned min_index = draw->pt.user.min_index;
- const unsigned max_index = draw->pt.user.max_index;
- const unsigned index_size = draw->pt.user.eltSize;
- unsigned fetch_count;
- const ushort *transformed_elts;
- ushort *storage = NULL;
- boolean ok = FALSE;
-
- /* debug: verify indexes are in range [min_index, max_index] */
- if (0) {
- unsigned i;
- for (i = 0; i < draw_count; i++) {
- if (index_size == 1) {
- assert( ((const ubyte *) elts)[i] >= min_index);
- assert( ((const ubyte *) elts)[i] <= max_index);
- }
- else if (index_size == 2) {
- assert( ((const ushort *) elts)[i] >= min_index);
- assert( ((const ushort *) elts)[i] <= max_index);
- }
- else {
- assert(index_size == 4);
- assert( ((const uint *) elts)[i] >= min_index);
- assert( ((const uint *) elts)[i] <= max_index);
- }
- }
- }
-
- /* Note: max_index is frequently 0xffffffff so we have to be sure
- * that any arithmetic involving max_index doesn't overflow!
- */
- if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES)
- goto fail;
-
- if (any_instance_divisors(draw))
- goto fail;
-
- fetch_count = max_index + 1 - min_index;
-
- if (0)
- debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
- vcache->fetch_max,
- draw_count);
-
- if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
- fetch_count >= UNDEFINED_VERTEX_ID ||
- fetch_count > draw_count) {
- if (0) debug_printf("fail\n");
- goto fail;
- }
-
- if (vcache->middle_prim != vcache->input_prim) {
- vcache->middle_prim = vcache->input_prim;
- vcache->middle->prepare( vcache->middle,
- vcache->middle_prim,
- vcache->opt,
- &vcache->fetch_max );
- }
-
- assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
- (elt_bias < 0 && min_index + elt_bias < min_index));
-
- if (min_index == 0 &&
- index_size == 2) {
- transformed_elts = (const ushort *)elts;
- }
- else {
- storage = MALLOC( draw_count * sizeof(ushort) );
- if (!storage)
- goto fail;
-
- if (min_index == 0) {
- switch(index_size) {
- case 1:
- translate_ubyte_elts( (const ubyte *)elts,
- draw_count,
- storage );
- break;
-
- case 2:
- translate_ushort_elts( (const ushort *)elts,
- draw_count,
- storage );
- break;
-
- case 4:
- translate_uint_elts( (const uint *)elts,
- draw_count,
- storage );
- break;
-
- default:
- assert(0);
- FREE(storage);
- return;
- }
- }
- else {
- switch(index_size) {
- case 1:
- rebase_ubyte_elts( (const ubyte *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- case 2:
- rebase_ushort_elts( (const ushort *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- case 4:
- rebase_uint_elts( (const uint *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- default:
- assert(0);
- FREE(storage);
- return;
- }
- }
- transformed_elts = storage;
- }
-
- if (fetch_count < UNDEFINED_VERTEX_ID)
- ok = vcache->middle->run_linear_elts( vcache->middle,
- min_index + elt_bias, /* start */
- fetch_count,
- transformed_elts,
- draw_count );
-
- FREE(storage);
-
- if (ok)
- return;
-
- debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
- fetch_count, draw_count);
-
-fail:
- vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
-}
-
-
-
-
-static void
-vcache_prepare( struct draw_pt_front_end *frontend,
- unsigned in_prim,
- struct draw_pt_middle_end *middle,
- unsigned opt )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
-
- if (opt & PT_PIPELINE) {
- vcache->base.run = vcache_run_extras;
- }
- else {
- vcache->base.run = vcache_check_run;
- }
-
- /* VCache will always emit the reduced version of its input
- * primitive, ie STRIP/FANS become TRIS, etc.
- *
- * This is not to be confused with what the GS might be up to,
- * which is a separate issue.
- */
- vcache->input_prim = in_prim;
- switch (in_prim) {
- case PIPE_PRIM_LINES_ADJACENCY:
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- vcache->output_prim = PIPE_PRIM_LINES_ADJACENCY;
- break;
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- vcache->output_prim = PIPE_PRIM_TRIANGLES_ADJACENCY;
- break;
- default:
- vcache->output_prim = u_reduced_prim(in_prim);
- }
-
- vcache->middle = middle;
- vcache->opt = opt;
-
- /* Have to run prepare here, but try and guess a good prim for
- * doing so:
- */
- vcache->middle_prim = (opt & PT_PIPELINE)
- ? vcache->output_prim : vcache->input_prim;
-
- middle->prepare( middle,
- vcache->middle_prim,
- opt, &vcache->fetch_max );
-}
-
-
-static void
-vcache_finish( struct draw_pt_front_end *frontend )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- vcache->middle->finish( vcache->middle );
- vcache->middle = NULL;
-}
-
-
-static void
-vcache_destroy( struct draw_pt_front_end *frontend )
-{
- FREE(frontend);
-}
-
-
-struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw )
-{
- struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend );
- if (vcache == NULL)
- return NULL;
-
- vcache->base.prepare = vcache_prepare;
- vcache->base.run = NULL;
- vcache->base.finish = vcache_finish;
- vcache->base.destroy = vcache_destroy;
- vcache->draw = draw;
-
- memset(vcache->in, ~0, sizeof(vcache->in));
-
- return &vcache->base;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
deleted file mode 100644
index 1a3748d5f0..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#define FUNC_VARS \
- struct draw_pt_front_end *frontend, \
- pt_elt_func get_elt, \
- const void *elts, \
- int elt_bias, \
- unsigned count
-
-#define LOCAL_VARS \
- struct vcache_frontend *vcache = (struct vcache_frontend *) frontend; \
- struct draw_context *draw = vcache->draw; \
- const unsigned prim = vcache->input_prim; \
- const boolean last_vertex_last = !(draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first);
-
-#define GET_ELT(idx) (get_elt(elts, idx) + elt_bias)
-
-#define FUNC_EXIT do { vcache_flush(vcache); } while (0)
-
-#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
new file mode 100644
index 0000000000..a687525309
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -0,0 +1,208 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+
+#define SEGMENT_SIZE 1024
+#define MAP_SIZE 256
+
+struct vsplit_frontend {
+ struct draw_pt_front_end base;
+ struct draw_context *draw;
+
+ unsigned prim;
+
+ struct draw_pt_middle_end *middle;
+
+ unsigned max_vertices;
+ ushort segment_size;
+
+ /* buffers for splitting */
+ unsigned fetch_elts[SEGMENT_SIZE];
+ ushort draw_elts[SEGMENT_SIZE];
+ ushort identity_draw_elts[SEGMENT_SIZE];
+
+ struct {
+ /* map a fetch element to a draw element */
+ unsigned fetches[MAP_SIZE];
+ ushort draws[MAP_SIZE];
+ boolean has_max_fetch;
+
+ ushort num_fetch_elts;
+ ushort num_draw_elts;
+ } cache;
+};
+
+
+static void
+vsplit_clear_cache(struct vsplit_frontend *vsplit)
+{
+ memset(vsplit->cache.fetches, 0xff, sizeof(vsplit->cache.fetches));
+ vsplit->cache.has_max_fetch = FALSE;
+ vsplit->cache.num_fetch_elts = 0;
+ vsplit->cache.num_draw_elts = 0;
+}
+
+static void
+vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags)
+{
+ vsplit->middle->run(vsplit->middle,
+ vsplit->fetch_elts, vsplit->cache.num_fetch_elts,
+ vsplit->draw_elts, vsplit->cache.num_draw_elts, flags);
+}
+
+/**
+ * Add a fetch element and add it to the draw elements.
+ */
+static INLINE void
+vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch)
+{
+ unsigned hash = fetch % MAP_SIZE;
+
+ if (vsplit->cache.fetches[hash] != fetch) {
+ /* update cache */
+ vsplit->cache.fetches[hash] = fetch;
+ vsplit->cache.draws[hash] = vsplit->cache.num_fetch_elts;
+
+ /* add fetch */
+ assert(vsplit->cache.num_fetch_elts < vsplit->segment_size);
+ vsplit->fetch_elts[vsplit->cache.num_fetch_elts++] = fetch;
+ }
+
+ vsplit->draw_elts[vsplit->cache.num_draw_elts++] = vsplit->cache.draws[hash];
+}
+
+
+/**
+ * Add a fetch element and add it to the draw elements. The fetch element is
+ * in full range (uint).
+ */
+static INLINE void
+vsplit_add_cache_uint(struct vsplit_frontend *vsplit, unsigned fetch)
+{
+ /* special care for 0xffffffff */
+ if (fetch == 0xffffffff && !vsplit->cache.has_max_fetch) {
+ unsigned hash = fetch % MAP_SIZE;
+ vsplit->cache.fetches[hash] = fetch - 1; /* force update */
+ vsplit->cache.has_max_fetch = TRUE;
+ }
+
+ vsplit_add_cache(vsplit, fetch);
+}
+
+
+#define FUNC vsplit_run_linear
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_ubyte
+#define ELT_TYPE ubyte
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_ushort
+#define ELT_TYPE ushort
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_uint
+#define ELT_TYPE uint
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache_uint(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+
+static void vsplit_prepare(struct draw_pt_front_end *frontend,
+ unsigned in_prim,
+ struct draw_pt_middle_end *middle,
+ unsigned opt)
+{
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
+
+ switch (vsplit->draw->pt.user.eltSize) {
+ case 0:
+ vsplit->base.run = vsplit_run_linear;
+ break;
+ case 1:
+ vsplit->base.run = vsplit_run_ubyte;
+ break;
+ case 2:
+ vsplit->base.run = vsplit_run_ushort;
+ break;
+ case 4:
+ vsplit->base.run = vsplit_run_uint;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* split only */
+ vsplit->prim = in_prim;
+
+ vsplit->middle = middle;
+ middle->prepare(middle, vsplit->prim, opt, &vsplit->max_vertices);
+
+ vsplit->segment_size = MIN2(SEGMENT_SIZE, vsplit->max_vertices);
+}
+
+
+static void vsplit_finish(struct draw_pt_front_end *frontend)
+{
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
+ vsplit->middle->finish(vsplit->middle);
+ vsplit->middle = NULL;
+}
+
+
+static void vsplit_destroy(struct draw_pt_front_end *frontend)
+{
+ FREE(frontend);
+}
+
+
+struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw)
+{
+ struct vsplit_frontend *vsplit = CALLOC_STRUCT(vsplit_frontend);
+ ushort i;
+
+ if (!vsplit)
+ return NULL;
+
+ vsplit->base.prepare = vsplit_prepare;
+ vsplit->base.run = NULL;
+ vsplit->base.finish = vsplit_finish;
+ vsplit->base.destroy = vsplit_destroy;
+ vsplit->draw = draw;
+
+ for (i = 0; i < SEGMENT_SIZE; i++)
+ vsplit->identity_draw_elts[i] = i;
+
+ return &vsplit->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
new file mode 100644
index 0000000000..4bb57b1493
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -0,0 +1,307 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#define CONCAT2(name, elt_type) name ## elt_type
+#define CONCAT(name, elt_type) CONCAT2(name, elt_type)
+
+#ifdef ELT_TYPE
+
+/**
+ * Fetch all elements in [min_index, max_index] with bias, and use the
+ * (rebased) index buffer as the draw elements.
+ */
+static boolean
+CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned istart, unsigned icount)
+{
+ struct draw_context *draw = vsplit->draw;
+ const ELT_TYPE *ib = (const ELT_TYPE *) draw->pt.user.elts;
+ const unsigned min_index = draw->pt.user.min_index;
+ const unsigned max_index = draw->pt.user.max_index;
+ const int elt_bias = draw->pt.user.eltBias;
+ unsigned fetch_start, fetch_count;
+ const ushort *draw_elts = NULL;
+ unsigned i;
+
+ /* use the ib directly */
+ if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) {
+ if (icount > vsplit->max_vertices)
+ return FALSE;
+
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+ assert(idx >= min_index && idx <= max_index);
+ }
+ draw_elts = (const ushort *) ib;
+ }
+ else {
+ /* have to go through vsplit->draw_elts */
+ if (icount > vsplit->segment_size)
+ return FALSE;
+ }
+
+ /* this is faster only when we fetch less elements than the normal path */
+ if (max_index - min_index > icount - 1)
+ return FALSE;
+
+ if (elt_bias < 0 && min_index < -elt_bias)
+ return FALSE;
+
+ /* why this check? */
+ for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+ if (draw->pt.vertex_element[i].instance_divisor)
+ return FALSE;
+ }
+
+ fetch_start = min_index + elt_bias;
+ fetch_count = max_index - min_index + 1;
+
+ if (!draw_elts) {
+ if (min_index == 0) {
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+
+ assert(idx >= min_index && idx <= max_index);
+ vsplit->draw_elts[i] = (ushort) idx;
+ }
+ }
+ else {
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+
+ assert(idx >= min_index && idx <= max_index);
+ vsplit->draw_elts[i] = (ushort) (idx - min_index);
+ }
+ }
+
+ draw_elts = vsplit->draw_elts;
+ }
+
+ return vsplit->middle->run_linear_elts(vsplit->middle,
+ fetch_start, fetch_count,
+ draw_elts, icount, 0x0);
+}
+
+/**
+ * Use the cache to prepare the fetch and draw elements, and flush.
+ *
+ * When spoken is TRUE, ispoken replaces istart; When close is TRUE, iclose is
+ * appended.
+ */
+static INLINE void
+CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart, unsigned icount,
+ boolean spoken, unsigned ispoken,
+ boolean close, unsigned iclose)
+{
+ struct draw_context *draw = vsplit->draw;
+ const ELT_TYPE *ib = (const ELT_TYPE *) draw->pt.user.elts;
+ const int ibias = draw->pt.user.eltBias;
+ unsigned i;
+
+ assert(icount + !!close <= vsplit->segment_size);
+
+ vsplit_clear_cache(vsplit);
+
+ spoken = !!spoken;
+ if (ibias == 0) {
+ if (spoken)
+ ADD_CACHE(vsplit, ib[ispoken]);
+
+ for (i = spoken; i < icount; i++)
+ ADD_CACHE(vsplit, ib[istart + i]);
+
+ if (close)
+ ADD_CACHE(vsplit, ib[iclose]);
+ }
+ else if (ibias > 0) {
+ if (spoken)
+ ADD_CACHE(vsplit, (uint) ib[ispoken] + ibias);
+
+ for (i = spoken; i < icount; i++)
+ ADD_CACHE(vsplit, (uint) ib[istart + i] + ibias);
+
+ if (close)
+ ADD_CACHE(vsplit, (uint) ib[iclose] + ibias);
+ }
+ else {
+ if (spoken) {
+ if (ib[ispoken] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[ispoken] + ibias);
+ }
+
+ for (i = spoken; i < icount; i++) {
+ if (ib[istart + i] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[istart + i] + ibias);
+ }
+
+ if (close) {
+ if (ib[iclose] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[iclose] + ibias);
+ }
+ }
+
+ vsplit_flush_cache(vsplit, flags);
+}
+
+static void
+CONCAT(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount)
+{
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, FALSE, 0, FALSE, 0);
+}
+
+static void
+CONCAT(vsplit_segment_loop_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount,
+ unsigned i0)
+{
+ const boolean close_loop = ((flags) == DRAW_SPLIT_BEFORE);
+
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, FALSE, 0, close_loop, i0);
+}
+
+static void
+CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount,
+ unsigned i0)
+{
+ const boolean use_spoken = (((flags) & DRAW_SPLIT_BEFORE) != 0);
+
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, use_spoken, i0, FALSE, 0);
+}
+
+#define LOCAL_VARS \
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \
+ const unsigned prim = vsplit->prim; \
+ const unsigned max_count_simple = vsplit->segment_size; \
+ const unsigned max_count_loop = vsplit->segment_size - 1; \
+ const unsigned max_count_fan = vsplit->segment_size;
+
+#define PRIMITIVE(istart, icount) \
+ CONCAT(vsplit_primitive_, ELT_TYPE)(vsplit, istart, icount)
+
+#else /* ELT_TYPE */
+
+static void
+vsplit_segment_simple_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount)
+{
+ assert(icount <= vsplit->max_vertices);
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+}
+
+static void
+vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount, unsigned i0)
+{
+ boolean close_loop = (flags == DRAW_SPLIT_BEFORE);
+ unsigned nr;
+
+ assert(icount + !!close_loop <= vsplit->segment_size);
+
+ if (close_loop) {
+ for (nr = 0; nr < icount; nr++)
+ vsplit->fetch_elts[nr] = istart + nr;
+ vsplit->fetch_elts[nr++] = i0;
+
+ vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
+ vsplit->identity_draw_elts, nr, flags);
+ }
+ else {
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+ }
+}
+
+static void
+vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount, unsigned i0)
+{
+ boolean use_spoken = ((flags & DRAW_SPLIT_BEFORE) != 0);
+ unsigned nr = 0, i;
+
+ assert(icount + !!use_spoken <= vsplit->segment_size);
+
+ if (use_spoken) {
+ vsplit->fetch_elts[nr++] = i0;
+ for (i = 1 ; i < icount; i++)
+ vsplit->fetch_elts[nr++] = istart + i;
+
+ vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
+ vsplit->identity_draw_elts, nr, flags);
+ }
+ else {
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+ }
+}
+
+#define LOCAL_VARS \
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \
+ const unsigned prim = vsplit->prim; \
+ const unsigned max_count_simple = vsplit->max_vertices; \
+ const unsigned max_count_loop = vsplit->segment_size - 1; \
+ const unsigned max_count_fan = vsplit->segment_size;
+
+#define PRIMITIVE(istart, icount) FALSE
+
+#define ELT_TYPE linear
+
+#endif /* ELT_TYPE */
+
+#define FUNC_VARS \
+ struct draw_pt_front_end *frontend, \
+ unsigned start, \
+ unsigned count
+
+#define SEGMENT_SIMPLE(flags, istart, icount) \
+ CONCAT(vsplit_segment_simple_, ELT_TYPE)(vsplit, flags, istart, icount)
+
+#define SEGMENT_LOOP(flags, istart, icount, i0) \
+ CONCAT(vsplit_segment_loop_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+
+#define SEGMENT_FAN(flags, istart, icount, i0) \
+ CONCAT(vsplit_segment_fan_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+
+#include "draw_split_tmp.h"
+
+#undef CONCAT2
+#undef CONCAT
+
+#undef ELT_TYPE
+#undef ADD_CACHE
diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
index 6d8937a0b4..7fafde9d5e 100644
--- a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
@@ -7,11 +7,9 @@
#define FUNC_ENTER \
/* declare more local vars */ \
- struct draw_context *draw = so->draw; \
const unsigned prim = input_prims->prim; \
- const boolean last_vertex_last = \
- !(draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first); \
+ const unsigned prim_flags = input_prims->flags; \
+ const boolean last_vertex_last = TRUE; \
do { \
debug_assert(input_prims->primitive_count == 1); \
switch (prim) { \
diff --git a/src/gallium/auxiliary/draw/draw_split_tmp.h b/src/gallium/auxiliary/draw/draw_split_tmp.h
new file mode 100644
index 0000000000..47defc62b9
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_split_tmp.h
@@ -0,0 +1,176 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+static void
+FUNC(FUNC_VARS)
+{
+ unsigned first, incr;
+ LOCAL_VARS
+
+ /*
+ * prim, start, count, and max_count_{simple,loop,fan} should have been
+ * defined
+ */
+ if (0) {
+ debug_printf("%s: prim 0x%x, start %d, count %d, max_count_simple %d, "
+ "max_count_loop %d, max_count_fan %d\n",
+ __FUNCTION__, prim, start, count, max_count_simple,
+ max_count_loop, max_count_fan);
+ }
+
+ draw_pt_split_prim(prim, &first, &incr);
+ /* sanitize primitive length */
+ count = draw_pt_trim_count(count, first, incr);
+ if (count < first)
+ return;
+
+ /* try flushing the entire primitive */
+ if (PRIMITIVE(start, count))
+ return;
+
+ /* must be able to at least flush two complete primitives */
+ assert(max_count_simple >= first + incr &&
+ max_count_loop >= first + incr &&
+ max_count_fan >= first + incr);
+
+ /* no splitting required */
+ if (count <= max_count_simple) {
+ SEGMENT_SIMPLE(0x0, start, count);
+ }
+ else {
+ const unsigned rollback = first - incr;
+ unsigned flags = DRAW_SPLIT_AFTER, seg_start = 0, seg_max;
+
+ /*
+ * Both count and seg_max below are explicitly trimmed. Because
+ *
+ * seg_start = N * (seg_max - rollback) = N' * incr,
+ *
+ * we have
+ *
+ * remaining = count - seg_start = first + N'' * incr.
+ *
+ * That is, remaining is implicitly trimmed.
+ */
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ case PIPE_PRIM_LINES_ADJACENCY:
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_simple, count), first, incr);
+ if (prim == PIPE_PRIM_TRIANGLE_STRIP ||
+ prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY) {
+ /* make sure we flush even number of triangles at a time */
+ if (seg_max < count && !(((seg_max - first) / incr) & 1))
+ seg_max -= incr;
+ }
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_SIMPLE(flags, start + seg_start, seg_max);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_SIMPLE(flags, start + seg_start, remaining);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_loop, count), first, incr);
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_LOOP(flags, start + seg_start, seg_max, start);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_LOOP(flags, start + seg_start, remaining, start);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_fan, count), first, incr);
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_FAN(flags, start + seg_start, seg_max, start);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_FAN(flags, start + seg_start, remaining, start);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+}
+
+#undef FUNC
+#undef FUNC_VARS
+#undef LOCAL_VARS
+
+#undef PRIMITIVE
+#undef SEGMENT_SIMPLE
+#undef SEGMENT_LOOP
+#undef SEGMENT_FAN
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index f5f2623e46..7b35dd4bb4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2009-2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -59,6 +59,19 @@
#include "lp_bld_arit.h"
+/*
+ * XXX: Increasing eliminates some artifacts, but adds others, most
+ * noticeably corruption in the Earth halo in Google Earth.
+ */
+#define RCP_NEWTON_STEPS 0
+
+#define RSQRT_NEWTON_STEPS 0
+
+#define EXP_POLY_DEGREE 3
+
+#define LOG_POLY_DEGREE 5
+
+
/**
* Generate min(a, b)
* No checks for special case values of a or b = 1 or 0 are done.
@@ -72,6 +85,9 @@ lp_build_min_simple(struct lp_build_context *bld,
const char *intrinsic = NULL;
LLVMValueRef cond;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
/* TODO: optimize the constant case */
if(type.width * type.length == 128) {
@@ -118,6 +134,9 @@ lp_build_max_simple(struct lp_build_context *bld,
const char *intrinsic = NULL;
LLVMValueRef cond;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
/* TODO: optimize the constant case */
if(type.width * type.length == 128) {
@@ -160,6 +179,8 @@ lp_build_comp(struct lp_build_context *bld,
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+
if(a == bld->one)
return bld->zero;
if(a == bld->zero)
@@ -173,9 +194,15 @@ lp_build_comp(struct lp_build_context *bld,
}
if(LLVMIsConstant(a))
- return LLVMConstSub(bld->one, a);
+ if (type.floating)
+ return LLVMConstFSub(bld->one, a);
+ else
+ return LLVMConstSub(bld->one, a);
else
- return LLVMBuildSub(bld->builder, bld->one, a, "");
+ if (type.floating)
+ return LLVMBuildFSub(bld->builder, bld->one, a, "");
+ else
+ return LLVMBuildSub(bld->builder, bld->one, a, "");
}
@@ -190,6 +217,9 @@ lp_build_add(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == bld->zero)
return b;
if(b == bld->zero)
@@ -217,9 +247,15 @@ lp_build_add(struct lp_build_context *bld,
}
if(LLVMIsConstant(a) && LLVMIsConstant(b))
- res = LLVMConstAdd(a, b);
+ if (type.floating)
+ res = LLVMConstFAdd(a, b);
+ else
+ res = LLVMConstAdd(a, b);
else
- res = LLVMBuildAdd(bld->builder, a, b, "");
+ if (type.floating)
+ res = LLVMBuildFAdd(bld->builder, a, b, "");
+ else
+ res = LLVMBuildAdd(bld->builder, a, b, "");
/* clamp to ceiling of 1.0 */
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
@@ -240,6 +276,8 @@ lp_build_sum_vector(struct lp_build_context *bld,
LLVMValueRef index, res;
unsigned i;
+ assert(lp_check_value(type, a));
+
if (a == bld->zero)
return bld->zero;
if (a == bld->undef)
@@ -253,9 +291,16 @@ lp_build_sum_vector(struct lp_build_context *bld,
for (i = 1; i < type.length; i++) {
index = LLVMConstInt(LLVMInt32Type(), i, 0);
- res = LLVMBuildAdd(bld->builder, res,
- LLVMBuildExtractElement(bld->builder, a, index, ""),
- "");
+ if (type.floating)
+ res = LLVMBuildFAdd(bld->builder, res,
+ LLVMBuildExtractElement(bld->builder,
+ a, index, ""),
+ "");
+ else
+ res = LLVMBuildAdd(bld->builder, res,
+ LLVMBuildExtractElement(bld->builder,
+ a, index, ""),
+ "");
}
return res;
@@ -273,6 +318,9 @@ lp_build_sub(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(b == bld->zero)
return a;
if(a == bld->undef || b == bld->undef)
@@ -300,9 +348,15 @@ lp_build_sub(struct lp_build_context *bld,
}
if(LLVMIsConstant(a) && LLVMIsConstant(b))
- res = LLVMConstSub(a, b);
+ if (type.floating)
+ res = LLVMConstFSub(a, b);
+ else
+ res = LLVMConstSub(a, b);
else
- res = LLVMBuildSub(bld->builder, a, b, "");
+ if (type.floating)
+ res = LLVMBuildFSub(bld->builder, a, b, "");
+ else
+ res = LLVMBuildSub(bld->builder, a, b, "");
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
res = lp_build_max_simple(bld, res, bld->zero);
@@ -360,6 +414,10 @@ lp_build_mul_u8n(LLVMBuilderRef builder,
LLVMValueRef c8;
LLVMValueRef ab;
+ assert(!i16_type.floating);
+ assert(lp_check_value(i16_type, a));
+ assert(lp_check_value(i16_type, b));
+
c8 = lp_build_const_int_vec(i16_type, 8);
#if 0
@@ -395,6 +453,9 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef shift;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == bld->zero)
return bld->zero;
if(a == bld->one)
@@ -433,7 +494,10 @@ lp_build_mul(struct lp_build_context *bld,
shift = NULL;
if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
- res = LLVMConstMul(a, b);
+ if (type.floating)
+ res = LLVMConstFMul(a, b);
+ else
+ res = LLVMConstMul(a, b);
if(shift) {
if(type.sign)
res = LLVMConstAShr(res, shift);
@@ -442,7 +506,10 @@ lp_build_mul(struct lp_build_context *bld,
}
}
else {
- res = LLVMBuildMul(bld->builder, a, b, "");
+ if (type.floating)
+ res = LLVMBuildFMul(bld->builder, a, b, "");
+ else
+ res = LLVMBuildMul(bld->builder, a, b, "");
if(shift) {
if(type.sign)
res = LLVMBuildAShr(bld->builder, res, shift, "");
@@ -465,6 +532,8 @@ lp_build_mul_imm(struct lp_build_context *bld,
{
LLVMValueRef factor;
+ assert(lp_check_value(bld->type, a));
+
if(b == 0)
return bld->zero;
@@ -472,7 +541,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
return a;
if(b == -1)
- return LLVMBuildNeg(bld->builder, a, "");
+ return lp_build_negate(bld, a);
if(b == 2 && bld->type.floating)
return lp_build_add(bld, a, a);
@@ -518,6 +587,9 @@ lp_build_div(struct lp_build_context *bld,
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == bld->zero)
return bld->zero;
if(a == bld->one)
@@ -529,13 +601,24 @@ lp_build_div(struct lp_build_context *bld,
if(a == bld->undef || b == bld->undef)
return bld->undef;
- if(LLVMIsConstant(a) && LLVMIsConstant(b))
- return LLVMConstFDiv(a, b);
+ if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
+ if (type.floating)
+ return LLVMConstFDiv(a, b);
+ else if (type.sign)
+ return LLVMConstSDiv(a, b);
+ else
+ return LLVMConstUDiv(a, b);
+ }
if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
return lp_build_mul(bld, a, lp_build_rcp(bld, b));
- return LLVMBuildFDiv(bld->builder, a, b, "");
+ if (type.floating)
+ return LLVMBuildFDiv(bld->builder, a, b, "");
+ else if (type.sign)
+ return LLVMBuildSDiv(bld->builder, a, b, "");
+ else
+ return LLVMBuildUDiv(bld->builder, a, b, "");
}
@@ -555,6 +638,10 @@ lp_build_lerp(struct lp_build_context *bld,
LLVMValueRef delta;
LLVMValueRef res;
+ assert(lp_check_value(bld->type, x));
+ assert(lp_check_value(bld->type, v0));
+ assert(lp_check_value(bld->type, v1));
+
delta = lp_build_sub(bld, v1, v0);
res = lp_build_mul(bld, x, delta);
@@ -596,6 +683,9 @@ lp_build_min(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, b));
+
if(a == bld->undef || b == bld->undef)
return bld->undef;
@@ -624,6 +714,9 @@ lp_build_max(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, b));
+
if(a == bld->undef || b == bld->undef)
return bld->undef;
@@ -653,6 +746,10 @@ lp_build_clamp(struct lp_build_context *bld,
LLVMValueRef min,
LLVMValueRef max)
{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, min));
+ assert(lp_check_value(bld->type, max));
+
a = lp_build_min(bld, a, max);
a = lp_build_max(bld, a, min);
return a;
@@ -669,6 +766,8 @@ lp_build_abs(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
+ assert(lp_check_value(type, a));
+
if(!type.sign)
return a;
@@ -702,7 +801,16 @@ LLVMValueRef
lp_build_negate(struct lp_build_context *bld,
LLVMValueRef a)
{
- return LLVMBuildNeg(bld->builder, a, "");
+ assert(lp_check_value(bld->type, a));
+
+#if HAVE_LLVM >= 0x0207
+ if (bld->type.floating)
+ a = LLVMBuildFNeg(bld->builder, a, "");
+ else
+#endif
+ a = LLVMBuildNeg(bld->builder, a, "");
+
+ return a;
}
@@ -715,6 +823,8 @@ lp_build_sgn(struct lp_build_context *bld,
LLVMValueRef cond;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+
/* Handle non-zero case */
if(!type.sign) {
/* if not zero then sign must be positive */
@@ -773,6 +883,7 @@ lp_build_set_sign(struct lp_build_context *bld,
LLVMValueRef val, res;
assert(type.floating);
+ assert(lp_check_value(type, a));
/* val = reinterpret_cast<int>(a) */
val = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
@@ -1021,7 +1132,7 @@ lp_build_iround(struct lp_build_context *bld,
half = LLVMBuildOr(bld->builder, sign, half, "");
half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
- res = LLVMBuildAdd(bld->builder, a, half, "");
+ res = LLVMBuildFAdd(bld->builder, a, half, "");
}
res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
@@ -1070,7 +1181,7 @@ lp_build_ifloor(struct lp_build_context *bld,
offset = LLVMBuildAnd(bld->builder, offset, sign, "");
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
- res = LLVMBuildAdd(bld->builder, a, offset, "ifloor.res");
+ res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res");
}
/* round to nearest (toward zero) */
@@ -1120,7 +1231,7 @@ lp_build_iceil(struct lp_build_context *bld,
offset = LLVMBuildAnd(bld->builder, offset, sign, "");
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
- res = LLVMBuildAdd(bld->builder, a, offset, "iceil.res");
+ res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res");
}
/* round to nearest (toward zero) */
@@ -1138,6 +1249,8 @@ lp_build_sqrt(struct lp_build_context *bld,
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
+ assert(lp_check_value(type, a));
+
/* TODO: optimize the constant case */
/* TODO: optimize the constant case */
@@ -1148,12 +1261,39 @@ lp_build_sqrt(struct lp_build_context *bld,
}
+/**
+ * Do one Newton-Raphson step to improve reciprocate precision:
+ *
+ * x_{i+1} = x_i * (2 - a * x_i)
+ *
+ * See also:
+ * - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
+ * - http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+static INLINE LLVMValueRef
+lp_build_rcp_refine(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef rcp_a)
+{
+ LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
+ LLVMValueRef res;
+
+ res = LLVMBuildFMul(bld->builder, a, rcp_a, "");
+ res = LLVMBuildFSub(bld->builder, two, res, "");
+ res = LLVMBuildFMul(bld->builder, rcp_a, res, "");
+
+ return res;
+}
+
+
LLVMValueRef
lp_build_rcp(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+
if(a == bld->zero)
return bld->undef;
if(a == bld->one)
@@ -1167,32 +1307,16 @@ lp_build_rcp(struct lp_build_context *bld,
return LLVMConstFDiv(bld->one, a);
if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
- /*
- * XXX: Added precision is not always necessary, so only enable this
- * when we have a better system in place to track minimum precision.
- */
-
-#if 0
- /*
- * Do one Newton-Raphson step to improve precision:
- *
- * x1 = (2 - a * rcp(a)) * rcp(a)
- */
-
- LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
- LLVMValueRef rcp_a;
LLVMValueRef res;
+ unsigned i;
- rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+ res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a);
- res = LLVMBuildMul(bld->builder, a, rcp_a, "");
- res = LLVMBuildSub(bld->builder, two, res, "");
- res = LLVMBuildMul(bld->builder, res, rcp_a, "");
+ for (i = 0; i < RCP_NEWTON_STEPS; ++i) {
+ res = lp_build_rcp_refine(bld, a, res);
+ }
- return rcp_a;
-#else
- return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
-#endif
+ return res;
}
return LLVMBuildFDiv(bld->builder, bld->one, a, "");
@@ -1200,6 +1324,33 @@ lp_build_rcp(struct lp_build_context *bld,
/**
+ * Do one Newton-Raphson step to improve rsqrt precision:
+ *
+ * x_{i+1} = 0.5 * x_i * (3.0 - a * x_i * x_i)
+ *
+ * See also:
+ * - http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+static INLINE LLVMValueRef
+lp_build_rsqrt_refine(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef rsqrt_a)
+{
+ LLVMValueRef half = lp_build_const_vec(bld->type, 0.5);
+ LLVMValueRef three = lp_build_const_vec(bld->type, 3.0);
+ LLVMValueRef res;
+
+ res = LLVMBuildFMul(bld->builder, rsqrt_a, rsqrt_a, "");
+ res = LLVMBuildFMul(bld->builder, a, res, "");
+ res = LLVMBuildFSub(bld->builder, three, res, "");
+ res = LLVMBuildFMul(bld->builder, rsqrt_a, res, "");
+ res = LLVMBuildFMul(bld->builder, half, res, "");
+
+ return res;
+}
+
+
+/**
* Generate 1/sqrt(a)
*/
LLVMValueRef
@@ -1208,10 +1359,22 @@ lp_build_rsqrt(struct lp_build_context *bld,
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+
assert(type.floating);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
- return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);
+ if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ LLVMValueRef res;
+ unsigned i;
+
+ res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a);
+
+ for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) {
+ res = lp_build_rsqrt_refine(bld, a, res);
+ }
+
+ return res;
+ }
return lp_build_rcp(bld, lp_build_sqrt(bld, a));
}
@@ -1270,7 +1433,7 @@ lp_build_sin(struct lp_build_context *bld,
*/
LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516);
- LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y");
+ LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y");
/*
* store the integer part of y in mm0
@@ -1344,9 +1507,9 @@ lp_build_sin(struct lp_build_context *bld,
* xmm2 = _mm_mul_ps(y, xmm2);
* xmm3 = _mm_mul_ps(y, xmm3);
*/
- LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1");
- LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2");
- LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3");
+ LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1");
+ LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2");
+ LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3");
/*
* x = _mm_add_ps(x, xmm1);
@@ -1354,16 +1517,16 @@ lp_build_sin(struct lp_build_context *bld,
* x = _mm_add_ps(x, xmm3);
*/
- LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1");
- LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2");
- LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3");
+ LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1");
+ LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2");
+ LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3");
/*
* Evaluate the first polynom (0 <= x <= Pi/4)
*
* z = _mm_mul_ps(x,x);
*/
- LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z");
+ LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z");
/*
* _PS_CONST(coscof_p0, 2.443315711809948E-005);
@@ -1378,12 +1541,12 @@ lp_build_sin(struct lp_build_context *bld,
* y = *(v4sf*)_ps_coscof_p0;
* y = _mm_mul_ps(y, z);
*/
- LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3");
- LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4");
- LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5");
- LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6");
- LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7");
- LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8");
+ LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3");
+ LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4");
+ LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5");
+ LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6");
+ LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7");
+ LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8");
/*
@@ -1392,10 +1555,10 @@ lp_build_sin(struct lp_build_context *bld,
* y = _mm_add_ps(y, *(v4sf*)_ps_1);
*/
LLVMValueRef half = lp_build_const_v4sf(0.5);
- LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp");
- LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8");
+ LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp");
+ LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8");
LLVMValueRef one = lp_build_const_v4sf(1.0);
- LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9");
+ LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9");
/*
* _PS_CONST(sincof_p0, -1.9515295891E-4);
@@ -1419,13 +1582,13 @@ lp_build_sin(struct lp_build_context *bld,
* y2 = _mm_add_ps(y2, x);
*/
- LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3");
- LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4");
- LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5");
- LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6");
- LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7");
- LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8");
- LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9");
+ LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3");
+ LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4");
+ LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5");
+ LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6");
+ LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7");
+ LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8");
+ LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9");
/*
* select the correct result from the two polynoms
@@ -1481,7 +1644,7 @@ lp_build_cos(struct lp_build_context *bld,
*/
LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516);
- LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y");
+ LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y");
/*
* store the integer part of y in mm0
@@ -1561,9 +1724,9 @@ lp_build_cos(struct lp_build_context *bld,
* xmm2 = _mm_mul_ps(y, xmm2);
* xmm3 = _mm_mul_ps(y, xmm3);
*/
- LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1");
- LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2");
- LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3");
+ LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1");
+ LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2");
+ LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3");
/*
* x = _mm_add_ps(x, xmm1);
@@ -1571,16 +1734,16 @@ lp_build_cos(struct lp_build_context *bld,
* x = _mm_add_ps(x, xmm3);
*/
- LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1");
- LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2");
- LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3");
+ LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1");
+ LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2");
+ LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3");
/*
* Evaluate the first polynom (0 <= x <= Pi/4)
*
* z = _mm_mul_ps(x,x);
*/
- LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z");
+ LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z");
/*
* _PS_CONST(coscof_p0, 2.443315711809948E-005);
@@ -1595,12 +1758,12 @@ lp_build_cos(struct lp_build_context *bld,
* y = *(v4sf*)_ps_coscof_p0;
* y = _mm_mul_ps(y, z);
*/
- LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3");
- LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4");
- LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5");
- LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6");
- LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7");
- LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8");
+ LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3");
+ LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4");
+ LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5");
+ LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6");
+ LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7");
+ LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8");
/*
@@ -1609,10 +1772,10 @@ lp_build_cos(struct lp_build_context *bld,
* y = _mm_add_ps(y, *(v4sf*)_ps_1);
*/
LLVMValueRef half = lp_build_const_v4sf(0.5);
- LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp");
- LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8");
+ LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp");
+ LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8");
LLVMValueRef one = lp_build_const_v4sf(1.0);
- LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9");
+ LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9");
/*
* _PS_CONST(sincof_p0, -1.9515295891E-4);
@@ -1636,13 +1799,13 @@ lp_build_cos(struct lp_build_context *bld,
* y2 = _mm_add_ps(y2, x);
*/
- LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3");
- LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4");
- LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5");
- LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6");
- LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7");
- LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8");
- LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9");
+ LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3");
+ LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4");
+ LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5");
+ LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6");
+ LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7");
+ LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8");
+ LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9");
/*
* select the correct result from the two polynoms
@@ -1695,6 +1858,8 @@ lp_build_exp(struct lp_build_context *bld,
/* log2(e) = 1/log(2) */
LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634);
+ assert(lp_check_value(bld->type, x));
+
return lp_build_mul(bld, log2e, lp_build_exp2(bld, x));
}
@@ -1709,14 +1874,12 @@ lp_build_log(struct lp_build_context *bld,
/* log(2) */
LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529);
+ assert(lp_check_value(bld->type, x));
+
return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
}
-#define EXP_POLY_DEGREE 3
-#define LOG_POLY_DEGREE 5
-
-
/**
* Generate polynomial.
* Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2].
@@ -1731,6 +1894,8 @@ lp_build_polynomial(struct lp_build_context *bld,
LLVMValueRef res = NULL;
unsigned i;
+ assert(lp_check_value(bld->type, x));
+
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
@@ -1802,6 +1967,8 @@ lp_build_exp2_approx(struct lp_build_context *bld,
LLVMValueRef expfpart = NULL;
LLVMValueRef res = NULL;
+ assert(lp_check_value(bld->type, x));
+
if(p_exp2_int_part || p_frac_part || p_exp2) {
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
@@ -1817,7 +1984,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
ipart = lp_build_floor(bld, x);
/* fpart = x - ipart */
- fpart = LLVMBuildSub(bld->builder, x, ipart, "");
+ fpart = LLVMBuildFSub(bld->builder, x, ipart, "");
}
if(p_exp2_int_part || p_exp2) {
@@ -1832,7 +1999,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
Elements(lp_build_exp2_polynomial));
- res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
+ res = LLVMBuildFMul(bld->builder, expipart, expfpart, "");
}
if(p_exp2_int_part)
@@ -1915,6 +2082,8 @@ lp_build_log2_approx(struct lp_build_context *bld,
LLVMValueRef logmant = NULL;
LLVMValueRef res = NULL;
+ assert(lp_check_value(bld->type, x));
+
if(p_exp || p_floor_log2 || p_log2) {
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
@@ -1945,9 +2114,9 @@ lp_build_log2_approx(struct lp_build_context *bld,
Elements(lp_build_log2_polynomial));
/* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
- logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+ logmant = LLVMBuildFMul(bld->builder, logmant, LLVMBuildFSub(bld->builder, mant, bld->one, ""), "");
- res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+ res = LLVMBuildFAdd(bld->builder, logmant, logexp, "");
}
if(p_exp) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 77012f1fac..8b477313d4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -117,8 +117,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
scale = (double)mask/ubound;
bias = (double)((unsigned long long)1 << (mantissa - n));
- res = LLVMBuildMul(builder, src, lp_build_const_vec(src_type, scale), "");
- res = LLVMBuildAdd(builder, res, lp_build_const_vec(src_type, bias), "");
+ res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
+ res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
res = LLVMBuildBitCast(builder, res, int_vec_type, "");
if(dst_width > n) {
@@ -175,6 +175,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
double scale;
double bias;
+ assert(dst_type.floating);
+
mantissa = lp_mantissa(dst_type);
n = MIN2(mantissa, src_width);
@@ -199,8 +201,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
res = LLVMBuildBitCast(builder, res, vec_type, "");
- res = LLVMBuildSub(builder, res, bias_, "");
- res = LLVMBuildMul(builder, res, lp_build_const_vec(dst_type, scale), "");
+ res = LLVMBuildFSub(builder, res, bias_, "");
+ res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");
return res;
}
@@ -296,7 +298,7 @@ lp_build_conv(LLVMBuilderRef builder,
if (dst_scale != 1.0) {
LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale);
for(i = 0; i < num_tmps; ++i)
- tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
+ tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
}
/* Use an equally sized integer for intermediate computations */
@@ -391,7 +393,7 @@ lp_build_conv(LLVMBuilderRef builder,
if (src_scale != 1.0) {
LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale);
for(i = 0; i < num_tmps; ++i)
- tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
+ tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
}
}
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 0f01fc1d75..247cb83ce6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -240,7 +240,7 @@ lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder,
*/
if (normalized)
- scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
+ scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
else
scaled = casted;
@@ -322,7 +322,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
}
if (normalized)
- scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
+ scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
else
scaled = unswizzled;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 9f405921b0..c724a4453e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -197,7 +197,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
if (format_desc->channel[chan].normalized) {
double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
LLVMValueRef scale_val = lp_build_const_vec(type, scale);
- input = LLVMBuildMul(builder, input, scale_val, "");
+ input = LLVMBuildFMul(builder, input, scale_val, "");
}
}
else {
@@ -227,7 +227,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
LLVMValueRef scale_val = lp_build_const_vec(type, scale);
input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
- input = LLVMBuildMul(builder, input, scale_val, "");
+ input = LLVMBuildFMul(builder, input, scale_val, "");
}
else {
/* FIXME */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index ef0888079c..60d8bcfa55 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -46,7 +46,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = {
DEBUG_NAMED_VALUE_END
};
-DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0);
+DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0)
#endif
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index ab4ddb81c4..7d7db3b0d9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -83,6 +83,8 @@ lp_build_compare(LLVMBuilderRef builder,
assert(func >= PIPE_FUNC_NEVER);
assert(func <= PIPE_FUNC_ALWAYS);
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
if(func == PIPE_FUNC_NEVER)
return zeros;
@@ -374,6 +376,9 @@ lp_build_select_bitwise(struct lp_build_context *bld,
struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if (a == b) {
return a;
}
@@ -419,6 +424,9 @@ lp_build_select(struct lp_build_context *bld,
struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == b)
return a;
@@ -484,6 +492,9 @@ lp_build_select_aos(struct lp_build_context *bld,
const unsigned n = type.length;
unsigned i, j;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == b)
return a;
if(cond[0] && cond[1] && cond[2] && cond[3])
@@ -539,7 +550,22 @@ lp_build_select_aos(struct lp_build_context *bld,
LLVMValueRef
lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
{
+ const struct lp_type type = bld->type;
+
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
+ /* can't do bitwise ops on floating-point values */
+ if(type.floating) {
+ a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, "");
+ }
+
b = LLVMBuildNot(bld->builder, b, "");
b = LLVMBuildAnd(bld->builder, a, b, "");
+
+ if(type.floating) {
+ b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, "");
+ }
return b;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 7748f8f099..b7b630f2e8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -171,14 +171,13 @@ lp_build_unpack2(LLVMBuilderRef builder,
msb = lp_build_zero(src_type);
/* Interleave bits */
- if(util_cpu_caps.little_endian) {
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
*dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0);
*dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1);
- }
- else {
+#else
*dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0);
*dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1);
- }
+#endif
/* Cast the result into the new type (twice as wide) */
@@ -261,13 +260,14 @@ lp_build_pack2(LLVMBuilderRef builder,
#endif
LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
LLVMValueRef shuffle;
- LLVMValueRef res;
+ LLVMValueRef res = NULL;
assert(!src_type.floating);
assert(!dst_type.floating);
assert(src_type.width == dst_type.width * 2);
assert(src_type.length * 2 == dst_type.length);
+ /* Check for special cases first */
if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
switch(src_type.width) {
case 32:
@@ -283,8 +283,8 @@ lp_build_pack2(LLVMBuilderRef builder,
return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
}
else {
- assert(0);
- return LLVMGetUndef(dst_vec_type);
+ /* use generic shuffle below */
+ res = NULL;
}
}
break;
@@ -310,10 +310,13 @@ lp_build_pack2(LLVMBuilderRef builder,
break;
}
- res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
- return res;
+ if (res) {
+ res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
+ return res;
+ }
}
+ /* generic shuffle */
lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
index ca36046d22..7b1088939b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -85,7 +85,7 @@ lp_build_scalar_ddx(struct lp_build_context *bld,
LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0);
LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "");
LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "");
- return LLVMBuildSub(bld->builder, a_right, a_left, "");
+ return lp_build_sub(bld, a_right, a_left);
}
@@ -97,5 +97,5 @@ lp_build_scalar_ddy(struct lp_build_context *bld,
LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0);
LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "");
LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "");
- return LLVMBuildSub(bld->builder, a_bottom, a_top, "");
+ return lp_build_sub(bld, a_bottom, a_top);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 1a20d74cac..806c7d56a8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -40,7 +40,6 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_format.h"
-#include "util/u_cpu_detect.h"
#include "lp_bld_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
@@ -811,7 +810,7 @@ lp_build_minify(struct lp_build_sample_context *bld,
LLVMValueRef base_size,
LLVMValueRef level)
{
- LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
+ LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
return size;
}
@@ -888,17 +887,17 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
/* Compute rho = max of all partial derivatives scaled by texture size.
* XXX this could be vectorized somewhat
*/
- rho = LLVMBuildMul(bld->builder,
+ rho = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dsdx, dsdy),
lp_build_int_to_float(float_bld, width), "");
if (dims > 1) {
LLVMValueRef max;
- max = LLVMBuildMul(bld->builder,
+ max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dtdx, dtdy),
lp_build_int_to_float(float_bld, height), "");
rho = lp_build_max(float_bld, rho, max);
if (dims > 2) {
- max = LLVMBuildMul(bld->builder,
+ max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, drdx, drdy),
lp_build_int_to_float(float_bld, depth), "");
rho = lp_build_max(float_bld, rho, max);
@@ -912,12 +911,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
if (lod_bias) {
lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
index0, "");
- lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
+ lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
}
}
/* add sampler lod bias */
- lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
+ lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
/* clamp lod */
lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
@@ -1219,8 +1218,7 @@ lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
/* ima = -0.5 / abs(coord); */
LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
- LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
- lp_build_rcp(coord_bld, absCoord));
+ LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
return ima;
}
@@ -1841,7 +1839,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
unsigned i, j;
for(j = 0; j < h16.type.length; j += 4) {
- unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ unsigned subindex = 0;
+#else
+ unsigned subindex = 1;
+#endif
LLVMValueRef index;
index = LLVMConstInt(elem_type, j/2 + subindex, 0);
@@ -2029,6 +2031,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
debug_printf("Sample from %s\n", util_format_name(fmt));
}
+ assert(type.floating);
+
/* Setup our build context */
memset(&bld, 0, sizeof bld);
bld.builder = builder;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 21236839fb..0aa64affac 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -489,7 +489,7 @@ get_indirect_offsets(struct lp_build_tgsi_soa_context *bld,
int_vec_type, "");
/* addr_vec = addr_vec * 4 */
- addr_vec = lp_build_mul(&bld->base, addr_vec, vec4);
+ addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4);
return addr_vec;
}
@@ -533,7 +533,7 @@ emit_fetch(
reg->Register.Index * 4 + swizzle);
/* index_vec = index_vec + addr_vec */
- index_vec = lp_build_add(&bld->base, index_vec, addr_vec);
+ index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
/* Gather values from the constant buffer */
res = build_gather(bld, bld->consts_ptr, index_vec);
@@ -612,11 +612,9 @@ emit_fetch(
case TGSI_UTIL_SIGN_SET:
/* TODO: Use bitwese OR for floating point */
res = lp_build_abs( &bld->base, res );
- res = LLVMBuildNeg( bld->base.builder, res, "" );
- break;
-
+ /* fall through */
case TGSI_UTIL_SIGN_TOGGLE:
- res = LLVMBuildNeg( bld->base.builder, res, "" );
+ res = lp_build_negate( &bld->base, res );
break;
case TGSI_UTIL_SIGN_KEEP:
@@ -773,7 +771,9 @@ emit_store(
addr = LLVMBuildExtractElement(bld->base.builder,
addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
"");
- addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+ addr = LLVMBuildMul(bld->base.builder,
+ addr, LLVMConstInt(LLVMInt32Type(), 4, 0),
+ "");
}
switch( reg->Register.File ) {
diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
index 2e15751e50..0461c81550 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -30,7 +30,7 @@
#include "rtasm_cpu.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
static boolean rtasm_sse_enabled(void)
{
static boolean firsttime = 1;
@@ -49,7 +49,7 @@ static boolean rtasm_sse_enabled(void)
int rtasm_cpu_has_sse(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
return rtasm_sse_enabled();
#else
return 0;
@@ -59,7 +59,7 @@ int rtasm_cpu_has_sse(void)
int rtasm_cpu_has_sse2(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
return rtasm_sse_enabled();
#else
return 0;
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 9f70b73698..0fe6ebfcb4 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -22,8 +22,9 @@
**************************************************************************/
#include "pipe/p_config.h"
+#include "util/u_cpu_detect.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
@@ -231,6 +232,10 @@ static void emit_modrm( struct x86_function *p,
assert(reg.mod == mod_REG);
+ /* TODO: support extended x86-64 registers */
+ assert(reg.idx < 8);
+ assert(regmem.idx < 8);
+
val |= regmem.mod << 6; /* mod field */
val |= reg.idx << 3; /* reg field */
val |= regmem.idx; /* r/m field */
@@ -363,6 +368,12 @@ int x86_get_label( struct x86_function *p )
*/
+void x64_rexw(struct x86_function *p)
+{
+ if(x86_target(p) != X86_32)
+ emit_1ub(p, 0x48);
+}
+
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
int label )
@@ -449,6 +460,52 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
emit_1i(p, imm);
}
+void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ if(dst.mod == mod_REG)
+ x86_mov_reg_imm(p, dst, imm);
+ else
+ {
+ emit_1ub(p, 0xc7);
+ emit_modrm_noreg(p, 0, dst);
+ emit_1i(p, imm);
+ }
+}
+
+void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm )
+{
+ DUMP_RI( dst, imm );
+ emit_1ub(p, 0x66);
+ if(dst.mod == mod_REG)
+ {
+ emit_1ub(p, 0xb8 + dst.idx);
+ emit_2ub(p, imm & 0xff, imm >> 8);
+ }
+ else
+ {
+ emit_1ub(p, 0xc7);
+ emit_modrm_noreg(p, 0, dst);
+ emit_2ub(p, imm & 0xff, imm >> 8);
+ }
+}
+
+void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm )
+{
+ DUMP_RI( dst, imm );
+ if(dst.mod == mod_REG)
+ {
+ emit_1ub(p, 0xb0 + dst.idx);
+ emit_1ub(p, imm);
+ }
+ else
+ {
+ emit_1ub(p, 0xc6);
+ emit_modrm_noreg(p, 0, dst);
+ emit_1ub(p, imm);
+ }
+}
+
/**
* Immediate group 1 instructions.
*/
@@ -520,7 +577,7 @@ void x86_push( struct x86_function *p,
}
- p->stack_offset += 4;
+ p->stack_offset += sizeof(void*);
}
void x86_push_imm32( struct x86_function *p,
@@ -530,7 +587,7 @@ void x86_push_imm32( struct x86_function *p,
emit_1ub(p, 0x68);
emit_1i(p, imm32);
- p->stack_offset += 4;
+ p->stack_offset += sizeof(void*);
}
@@ -540,23 +597,33 @@ void x86_pop( struct x86_function *p,
DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x58 + reg.idx);
- p->stack_offset -= 4;
+ p->stack_offset -= sizeof(void*);
}
void x86_inc( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x40 + reg.idx);
+ if(x86_target(p) == X86_32 && reg.mod == mod_REG)
+ {
+ emit_1ub(p, 0x40 + reg.idx);
+ return;
+ }
+ emit_1ub(p, 0xff);
+ emit_modrm_noreg(p, 0, reg);
}
void x86_dec( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x48 + reg.idx);
+ if(x86_target(p) == X86_32 && reg.mod == mod_REG)
+ {
+ emit_1ub(p, 0x48 + reg.idx);
+ return;
+ }
+ emit_1ub(p, 0xff);
+ emit_modrm_noreg(p, 1, reg);
}
void x86_ret( struct x86_function *p )
@@ -583,9 +650,82 @@ void x86_mov( struct x86_function *p,
struct x86_reg src )
{
DUMP_RR( dst, src );
+ /* special hack for reading arguments until we support x86-64 registers everywhere */
+ if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
+ {
+ uint8_t rex = 0x40;
+ if(dst.idx >= 8)
+ {
+ rex |= 4;
+ dst.idx -= 8;
+ }
+ if(src.idx >= 8)
+ {
+ rex |= 1;
+ src.idx -= 8;
+ }
+ emit_1ub(p, rex);
+ }
+ emit_op_modrm( p, 0x8b, 0x89, dst, src );
+}
+
+void x86_mov16( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_1ub(p, 0x66);
+ emit_op_modrm( p, 0x8b, 0x89, dst, src );
+}
+
+void x86_mov8( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_op_modrm( p, 0x8a, 0x88, dst, src );
+}
+
+void x64_mov64( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ uint8_t rex = 0x48;
+ DUMP_RR( dst, src );
+ assert(x86_target(p) != X86_32);
+
+ /* special hack for reading arguments until we support x86-64 registers everywhere */
+ if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
+ {
+ if(dst.idx >= 8)
+ {
+ rex |= 4;
+ dst.idx -= 8;
+ }
+ if(src.idx >= 8)
+ {
+ rex |= 1;
+ src.idx -= 8;
+ }
+ }
+ emit_1ub(p, rex);
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
+void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, 0x0f, 0xb6);
+ emit_modrm(p, dst, src);
+}
+
+void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, 0x0f, 0xb7);
+ emit_modrm(p, dst, src);
+}
+
void x86_xor( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -680,6 +820,61 @@ void x86_div( struct x86_function *p,
emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src);
}
+void x86_bswap( struct x86_function *p, struct x86_reg reg )
+{
+ DUMP_R(reg);
+ assert(reg.file == file_REG32);
+ assert(reg.mod == mod_REG);
+ emit_2ub(p, 0x0f, 0xc8 + reg.idx);
+}
+
+void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+ DUMP_RI(reg, imm);
+ if(imm == 1)
+ {
+ emit_1ub(p, 0xd1);
+ emit_modrm_noreg(p, 5, reg);
+ }
+ else
+ {
+ emit_1ub(p, 0xc1);
+ emit_modrm_noreg(p, 5, reg);
+ emit_1ub(p, imm);
+ }
+}
+
+void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+ DUMP_RI(reg, imm);
+ if(imm == 1)
+ {
+ emit_1ub(p, 0xd1);
+ emit_modrm_noreg(p, 7, reg);
+ }
+ else
+ {
+ emit_1ub(p, 0xc1);
+ emit_modrm_noreg(p, 7, reg);
+ emit_1ub(p, imm);
+ }
+}
+
+void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+ DUMP_RI(reg, imm);
+ if(imm == 1)
+ {
+ emit_1ub(p, 0xd1);
+ emit_modrm_noreg(p, 4, reg);
+ }
+ else
+ {
+ emit_1ub(p, 0xc1);
+ emit_modrm_noreg(p, 4, reg);
+ emit_1ub(p, imm);
+ }
+}
/***********************************************************************
@@ -1013,6 +1208,77 @@ void sse_movmskps( struct x86_function *p,
* SSE2 instructions
*/
+void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ if(dst.mod == mod_REG && dst.file == file_REG32)
+ {
+ emit_1ub(p, 0x7e);
+ emit_modrm(p, src, dst);
+ }
+ else
+ {
+ emit_op_modrm(p, 0x6e, 0x7e, dst, src);
+ }
+}
+
+void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ switch (dst.mod) {
+ case mod_REG:
+ emit_3ub(p, 0xf3, 0x0f, 0x7e);
+ emit_modrm(p, dst, src);
+ break;
+ case mod_INDIRECT:
+ case mod_DISP32:
+ case mod_DISP8:
+ assert(src.mod == mod_REG);
+ emit_3ub(p, 0x66, 0x0f, 0xd6);
+ emit_modrm(p, src, dst);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0xf3, 0x0f);
+ emit_op_modrm(p, 0x6f, 0x7f, dst, src);
+}
+
+void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ emit_op_modrm(p, 0x6f, 0x7f, dst, src);
+}
+
+void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0xf2, 0x0f);
+ emit_op_modrm(p, 0x10, 0x11, dst, src);
+}
+
+void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ emit_op_modrm(p, 0x10, 0x11, dst, src);
+}
+
+void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ emit_op_modrm(p, 0x28, 0x29, dst, src);
+}
+
/**
* Perform a reduced swizzle:
*/
@@ -1027,6 +1293,28 @@ void sse2_pshufd( struct x86_function *p,
emit_1ub(p, shuf);
}
+void sse2_pshuflw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src,
+ unsigned char shuf)
+{
+ DUMP_RRI( dst, src, shuf );
+ emit_3ub(p, 0xf2, X86_TWOB, 0x70);
+ emit_modrm(p, dst, src);
+ emit_1ub(p, shuf);
+}
+
+void sse2_pshufhw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src,
+ unsigned char shuf)
+{
+ DUMP_RRI( dst, src, shuf );
+ emit_3ub(p, 0xf3, X86_TWOB, 0x70);
+ emit_modrm(p, dst, src);
+ emit_1ub(p, shuf);
+}
+
void sse2_cvttps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -1045,6 +1333,24 @@ void sse2_cvtps2dq( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_cvtsd2ss( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0xf2, 0x0f, 0x5a);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_cvtpd2ps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x5a);
+ emit_modrm( p, dst, src );
+}
+
void sse2_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -1081,6 +1387,97 @@ void sse2_punpcklbw( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x61);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x62);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x6c);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x71);
+ emit_modrm_noreg(p, 6, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x72);
+ emit_modrm_noreg(p, 6, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x73);
+ emit_modrm_noreg(p, 6, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x71);
+ emit_modrm_noreg(p, 2, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x72);
+ emit_modrm_noreg(p, 2, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x73);
+ emit_modrm_noreg(p, 2, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x71);
+ emit_modrm_noreg(p, 4, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x72);
+ emit_modrm_noreg(p, 4, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_3ub(p, 0x66, 0x0f, 0xeb);
+ emit_modrm(p, dst, src);
+}
void sse2_rcpps( struct x86_function *p,
struct x86_reg dst,
@@ -1100,18 +1497,6 @@ void sse2_rcpss( struct x86_function *p,
emit_modrm( p, dst, src );
}
-void sse2_movd( struct x86_function *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( dst, src );
- emit_2ub(p, 0x66, X86_TWOB);
- emit_op_modrm( p, 0x6e, 0x7e, dst, src );
-}
-
-
-
-
/***********************************************************************
* x87 instructions
*/
@@ -1702,23 +2087,79 @@ void x86_cdecl_caller_pop_regs( struct x86_function *p )
}
-/* Retreive a reference to one of the function arguments, taking into
- * account any push/pop activity:
- */
struct x86_reg x86_fn_arg( struct x86_function *p,
- unsigned arg )
+ unsigned arg )
{
- return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+ switch(x86_target(p))
+ {
+ case X86_64_WIN64_ABI:
+ /* Microsoft uses a different calling convention than the rest of the world */
+ switch(arg)
+ {
+ case 1:
+ return x86_make_reg(file_REG32, reg_CX);
+ case 2:
+ return x86_make_reg(file_REG32, reg_DX);
+ case 3:
+ return x86_make_reg(file_REG32, reg_R8);
+ case 4:
+ return x86_make_reg(file_REG32, reg_R9);
+ default:
+ return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+ p->stack_offset + (arg - 4) * 8); /* ??? */
+ }
+ case X86_64_STD_ABI:
+ switch(arg)
+ {
+ case 1:
+ return x86_make_reg(file_REG32, reg_DI);
+ case 2:
+ return x86_make_reg(file_REG32, reg_SI);
+ case 3:
+ return x86_make_reg(file_REG32, reg_DX);
+ case 4:
+ return x86_make_reg(file_REG32, reg_CX);
+ case 5:
+ return x86_make_reg(file_REG32, reg_R8);
+ case 6:
+ return x86_make_reg(file_REG32, reg_R9);
+ default:
+ return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+ p->stack_offset + (arg - 6) * 8); /* ??? */
+ }
+ case X86_32:
+ return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + arg * 4); /* ??? */
+ default:
+ abort();
+ }
}
+static void x86_init_func_common( struct x86_function *p )
+{
+ util_cpu_detect();
+ p->caps = 0;
+ if(util_cpu_caps.has_mmx)
+ p->caps |= X86_MMX;
+ if(util_cpu_caps.has_mmx2)
+ p->caps |= X86_MMX2;
+ if(util_cpu_caps.has_sse)
+ p->caps |= X86_SSE;
+ if(util_cpu_caps.has_sse2)
+ p->caps |= X86_SSE2;
+ if(util_cpu_caps.has_sse3)
+ p->caps |= X86_SSE3;
+ if(util_cpu_caps.has_sse4_1)
+ p->caps |= X86_SSE4_1;
+ p->csr = p->store;
+ DUMP_START();
+}
void x86_init_func( struct x86_function *p )
{
p->size = 0;
p->store = NULL;
- p->csr = p->store;
- DUMP_START();
+ x86_init_func_common(p);
}
void x86_init_func_size( struct x86_function *p, unsigned code_size )
@@ -1728,8 +2169,7 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size )
if (p->store == NULL) {
p->store = p->error_overflow;
}
- p->csr = p->store;
- DUMP_START();
+ x86_init_func_common(p);
}
void x86_release_func( struct x86_function *p )
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 6208e8f707..aa77892b2d 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -26,20 +26,28 @@
#include "pipe/p_config.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
/* It is up to the caller to ensure that instructions issued are
* suitable for the host cpu. There are no checks made in this module
* for mmx/sse/sse2 support on the cpu.
*/
struct x86_reg {
- unsigned file:3;
- unsigned idx:3;
+ unsigned file:2;
+ unsigned idx:4;
unsigned mod:2; /* mod_REG if this is just a register */
int disp:24; /* only +/- 23bits of offset - should be enough... */
};
+#define X86_MMX 1
+#define X86_MMX2 2
+#define X86_SSE 4
+#define X86_SSE2 8
+#define X86_SSE3 0x10
+#define X86_SSE4_1 0x20
+
struct x86_function {
+ unsigned caps;
unsigned size;
unsigned char *store;
unsigned char *csr;
@@ -75,7 +83,15 @@ enum x86_reg_name {
reg_SP,
reg_BP,
reg_SI,
- reg_DI
+ reg_DI,
+ reg_R8,
+ reg_R9,
+ reg_R10,
+ reg_R11,
+ reg_R12,
+ reg_R13,
+ reg_R14,
+ reg_R15
};
@@ -110,6 +126,29 @@ typedef void (*x86_func)(void);
/* Begin/end/retrieve function creation:
*/
+enum x86_target
+{
+ X86_32,
+ X86_64_STD_ABI,
+ X86_64_WIN64_ABI
+};
+
+/* make this read a member of x86_function if target != host is desired */
+static INLINE enum x86_target x86_target( struct x86_function* p )
+{
+#ifdef PIPE_ARCH_X86
+ return X86_32;
+#elif defined(_WIN64)
+ return X86_64_WIN64_ABI;
+#elif defined(PIPE_ARCH_X86_64)
+ return X86_64_STD_ABI;
+#endif
+}
+
+static INLINE unsigned x86_target_caps( struct x86_function* p )
+{
+ return p->caps;
+}
void x86_init_func( struct x86_function *p );
void x86_init_func_size( struct x86_function *p, unsigned code_size );
@@ -138,6 +177,8 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg );
*/
int x86_get_label( struct x86_function *p );
+void x64_rexw(struct x86_function *p);
+
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
int label );
@@ -178,18 +219,54 @@ void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
+void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+ unsigned char shuf );
+void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+ unsigned char shuf );
void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
+void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
+void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
+void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
+void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
@@ -227,7 +304,6 @@ void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg
void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
-void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -237,6 +313,14 @@ void x86_dec( struct x86_function *p, struct x86_reg reg );
void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
+void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );
+void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );
void x86_mul( struct x86_function *p, struct x86_reg src );
void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -250,7 +334,10 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
void x86_div( struct x86_function *p, struct x86_reg src );
-
+void x86_bswap( struct x86_function *p, struct x86_reg src );
+void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
+void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
+void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
void x86_cdecl_caller_push_regs( struct x86_function *p );
void x86_cdecl_caller_pop_regs( struct x86_function *p );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 9e02040f6c..acbff103ef 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -34,7 +34,7 @@
#include "tgsi_iterate.h"
-DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE);
+DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", FALSE)
typedef struct {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h
index 46d8d18419..73f0f414e3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h
@@ -36,7 +36,7 @@ extern "C" {
/* Check the given token stream for errors and common mistakes.
* Diagnostic messages are printed out to the debug output, and is
- * controlled by the debug option TGSI_PRINT_SANITY (default true).
+ * controlled by the debug option TGSI_PRINT_SANITY (default false).
* Returns TRUE if there are no errors, even though there could be some warnings.
*/
boolean
diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c
index a9b7253bf4..03a7f050aa 100644
--- a/src/gallium/auxiliary/translate/translate.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -38,7 +38,8 @@ struct translate *translate_create( const struct translate_key *key )
{
struct translate *translate = NULL;
-#if defined(PIPE_ARCH_X86)
+/* TODO: enable Win64 once it has actually been tested */
+#if defined(PIPE_ARCH_X86) || (defined(PIPE_ARCH_X86_64) && !defined(_WIN64))
translate = translate_sse2_create( key );
if (translate)
return translate;
@@ -48,3 +49,8 @@ struct translate *translate_create( const struct translate_key *key )
return translate_generic_create( key );
}
+
+boolean translate_is_output_format_supported(enum pipe_format format)
+{
+ return translate_generic_is_output_format_supported(format);
+}
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index edd95e0788..a75380228b 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -85,6 +85,18 @@ struct translate {
unsigned instance_id,
void *output_buffer);
+ void (PIPE_CDECL *run_elts16)( struct translate *,
+ const uint16_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer);
+
+ void (PIPE_CDECL *run_elts8)( struct translate *,
+ const uint8_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer);
+
void (PIPE_CDECL *run)( struct translate *,
unsigned start,
unsigned count,
@@ -105,6 +117,8 @@ struct translate *translate_lookup_or_create( struct translate_context *tctx,
struct translate *translate_create( const struct translate_key *key );
+boolean translate_is_output_format_supported(enum pipe_format format);
+
static INLINE int translate_keysize( const struct translate_key *key )
{
return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element);
@@ -138,5 +152,6 @@ struct translate *translate_sse2_create( const struct translate_key *key );
struct translate *translate_generic_create( const struct translate_key *key );
+boolean translate_generic_is_output_format_supported(enum pipe_format format);
#endif
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 4d1977229e..ad809db720 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -64,6 +64,14 @@ struct translate_generic {
unsigned input_stride;
unsigned max_index;
+ /* this value is set to -1 if this is a normal element with output_format != input_format:
+ * in this case, u_format is used to do a full conversion
+ *
+ * this value is set to the format size in bytes if output_format == input_format or for 32-bit instance ids:
+ * in this case, memcpy is used to copy this amount of bytes
+ */
+ int copy_size;
+
} attrib[PIPE_MAX_ATTRIBS];
unsigned nr_attrib;
@@ -187,9 +195,15 @@ ATTRIB( R8G8B8_SNORM, 3, char, TO_8_SNORM )
ATTRIB( R8G8_SNORM, 2, char, TO_8_SNORM )
ATTRIB( R8_SNORM, 1, char, TO_8_SNORM )
-ATTRIB( A8R8G8B8_UNORM, 4, ubyte, TO_8_UNORM )
-/*ATTRIB( R8G8B8A8_UNORM, 4, ubyte, TO_8_UNORM )*/
-
+static void
+emit_A8R8G8B8_UNORM( const float *attrib, void *ptr)
+{
+ ubyte *out = (ubyte *)ptr;
+ out[0] = TO_8_UNORM(attrib[3]);
+ out[1] = TO_8_UNORM(attrib[0]);
+ out[2] = TO_8_UNORM(attrib[1]);
+ out[3] = TO_8_UNORM(attrib[2]);
+}
static void
emit_B8G8R8A8_UNORM( const float *attrib, void *ptr)
@@ -348,7 +362,65 @@ static emit_func get_emit_func( enum pipe_format format )
}
}
+static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *tg,
+ unsigned elt,
+ unsigned instance_id,
+ void *vert )
+{
+ unsigned nr_attrs = tg->nr_attrib;
+ unsigned attr;
+
+ for (attr = 0; attr < nr_attrs; attr++) {
+ float data[4];
+ uint8_t *dst = (uint8_t *)vert + tg->attrib[attr].output_offset;
+ if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+ const uint8_t *src;
+ unsigned index;
+ int copy_size;
+
+ if (tg->attrib[attr].instance_divisor) {
+ index = instance_id / tg->attrib[attr].instance_divisor;
+ }
+ else {
+ index = elt;
+ }
+
+ /* clamp to void going out of bounds */
+ index = MIN2(index, tg->attrib[attr].max_index);
+
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * index;
+
+ copy_size = tg->attrib[attr].copy_size;
+ if(likely(copy_size >= 0))
+ memcpy(dst, src, copy_size);
+ else
+ {
+ tg->attrib[attr].fetch( data, src, 0, 0 );
+
+ if (0)
+ debug_printf("Fetch linear attr %d from %p stride %d index %d: "
+ " %f, %f, %f, %f \n",
+ attr,
+ tg->attrib[attr].input_ptr,
+ tg->attrib[attr].input_stride,
+ index,
+ data[0], data[1],data[2], data[3]);
+
+ tg->attrib[attr].emit( data, dst );
+ }
+ } else {
+ if(likely(tg->attrib[attr].copy_size >= 0))
+ memcpy(data, &instance_id, 4);
+ else
+ {
+ data[0] = (float)instance_id;
+ tg->attrib[attr].emit( data, dst );
+ }
+ }
+ }
+}
/**
* Fetch vertex attributes for 'count' vertices.
@@ -361,62 +433,45 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
- unsigned nr_attrs = tg->nr_attrib;
- unsigned attr;
unsigned i;
- /* loop over vertex attributes (vertex shader inputs)
- */
for (i = 0; i < count; i++) {
- const unsigned elt = *elts++;
-
- for (attr = 0; attr < nr_attrs; attr++) {
- float data[4];
- char *dst = vert + tg->attrib[attr].output_offset;
-
- if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
- const uint8_t *src;
- unsigned index;
-
- if (tg->attrib[attr].instance_divisor) {
- index = instance_id / tg->attrib[attr].instance_divisor;
- } else {
- index = elt;
- }
-
- /* clamp to void going out of bounds */
- index = MIN2(index, tg->attrib[attr].max_index);
-
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * index;
-
- tg->attrib[attr].fetch( data, src, 0, 0 );
-
- if (0)
- debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: "
- " %f, %f, %f, %f \n",
- attr,
- tg->attrib[attr].input_ptr,
- tg->attrib[attr].input_stride,
- tg->attrib[attr].instance_divisor,
- tg->attrib[attr].max_index,
- index,
- data[0], data[1],data[2], data[3]);
- } else {
- data[0] = (float)instance_id;
- }
+ generic_run_one(tg, *elts++, instance_id, vert);
+ vert += tg->translate.key.output_stride;
+ }
+}
- if (0)
- debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
- i, elt, attr, data[0], data[1], data[2], data[3]);
+static void PIPE_CDECL generic_run_elts16( struct translate *translate,
+ const uint16_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned i;
- tg->attrib[attr].emit( data, dst );
- }
+ for (i = 0; i < count; i++) {
+ generic_run_one(tg, *elts++, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
+static void PIPE_CDECL generic_run_elts8( struct translate *translate,
+ const uint8_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned i;
+ for (i = 0; i < count; i++) {
+ generic_run_one(tg, *elts++, instance_id, vert);
+ vert += tg->translate.key.output_stride;
+ }
+}
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
@@ -426,57 +481,10 @@ static void PIPE_CDECL generic_run( struct translate *translate,
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
- unsigned nr_attrs = tg->nr_attrib;
- unsigned attr;
unsigned i;
- /* loop over vertex attributes (vertex shader inputs)
- */
for (i = 0; i < count; i++) {
- unsigned elt = start + i;
-
- for (attr = 0; attr < nr_attrs; attr++) {
- float data[4];
- char *dst = vert + tg->attrib[attr].output_offset;
-
- if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
- const uint8_t *src;
- unsigned index;
-
- if (tg->attrib[attr].instance_divisor) {
- index = instance_id / tg->attrib[attr].instance_divisor;
- }
- else {
- index = elt;
- }
-
- /* clamp to void going out of bounds */
- index = MIN2(index, tg->attrib[attr].max_index);
-
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * index;
-
- tg->attrib[attr].fetch( data, src, 0, 0 );
-
- if (0)
- debug_printf("Fetch linear attr %d from %p stride %d index %d: "
- " %f, %f, %f, %f \n",
- attr,
- tg->attrib[attr].input_ptr,
- tg->attrib[attr].input_stride,
- index,
- data[0], data[1],data[2], data[3]);
- } else {
- data[0] = (float)instance_id;
- }
-
- if (0)
- debug_printf("vert %d attr %d: %f %f %f %f\n",
- i, attr, data[0], data[1], data[2], data[3]);
-
- tg->attrib[attr].emit( data, dst );
- }
-
+ generic_run_one(tg, start + i, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
@@ -522,6 +530,8 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->translate.release = generic_release;
tg->translate.set_buffer = generic_set_buffer;
tg->translate.run_elts = generic_run_elts;
+ tg->translate.run_elts16 = generic_run_elts16;
+ tg->translate.run_elts8 = generic_run_elts8;
tg->translate.run = generic_run;
for (i = 0; i < key->nr_elements; i++) {
@@ -538,9 +548,28 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->attrib[i].input_offset = key->element[i].input_offset;
tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
- tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
tg->attrib[i].output_offset = key->element[i].output_offset;
+ tg->attrib[i].copy_size = -1;
+ if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID)
+ {
+ if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED
+ || key->element[i].output_format == PIPE_FORMAT_R32_SSCALED)
+ tg->attrib[i].copy_size = 4;
+ }
+ else
+ {
+ if(key->element[i].input_format == key->element[i].output_format
+ && format_desc->block.width == 1
+ && format_desc->block.height == 1
+ && !(format_desc->block.bits & 7))
+ tg->attrib[i].copy_size = format_desc->block.bits >> 3;
+ }
+
+ if(tg->attrib[i].copy_size < 0)
+ tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
+ else
+ tg->attrib[i].emit = NULL;
}
tg->nr_attrib = key->nr_elements;
@@ -548,3 +577,83 @@ struct translate *translate_generic_create( const struct translate_key *key )
return &tg->translate;
}
+
+boolean translate_generic_is_output_format_supported(enum pipe_format format)
+{
+ switch(format)
+ {
+ case PIPE_FORMAT_R64G64B64A64_FLOAT: return TRUE;
+ case PIPE_FORMAT_R64G64B64_FLOAT: return TRUE;
+ case PIPE_FORMAT_R64G64_FLOAT: return TRUE;
+ case PIPE_FORMAT_R64_FLOAT: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_FLOAT: return TRUE;
+ case PIPE_FORMAT_R32G32B32_FLOAT: return TRUE;
+ case PIPE_FORMAT_R32G32_FLOAT: return TRUE;
+ case PIPE_FORMAT_R32_FLOAT: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_USCALED: return TRUE;
+ case PIPE_FORMAT_R32G32B32_USCALED: return TRUE;
+ case PIPE_FORMAT_R32G32_USCALED: return TRUE;
+ case PIPE_FORMAT_R32_USCALED: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_SSCALED: return TRUE;
+ case PIPE_FORMAT_R32G32B32_SSCALED: return TRUE;
+ case PIPE_FORMAT_R32G32_SSCALED: return TRUE;
+ case PIPE_FORMAT_R32_SSCALED: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_UNORM: return TRUE;
+ case PIPE_FORMAT_R32G32B32_UNORM: return TRUE;
+ case PIPE_FORMAT_R32G32_UNORM: return TRUE;
+ case PIPE_FORMAT_R32_UNORM: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_SNORM: return TRUE;
+ case PIPE_FORMAT_R32G32B32_SNORM: return TRUE;
+ case PIPE_FORMAT_R32G32_SNORM: return TRUE;
+ case PIPE_FORMAT_R32_SNORM: return TRUE;
+
+ case PIPE_FORMAT_R16G16B16A16_USCALED: return TRUE;
+ case PIPE_FORMAT_R16G16B16_USCALED: return TRUE;
+ case PIPE_FORMAT_R16G16_USCALED: return TRUE;
+ case PIPE_FORMAT_R16_USCALED: return TRUE;
+
+ case PIPE_FORMAT_R16G16B16A16_SSCALED: return TRUE;
+ case PIPE_FORMAT_R16G16B16_SSCALED: return TRUE;
+ case PIPE_FORMAT_R16G16_SSCALED: return TRUE;
+ case PIPE_FORMAT_R16_SSCALED: return TRUE;
+
+ case PIPE_FORMAT_R16G16B16A16_UNORM: return TRUE;
+ case PIPE_FORMAT_R16G16B16_UNORM: return TRUE;
+ case PIPE_FORMAT_R16G16_UNORM: return TRUE;
+ case PIPE_FORMAT_R16_UNORM: return TRUE;
+
+ case PIPE_FORMAT_R16G16B16A16_SNORM: return TRUE;
+ case PIPE_FORMAT_R16G16B16_SNORM: return TRUE;
+ case PIPE_FORMAT_R16G16_SNORM: return TRUE;
+ case PIPE_FORMAT_R16_SNORM: return TRUE;
+
+ case PIPE_FORMAT_R8G8B8A8_USCALED: return TRUE;
+ case PIPE_FORMAT_R8G8B8_USCALED: return TRUE;
+ case PIPE_FORMAT_R8G8_USCALED: return TRUE;
+ case PIPE_FORMAT_R8_USCALED: return TRUE;
+
+ case PIPE_FORMAT_R8G8B8A8_SSCALED: return TRUE;
+ case PIPE_FORMAT_R8G8B8_SSCALED: return TRUE;
+ case PIPE_FORMAT_R8G8_SSCALED: return TRUE;
+ case PIPE_FORMAT_R8_SSCALED: return TRUE;
+
+ case PIPE_FORMAT_R8G8B8A8_UNORM: return TRUE;
+ case PIPE_FORMAT_R8G8B8_UNORM: return TRUE;
+ case PIPE_FORMAT_R8G8_UNORM: return TRUE;
+ case PIPE_FORMAT_R8_UNORM: return TRUE;
+
+ case PIPE_FORMAT_R8G8B8A8_SNORM: return TRUE;
+ case PIPE_FORMAT_R8G8B8_SNORM: return TRUE;
+ case PIPE_FORMAT_R8G8_SNORM: return TRUE;
+ case PIPE_FORMAT_R8_SNORM: return TRUE;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM: return TRUE;
+ case PIPE_FORMAT_B8G8R8A8_UNORM: return TRUE;
+ default: return FALSE;
+ }
+}
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index ef3aa674a3..56c5b36ce2 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -30,11 +30,12 @@
#include "pipe/p_compiler.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_format.h"
#include "translate.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#include "rtasm/rtasm_cpu.h"
#include "rtasm/rtasm_x86sse.h"
@@ -46,21 +47,9 @@
#define W 3
-typedef void (PIPE_CDECL *run_func)( struct translate *translate,
- unsigned start,
- unsigned count,
- unsigned instance_id,
- void *output_buffer);
-
-typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
- const unsigned *elts,
- unsigned count,
- unsigned instance_id,
- void *output_buffer);
-
struct translate_buffer {
const void *base_ptr;
- unsigned stride;
+ uintptr_t stride;
unsigned max_index;
};
@@ -79,15 +68,15 @@ struct translate_sse {
struct x86_function linear_func;
struct x86_function elt_func;
+ struct x86_function elt16_func;
+ struct x86_function elt8_func;
struct x86_function *func;
boolean loaded_identity;
- boolean loaded_255;
- boolean loaded_inv_255;
+ boolean loaded_const[5];
float identity[4];
- float float_255[4];
- float inv_255[4];
+ float const_value[5][4];
struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
unsigned nr_buffers;
@@ -102,17 +91,16 @@ struct translate_sse {
boolean use_instancing;
unsigned instance_id;
- run_func gen_run;
- run_elts_func gen_run_elts;
-
/* these are actually known values, but putting them in a struct
* like this is helpful to keep them in sync across the file.
*/
struct x86_reg tmp_EAX;
- struct x86_reg idx_EBX; /* either start+i or &elt[i] */
- struct x86_reg outbuf_ECX;
- struct x86_reg machine_EDX;
- struct x86_reg count_ESI; /* decrements to zero */
+ struct x86_reg tmp2_EDX;
+ struct x86_reg tmp3_ECX;
+ struct x86_reg idx_ESI; /* either start+i or &elt[i] */
+ struct x86_reg machine_EDI;
+ struct x86_reg outbuf_EBX;
+ struct x86_reg count_EBP; /* decrements to zero */
};
static int get_offset( const void *a, const void *b )
@@ -124,7 +112,7 @@ static int get_offset( const void *a, const void *b )
static struct x86_reg get_identity( struct translate_sse *p )
{
- struct x86_reg reg = x86_make_reg(file_XMM, 6);
+ struct x86_reg reg = x86_make_reg(file_XMM, 7);
if (!p->loaded_identity) {
p->loaded_identity = TRUE;
@@ -134,267 +122,924 @@ static struct x86_reg get_identity( struct translate_sse *p )
p->identity[3] = 1;
sse_movups(p->func, reg,
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->identity[0])));
}
return reg;
}
-static struct x86_reg get_255( struct translate_sse *p )
+static struct x86_reg get_const( struct translate_sse *p, unsigned i, float v)
{
- struct x86_reg reg = x86_make_reg(file_XMM, 7);
-
- if (!p->loaded_255) {
- p->loaded_255 = TRUE;
- p->float_255[0] =
- p->float_255[1] =
- p->float_255[2] =
- p->float_255[3] = 255.0f;
-
- sse_movups(p->func, reg,
- x86_make_disp(p->machine_EDX,
- get_offset(p, &p->float_255[0])));
+ struct x86_reg reg = x86_make_reg(file_XMM, 2 + i);
+
+ if (!p->loaded_const[i]) {
+ p->loaded_const[i] = TRUE;
+ p->const_value[i][0] =
+ p->const_value[i][1] =
+ p->const_value[i][2] =
+ p->const_value[i][3] = v;
+
+ sse_movups(p->func, reg,
+ x86_make_disp(p->machine_EDI,
+ get_offset(p, &p->const_value[i][0])));
}
return reg;
}
-static struct x86_reg get_inv_255( struct translate_sse *p )
+static struct x86_reg get_inv_127( struct translate_sse *p )
{
- struct x86_reg reg = x86_make_reg(file_XMM, 5);
-
- if (!p->loaded_inv_255) {
- p->loaded_inv_255 = TRUE;
- p->inv_255[0] =
- p->inv_255[1] =
- p->inv_255[2] =
- p->inv_255[3] = 1.0f / 255.0f;
-
- sse_movups(p->func, reg,
- x86_make_disp(p->machine_EDX,
- get_offset(p, &p->inv_255[0])));
- }
-
- return reg;
+ return get_const(p, 0, 1.0f / 127.0f);
}
-
-static void emit_load_R32G32B32A32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static struct x86_reg get_inv_255( struct translate_sse *p )
{
- sse_movups(p->func, data, arg0);
+ return get_const(p, 1, 1.0f / 255.0f);
}
-static void emit_load_R32G32B32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static struct x86_reg get_inv_32767( struct translate_sse *p )
{
- /* Have to jump through some hoops:
- *
- * c 0 0 0
- * c 0 0 1
- * 0 0 c 1
- * a b c 1
- */
- sse_movss(p->func, data, x86_make_disp(arg0, 8));
- sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
- sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
- sse_movlps(p->func, data, arg0);
+ return get_const(p, 2, 1.0f / 32767.0f);
}
-static void emit_load_R32G32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static struct x86_reg get_inv_65535( struct translate_sse *p )
{
- /* 0 0 0 1
- * a b 0 1
- */
- sse_movups(p->func, data, get_identity(p) );
- sse_movlps(p->func, data, arg0);
+ return get_const(p, 3, 1.0f / 65535.0f);
}
-
-static void emit_load_R32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static struct x86_reg get_inv_2147483647( struct translate_sse *p )
{
- /* a 0 0 0
- * a 0 0 1
- */
- sse_movss(p->func, data, arg0);
- sse_orps(p->func, data, get_identity(p) );
+ return get_const(p, 4, 1.0f / 2147483647.0f);
}
-
-static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p,
+/* load the data in a SSE2 register, padding with zeros */
+static boolean emit_load_sse2( struct translate_sse *p,
struct x86_reg data,
- struct x86_reg src )
+ struct x86_reg src,
+ unsigned size)
{
-
- /* Load and unpack twice:
- */
- sse_movss(p->func, data, src);
- sse2_punpcklbw(p->func, data, get_identity(p));
- sse2_punpcklbw(p->func, data, get_identity(p));
-
- /* Convert to float:
- */
- sse2_cvtdq2ps(p->func, data, data);
-
-
- /* Scale by 1/255.0
- */
- sse_mulps(p->func, data, get_inv_255(p));
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+ struct x86_reg tmp = p->tmp_EAX;
+ switch(size)
+ {
+ case 1:
+ x86_movzx8(p->func, tmp, src);
+ sse2_movd(p->func, data, tmp);
+ break;
+ case 2:
+ x86_movzx16(p->func, tmp, src);
+ sse2_movd(p->func, data, tmp);
+ case 3:
+ x86_movzx8(p->func, tmp, x86_make_disp(src, 2));
+ x86_shl_imm(p->func, tmp, 16);
+ x86_mov16(p->func, tmp, src);
+ sse2_movd(p->func, data, tmp);
+ case 4:
+ sse2_movd(p->func, data, src);
+ break;
+ case 6:
+ sse2_movd(p->func, data, src);
+ x86_movzx16(p->func, tmp, x86_make_disp(src, 4));
+ sse2_movd(p->func, tmpXMM, tmp);
+ sse2_punpckldq(p->func, data, tmpXMM);
+ break;
+ case 8:
+ sse2_movq(p->func, data, src);
+ break;
+ case 12:
+ sse2_movq(p->func, data, src);
+ sse2_movd(p->func, tmpXMM, x86_make_disp(src, 8));
+ sse2_punpcklqdq(p->func, data, tmpXMM);
+ break;
+ case 16:
+ sse2_movdqu(p->func, data, src);
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
}
+/* this value can be passed for the out_chans argument */
+#define CHANNELS_0001 5
+/* this function will load #chans float values, and will
+ * pad the register with zeroes at least up to out_chans.
+ *
+ * If out_chans is set to CHANNELS_0001, then the fourth
+ * value will be padded with 1. Only pass this value if
+ * chans < 4 or results are undefined.
+ */
+static void emit_load_float32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0,
+ unsigned out_chans,
+ unsigned chans)
+{
+ switch(chans)
+ {
+ case 1:
+ /* a 0 0 0
+ * a 0 0 1
+ */
+ sse_movss(p->func, data, arg0);
+ if(out_chans == CHANNELS_0001)
+ sse_orps(p->func, data, get_identity(p) );
+ break;
+ case 2:
+ /* 0 0 0 1
+ * a b 0 1
+ */
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_identity(p), SHUF(X, Y, Z, W) );
+ else if(out_chans > 2)
+ sse_movlhps(p->func, data, get_identity(p) );
+ sse_movlps(p->func, data, arg0);
+ break;
+ case 3:
+ /* Have to jump through some hoops:
+ *
+ * c 0 0 0
+ * c 0 0 1 if out_chans == CHANNELS_0001
+ * 0 0 c 0/1
+ * a b c 0/1
+ */
+ sse_movss(p->func, data, x86_make_disp(arg0, 8));
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
+ sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
+ sse_movlps(p->func, data, arg0);
+ break;
+ case 4:
+ sse_movups(p->func, data, arg0);
+ break;
+ }
+}
+/* this function behaves like emit_load_float32, but loads
+ 64-bit floating point numbers, converting them to 32-bit
+ ones */
+static void emit_load_float64to32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0,
+ unsigned out_chans,
+ unsigned chans)
+{
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+ switch(chans)
+ {
+ case 1:
+ sse2_movsd(p->func, data, arg0);
+ if(out_chans > 1)
+ sse2_cvtpd2ps(p->func, data, data);
+ else
+ sse2_cvtsd2ss(p->func, data, data);
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_identity(p), SHUF(X, Y, Z, W) );
+ break;
+ case 2:
+ sse2_movupd(p->func, data, arg0);
+ sse2_cvtpd2ps(p->func, data, data);
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_identity(p), SHUF(X, Y, Z, W) );
+ else if(out_chans > 2)
+ sse_movlhps(p->func, data, get_identity(p) );
+ break;
+ case 3:
+ sse2_movupd(p->func, data, arg0);
+ sse2_cvtpd2ps(p->func, data, data);
+ sse2_movsd(p->func, tmpXMM, x86_make_disp(arg0, 16));
+ if(out_chans > 3)
+ sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM);
+ else
+ sse2_cvtsd2ss(p->func, tmpXMM, tmpXMM);
+ sse_movlhps(p->func, data, tmpXMM);
+ if(out_chans == CHANNELS_0001)
+ sse_orps(p->func, data, get_identity(p) );
+ break;
+ case 4:
+ sse2_movupd(p->func, data, arg0);
+ sse2_cvtpd2ps(p->func, data, data);
+ sse2_movupd(p->func, tmpXMM, x86_make_disp(arg0, 16));
+ sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM);
+ sse_movlhps(p->func, data, tmpXMM);
+ break;
+ }
+}
-static void emit_store_R32G32B32A32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
+static void emit_mov64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src_gpr, struct x86_reg src_xmm)
{
- sse_movups(p->func, dest, dataXMM);
+ if(x86_target(p->func) != X86_32)
+ x64_mov64(p->func, dst_gpr, src_gpr);
+ else
+ {
+ /* TODO: when/on which CPUs is SSE2 actually better than SSE? */
+ if(x86_target_caps(p->func) & X86_SSE2)
+ sse2_movq(p->func, dst_xmm, src_xmm);
+ else
+ sse_movlps(p->func, dst_xmm, src_xmm);
+ }
}
-static void emit_store_R32G32B32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
+static void emit_load64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src)
{
- /* Emit two, shuffle, emit one.
- */
- sse_movlps(p->func, dest, dataXMM);
- sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
- sse_movss(p->func, x86_make_disp(dest,8), dataXMM);
+ emit_mov64(p, dst_gpr, dst_xmm, src, src);
}
-static void emit_store_R32G32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
+static void emit_store64(struct translate_sse *p, struct x86_reg dst, struct x86_reg src_gpr, struct x86_reg src_xmm)
{
- sse_movlps(p->func, dest, dataXMM);
+ emit_mov64(p, dst, dst, src_gpr, src_xmm);
}
-static void emit_store_R32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
+static void emit_mov128(struct translate_sse *p, struct x86_reg dst, struct x86_reg src)
{
- sse_movss(p->func, dest, dataXMM);
+ if(x86_target_caps(p->func) & X86_SSE2)
+ sse2_movdqu(p->func, dst, src);
+ else
+ sse_movups(p->func, dst, src);
}
+/* TODO: this uses unaligned accesses liberally, which is great on Nehalem,
+ * but may or may not be good on older processors
+ * TODO: may perhaps want to use non-temporal stores here if possible
+ */
+static void emit_memcpy(struct translate_sse *p, struct x86_reg dst, struct x86_reg src, unsigned size)
+{
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ struct x86_reg dataXMM2 = x86_make_reg(file_XMM, 1);
+ struct x86_reg dataGPR = p->tmp_EAX;
+ struct x86_reg dataGPR2 = p->tmp2_EDX;
+
+ if(size < 8)
+ {
+ switch (size)
+ {
+ case 1:
+ x86_mov8(p->func, dataGPR, src);
+ x86_mov8(p->func, dst, dataGPR);
+ break;
+ case 2:
+ x86_mov16(p->func, dataGPR, src);
+ x86_mov16(p->func, dst, dataGPR);
+ break;
+ case 3:
+ x86_mov16(p->func, dataGPR, src);
+ x86_mov8(p->func, dataGPR2, x86_make_disp(src, 2));
+ x86_mov16(p->func, dst, dataGPR);
+ x86_mov8(p->func, x86_make_disp(dst, 2), dataGPR2);
+ break;
+ case 4:
+ x86_mov(p->func, dataGPR, src);
+ x86_mov(p->func, dst, dataGPR);
+ break;
+ case 6:
+ x86_mov(p->func, dataGPR, src);
+ x86_mov16(p->func, dataGPR2, x86_make_disp(src, 4));
+ x86_mov(p->func, dst, dataGPR);
+ x86_mov16(p->func, x86_make_disp(dst, 4), dataGPR2);
+ break;
+ }
+ }
+ else if(!(x86_target_caps(p->func) & X86_SSE))
+ {
+ unsigned i = 0;
+ assert((size & 3) == 0);
+ for(i = 0; i < size; i += 4)
+ {
+ x86_mov(p->func, dataGPR, x86_make_disp(src, i));
+ x86_mov(p->func, x86_make_disp(dst, i), dataGPR);
+ }
+ }
+ else
+ {
+ switch(size)
+ {
+ case 8:
+ emit_load64(p, dataGPR, dataXMM, src);
+ emit_store64(p, dst, dataGPR, dataXMM);
+ break;
+ case 12:
+ emit_load64(p, dataGPR2, dataXMM, src);
+ x86_mov(p->func, dataGPR, x86_make_disp(src, 8));
+ emit_store64(p, dst, dataGPR2, dataXMM);
+ x86_mov(p->func, x86_make_disp(dst, 8), dataGPR);
+ break;
+ case 16:
+ emit_mov128(p, dataXMM, src);
+ emit_mov128(p, dst, dataXMM);
+ break;
+ case 24:
+ emit_mov128(p, dataXMM, src);
+ emit_load64(p, dataGPR, dataXMM2, x86_make_disp(src, 16));
+ emit_mov128(p, dst, dataXMM);
+ emit_store64(p, x86_make_disp(dst, 16), dataGPR, dataXMM2);
+ break;
+ case 32:
+ emit_mov128(p, dataXMM, src);
+ emit_mov128(p, dataXMM2, x86_make_disp(src, 16));
+ emit_mov128(p, dst, dataXMM);
+ emit_mov128(p, x86_make_disp(dst, 16), dataXMM2);
+ break;
+ default:
+ assert(0);
+ }
+ }
+}
+static boolean translate_attr_convert( struct translate_sse *p,
+ const struct translate_element *a,
+ struct x86_reg src,
+ struct x86_reg dst)
-static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
{
- /* Scale by 255.0
- */
- sse_mulps(p->func, dataXMM, get_255(p));
+ const struct util_format_description* input_desc = util_format_description(a->input_format);
+ const struct util_format_description* output_desc = util_format_description(a->output_format);
+ unsigned i;
+ boolean id_swizzle = TRUE;
+ unsigned swizzle[4] = {UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE};
+ unsigned needed_chans = 0;
+ unsigned imms[2] = {0, 0x3f800000};
- /* Pack and emit:
- */
- sse2_cvtps2dq(p->func, dataXMM, dataXMM);
- sse2_packssdw(p->func, dataXMM, dataXMM);
- sse2_packuswb(p->func, dataXMM, dataXMM);
- sse_movss(p->func, dest, dataXMM);
-}
+ if(a->output_format == PIPE_FORMAT_NONE || a->input_format == PIPE_FORMAT_NONE)
+ return FALSE;
+
+ if(input_desc->channel[0].size & 7)
+ return FALSE;
+ if(input_desc->colorspace != output_desc->colorspace)
+ return FALSE;
+ for(i = 1; i < input_desc->nr_channels; ++i)
+ {
+ if(memcmp(&input_desc->channel[i], &input_desc->channel[0], sizeof(input_desc->channel[0])))
+ return FALSE;
+ }
+ for(i = 1; i < output_desc->nr_channels; ++i)
+ {
+ if(memcmp(&output_desc->channel[i], &output_desc->channel[0], sizeof(output_desc->channel[0])))
+ return FALSE;
+ }
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(output_desc->swizzle[i] < 4)
+ swizzle[output_desc->swizzle[i]] = input_desc->swizzle[i];
+ }
-/* Extended swizzles? Maybe later.
- */
-static void emit_swizzle( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg src,
- unsigned char shuffle )
-{
- sse_shufps(p->func, dest, src, shuffle);
-}
+ if((x86_target_caps(p->func) & X86_SSE) && (0
+ || a->output_format == PIPE_FORMAT_R32_FLOAT
+ || a->output_format == PIPE_FORMAT_R32G32_FLOAT
+ || a->output_format == PIPE_FORMAT_R32G32B32_FLOAT
+ || a->output_format == PIPE_FORMAT_R32G32B32A32_FLOAT))
+ {
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels)
+ swizzle[i] = i;
+ }
-static boolean translate_attr( struct translate_sse *p,
- const struct translate_element *a,
- struct x86_reg srcECX,
- struct x86_reg dstEAX)
-{
- struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] < 4)
+ needed_chans = MAX2(needed_chans, swizzle[i] + 1);
+ if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i)
+ id_swizzle = FALSE;
+ }
- switch (a->input_format) {
- case PIPE_FORMAT_R32_FLOAT:
- emit_load_R32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_R32G32_FLOAT:
- emit_load_R32G32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- emit_load_R32G32B32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- emit_load_R32G32B32A32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
- emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
- break;
- default:
- return FALSE;
+ if(needed_chans > 0)
+ {
+ switch(input_desc->channel[0].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if(!(x86_target_caps(p->func) & X86_SSE2))
+ return FALSE;
+ emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+ /* TODO: add support for SSE4.1 pmovzx */
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ /* TODO: this may be inefficient due to get_identity() being used both as a float and integer register */
+ sse2_punpcklbw(p->func, dataXMM, get_identity(p));
+ sse2_punpcklbw(p->func, dataXMM, get_identity(p));
+ break;
+ case 16:
+ sse2_punpcklwd(p->func, dataXMM, get_identity(p));
+ break;
+ case 32: /* we lose precision here */
+ sse2_psrld_imm(p->func, dataXMM, 1);
+ break;
+ default:
+ return FALSE;
+ }
+ sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
+ if(input_desc->channel[0].normalized)
+ {
+ struct x86_reg factor;
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ factor = get_inv_255(p);
+ break;
+ case 16:
+ factor = get_inv_65535(p);
+ break;
+ case 32:
+ factor = get_inv_2147483647(p);
+ break;
+ }
+ sse_mulps(p->func, dataXMM, factor);
+ }
+ else if(input_desc->channel[0].size == 32)
+ sse_addps(p->func, dataXMM, dataXMM); /* compensate for the bit we threw away to fit u32 into s32 */
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if(!(x86_target_caps(p->func) & X86_SSE2))
+ return FALSE;
+ emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+ /* TODO: add support for SSE4.1 pmovsx */
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ sse2_psrad_imm(p->func, dataXMM, 24);
+ break;
+ case 16:
+ sse2_punpcklwd(p->func, dataXMM, dataXMM);
+ sse2_psrad_imm(p->func, dataXMM, 16);
+ break;
+ case 32: /* we lose precision here */
+ break;
+ default:
+ return FALSE;
+ }
+ sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
+ if(input_desc->channel[0].normalized)
+ {
+ struct x86_reg factor;
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ factor = get_inv_127(p);
+ break;
+ case 16:
+ factor = get_inv_32767(p);
+ break;
+ case 32:
+ factor = get_inv_2147483647(p);
+ break;
+ }
+ sse_mulps(p->func, dataXMM, factor);
+ }
+ break;
+
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ if(input_desc->channel[0].size != 32 && input_desc->channel[0].size != 64)
+ return FALSE;
+ if(swizzle[3] == UTIL_FORMAT_SWIZZLE_1 && input_desc->nr_channels <= 3)
+ {
+ swizzle[3] = UTIL_FORMAT_SWIZZLE_W;
+ needed_chans = CHANNELS_0001;
+ }
+ switch(input_desc->channel[0].size)
+ {
+ case 32:
+ emit_load_float32(p, dataXMM, src, needed_chans, input_desc->nr_channels);
+ break;
+ case 64: /* we lose precision here */
+ if(!(x86_target_caps(p->func) & X86_SSE2))
+ return FALSE;
+ emit_load_float64to32(p, dataXMM, src, needed_chans, input_desc->nr_channels);
+ break;
+ default:
+ return FALSE;
+ }
+ break;
+ default:
+ return FALSE;
+ }
+
+ if(!id_swizzle)
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(swizzle[0], swizzle[1], swizzle[2], swizzle[3]) );
+ }
+
+ if(output_desc->nr_channels >= 4
+ && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[1] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[3] < UTIL_FORMAT_SWIZZLE_0
+ )
+ sse_movups(p->func, dst, dataXMM);
+ else
+ {
+ if(output_desc->nr_channels >= 2
+ && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+ sse_movlps(p->func, dst, dataXMM);
+ else
+ {
+ if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0)
+ sse_movss(p->func, dst, dataXMM);
+ else
+ x86_mov_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+
+ if(output_desc->nr_channels >= 2)
+ {
+ if(swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(1, 1, 2, 3));
+ sse_movss(p->func, x86_make_disp(dst, 4), dataXMM);
+ }
+ else
+ x86_mov_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+
+ if(output_desc->nr_channels >= 3)
+ {
+ if(output_desc->nr_channels >= 4
+ && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+ sse_movhps(p->func, x86_make_disp(dst, 8), dataXMM);
+ else
+ {
+ if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(2, 2, 2, 3));
+ sse_movss(p->func, x86_make_disp(dst, 8), dataXMM);
+ }
+ else
+ x86_mov_imm(p->func, x86_make_disp(dst, 8), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+
+ if(output_desc->nr_channels >= 4)
+ {
+ if(swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(3, 3, 3, 3));
+ sse_movss(p->func, x86_make_disp(dst, 12), dataXMM);
+ }
+ else
+ x86_mov_imm(p->func, x86_make_disp(dst, 12), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ }
+ }
+ return TRUE;
}
+ else if((x86_target_caps(p->func) & X86_SSE2) && input_desc->channel[0].size == 8 && output_desc->channel[0].size == 16
+ && output_desc->channel[0].normalized == input_desc->channel[0].normalized
+ && (0
+ || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
+ || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+ || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+ ))
+ {
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+ struct x86_reg tmp = p->tmp_EAX;
+ unsigned imms[2] = {0, 1};
+
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels)
+ swizzle[i] = i;
+ }
- switch (a->output_format) {
- case PIPE_FORMAT_R32_FLOAT:
- emit_store_R32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R32G32_FLOAT:
- emit_store_R32G32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- emit_store_R32G32B32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- emit_store_R32G32B32A32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
- emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
- break;
- default:
- return FALSE;
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] < 4)
+ needed_chans = MAX2(needed_chans, swizzle[i] + 1);
+ if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i)
+ id_swizzle = FALSE;
+ }
+
+ if(needed_chans > 0)
+ {
+ emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+ switch(input_desc->channel[0].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if(input_desc->channel[0].normalized)
+ {
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ if(output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+ sse2_psrlw_imm(p->func, dataXMM, 1);
+ }
+ else
+ sse2_punpcklbw(p->func, dataXMM, get_identity(p));
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if(input_desc->channel[0].normalized)
+ {
+ sse2_movq(p->func, tmpXMM, get_identity(p));
+ sse2_punpcklbw(p->func, tmpXMM, dataXMM);
+ sse2_psllw_imm(p->func, dataXMM, 9);
+ sse2_psrlw_imm(p->func, dataXMM, 8);
+ sse2_por(p->func, tmpXMM, dataXMM);
+ sse2_psrlw_imm(p->func, dataXMM, 7);
+ sse2_por(p->func, tmpXMM, dataXMM);
+ {
+ struct x86_reg t = dataXMM;
+ dataXMM = tmpXMM;
+ tmpXMM = t;
+ }
+ }
+ else
+ {
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ sse2_psraw_imm(p->func, dataXMM, 8);
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ if(output_desc->channel[0].normalized)
+ imms[1] = (output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) ? 0xffff : 0x7ffff;
+
+ if(!id_swizzle)
+ sse2_pshuflw(p->func, dataXMM, dataXMM, (swizzle[0] & 3) | ((swizzle[1] & 3) << 2) | ((swizzle[2] & 3) << 4) | ((swizzle[3] & 3) << 6));
+ }
+
+ if(output_desc->nr_channels >= 4
+ && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[1] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[3] < UTIL_FORMAT_SWIZZLE_0
+ )
+ sse2_movq(p->func, dst, dataXMM);
+ else
+ {
+ if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ if(output_desc->nr_channels >= 2 && swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+ sse2_movd(p->func, dst, dataXMM);
+ else
+ {
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_mov16(p->func, dst, tmp);
+ if(output_desc->nr_channels >= 2)
+ x86_mov16_imm(p->func, x86_make_disp(dst, 2), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ else
+ {
+ if(output_desc->nr_channels >= 2 && swizzle[1] >= UTIL_FORMAT_SWIZZLE_0)
+ x86_mov_imm(p->func, dst, (imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+ else
+ {
+ x86_mov16_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+ if(output_desc->nr_channels >= 2)
+ {
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_shr_imm(p->func, tmp, 16);
+ x86_mov16(p->func, x86_make_disp(dst, 2), tmp);
+ }
+ }
+ }
+
+ if(output_desc->nr_channels >= 3)
+ {
+ if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ if(output_desc->nr_channels >= 4 && swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse2_psrlq_imm(p->func, dataXMM, 32);
+ sse2_movd(p->func, x86_make_disp(dst, 4), dataXMM);
+ }
+ else
+ {
+ sse2_psrlq_imm(p->func, dataXMM, 32);
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_mov16(p->func, x86_make_disp(dst, 4), tmp);
+ if(output_desc->nr_channels >= 4)
+ {
+ x86_mov16_imm(p->func, x86_make_disp(dst, 6), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ }
+ else
+ {
+ if(output_desc->nr_channels >= 4 && swizzle[3] >= UTIL_FORMAT_SWIZZLE_0)
+ x86_mov_imm(p->func, x86_make_disp(dst, 4), (imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+ else
+ {
+ x86_mov16_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+
+ if(output_desc->nr_channels >= 4)
+ {
+ sse2_psrlq_imm(p->func, dataXMM, 48);
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_mov16(p->func, x86_make_disp(dst, 6), tmp);
+ }
+ }
+ }
+ }
+ }
+ return TRUE;
}
+ else if(!memcmp(&output_desc->channel[0], &input_desc->channel[0], sizeof(output_desc->channel[0])))
+ {
+ struct x86_reg tmp = p->tmp_EAX;
+ unsigned i;
+ if(input_desc->channel[0].size == 8 && input_desc->nr_channels == 4 && output_desc->nr_channels == 4
+ && swizzle[0] == UTIL_FORMAT_SWIZZLE_W
+ && swizzle[1] == UTIL_FORMAT_SWIZZLE_Z
+ && swizzle[2] == UTIL_FORMAT_SWIZZLE_Y
+ && swizzle[3] == UTIL_FORMAT_SWIZZLE_X)
+ {
+ /* TODO: support movbe */
+ x86_mov(p->func, tmp, src);
+ x86_bswap(p->func, tmp);
+ x86_mov(p->func, dst, tmp);
+ return TRUE;
+ }
- return TRUE;
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ switch(output_desc->channel[0].size)
+ {
+ case 8:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned v = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[0].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ v = output_desc->channel[0].normalized ? 0xff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ v = output_desc->channel[0].normalized ? 0x7f : 1;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov8_imm(p->func, x86_make_disp(dst, i * 1), v);
+ }
+ else
+ {
+ x86_mov8(p->func, tmp, x86_make_disp(src, swizzle[i] * 1));
+ x86_mov8(p->func, x86_make_disp(dst, i * 1), tmp);
+ }
+ break;
+ case 16:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned v = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[1].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ v = output_desc->channel[1].normalized ? 0xffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ v = output_desc->channel[1].normalized ? 0x7fff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ v = 0x3c00;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), v);
+ }
+ else if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0)
+ x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), 0);
+ else
+ {
+ x86_mov16(p->func, tmp, x86_make_disp(src, swizzle[i] * 2));
+ x86_mov16(p->func, x86_make_disp(dst, i * 2), tmp);
+ }
+ break;
+ case 32:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned v = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[1].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ v = output_desc->channel[1].normalized ? 0xffffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ v = output_desc->channel[1].normalized ? 0x7fffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ v = 0x3f800000;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov_imm(p->func, x86_make_disp(dst, i * 4), v);
+ }
+ else
+ {
+ x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 4));
+ x86_mov(p->func, x86_make_disp(dst, i * 4), tmp);
+ }
+ break;
+ case 64:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned l = 0;
+ unsigned h = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[1].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ h = output_desc->channel[1].normalized ? 0xffffffff : 0;
+ l = output_desc->channel[1].normalized ? 0xffffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ h = output_desc->channel[1].normalized ? 0x7fffffff : 0;
+ l = output_desc->channel[1].normalized ? 0xffffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ h = 0x3ff00000;
+ l = 0;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov_imm(p->func, x86_make_disp(dst, i * 8), l);
+ x86_mov_imm(p->func, x86_make_disp(dst, i * 8 + 4), h);
+ }
+ else
+ {
+ if(x86_target_caps(p->func) & X86_SSE)
+ {
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 0);
+ emit_load64(p, tmp, tmpXMM, x86_make_disp(src, swizzle[i] * 8));
+ emit_store64(p, x86_make_disp(dst, i * 8), tmp, tmpXMM);
+ }
+ else
+ {
+ x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8));
+ x86_mov(p->func, x86_make_disp(dst, i * 8), tmp);
+ x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8 + 4));
+ x86_mov(p->func, x86_make_disp(dst, i * 8 + 4), tmp);
+ }
+ }
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ return TRUE;
+ }
+ return FALSE;
}
+static boolean translate_attr( struct translate_sse *p,
+ const struct translate_element *a,
+ struct x86_reg src,
+ struct x86_reg dst)
+{
+ if(a->input_format == a->output_format)
+ {
+ emit_memcpy(p, dst, src, util_format_get_stride(a->input_format, 1));
+ return TRUE;
+ }
+
+ return translate_attr_convert(p, a, src, dst);
+}
static boolean init_inputs( struct translate_sse *p,
- boolean linear )
+ unsigned index_size )
{
unsigned i;
- struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
+ struct x86_reg instance_id = x86_make_disp(p->machine_EDI,
get_offset(p, &p->instance_id));
for (i = 0; i < p->nr_buffer_varients; i++) {
struct translate_buffer_varient *varient = &p->buffer_varient[i];
struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
- if (linear || varient->instance_divisor) {
- struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
+ if (!index_size || varient->instance_divisor) {
+ struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
get_offset(p, &buffer->stride));
- struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &varient->ptr));
- struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &buffer->base_ptr));
- struct x86_reg elt = p->idx_EBX;
+ struct x86_reg elt = p->idx_ESI;
struct x86_reg tmp_EAX = p->tmp_EAX;
/* Calculate pointer to first attrib:
@@ -406,20 +1051,16 @@ static boolean init_inputs( struct translate_sse *p,
x86_mov(p->func, tmp_EAX, instance_id);
if (varient->instance_divisor != 1) {
- struct x86_reg tmp_EDX = p->machine_EDX;
- struct x86_reg tmp_ECX = p->outbuf_ECX;
+ struct x86_reg tmp_EDX = p->tmp2_EDX;
+ struct x86_reg tmp_ECX = p->tmp3_ECX;
/* TODO: Add x86_shr() to rtasm and use it whenever
* instance divisor is power of two.
*/
- x86_push(p->func, tmp_EDX);
- x86_push(p->func, tmp_ECX);
x86_xor(p->func, tmp_EDX, tmp_EDX);
x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
- x86_pop(p->func, tmp_ECX);
- x86_pop(p->func, tmp_EDX);
}
} else {
x86_mov(p->func, tmp_EAX, elt);
@@ -430,16 +1071,23 @@ static boolean init_inputs( struct translate_sse *p,
*/
x86_imul(p->func, tmp_EAX, buf_stride);
+ x64_rexw(p->func);
x86_add(p->func, tmp_EAX, buf_base_ptr);
/* In the linear case, keep the buffer pointer instead of the
* index number.
*/
- if (linear && p->nr_buffer_varients == 1)
+ if (!index_size && p->nr_buffer_varients == 1)
+ {
+ x64_rexw(p->func);
x86_mov(p->func, elt, tmp_EAX);
+ }
else
+ {
+ x64_rexw(p->func);
x86_mov(p->func, buf_ptr, tmp_EAX);
+ }
}
}
@@ -448,23 +1096,24 @@ static boolean init_inputs( struct translate_sse *p,
static struct x86_reg get_buffer_ptr( struct translate_sse *p,
- boolean linear,
+ unsigned index_size,
unsigned var_idx,
struct x86_reg elt )
{
if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
- return x86_make_disp(p->machine_EDX,
+ return x86_make_disp(p->machine_EDI,
get_offset(p, &p->instance_id));
}
- if (linear && p->nr_buffer_varients == 1) {
- return p->idx_EBX;
+ if (!index_size && p->nr_buffer_varients == 1) {
+ return p->idx_ESI;
}
- else if (linear || p->buffer_varient[var_idx].instance_divisor) {
+ else if (!index_size || p->buffer_varient[var_idx].instance_divisor) {
struct x86_reg ptr = p->tmp_EAX;
struct x86_reg buf_ptr =
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer_varient[var_idx].ptr));
+ x64_rexw(p->func);
x86_mov(p->func, ptr, buf_ptr);
return ptr;
}
@@ -473,19 +1122,31 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
struct x86_reg buf_stride =
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[varient->buffer_index].stride));
struct x86_reg buf_base_ptr =
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
/* Calculate pointer to current attrib:
*/
- x86_mov(p->func, ptr, buf_stride);
- x86_imul(p->func, ptr, elt);
+ switch(index_size)
+ {
+ case 1:
+ x86_movzx8(p->func, ptr, elt);
+ break;
+ case 2:
+ x86_movzx16(p->func, ptr, elt);
+ break;
+ case 4:
+ x86_mov(p->func, ptr, elt);
+ break;
+ }
+ x86_imul(p->func, ptr, buf_stride);
+ x64_rexw(p->func);
x86_add(p->func, ptr, buf_base_ptr);
return ptr;
}
@@ -494,39 +1155,42 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
static boolean incr_inputs( struct translate_sse *p,
- boolean linear )
+ unsigned index_size )
{
- if (linear && p->nr_buffer_varients == 1) {
- struct x86_reg stride = x86_make_disp(p->machine_EDX,
+ if (!index_size && p->nr_buffer_varients == 1) {
+ struct x86_reg stride = x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[0].stride));
if (p->buffer_varient[0].instance_divisor == 0) {
- x86_add(p->func, p->idx_EBX, stride);
- sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
+ x64_rexw(p->func);
+ x86_add(p->func, p->idx_ESI, stride);
+ sse_prefetchnta(p->func, x86_make_disp(p->idx_ESI, 192));
}
}
- else if (linear) {
+ else if (!index_size) {
unsigned i;
/* Is this worthwhile??
*/
for (i = 0; i < p->nr_buffer_varients; i++) {
struct translate_buffer_varient *varient = &p->buffer_varient[i];
- struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &varient->ptr));
- struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[varient->buffer_index].stride));
if (varient->instance_divisor == 0) {
- x86_mov(p->func, p->tmp_EAX, buf_ptr);
- x86_add(p->func, p->tmp_EAX, buf_stride);
+ x86_mov(p->func, p->tmp_EAX, buf_stride);
+ x64_rexw(p->func);
+ x86_add(p->func, p->tmp_EAX, buf_ptr);
if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
+ x64_rexw(p->func);
x86_mov(p->func, buf_ptr, p->tmp_EAX);
}
}
}
else {
- x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4));
+ x86_lea(p->func, p->idx_ESI, x86_make_disp(p->idx_ESI, index_size));
}
return TRUE;
@@ -551,35 +1215,51 @@ static boolean incr_inputs( struct translate_sse *p,
*/
static boolean build_vertex_emit( struct translate_sse *p,
struct x86_function *func,
- boolean linear )
+ unsigned index_size )
{
int fixup, label;
unsigned j;
p->tmp_EAX = x86_make_reg(file_REG32, reg_AX);
- p->idx_EBX = x86_make_reg(file_REG32, reg_BX);
- p->outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
- p->machine_EDX = x86_make_reg(file_REG32, reg_DX);
- p->count_ESI = x86_make_reg(file_REG32, reg_SI);
+ p->idx_ESI = x86_make_reg(file_REG32, reg_SI);
+ p->outbuf_EBX = x86_make_reg(file_REG32, reg_BX);
+ p->machine_EDI = x86_make_reg(file_REG32, reg_DI);
+ p->count_EBP = x86_make_reg(file_REG32, reg_BP);
+ p->tmp2_EDX = x86_make_reg(file_REG32, reg_DX);
+ p->tmp3_ECX = x86_make_reg(file_REG32, reg_CX);
p->func = func;
- p->loaded_inv_255 = FALSE;
- p->loaded_255 = FALSE;
+ memset(&p->loaded_const, 0, sizeof(p->loaded_const));
p->loaded_identity = FALSE;
x86_init_func(p->func);
- /* Push a few regs?
- */
- x86_push(p->func, p->idx_EBX);
- x86_push(p->func, p->count_ESI);
+ if(x86_target(p->func) == X86_64_WIN64_ABI)
+ {
+ /* the ABI guarantees a 16-byte aligned 32-byte "shadow space" above the return address */
+ sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8), x86_make_reg(file_XMM, 6));
+ sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24), x86_make_reg(file_XMM, 7));
+ }
- /* Load arguments into regs:
- */
- x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1));
- x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2));
- x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3));
- x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5));
+ x86_push(p->func, p->outbuf_EBX);
+ x86_push(p->func, p->count_EBP);
+
+/* on non-Win64 x86-64, these are already in the right registers */
+ if(x86_target(p->func) != X86_64_STD_ABI)
+ {
+ x86_push(p->func, p->machine_EDI);
+ x86_push(p->func, p->idx_ESI);
+
+ x86_mov(p->func, p->machine_EDI, x86_fn_arg(p->func, 1));
+ x86_mov(p->func, p->idx_ESI, x86_fn_arg(p->func, 2));
+ }
+
+ x86_mov(p->func, p->count_EBP, x86_fn_arg(p->func, 3));
+
+ if(x86_target(p->func) != X86_32)
+ x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5));
+ else
+ x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5));
/* Load instance ID.
*/
@@ -588,25 +1268,25 @@ static boolean build_vertex_emit( struct translate_sse *p,
p->tmp_EAX,
x86_fn_arg(p->func, 4));
x86_mov(p->func,
- x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
+ x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)),
p->tmp_EAX);
}
/* Get vertex count, compare to zero
*/
x86_xor(p->func, p->tmp_EAX, p->tmp_EAX);
- x86_cmp(p->func, p->count_ESI, p->tmp_EAX);
+ x86_cmp(p->func, p->count_EBP, p->tmp_EAX);
fixup = x86_jcc_forward(p->func, cc_E);
/* always load, needed or not:
*/
- init_inputs(p, linear);
+ init_inputs(p, index_size);
/* Note address for loop jump
*/
label = x86_get_label(p->func);
{
- struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
+ struct x86_reg elt = !index_size ? p->idx_ESI : x86_deref(p->idx_ESI);
int last_varient = -1;
struct x86_reg vb;
@@ -618,30 +1298,31 @@ static boolean build_vertex_emit( struct translate_sse *p,
*/
if (varient != last_varient) {
last_varient = varient;
- vb = get_buffer_ptr(p, linear, varient, elt);
+ vb = get_buffer_ptr(p, index_size, varient, elt);
}
if (!translate_attr( p, a,
x86_make_disp(vb, a->input_offset),
- x86_make_disp(p->outbuf_ECX, a->output_offset)))
+ x86_make_disp(p->outbuf_EBX, a->output_offset)))
return FALSE;
}
/* Next output vertex:
*/
+ x64_rexw(p->func);
x86_lea(p->func,
- p->outbuf_ECX,
- x86_make_disp(p->outbuf_ECX,
+ p->outbuf_EBX,
+ x86_make_disp(p->outbuf_EBX,
p->translate.key.output_stride));
/* Incr index
*/
- incr_inputs( p, linear );
+ incr_inputs( p, index_size );
}
/* decr count, loop if not zero
*/
- x86_dec(p->func, p->count_ESI);
+ x86_dec(p->func, p->count_EBP);
x86_jcc(p->func, cc_NZ, label);
/* Exit mmx state?
@@ -656,8 +1337,20 @@ static boolean build_vertex_emit( struct translate_sse *p,
/* Pop regs and return
*/
- x86_pop(p->func, p->count_ESI);
- x86_pop(p->func, p->idx_EBX);
+ if(x86_target(p->func) != X86_64_STD_ABI)
+ {
+ x86_pop(p->func, p->idx_ESI);
+ x86_pop(p->func, p->machine_EDI);
+ }
+
+ x86_pop(p->func, p->count_EBP);
+ x86_pop(p->func, p->outbuf_EBX);
+
+ if(x86_target(p->func) == X86_64_WIN64_ABI)
+ {
+ sse2_movdqa(p->func, x86_make_reg(file_XMM, 6), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8));
+ sse2_movdqa(p->func, x86_make_reg(file_XMM, 7), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24));
+ }
x86_ret(p->func);
return TRUE;
@@ -700,43 +1393,14 @@ static void translate_sse_release( struct translate *translate )
FREE(p);
}
-static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
- const unsigned *elts,
- unsigned count,
- unsigned instance_id,
- void *output_buffer )
-{
- struct translate_sse *p = (struct translate_sse *)translate;
-
- p->gen_run_elts( translate,
- elts,
- count,
- instance_id,
- output_buffer);
-}
-
-static void PIPE_CDECL translate_sse_run( struct translate *translate,
- unsigned start,
- unsigned count,
- unsigned instance_id,
- void *output_buffer )
-{
- struct translate_sse *p = (struct translate_sse *)translate;
-
- p->gen_run( translate,
- start,
- count,
- instance_id,
- output_buffer);
-}
-
struct translate *translate_sse2_create( const struct translate_key *key )
{
struct translate_sse *p = NULL;
unsigned i;
- if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2())
+ /* this is misnamed, it actually refers to whether rtasm is enabled or not */
+ if (!rtasm_cpu_has_sse())
goto fail;
p = CALLOC_STRUCT( translate_sse );
@@ -746,8 +1410,6 @@ struct translate *translate_sse2_create( const struct translate_key *key )
p->translate.key = *key;
p->translate.release = translate_sse_release;
p->translate.set_buffer = translate_sse_set_buffer;
- p->translate.run_elts = translate_sse_run_elts;
- p->translate.run = translate_sse_run;
for (i = 0; i < key->nr_elements; i++) {
if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
@@ -783,18 +1445,32 @@ struct translate *translate_sse2_create( const struct translate_key *key )
if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
- if (!build_vertex_emit(p, &p->linear_func, TRUE))
+ if (!build_vertex_emit(p, &p->linear_func, 0))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt_func, 4))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt16_func, 2))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt8_func, 1))
+ goto fail;
+
+ p->translate.run = (void*)x86_get_func(&p->linear_func);
+ if (p->translate.run == NULL)
goto fail;
- if (!build_vertex_emit(p, &p->elt_func, FALSE))
+ p->translate.run_elts = (void*)x86_get_func(&p->elt_func);
+ if (p->translate.run_elts == NULL)
goto fail;
- p->gen_run = (run_func)x86_get_func(&p->linear_func);
- if (p->gen_run == NULL)
+ p->translate.run_elts16 = (void*)x86_get_func(&p->elt16_func);
+ if (p->translate.run_elts16 == NULL)
goto fail;
- p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func);
- if (p->gen_run_elts == NULL)
+ p->translate.run_elts8 = (void*)x86_get_func(&p->elt8_func);
+ if (p->translate.run_elts8 == NULL)
goto fail;
return &p->translate;
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 6f38d22285..b9b9f9257a 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -73,7 +73,7 @@
#endif
-DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE);
+DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE)
struct util_cpu_caps util_cpu_caps;
@@ -194,123 +194,8 @@ check_os_altivec_support(void)
}
#endif /* PIPE_ARCH_PPC */
-/* If we're running on a processor that can do SSE, let's see if we
- * are allowed to or not. This will catch 2.4.0 or later kernels that
- * haven't been configured for a Pentium III but are running on one,
- * and RedHat patched 2.2 kernels that have broken exception handling
- * support for user space apps that do SSE.
- */
-#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
-static void
-check_os_katmai_support(void)
-{
-#if defined(PIPE_ARCH_X86)
-#if defined(PIPE_OS_FREEBSD)
- int has_sse=0, ret;
- int len = sizeof (has_sse);
-
- ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0);
- if (ret || !has_sse)
- util_cpu_caps.has_sse=0;
-
-#elif defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
- int has_sse, has_sse2, ret, mib[2];
- int varlen;
-
- mib[0] = CTL_MACHDEP;
- mib[1] = CPU_SSE;
- varlen = sizeof (has_sse);
-
- ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0);
- if (ret < 0 || !has_sse) {
- util_cpu_caps.has_sse = 0;
- } else {
- util_cpu_caps.has_sse = 1;
- }
-
- mib[1] = CPU_SSE2;
- varlen = sizeof (has_sse2);
- ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0);
- if (ret < 0 || !has_sse2) {
- util_cpu_caps.has_sse2 = 0;
- } else {
- util_cpu_caps.has_sse2 = 1;
- }
- util_cpu_caps.has_sse = 0; /* FIXME ?!?!? */
-
-#elif defined(PIPE_OS_WINDOWS)
- LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
- if (util_cpu_caps.has_sse) {
- exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
-#if defined(PIPE_CC_GCC)
- __asm __volatile ("xorps %xmm0, %xmm0");
-#elif defined(PIPE_CC_MSVC)
- __asm {
- xorps xmm0, xmm0 /* executing SSE instruction */
- }
-#else
-#error Unsupported compiler
-#endif
- SetUnhandledExceptionFilter(exc_fil);
- }
-#elif defined(PIPE_OS_LINUX)
- struct sigaction saved_sigill;
- struct sigaction saved_sigfpe;
-
- /* Save the original signal handlers.
- */
- sigaction(SIGILL, NULL, &saved_sigill);
- sigaction(SIGFPE, NULL, &saved_sigfpe);
-
- signal(SIGILL, (void (*)(int))sigill_handler_sse);
- signal(SIGFPE, (void (*)(int))sigfpe_handler_sse);
-
- /* Emulate test for OSFXSR in CR4. The OS will set this bit if it
- * supports the extended FPU save and restore required for SSE. If
- * we execute an SSE instruction on a PIII and get a SIGILL, the OS
- * doesn't support Streaming SIMD Exceptions, even if the processor
- * does.
- */
- if (util_cpu_caps.has_sse) {
- __asm __volatile ("xorps %xmm1, %xmm0");
- }
-
- /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if
- * it supports unmasked SIMD FPU exceptions. If we unmask the
- * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
- * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE
- * as expected, we're okay but we need to clean up after it.
- *
- * Are we being too stringent in our requirement that the OS support
- * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by
- * setting CR4.OSFXSR but don't support unmasked exceptions. Win98
- * doesn't even support them. We at least know the user-space SSE
- * support is good in kernels that do support unmasked exceptions,
- * and therefore to be safe I'm going to leave this test in here.
- */
- if (util_cpu_caps.has_sse) {
- /* test_os_katmai_exception_support(); */
- }
-
- /* Restore the original signal handlers.
- */
- sigaction(SIGILL, &saved_sigill, NULL);
- sigaction(SIGFPE, &saved_sigfpe, NULL);
-
-#else
- /* We can't use POSIX signal handling to test the availability of
- * SSE, so we disable it by default.
- */
- util_cpu_caps.has_sse = 0;
-#endif /* __linux__ */
-#endif
-
-#if defined(PIPE_ARCH_X86_64)
- util_cpu_caps.has_sse = 1;
-#endif
-}
-
+#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
static int has_cpuid(void)
{
#if defined(PIPE_ARCH_X86)
@@ -391,23 +276,6 @@ util_cpu_detect(void)
memset(&util_cpu_caps, 0, sizeof util_cpu_caps);
- /* Check for arch type */
-#if defined(PIPE_ARCH_MIPS)
- util_cpu_caps.arch = UTIL_CPU_ARCH_MIPS;
-#elif defined(PIPE_ARCH_ALPHA)
- util_cpu_caps.arch = UTIL_CPU_ARCH_ALPHA;
-#elif defined(PIPE_ARCH_SPARC)
- util_cpu_caps.arch = UTIL_CPU_ARCH_SPARC;
-#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- util_cpu_caps.arch = UTIL_CPU_ARCH_X86;
- util_cpu_caps.little_endian = 1;
-#elif defined(PIPE_ARCH_PPC)
- util_cpu_caps.arch = UTIL_CPU_ARCH_POWERPC;
- util_cpu_caps.little_endian = 0;
-#else
- util_cpu_caps.arch = UTIL_CPU_ARCH_UNKNOWN;
-#endif
-
/* Count the number of CPUs in system */
#if defined(PIPE_OS_WINDOWS)
{
@@ -486,9 +354,6 @@ util_cpu_detect(void)
util_cpu_caps.cacheline = regs2[2] & 0xFF;
}
- if (util_cpu_caps.has_sse)
- check_os_katmai_support();
-
if (!util_cpu_caps.has_sse) {
util_cpu_caps.has_sse2 = 0;
util_cpu_caps.has_sse3 = 0;
@@ -504,7 +369,6 @@ util_cpu_detect(void)
#ifdef DEBUG
if (debug_get_option_dump_cpu()) {
- debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch);
debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus);
debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type);
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h
index 4b3dc39c34..f3bef0993c 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -36,26 +36,15 @@
#define _UTIL_CPU_DETECT_H
#include "pipe/p_compiler.h"
-
-enum util_cpu_arch {
- UTIL_CPU_ARCH_UNKNOWN = 0,
- UTIL_CPU_ARCH_MIPS,
- UTIL_CPU_ARCH_ALPHA,
- UTIL_CPU_ARCH_SPARC,
- UTIL_CPU_ARCH_X86,
- UTIL_CPU_ARCH_POWERPC
-};
+#include "pipe/p_config.h"
struct util_cpu_caps {
- enum util_cpu_arch arch;
unsigned nr_cpus;
/* Feature flags */
int x86_cpu_type;
unsigned cacheline;
- unsigned little_endian:1;
-
unsigned has_tsc:1;
unsigned has_mmx:1;
unsigned has_mmx2:1;
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index ad162558bc..504e6d2a18 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -88,7 +88,7 @@ debug_get_option_should_print(void)
* but its cool since we set first to false
*/
first = FALSE;
- value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", TRUE);
+ value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", FALSE);
/* XXX should we print this option? Currently it wont */
return value;
}
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 38254b1096..8e786a390a 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -631,6 +631,44 @@ util_format_has_alpha(enum pipe_format format)
}
/**
+ * Return the matching SRGB format, or PIPE_FORMAT_NONE if none.
+ */
+static INLINE enum pipe_format
+util_format_srgb(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_L8_UNORM:
+ return PIPE_FORMAT_L8_SRGB;
+ case PIPE_FORMAT_L8A8_UNORM:
+ return PIPE_FORMAT_L8A8_SRGB;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return PIPE_FORMAT_R8G8B8_SRGB;
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ return PIPE_FORMAT_A8B8G8R8_SRGB;
+ case PIPE_FORMAT_X8B8G8R8_UNORM:
+ return PIPE_FORMAT_X8B8G8R8_SRGB;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return PIPE_FORMAT_B8G8R8A8_SRGB;
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ return PIPE_FORMAT_B8G8R8X8_SRGB;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return PIPE_FORMAT_A8R8G8B8_SRGB;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ return PIPE_FORMAT_X8R8G8B8_SRGB;
+ case PIPE_FORMAT_DXT1_RGB:
+ return PIPE_FORMAT_DXT1_SRGB;
+ case PIPE_FORMAT_DXT1_RGBA:
+ return PIPE_FORMAT_DXT1_SRGBA;
+ case PIPE_FORMAT_DXT3_RGBA:
+ return PIPE_FORMAT_DXT3_SRGBA;
+ case PIPE_FORMAT_DXT5_RGBA:
+ return PIPE_FORMAT_DXT5_SRGBA;
+ default:
+ return PIPE_FORMAT_NONE;
+ }
+}
+
+/**
* Return the number of components stored.
* Formats with block size != 1x1 will always have 1 component (the block).
*/
diff --git a/src/gallium/auxiliary/util/u_framebuffer.c b/src/gallium/auxiliary/util/u_framebuffer.c
index 768ae9ceb5..7803ec6a8b 100644
--- a/src/gallium/auxiliary/util/u_framebuffer.c
+++ b/src/gallium/auxiliary/util/u_framebuffer.c
@@ -85,9 +85,11 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst,
dst->width = src->width;
dst->height = src->height;
- for (i = 0; i < Elements(src->cbufs); i++) {
+ for (i = 0; i < src->nr_cbufs; i++)
pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
- }
+
+ for (i = src->nr_cbufs; i < dst->nr_cbufs; i++)
+ pipe_surface_reference(&dst->cbufs[i], NULL);
dst->nr_cbufs = src->nr_cbufs;
diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h
new file mode 100644
index 0000000000..206e1ec311
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_split_prim.h
@@ -0,0 +1,105 @@
+/* Originally written by Ben Skeggs for the nv50 driver*/
+#include <pipe/p_defines.h>
+
+struct util_split_prim {
+ void *priv;
+ void (*emit)(void *priv, unsigned start, unsigned count);
+ void (*edge)(void *priv, boolean enabled);
+
+ unsigned mode;
+ unsigned start;
+ unsigned p_start;
+ unsigned p_end;
+
+ uint repeat_first:1;
+ uint close_first:1;
+ uint edgeflag_off:1;
+};
+
+static INLINE void
+util_split_prim_init(struct util_split_prim *s,
+ unsigned mode, unsigned start, unsigned count)
+{
+ if (mode == PIPE_PRIM_LINE_LOOP) {
+ s->mode = PIPE_PRIM_LINE_STRIP;
+ s->close_first = 1;
+ } else {
+ s->mode = mode;
+ s->close_first = 0;
+ }
+ s->start = start;
+ s->p_start = start;
+ s->p_end = start + count;
+ s->edgeflag_off = 0;
+ s->repeat_first = 0;
+}
+
+static INLINE boolean
+util_split_prim_next(struct util_split_prim *s, unsigned max_verts)
+{
+ int repeat = 0;
+
+ if (s->repeat_first) {
+ s->emit(s->priv, s->start, 1);
+ max_verts--;
+ if (s->edgeflag_off) {
+ s->edge(s->priv, TRUE);
+ s->edgeflag_off = FALSE;
+ }
+ }
+
+ if (s->p_start + s->close_first + max_verts >= s->p_end) {
+ s->emit(s->priv, s->p_start, s->p_end - s->p_start);
+ if (s->close_first)
+ s->emit(s->priv, s->start, 1);
+ return TRUE;
+ }
+
+ switch (s->mode) {
+ case PIPE_PRIM_LINES:
+ max_verts &= ~1;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ repeat = 1;
+ break;
+ case PIPE_PRIM_POLYGON:
+ max_verts--;
+ s->emit(s->priv, s->p_start, max_verts);
+ s->edge(s->priv, FALSE);
+ s->emit(s->priv, s->p_start + max_verts, 1);
+ s->p_start += max_verts;
+ s->repeat_first = TRUE;
+ s->edgeflag_off = TRUE;
+ return FALSE;
+ case PIPE_PRIM_TRIANGLES:
+ max_verts = max_verts - (max_verts % 3);
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ /* to ensure winding stays correct, always split
+ * on an even number of generated triangles
+ */
+ max_verts = max_verts & ~1;
+ repeat = 2;
+ break;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ s->repeat_first = TRUE;
+ repeat = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ max_verts &= ~3;
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ max_verts &= ~1;
+ repeat = 2;
+ break;
+ case PIPE_PRIM_POINTS:
+ break;
+ default:
+ /* TODO: implement adjacency primitives */
+ assert(0);
+ }
+
+ s->emit (s->priv, s->p_start, max_verts);
+ s->p_start += (max_verts - repeat);
+ return FALSE;
+}
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
index 6145e34aa3..87959ab0aa 100644
--- a/src/gallium/auxiliary/util/u_sse.h
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -71,6 +71,35 @@ _mm_castps_si128(__m128 a)
#endif /* defined(_MSC_VER) && _MSC_VER < 1500 */
+
+#if defined(PIPE_ARCH_SSSE3)
+
+#include <tmmintrin.h>
+
+#else /* !PIPE_ARCH_SSSE3 */
+
+#include <emmintrin.h>
+
+/**
+ * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases
+ * where -mssse3 is not supported/enabled.
+ *
+ * MSVC will never get in here as its intrinsics support do not rely on
+ * compiler command line options.
+ */
+static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi8(__m128i a, __m128i mask)
+{
+ __m128i result;
+ __asm__("pshufb %1, %0"
+ : "=x" (result)
+ : "xm" (mask), "0" (a));
+ return result;
+}
+
+#endif /* !PIPE_ARCH_SSSE3 */
+
+
#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
#endif /* U_SSE_H_ */
diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c
new file mode 100644
index 0000000000..607c31f5ee
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_staging.c
@@ -0,0 +1,95 @@
+#include "util/u_staging.h"
+#include "pipe/p_context.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+static void
+util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigned height, unsigned depth, struct pipe_resource *template)
+{
+ memset(template, 0, sizeof(struct pipe_resource));
+ if(pt->target != PIPE_BUFFER && depth <= 1)
+ template->target = PIPE_TEXTURE_2D;
+ else
+ template->target = pt->target;
+ template->format = pt->format;
+ template->width0 = width;
+ template->height0 = height;
+ template->depth0 = depth;
+ template->last_level = 0;
+ template->nr_samples = pt->nr_samples;
+ template->bind = 0;
+ template->usage = PIPE_USAGE_STAGING;
+ template->flags = 0;
+}
+
+struct util_staging_transfer *
+util_staging_transfer_new(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ bool direct)
+{
+ struct pipe_screen *pscreen = pipe->screen;
+ struct util_staging_transfer *tx;
+ struct pipe_resource staging_resource_template;
+
+ tx = CALLOC_STRUCT(util_staging_transfer);
+ if (!tx)
+ return NULL;
+
+ pipe_resource_reference(&tx->base.resource, pt);
+ tx->base.sr = sr;
+ tx->base.usage = usage;
+ tx->base.box = *box;
+
+ if (direct)
+ {
+ tx->staging_resource = pt;
+ return tx;
+ }
+
+ util_staging_resource_template(pt, box->width, box->height, box->depth, &staging_resource_template);
+ tx->staging_resource = pscreen->resource_create(pscreen, &staging_resource_template);
+ if (!tx->staging_resource)
+ {
+ pipe_resource_reference(&tx->base.resource, NULL);
+ FREE(tx);
+ return NULL;
+ }
+
+ if (usage & PIPE_TRANSFER_READ)
+ {
+ struct pipe_subresource dstsr;
+ unsigned zi;
+ dstsr.face = 0;
+ dstsr.level = 0;
+ for(zi = 0; zi < box->depth; ++zi)
+ pipe->resource_copy_region(pipe, tx->staging_resource, dstsr, 0, 0, 0, tx->base.resource, sr, box->x, box->y, box->z + zi, box->width, box->height);
+ }
+
+ return tx;
+}
+
+void
+util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx)
+{
+ struct util_staging_transfer *tx = (struct util_staging_transfer *)ptx;
+
+ if (tx->staging_resource != tx->base.resource)
+ {
+ if(tx->base.usage & PIPE_TRANSFER_WRITE) {
+ struct pipe_subresource srcsr;
+ unsigned zi;
+ srcsr.face = 0;
+ srcsr.level = 0;
+ for(zi = 0; zi < tx->base.box.depth; ++zi)
+ pipe->resource_copy_region(pipe, tx->base.resource, tx->base.sr, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi, tx->staging_resource, srcsr, 0, 0, 0, tx->base.box.width, tx->base.box.height);
+ }
+
+ pipe_resource_reference(&tx->staging_resource, NULL);
+ }
+
+ pipe_resource_reference(&ptx->resource, NULL);
+ FREE(ptx);
+}
diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h
new file mode 100644
index 0000000000..602faa2971
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_staging.h
@@ -0,0 +1,37 @@
+/* Direct3D 10/11 has no concept of transfers. Applications instead
+ * create resources with a STAGING or DYNAMIC usage, copy between them
+ * and the real resource and use Map to map the STAGING/DYNAMIC resource.
+ *
+ * This util module allows to implement Gallium drivers as a Direct3D
+ * driver would be implemented: transfers allocate a resource with
+ * PIPE_USAGE_STAGING, and copy the data between it and the real resource
+ * with resource_copy_region.
+ */
+
+#ifndef U_STAGING_H
+#define U_STAGING_H
+
+#include "pipe/p_state.h"
+
+struct util_staging_transfer {
+ struct pipe_transfer base;
+
+ /* if direct, same as base.resource, otherwise the temporary staging resource */
+ struct pipe_resource *staging_resource;
+};
+
+/* user must be stride, slice_stride and offset */
+/* pt->usage == PIPE_USAGE_DYNAMIC should be a good value to pass for direct */
+/* staging resource is currently created with PIPE_USAGE_DYNAMIC */
+struct util_staging_transfer *
+util_staging_transfer_new(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ bool direct);
+
+void
+util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx);
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c
index b5d21570d5..7733ad24d0 100644
--- a/src/gallium/auxiliary/util/u_surfaces.c
+++ b/src/gallium/auxiliary/util/u_surfaces.c
@@ -3,40 +3,22 @@
#include "util/u_inlines.h"
#include "util/u_memory.h"
-/* TODO: ouch, util_hash_table should do these by default when passed a null function pointer
- * this indirect function call is quite bad
- */
-static unsigned
-hash(void *key)
-{
- return (unsigned)(uintptr_t)key;
-}
-
-static int
-compare(void *key1, void *key2)
-{
- return (unsigned)(uintptr_t)key1 - (unsigned)(uintptr_t)key2;
-}
-
struct pipe_surface *
util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags)
{
struct pipe_surface *ps;
- void *key = NULL;
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
- { /* or 2D array */
- if(!us->u.table)
- us->u.table = util_hash_table_create(hash, compare);
- key = (void *)(uintptr_t)(((zslice + face) << 8) | level);
- /* TODO: ouch, should have a get-reference function...
- * also, shouldn't allocate a two-pointer structure for each item... */
- ps = util_hash_table_get(us->u.table, key);
+ { /* or 2D array */
+ if(!us->u.hash)
+ us->u.hash = cso_hash_create();
+
+ ps = cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level));
}
else
{
if(!us->u.array)
- us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *));
+ us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *));
ps = us->u.array[level];
}
@@ -54,7 +36,7 @@ util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, str
ps->offset = ~0;
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
- util_hash_table_set(us->u.table, key, ps);
+ cso_hash_insert(us->u.hash, ((zslice + face) << 8) | level, ps);
else
us->u.array[level] = ps;
@@ -66,47 +48,44 @@ util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps)
{
struct pipe_resource *pt = ps->texture;
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
- { /* or 2D array */
- void* key = (void*)(uintptr_t)(((ps->zslice + ps->face) << 8) | ps->level);
- util_hash_table_remove(us->u.table, key);
+ { /* or 2D array */
+ cso_hash_erase(us->u.hash, cso_hash_find(us->u.hash, ((ps->zslice + ps->face) << 8) | ps->level));
}
else
us->u.array[ps->level] = 0;
}
-static enum pipe_error
-util_surfaces_destroy_callback(void *key, void *value, void *data)
-{
- void (*destroy_surface) (struct pipe_surface * ps) = data;
- destroy_surface((struct pipe_surface *)value);
- return PIPE_OK;
-}
-
void
util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void (*destroy_surface) (struct pipe_surface *))
{
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
- { /* or 2D array */
- if(us->u.table)
+ { /* or 2D array */
+ if(us->u.hash)
{
- util_hash_table_foreach(us->u.table, util_surfaces_destroy_callback, destroy_surface);
- util_hash_table_destroy(us->u.table);
- us->u.table = NULL;
+ struct cso_hash_iter iter;
+ iter = cso_hash_first_node(us->u.hash);
+ while (!cso_hash_iter_is_null(iter)) {
+ destroy_surface(cso_hash_iter_data(iter));
+ iter = cso_hash_iter_next(iter);
+ }
+
+ cso_hash_delete(us->u.hash);
+ us->u.hash = NULL;
}
}
else
{
if(us->u.array)
{
- unsigned i;
- for(i = 0; i < pt->last_level; ++i)
- {
- struct pipe_surface *ps = us->u.array[i];
- if(ps)
- destroy_surface(ps);
- }
- FREE(us->u.array);
- us->u.array = NULL;
+ unsigned i;
+ for(i = 0; i <= pt->last_level; ++i)
+ {
+ struct pipe_surface *ps = us->u.array[i];
+ if(ps)
+ destroy_surface(ps);
+ }
+ FREE(us->u.array);
+ us->u.array = NULL;
}
}
}
diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h
index 0195bf5afb..af978c7057 100644
--- a/src/gallium/auxiliary/util/u_surfaces.h
+++ b/src/gallium/auxiliary/util/u_surfaces.h
@@ -4,15 +4,15 @@
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
#include "util/u_atomic.h"
-
-struct util_hash_table;
+#include "cso_cache/cso_hash.h"
struct util_surfaces
{
union
{
- struct util_hash_table *table;
+ struct cso_hash *hash;
struct pipe_surface **array;
+ void* pv;
} u;
};
@@ -35,6 +35,18 @@ util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct
return util_surfaces_do_get(us, surface_struct_size, pscreen, pt, face, level, zslice, flags);
}
+static INLINE struct pipe_surface *
+util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice)
+{
+ if(!us->u.pv)
+ return 0;
+
+ if(unlikely(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE))
+ return cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level));
+ else
+ return us->u.array[level];
+}
+
void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps);
static INLINE void
diff --git a/src/gallium/docs/source/conf.py b/src/gallium/docs/source/conf.py
index 99e665234e..0846e7d0ec 100644
--- a/src/gallium/docs/source/conf.py
+++ b/src/gallium/docs/source/conf.py
@@ -22,7 +22,7 @@ sys.path.append(os.path.abspath('exts'))
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.pngmath', 'tgsi']
+extensions = ['sphinx.ext.pngmath', 'formatting']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
diff --git a/src/gallium/docs/source/debugging.rst b/src/gallium/docs/source/debugging.rst
new file mode 100644
index 0000000000..42bda5aee9
--- /dev/null
+++ b/src/gallium/docs/source/debugging.rst
@@ -0,0 +1,101 @@
+Debugging
+=========
+
+Debugging utilities in gallium.
+
+Debug Variables
+^^^^^^^^^^^^^^^
+
+All drivers respond to a set of common debug environment variables, as well as
+some driver-specific variables. Set them as normal environment variables for
+the platform or operating system you are running. For example, for Linux this
+can be done by typing "export var=value" into a console and then running the
+program from that console.
+
+Common
+""""""
+
+.. envvar:: GALLIUM_PRINT_OPTIONS <bool> (false)
+
+This option controls if the debug variables should be printed to stderr. This
+is probably the most useful variable, since it allows you to find which
+variables a driver uses.
+
+.. envvar:: GALLIUM_RBUG <bool> (false)
+
+Controls if the :ref:`rbug` should be used.
+
+.. envvar:: GALLIUM_TRACE <string> ("")
+
+If set, this variable will cause the :ref:`Trace` output to be written to the
+specified file. Paths may be relative or absolute; relative paths are relative
+to the working directory. For example, setting it to "trace.xml" will cause
+the trace to be written to a file of the same name in the working directory.
+
+.. envvar:: GALLIUM_DUMP_CPU <bool> (false)
+
+Dump information about the current CPU that the driver is running on.
+
+.. envvar:: TGSI_PRINT_SANITY <bool> (false)
+
+Gallium has a built-in shader sanity checker. This option controls whether
+the shader sanity checker prints its warnings and errors to stderr.
+
+.. envvar:: DRAW_USE_LLVM <bool> (false)
+
+Whether the :ref:`Draw` module will attempt to use LLVM for vertex and geometry shaders.
+
+
+State tracker-specific
+""""""""""""""""""""""
+
+.. envvar:: ST_DEBUG <flags> (0x0)
+
+Debug :ref:`flags` for the GL state tracker.
+
+
+Driver-specific
+"""""""""""""""
+
+.. envvar:: I915_DEBUG <flags> (0x0)
+
+Debug :ref:`flags` for the i915 driver.
+
+.. envvar:: I915_NO_HW <bool> (false)
+
+Stop the i915 driver from submitting commands to the hardware.
+
+.. envvar:: I915_DUMP_CMD <bool> (false)
+
+Dump all commands going to the hardware.
+
+.. envvar:: LP_DEBUG <flags> (0x0)
+
+Debug :ref:`flags` for the llvmpipe driver.
+
+.. envvar:: LP_NUM_THREADS <int> (number of CPUs)
+
+Number of threads that the llvmpipe driver should use.
+
+
+.. _flags:
+
+Flags
+"""""
+
+The variables of type "flags" all take a string with comma-separated flags to
+enable different debugging for different parts of the drivers or state
+tracker. If set to "help", the driver will print a list of flags which the
+variable accepts. Order does not matter.
+
+
+.. _rbug:
+
+Remote Debugger
+^^^^^^^^^^^^^^^
+
+The remote debugger, commonly known as rbug, allows for runtime inspections of
+:ref:`Context`, :ref:`Screen`, :ref:`Resource` and :ref:`Shader` objects; and
+pausing and stepping of :ref:`Draw` calls. Is used with rbug-gui which is
+hosted outside of the main mesa repository. rbug is can be used over a network
+connection, so the debugger does not need to be on the same machine.
diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst
index e379ad3271..70d75b51e6 100644
--- a/src/gallium/docs/source/distro.rst
+++ b/src/gallium/docs/source/distro.rst
@@ -74,6 +74,11 @@ Trace
Wrapper driver. Trace dumps an XML record of the calls made to the
:ref:`Context` and :ref:`Screen` objects that it wraps.
+Rbug
+^^^^
+
+Wrapper driver. :ref:`rbug` driver used with stand alone rbug-gui.
+
State Trackers
--------------
diff --git a/src/gallium/docs/source/exts/formatting.py b/src/gallium/docs/source/exts/formatting.py
new file mode 100644
index 0000000000..14865f3603
--- /dev/null
+++ b/src/gallium/docs/source/exts/formatting.py
@@ -0,0 +1,31 @@
+# formatting.py
+# Sphinx extension providing formatting for Gallium-specific data
+# (c) Corbin Simpson 2010
+# Public domain to the extent permitted; contact author for special licensing
+
+import docutils.nodes
+import sphinx.addnodes
+
+def parse_envvar(env, sig, signode):
+ envvar, t, default = sig.split(" ", 2)
+ envvar = envvar.strip().upper()
+ t = " Type: %s" % t.strip(" <>").lower()
+ default = " Default: %s" % default.strip(" ()")
+ signode += sphinx.addnodes.desc_name(envvar, envvar)
+ signode += sphinx.addnodes.desc_type(t, t)
+ signode += sphinx.addnodes.desc_annotation(default, default)
+ return envvar
+
+def parse_opcode(env, sig, signode):
+ opcode, desc = sig.split("-", 1)
+ opcode = opcode.strip().upper()
+ desc = " (%s)" % desc.strip()
+ signode += sphinx.addnodes.desc_name(opcode, opcode)
+ signode += sphinx.addnodes.desc_annotation(desc, desc)
+ return opcode
+
+def setup(app):
+ app.add_description_unit("envvar", "envvar", "%s (environment variable)",
+ parse_envvar)
+ app.add_description_unit("opcode", "opcode", "%s (TGSI opcode)",
+ parse_opcode)
diff --git a/src/gallium/docs/source/exts/tgsi.py b/src/gallium/docs/source/exts/tgsi.py
deleted file mode 100644
index e92cd5c4d1..0000000000
--- a/src/gallium/docs/source/exts/tgsi.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# tgsi.py
-# Sphinx extension providing formatting for TGSI opcodes
-# (c) Corbin Simpson 2010
-
-import docutils.nodes
-import sphinx.addnodes
-
-def parse_opcode(env, sig, signode):
- opcode, desc = sig.split("-", 1)
- opcode = opcode.strip().upper()
- desc = " (%s)" % desc.strip()
- signode += sphinx.addnodes.desc_name(opcode, opcode)
- signode += sphinx.addnodes.desc_annotation(desc, desc)
- return opcode
-
-def setup(app):
- app.add_description_unit("opcode", "opcode", "%s (TGSI opcode)", parse_opcode)
diff --git a/src/gallium/docs/source/index.rst b/src/gallium/docs/source/index.rst
index 54bc883fce..6c19842dac 100644
--- a/src/gallium/docs/source/index.rst
+++ b/src/gallium/docs/source/index.rst
@@ -12,6 +12,7 @@ Contents:
:maxdepth: 2
intro
+ debugging
tgsi
screen
context
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 78744da500..2cf6f38c4b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -141,7 +141,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
else {
dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
- dadxy = LLVMBuildAdd(builder, dadx, dady, "");
+ dadxy = LLVMBuildFAdd(builder, dadx, dady, "");
attrib_name(dadx, attrib, chan, ".dadx");
attrib_name(dady, attrib, chan, ".dady");
attrib_name(dadxy, attrib, chan, ".dadxy");
@@ -177,7 +177,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
* dadq2 = 2 * dq
*/
- dadq2 = LLVMBuildAdd(builder, dadq, dadq, "");
+ dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
/*
* a = a0 + x * dadx + y * dady
@@ -193,12 +193,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
a = a0;
if (interp != LP_INTERP_CONSTANT &&
interp != LP_INTERP_FACING) {
- a = LLVMBuildAdd(builder, a,
- LLVMBuildMul(builder, bld->x, dadx, ""),
- "");
- a = LLVMBuildAdd(builder, a,
- LLVMBuildMul(builder, bld->y, dady, ""),
- "");
+ LLVMValueRef tmp;
+ tmp = LLVMBuildFMul(builder, bld->x, dadx, "");
+ a = LLVMBuildFAdd(builder, a, tmp, "");
+ tmp = LLVMBuildFMul(builder, bld->y, dady, "");
+ a = LLVMBuildFAdd(builder, a, tmp, "");
}
}
@@ -212,7 +211,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
* Compute the attrib values on the upper-left corner of each quad.
*/
- a = LLVMBuildAdd(builder, a, dadq2, "");
+ a = LLVMBuildFAdd(builder, a, dadq2, "");
/*
* a *= 1 / w
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 28793682ed..7543bd7b2b 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -47,7 +47,7 @@
#include "lp_setup.h"
-DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE);
+DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE)
static void llvmpipe_destroy( struct pipe_context *pipe )
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index eaf2a6f334..102e902d02 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -104,9 +104,6 @@ struct lp_rast_plane {
int dcdx;
int dcdy;
-
- /* edge/step info for 3 edges and 4x4 block of pixels */
- const int *step;
};
/**
@@ -119,8 +116,6 @@ struct lp_rast_triangle {
/* inputs for the shader */
struct lp_rast_shader_inputs inputs;
- int step[3][16];
-
#ifdef DEBUG
float v[3][2];
#endif
@@ -261,5 +256,9 @@ void lp_rast_begin_query(struct lp_rasterizer_task *,
void lp_rast_end_query(struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
+void
+lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg);
+
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index ebe9a8e92b..673f67386b 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -37,52 +37,6 @@
#include "lp_tile_soa.h"
-/**
- * Map an index in [0,15] to an x,y position, multiplied by 4.
- * This is used to get the position of each subtile in a 4x4
- * grid of edge step values.
- * Note: we can use some bit twiddling to compute these values instead
- * of using a look-up table, but there's no measurable performance
- * difference.
- */
-static const int pos_table4[16][2] = {
- { 0, 0 },
- { 4, 0 },
- { 0, 4 },
- { 4, 4 },
- { 8, 0 },
- { 12, 0 },
- { 8, 4 },
- { 12, 4 },
- { 0, 8 },
- { 4, 8 },
- { 0, 12 },
- { 4, 12 },
- { 8, 8 },
- { 12, 8 },
- { 8, 12 },
- { 12, 12 }
-};
-
-
-static const int pos_table16[16][2] = {
- { 0, 0 },
- { 16, 0 },
- { 0, 16 },
- { 16, 16 },
- { 32, 0 },
- { 48, 0 },
- { 32, 16 },
- { 48, 16 },
- { 0, 32 },
- { 16, 32 },
- { 0, 48 },
- { 16, 48 },
- { 32, 32 },
- { 48, 32 },
- { 32, 48 },
- { 48, 48 }
-};
/**
@@ -113,6 +67,68 @@ block_full_16(struct lp_rasterizer_task *task,
block_full_4(task, tri, x + ix, y + iy);
}
+
+static INLINE unsigned
+build_mask(int c, int dcdx, int dcdy)
+{
+ int mask = 0;
+
+ int c0 = c;
+ int c1 = c0 + dcdx;
+ int c2 = c1 + dcdx;
+ int c3 = c2 + dcdx;
+
+ mask |= ((c0 + 0 * dcdy) >> 31) & (1 << 0);
+ mask |= ((c0 + 1 * dcdy) >> 31) & (1 << 2);
+ mask |= ((c0 + 2 * dcdy) >> 31) & (1 << 8);
+ mask |= ((c0 + 3 * dcdy) >> 31) & (1 << 10);
+ mask |= ((c1 + 0 * dcdy) >> 31) & (1 << 1);
+ mask |= ((c1 + 1 * dcdy) >> 31) & (1 << 3);
+ mask |= ((c1 + 2 * dcdy) >> 31) & (1 << 9);
+ mask |= ((c1 + 3 * dcdy) >> 31) & (1 << 11);
+ mask |= ((c2 + 0 * dcdy) >> 31) & (1 << 4);
+ mask |= ((c2 + 1 * dcdy) >> 31) & (1 << 6);
+ mask |= ((c2 + 2 * dcdy) >> 31) & (1 << 12);
+ mask |= ((c2 + 3 * dcdy) >> 31) & (1 << 14);
+ mask |= ((c3 + 0 * dcdy) >> 31) & (1 << 5);
+ mask |= ((c3 + 1 * dcdy) >> 31) & (1 << 7);
+ mask |= ((c3 + 2 * dcdy) >> 31) & (1 << 13);
+ mask |= ((c3 + 3 * dcdy) >> 31) & (1 << 15);
+
+ return mask;
+}
+
+static INLINE unsigned
+build_mask_linear(int c, int dcdx, int dcdy)
+{
+ int mask = 0;
+
+ int c0 = c;
+ int c1 = c0 + dcdy;
+ int c2 = c1 + dcdy;
+ int c3 = c2 + dcdy;
+
+ mask |= ((c0 + 0 * dcdx) >> 31) & (1 << 0);
+ mask |= ((c0 + 1 * dcdx) >> 31) & (1 << 1);
+ mask |= ((c0 + 2 * dcdx) >> 31) & (1 << 2);
+ mask |= ((c0 + 3 * dcdx) >> 31) & (1 << 3);
+ mask |= ((c1 + 0 * dcdx) >> 31) & (1 << 4);
+ mask |= ((c1 + 1 * dcdx) >> 31) & (1 << 5);
+ mask |= ((c1 + 2 * dcdx) >> 31) & (1 << 6);
+ mask |= ((c1 + 3 * dcdx) >> 31) & (1 << 7);
+ mask |= ((c2 + 0 * dcdx) >> 31) & (1 << 8);
+ mask |= ((c2 + 1 * dcdx) >> 31) & (1 << 9);
+ mask |= ((c2 + 2 * dcdx) >> 31) & (1 << 10);
+ mask |= ((c2 + 3 * dcdx) >> 31) & (1 << 11);
+ mask |= ((c3 + 0 * dcdx) >> 31) & (1 << 12);
+ mask |= ((c3 + 1 * dcdx) >> 31) & (1 << 13);
+ mask |= ((c3 + 2 * dcdx) >> 31) & (1 << 14);
+ mask |= ((c3 + 3 * dcdx) >> 31) & (1 << 15);
+
+ return mask;
+}
+
+
#define TAG(x) x##_1
#define NR_PLANES 1
#include "lp_rast_tri_tmp.h"
@@ -141,3 +157,85 @@ block_full_16(struct lp_rasterizer_task *task,
#define NR_PLANES 7
#include "lp_rast_tri_tmp.h"
+
+/* Special case for 3 plane triangle which is contained entirely
+ * within a 16x16 block.
+ */
+void
+lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = tri->plane;
+ unsigned mask = arg.triangle.plane_mask;
+ const int x = task->x + (mask & 0xf) * 16;
+ const int y = task->y + (mask >> 4) * 16;
+ unsigned outmask, inmask, partmask, partial_mask;
+ unsigned j;
+ int c[3];
+
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
+
+ for (j = 0; j < 3; j++) {
+ c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+
+ {
+ const int dcdx = -plane[j].dcdx * 4;
+ const int dcdy = plane[j].dcdy * 4;
+ const int cox = c[j] + plane[j].eo * 4;
+ const int cio = c[j] + plane[j].ei * 4 - 1;
+
+ outmask |= build_mask_linear(cox, dcdx, dcdy);
+ partmask |= build_mask_linear(cio, dcdx, dcdy);
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Mask of sub-blocks which are inside all trivial accept planes:
+ */
+ inmask = ~partmask & 0xffff;
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = partmask & ~outmask;
+
+ assert((partial_mask & inmask) == 0);
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+ int cx[3];
+
+ partial_mask &= ~(1 << i);
+
+ for (j = 0; j < 3; j++)
+ cx[j] = (c[j]
+ - plane[j].dcdx * ix
+ + plane[j].dcdy * iy);
+
+ do_block_4_3(task, tri, plane, px, py, cx);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+
+ inmask &= ~(1 << i);
+
+ block_full_4(task, tri, px, py);
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
index a410c611a3..43f72d8ca8 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -46,19 +46,13 @@ TAG(do_block_4)(struct lp_rasterizer_task *task,
int x, int y,
const int *c)
{
- unsigned mask = 0;
- int i;
+ unsigned mask = 0xffff;
+ int j;
- for (i = 0; i < 16; i++) {
- int any_negative = 0;
- int j;
-
- for (j = 0; j < NR_PLANES; j++)
- any_negative |= (c[j] - 1 + plane[j].step[i]);
-
- any_negative >>= 31;
-
- mask |= (~any_negative) & (1 << i);
+ for (j = 0; j < NR_PLANES; j++) {
+ mask &= ~build_mask(c[j] - 1,
+ -plane[j].dcdx,
+ plane[j].dcdy);
}
/* Now pass to the shader:
@@ -79,24 +73,19 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
const int *c)
{
unsigned outmask, inmask, partmask, partial_mask;
- unsigned i, j;
+ unsigned j;
outmask = 0; /* outside one or more trivial reject planes */
partmask = 0; /* outside one or more trivial accept planes */
for (j = 0; j < NR_PLANES; j++) {
- const int *step = plane[j].step;
- const int eo = plane[j].eo * 4;
- const int ei = plane[j].ei * 4;
- const int cox = c[j] + eo;
- const int cio = ei - 1 - eo;
-
- for (i = 0; i < 16; i++) {
- int out = cox + step[i] * 4;
- int part = out + cio;
- outmask |= (out >> 31) & (1 << i);
- partmask |= (part >> 31) & (1 << i);
- }
+ const int dcdx = -plane[j].dcdx * 4;
+ const int dcdy = plane[j].dcdy * 4;
+ const int cox = c[j] + plane[j].eo * 4;
+ const int cio = c[j] + plane[j].ei * 4 - 1;
+
+ outmask |= build_mask_linear(cox, dcdx, dcdy);
+ partmask |= build_mask_linear(cio, dcdx, dcdy);
}
if (outmask == 0xffff)
@@ -117,15 +106,19 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
*/
while (partial_mask) {
int i = ffs(partial_mask) - 1;
- int px = x + pos_table4[i][0];
- int py = y + pos_table4[i][1];
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
int cx[NR_PLANES];
- for (j = 0; j < NR_PLANES; j++)
- cx[j] = c[j] + plane[j].step[i] * 4;
-
partial_mask &= ~(1 << i);
+ for (j = 0; j < NR_PLANES; j++)
+ cx[j] = (c[j]
+ - plane[j].dcdx * ix
+ + plane[j].dcdy * iy);
+
TAG(do_block_4)(task, tri, plane, px, py, cx);
}
@@ -133,8 +126,10 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
*/
while (inmask) {
int i = ffs(inmask) - 1;
- int px = x + pos_table4[i][0];
- int py = y + pos_table4[i][1];
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
inmask &= ~(1 << i);
@@ -157,35 +152,28 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
struct lp_rast_plane plane[NR_PLANES];
int c[NR_PLANES];
unsigned outmask, inmask, partmask, partial_mask;
- unsigned i, j, nr_planes = 0;
+ unsigned j = 0;
+
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
while (plane_mask) {
int i = ffs(plane_mask) - 1;
- plane[nr_planes] = tri->plane[i];
+ plane[j] = tri->plane[i];
plane_mask &= ~(1 << i);
- nr_planes++;
- };
-
- assert(nr_planes == NR_PLANES);
- outmask = 0; /* outside one or more trivial reject planes */
- partmask = 0; /* outside one or more trivial accept planes */
+ c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
- for (j = 0; j < NR_PLANES; j++) {
- const int *step = plane[j].step;
- const int eo = plane[j].eo * 16;
- const int ei = plane[j].ei * 16;
- int cox, cio;
+ {
+ const int dcdx = -plane[j].dcdx * 16;
+ const int dcdy = plane[j].dcdy * 16;
+ const int cox = c[j] + plane[j].eo * 16;
+ const int cio = c[j] + plane[j].ei * 16 - 1;
- c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
- cox = c[j] + eo;
- cio = ei - 1 - eo;
-
- for (i = 0; i < 16; i++) {
- int out = cox + step[i] * 16;
- int part = out + cio;
- outmask |= (out >> 31) & (1 << i);
- partmask |= (part >> 31) & (1 << i);
+ outmask |= build_mask_linear(cox, dcdx, dcdy);
+ partmask |= build_mask_linear(cio, dcdx, dcdy);
}
+
+ j++;
}
if (outmask == 0xffff)
@@ -206,12 +194,16 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
*/
while (partial_mask) {
int i = ffs(partial_mask) - 1;
- int px = x + pos_table16[i][0];
- int py = y + pos_table16[i][1];
+ int ix = (i & 3) * 16;
+ int iy = (i >> 2) * 16;
+ int px = x + ix;
+ int py = y + iy;
int cx[NR_PLANES];
for (j = 0; j < NR_PLANES; j++)
- cx[j] = c[j] + plane[j].step[i] * 16;
+ cx[j] = (c[j]
+ - plane[j].dcdx * ix
+ + plane[j].dcdy * iy);
partial_mask &= ~(1 << i);
@@ -223,8 +215,10 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
*/
while (inmask) {
int i = ffs(inmask) - 1;
- int px = x + pos_table16[i][0];
- int py = y + pos_table16[i][1];
+ int ix = (i & 3) * 16;
+ int iy = (i >> 2) * 16;
+ int px = x + ix;
+ int py = y + iy;
inmask &= ~(1 << i);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 7e432503c1..614a6372b4 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -61,36 +61,6 @@ struct tri_info {
-static const int step_scissor_minx[16] = {
- 0, 1, 0, 1,
- 2, 3, 2, 3,
- 0, 1, 0, 1,
- 2, 3, 2, 3
-};
-
-static const int step_scissor_maxx[16] = {
- 0, -1, 0, -1,
- -2, -3, -2, -3,
- 0, -1, 0, -1,
- -2, -3, -2, -3
-};
-
-static const int step_scissor_miny[16] = {
- 0, 0, 1, 1,
- 0, 0, 1, 1,
- 2, 2, 3, 3,
- 2, 2, 3, 3
-};
-
-static const int step_scissor_maxy[16] = {
- 0, 0, -1, -1,
- 0, 0, -1, -1,
- -2, -2, -3, -3,
- -2, -2, -3, -3
-};
-
-
-
static INLINE int
subpixel_snap(float a)
@@ -260,13 +230,13 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
{
unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
unsigned slot;
+ unsigned i;
/* setup interpolation for all the remaining attributes:
*/
for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
unsigned vert_attr = setup->fs.input[slot].src_index;
unsigned usage_mask = setup->fs.input[slot].usage_mask;
- unsigned i;
switch (setup->fs.input[slot].interp) {
case LP_INTERP_CONSTANT:
@@ -316,6 +286,34 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
/* The internal position input is in slot zero:
*/
setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask);
+
+ if (0) {
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ float a0 = tri->inputs.a0 [0][i];
+ float dadx = tri->inputs.dadx[0][i];
+ float dady = tri->inputs.dady[0][i];
+
+ debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
+ "xyzw"[i],
+ a0, dadx, dady);
+ }
+
+ for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+ unsigned usage_mask = setup->fs.input[slot].usage_mask;
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (usage_mask & (1 << i)) {
+ float a0 = tri->inputs.a0 [1 + slot][i];
+ float dadx = tri->inputs.dadx[1 + slot][i];
+ float dady = tri->inputs.dady[1 + slot][i];
+
+ debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
+ slot,
+ "xyzw"[i],
+ a0, dadx, dady);
+ }
+ }
+ }
+ }
}
@@ -525,7 +523,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
info.dx20 = info.v2[0][0] - info.v0[0][0];
info.dy01 = info.v0[0][1] - info.v1[0][1];
info.dy20 = info.v2[0][1] - info.v0[0][1];
- info.oneoverarea = 1.0 / (info.dx01 * info.dy20 - info.dx20 * info.dy01);
+ info.oneoverarea = 1.0f / (info.dx01 * info.dy20 - info.dx20 * info.dy01);
info.frontfacing = frontfacing;
/* Setup parameter interpolants:
@@ -590,35 +588,6 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* Calculate trivial accept offsets from the above.
*/
plane->ei = plane->dcdy - plane->dcdx - plane->eo;
-
- plane->step = tri->step[i];
-
- /* Fill in the inputs.step[][] arrays.
- * We've manually unrolled some loops here.
- */
-#define SETUP_STEP(j, x, y) \
- tri->step[i][j] = y * plane->dcdy - x * plane->dcdx
-
- SETUP_STEP(0, 0, 0);
- SETUP_STEP(1, 1, 0);
- SETUP_STEP(2, 0, 1);
- SETUP_STEP(3, 1, 1);
-
- SETUP_STEP(4, 2, 0);
- SETUP_STEP(5, 3, 0);
- SETUP_STEP(6, 2, 1);
- SETUP_STEP(7, 3, 1);
-
- SETUP_STEP(8, 0, 2);
- SETUP_STEP(9, 1, 2);
- SETUP_STEP(10, 0, 3);
- SETUP_STEP(11, 1, 3);
-
- SETUP_STEP(12, 2, 2);
- SETUP_STEP(13, 3, 2);
- SETUP_STEP(14, 2, 3);
- SETUP_STEP(15, 3, 3);
-#undef STEP
}
@@ -641,28 +610,24 @@ do_triangle_ccw(struct lp_setup_context *setup,
* these planes elsewhere.
*/
if (nr_planes == 7) {
- tri->plane[3].step = step_scissor_minx;
tri->plane[3].dcdx = -1;
tri->plane[3].dcdy = 0;
tri->plane[3].c = 1-minx;
tri->plane[3].ei = 0;
tri->plane[3].eo = 1;
- tri->plane[4].step = step_scissor_maxx;
tri->plane[4].dcdx = 1;
tri->plane[4].dcdy = 0;
tri->plane[4].c = maxx;
tri->plane[4].ei = -1;
tri->plane[4].eo = 0;
- tri->plane[5].step = step_scissor_miny;
tri->plane[5].dcdx = 0;
tri->plane[5].dcdy = 1;
tri->plane[5].c = 1-miny;
tri->plane[5].ei = 0;
tri->plane[5].eo = 1;
- tri->plane[6].step = step_scissor_maxy;
tri->plane[6].dcdx = 0;
tri->plane[6].dcdy = -1;
tri->plane[6].c = maxy;
@@ -678,6 +643,26 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* Convert to tile coordinates, and inclusive ranges:
*/
+ if (nr_planes == 3) {
+ int ix0 = minx / 16;
+ int iy0 = miny / 16;
+ int ix1 = (maxx-1) / 16;
+ int iy1 = (maxy-1) / 16;
+
+ if (iy0 == iy1 && ix0 == ix1)
+ {
+
+ /* Triangle is contained in a single 16x16 block:
+ */
+ int mask = (ix0 & 3) | ((iy0 & 3) << 4);
+
+ lp_scene_bin_command( scene, ix0/4, iy0/4,
+ lp_rast_triangle_3_16,
+ lp_rast_arg_triangle(tri, mask) );
+ return;
+ }
+ }
+
ix0 = minx / TILE_SIZE;
iy0 = miny / TILE_SIZE;
ix1 = (maxx-1) / TILE_SIZE;
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index c71ec8066c..2ba39052ab 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -293,34 +293,7 @@ def generate_ssse3():
print '''
#if defined(PIPE_ARCH_SSE)
-
-#if defined(PIPE_ARCH_SSSE3)
-
-#include <tmmintrin.h>
-
-#else
-
-#include <emmintrin.h>
-
-/**
- * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases
- * where -mssse3 is not supported/enabled.
- *
- * MSVC will never get in here as its intrinsics support do not rely on
- * compiler command line options.
- */
-static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_epi8(__m128i a, __m128i mask)
-{
- __m128i result;
- __asm__("pshufb %1, %0"
- : "=x" (result)
- : "xm" (mask), "0" (a));
- return result;
-}
-
-#endif
-
+#include "util/u_sse.h"
static void
lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst,
diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
index f5c1c5ca2c..e920cf9f3b 100644
--- a/src/gallium/drivers/nouveau/nouveau_stateobj.h
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -151,9 +151,9 @@ so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr,
if (so->start_alloc <= so->cur_start) {
debug_printf("exceeding num_start size\n");
assert(0);
- } else
+ }
#endif /* DEBUG_NOUVEAU_STATEOBJ */
- start = so->start;
+ start = so->start;
#ifdef DEBUG_NOUVEAU_STATEOBJ
if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) {
@@ -162,7 +162,6 @@ so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr,
}
#endif /* DEBUG_NOUVEAU_STATEOBJ */
- so->start = start;
start[so->cur_start].gr = gr;
start[so->cur_start].mthd = mthd;
start[so->cur_start].size = size;
@@ -193,11 +192,10 @@ so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo,
if (so->reloc_alloc <= so->cur_reloc) {
debug_printf("exceeding num_reloc size\n");
assert(0);
- } else
+ }
#endif /* DEBUG_NOUVEAU_STATEOBJ */
- r = so->reloc;
+ r = so->reloc;
- so->reloc = r;
r[so->cur_reloc].bo = NULL;
nouveau_bo_ref(bo, &(r[so->cur_reloc].bo));
r[so->cur_reloc].gr = so->start[so->cur_start-1].gr;
diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h
index a5e8537533..b165f7a611 100644
--- a/src/gallium/drivers/nouveau/nouveau_util.h
+++ b/src/gallium/drivers/nouveau/nouveau_util.h
@@ -88,104 +88,4 @@ static INLINE unsigned log2i(unsigned i)
return r;
}
-struct u_split_prim {
- void *priv;
- void (*emit)(void *priv, unsigned start, unsigned count);
- void (*edge)(void *priv, boolean enabled);
-
- unsigned mode;
- unsigned start;
- unsigned p_start;
- unsigned p_end;
-
- uint repeat_first:1;
- uint close_first:1;
- uint edgeflag_off:1;
-};
-
-static INLINE void
-u_split_prim_init(struct u_split_prim *s,
- unsigned mode, unsigned start, unsigned count)
-{
- if (mode == PIPE_PRIM_LINE_LOOP) {
- s->mode = PIPE_PRIM_LINE_STRIP;
- s->close_first = 1;
- } else {
- s->mode = mode;
- s->close_first = 0;
- }
- s->start = start;
- s->p_start = start;
- s->p_end = start + count;
- s->edgeflag_off = 0;
- s->repeat_first = 0;
-}
-
-static INLINE boolean
-u_split_prim_next(struct u_split_prim *s, unsigned max_verts)
-{
- int repeat = 0;
-
- if (s->repeat_first) {
- s->emit(s->priv, s->start, 1);
- max_verts--;
- if (s->edgeflag_off) {
- s->edge(s->priv, TRUE);
- s->edgeflag_off = FALSE;
- }
- }
-
- if (s->p_start + s->close_first + max_verts >= s->p_end) {
- s->emit(s->priv, s->p_start, s->p_end - s->p_start);
- if (s->close_first)
- s->emit(s->priv, s->start, 1);
- return TRUE;
- }
-
- switch (s->mode) {
- case PIPE_PRIM_LINES:
- max_verts &= ~1;
- break;
- case PIPE_PRIM_LINE_STRIP:
- repeat = 1;
- break;
- case PIPE_PRIM_POLYGON:
- max_verts--;
- s->emit(s->priv, s->p_start, max_verts);
- s->edge(s->priv, FALSE);
- s->emit(s->priv, s->p_start + max_verts, 1);
- s->p_start += max_verts;
- s->repeat_first = TRUE;
- s->edgeflag_off = TRUE;
- return FALSE;
- case PIPE_PRIM_TRIANGLES:
- max_verts = max_verts - (max_verts % 3);
- break;
- case PIPE_PRIM_TRIANGLE_STRIP:
- /* to ensure winding stays correct, always split
- * on an even number of generated triangles
- */
- max_verts = max_verts & ~1;
- repeat = 2;
- break;
- case PIPE_PRIM_TRIANGLE_FAN:
- s->repeat_first = TRUE;
- repeat = 1;
- break;
- case PIPE_PRIM_QUADS:
- max_verts &= ~3;
- break;
- case PIPE_PRIM_QUAD_STRIP:
- max_verts &= ~1;
- repeat = 2;
- break;
- default:
- break;
- }
-
- s->emit (s->priv, s->p_start, max_verts);
- s->p_start += (max_verts - repeat);
- return FALSE;
-}
-
#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index df79ca89ca..c6c93d40b8 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -24,11 +24,10 @@ nouveau_screen_transfer_flags(unsigned pipe)
flags |= NOUVEAU_BO_WR;
if (pipe & PIPE_TRANSFER_DISCARD)
flags |= NOUVEAU_BO_INVAL;
- if (pipe & PIPE_TRANSFER_DONTBLOCK)
- flags |= NOUVEAU_BO_NOWAIT;
- else
if (pipe & PIPE_TRANSFER_UNSYNCHRONIZED)
flags |= NOUVEAU_BO_NOSYNC;
+ else if (pipe & PIPE_TRANSFER_DONTBLOCK)
+ flags |= NOUVEAU_BO_NOWAIT;
return flags;
}
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c
index c3ac804146..6a2ffd5a3c 100644
--- a/src/gallium/drivers/nv50/nv50_push.c
+++ b/src/gallium/drivers/nv50/nv50_push.c
@@ -2,8 +2,8 @@
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "util/u_split_prim.h"
-#include "nouveau/nouveau_util.h"
#include "nv50_context.h"
#include "nv50_resource.h"
@@ -217,7 +217,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe,
4; /* potential edgeflag enable/disable */
const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
2; /* potential edgeflag modification */
- struct u_split_prim s;
+ struct util_split_prim s;
unsigned vtx_size;
boolean nzi = FALSE;
int i;
@@ -335,7 +335,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe,
ctx.attr[i].map = (uint8_t *)ctx.attr[i].map + ctx.attr[i].stride;
}
- u_split_prim_init(&s, mode, start, count);
+ util_split_prim_init(&s, mode, start, count);
do {
if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
FIRE_RING(chan);
@@ -351,7 +351,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
- done = u_split_prim_next(&s, max_verts);
+ done = util_split_prim_next(&s, max_verts);
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
} while (!done);
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index e7f8fe33ed..1f11950199 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -24,8 +24,8 @@
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "util/u_split_prim.h"
-#include "nouveau/nouveau_util.h"
#include "nv50_context.h"
#include "nv50_resource.h"
@@ -311,7 +311,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe,
struct pipe_transfer *transfer;
struct instance a[16];
struct inline_ctx ctx;
- struct u_split_prim s;
+ struct util_split_prim s;
boolean nzi = FALSE;
unsigned overhead;
@@ -347,7 +347,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe,
unsigned max_verts;
boolean done;
- u_split_prim_init(&s, mode, start, count);
+ util_split_prim_init(&s, mode, start, count);
do {
if (AVAIL_RING(chan) < (overhead + 6)) {
FIRE_RING(chan);
@@ -366,7 +366,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0));
- done = u_split_prim_next(&s, max_verts);
+ done = util_split_prim_next(&s, max_verts);
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
} while (!done);
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 6f8d9abfc8..47ffc0cb3c 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -21,6 +21,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "r300_context.h"
+#include "r300_emit.h"
+#include "r300_hyperz.h"
#include "r300_texture.h"
#include "r300_winsys.h"
@@ -99,9 +101,6 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- if (r300->z_fastfill)
- clear_buffers &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
-
/* Only color clear allowed, and only one colorbuffer. */
if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1)
return FALSE;
@@ -173,22 +172,25 @@ static void r300_clear(struct pipe_context* pipe,
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_hyperz_state *hyperz =
(struct r300_hyperz_state*)r300->hyperz_state.state;
+ struct r300_texture *zstex =
+ fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
uint32_t width = fb->width;
uint32_t height = fb->height;
boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
- uint32_t hyperz_dcv = 0;
+ uint32_t hyperz_dcv = hyperz->zb_depthclearvalue;
/* Enable fast Z clear.
* The zbuffer must be in micro-tiled mode, otherwise it locks up. */
- if ((buffers & (PIPE_CLEAR_DEPTH|PIPE_CLEAR_STENCIL)) && has_hyperz) {
-
+ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && has_hyperz) {
hyperz_dcv = hyperz->zb_depthclearvalue =
r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG);
- if (r300->z_compression || r300->z_fastfill)
+ if (zstex->zmask_mem[fb->zsbuf->level]) {
r300->zmask_clear.dirty = TRUE;
- if (r300->hiz_enable)
+ buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
+ }
+ if (zstex->hiz_mem[fb->zsbuf->level])
r300->hiz_clear.dirty = TRUE;
}
@@ -207,13 +209,43 @@ static void r300_clear(struct pipe_context* pipe,
}
/* Clear. */
- r300_blitter_begin(r300, R300_CLEAR);
- util_blitter_clear(r300->blitter,
- width,
- height,
- fb->nr_cbufs,
- buffers, rgba, depth, stencil);
- r300_blitter_end(r300);
+ if (buffers) {
+ /* Clear using the blitter. */
+ r300_blitter_begin(r300, R300_CLEAR);
+ util_blitter_clear(r300->blitter,
+ width,
+ height,
+ fb->nr_cbufs,
+ buffers, rgba, depth, stencil);
+ r300_blitter_end(r300);
+ } else if (r300->zmask_clear.dirty) {
+ /* Just clear zmask and hiz now, this does not use a standard draw
+ * procedure. */
+ unsigned dwords;
+
+ /* Calculate zmask_clear and hiz_clear atom sizes. */
+ r300_update_hyperz_state(r300);
+ dwords = r300->zmask_clear.size +
+ (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) +
+ r300_get_num_cs_end_dwords(r300);
+
+ /* Reserve CS space. */
+ if (dwords > (r300->cs->ndw - r300->cs->cdw)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ }
+
+ /* Emit clear packets. */
+ r300_emit_zmask_clear(r300, r300->zmask_clear.size,
+ r300->zmask_clear.state);
+ r300->zmask_clear.dirty = FALSE;
+ if (r300->hiz_clear.dirty) {
+ r300_emit_hiz_clear(r300, r300->hiz_clear.size,
+ r300->hiz_clear.state);
+ r300->hiz_clear.dirty = FALSE;
+ }
+ } else {
+ assert(0);
+ }
/* Disable CBZB clear. */
if (r300->cbzb_clear) {
@@ -222,6 +254,16 @@ static void r300_clear(struct pipe_context* pipe,
r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
}
+ /* Enable fastfill and/or hiz.
+ *
+ * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update
+ * looks if zmask/hiz is in use and enables fastfill accordingly. */
+ if (zstex &&
+ (zstex->zmask_in_use[fb->zsbuf->level] ||
+ zstex->hiz_in_use[fb->zsbuf->level])) {
+ r300->hyperz_state.dirty = TRUE;
+ }
+
/* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */
if (r300->flush_counter == 0)
pipe->flush(pipe, 0, NULL);
@@ -259,27 +301,31 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
r300_blitter_end(r300);
}
-/* Clear a region of a depth stencil surface. */
-static void r300_flush_depth_stencil(struct pipe_context *pipe,
- struct pipe_resource *dst,
- struct pipe_subresource subdst)
+/* Flush a depth stencil buffer. */
+void r300_flush_depth_stencil(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned zslice)
{
struct r300_context *r300 = r300_context(pipe);
struct pipe_surface *dstsurf;
struct r300_texture *tex = r300_texture(dst);
- /* only flush the zmask if we have one attached to this texture */
if (!tex->zmask_mem[subdst.level])
return;
+ if (!tex->zmask_in_use[subdst.level])
+ return;
dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst,
- subdst.face, subdst.level, 0,
+ subdst.face, subdst.level, zslice,
PIPE_BIND_DEPTH_STENCIL);
r300->z_decomp_rd = TRUE;
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_flush_depth_stencil(r300->blitter, dstsurf);
r300_blitter_end(r300);
r300->z_decomp_rd = FALSE;
+
+ tex->zmask_in_use[subdst.level] = FALSE;
}
/* Copy a block of pixels from one surface to another using HW. */
@@ -342,7 +388,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
if (is_depth) {
- r300_flush_depth_stencil(pipe, src, subsrc);
+ r300_flush_depth_stencil(pipe, src, subsrc, srcz);
}
if (old_format != new_format) {
dst->format = new_format;
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index e8b6c4f7af..a83ad892ea 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -99,8 +99,10 @@ static void r300_destroy_context(struct pipe_context* context)
struct r300_context* r300 = r300_context(context);
struct r300_atom *atom;
- util_blitter_destroy(r300->blitter);
- draw_destroy(r300->draw);
+ if (r300->blitter)
+ util_blitter_destroy(r300->blitter);
+ if (r300->draw)
+ draw_destroy(r300->draw);
/* Print stats, if enabled. */
if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
@@ -112,40 +114,48 @@ static void r300_destroy_context(struct pipe_context* context)
}
}
- u_upload_destroy(r300->upload_vb);
- u_upload_destroy(r300->upload_ib);
+ if (r300->upload_vb)
+ u_upload_destroy(r300->upload_vb);
+ if (r300->upload_ib)
+ u_upload_destroy(r300->upload_ib);
- /* setup hyper-z mm */
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
+ if (r300->zmask_mm)
r300_hyperz_destroy_mm(r300);
- translate_cache_destroy(r300->tran.translate_cache);
+ if (r300->tran.translate_cache)
+ translate_cache_destroy(r300->tran.translate_cache);
+ /* XXX: This function assumes r300->query_list was initialized */
r300_release_referenced_objects(r300);
- r300->rws->cs_destroy(r300->cs);
+ if (r300->cs)
+ r300->rws->cs_destroy(r300->cs);
+ /* XXX: No way to tell if this was initialized or not? */
util_mempool_destroy(&r300->pool_transfers);
r300_update_num_contexts(r300->screen, -1);
- FREE(r300->aa_state.state);
- FREE(r300->blend_color_state.state);
- FREE(r300->clip_state.state);
- FREE(r300->fb_state.state);
- FREE(r300->gpu_flush.state);
- FREE(r300->hyperz_state.state);
- FREE(r300->invariant_state.state);
- FREE(r300->rs_block_state.state);
- FREE(r300->scissor_state.state);
- FREE(r300->textures_state.state);
- FREE(r300->vap_invariant_state.state);
- FREE(r300->viewport_state.state);
- FREE(r300->ztop_state.state);
- FREE(r300->fs_constants.state);
- FREE(r300->vs_constants.state);
- if (!r300->screen->caps.has_tcl) {
- FREE(r300->vertex_stream_state.state);
+ /* Free the structs allocated in r300_setup_atoms() */
+ if (r300->aa_state.state) {
+ FREE(r300->aa_state.state);
+ FREE(r300->blend_color_state.state);
+ FREE(r300->clip_state.state);
+ FREE(r300->fb_state.state);
+ FREE(r300->gpu_flush.state);
+ FREE(r300->hyperz_state.state);
+ FREE(r300->invariant_state.state);
+ FREE(r300->rs_block_state.state);
+ FREE(r300->scissor_state.state);
+ FREE(r300->textures_state.state);
+ FREE(r300->vap_invariant_state.state);
+ FREE(r300->viewport_state.state);
+ FREE(r300->ztop_state.state);
+ FREE(r300->fs_constants.state);
+ FREE(r300->vs_constants.state);
+ if (!r300->screen->caps.has_tcl) {
+ FREE(r300->vertex_stream_state.state);
+ }
}
FREE(r300);
}
@@ -158,12 +168,14 @@ void r300_flush_cb(void *data)
}
#define R300_INIT_ATOM(atomname, atomsize) \
+ do { \
r300->atomname.name = #atomname; \
r300->atomname.state = NULL; \
r300->atomname.size = atomsize; \
r300->atomname.emit = r300_emit_##atomname; \
r300->atomname.dirty = FALSE; \
- insert_at_tail(&r300->atom_list, &r300->atomname);
+ insert_at_tail(&r300->atom_list, &r300->atomname); \
+ } while (0)
static void r300_setup_atoms(struct r300_context* r300)
{
@@ -404,12 +416,16 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->context.destroy = r300_destroy_context;
- r300->cs = rws->cs_create(rws);
+ make_empty_list(&r300->query_list);
util_mempool_create(&r300->pool_transfers,
sizeof(struct pipe_transfer), 64,
UTIL_MEMPOOL_SINGLETHREADED);
+ r300->cs = rws->cs_create(rws);
+ if (r300->cs == NULL)
+ goto fail;
+
if (!r300screen->caps.has_tcl) {
/* Create a Draw. This is used for SW TCL. */
r300->draw = draw_create(&r300->context);
@@ -424,8 +440,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300_setup_atoms(r300);
- make_empty_list(&r300->query_list);
-
r300_init_blit_functions(r300);
r300_init_flush_functions(r300);
r300_init_query_functions(r300);
@@ -433,6 +447,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300_init_resource_functions(r300);
r300->blitter = util_blitter_create(&r300->context);
+ if (r300->blitter == NULL)
+ goto fail;
/* Render functions must be initialized after blitter. */
r300_init_render_functions(r300);
@@ -441,22 +457,25 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
/* setup hyper-z mm */
if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- r300_hyperz_init_mm(r300);
+ if (!r300_hyperz_init_mm(r300))
+ goto fail;
r300->upload_ib = u_upload_create(&r300->context,
32 * 1024, 16,
PIPE_BIND_INDEX_BUFFER);
if (r300->upload_ib == NULL)
- goto no_upload_ib;
+ goto fail;
r300->upload_vb = u_upload_create(&r300->context,
128 * 1024, 16,
PIPE_BIND_VERTEX_BUFFER);
if (r300->upload_vb == NULL)
- goto no_upload_vb;
+ goto fail;
r300->tran.translate_cache = translate_cache_create();
+ if (r300->tran.translate_cache == NULL)
+ goto fail;
r300_init_states(&r300->context);
@@ -486,10 +505,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
return &r300->context;
- no_upload_ib:
- u_upload_destroy(r300->upload_ib);
- no_upload_vb:
- FREE(r300);
+ fail:
+ r300_destroy_context(&r300->context);
return NULL;
}
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index d86a5c8fc9..6fa7f470f9 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -397,6 +397,8 @@ struct r300_texture {
/* hyper-z memory allocs */
struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS];
struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS];
+ boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS];
+ boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS];
/* This is the level tiling flags were last time set for.
* It's used to prevent redundant tiling-flags changes from happening.*/
@@ -564,12 +566,9 @@ struct r300_context {
boolean two_sided_color;
/* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
boolean incompatible_vb_layout;
- /* Whether fast zclear is enabled. */
- boolean z_fastfill;
#define R300_Z_COMPRESS_44 1
#define RV350_Z_COMPRESS_88 2
int z_compression;
- boolean hiz_enable;
boolean cbzb_clear;
boolean z_decomp_rd;
@@ -628,6 +627,12 @@ void r300_init_render_functions(struct r300_context *r300);
void r300_init_state_functions(struct r300_context* r300);
void r300_init_resource_functions(struct r300_context* r300);
+/* r300_blit.c */
+void r300_flush_depth_stencil(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned zslice);
+
/* r300_query.c */
void r300_resume_query(struct r300_context *r300,
struct r300_query *query);
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 17e180a79a..d0fd45349e 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -393,7 +393,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
/* HiZ RAM. */
if (r300->screen->caps.hiz_ram) {
if (tex->hiz_mem[level]) {
- OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs);
+ OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2);
OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch);
} else {
OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
@@ -402,7 +402,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
}
/* Z Mask RAM. (compressed zbuffer) */
if (tex->zmask_mem[level]) {
- OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs);
+ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2);
OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch);
} else {
OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
@@ -936,6 +936,22 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_TABLE(data, 4);
}
}
+
+ /* Emit flow control instructions. */
+ if (code->num_fc_ops) {
+
+ OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops);
+ if (r300screen->caps.is_r500) {
+ OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, code->num_fc_ops * 2);
+ OUT_CS_TABLE(code->fc_op_addrs.r500, code->num_fc_ops * 2);
+ } else {
+ OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, code->num_fc_ops);
+ OUT_CS_TABLE(code->fc_op_addrs.r300, code->num_fc_ops);
+ }
+ OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, code->num_fc_ops);
+ OUT_CS_TABLE(code->fc_loop_index, code->num_fc_ops);
+ }
+
END_CS;
}
@@ -1008,6 +1024,8 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
int i;
tex = r300_texture(fb->zsbuf->texture);
+
+ offset = tex->hiz_mem[fb->zsbuf->level]->ofs;
stride = tex->desc.stride_in_pixels[fb->zsbuf->level];
/* convert from pixels to 4x4 blocks */
@@ -1028,6 +1046,9 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff);
}
z->current_func = -1;
+
+ /* Mark the current zbuffer's hiz ram as in use. */
+ tex->hiz_in_use[fb->zsbuf->level] = TRUE;
}
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state)
@@ -1043,6 +1064,8 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state
tex = r300_texture(fb->zsbuf->texture);
stride = tex->desc.stride_in_pixels[fb->zsbuf->level];
+ offset = tex->zmask_mem[fb->zsbuf->level]->ofs;
+
if (r300->z_compression == RV350_Z_COMPRESS_88)
mult = 8;
else
@@ -1065,6 +1088,9 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state
offset <<= offset_shift;
r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff);
}
+
+ /* Mark the current zbuffer's zmask as in use. */
+ tex->zmask_in_use[fb->zsbuf->level] = TRUE;
}
void r300_emit_ztop_state(struct r300_context* r300,
@@ -1186,6 +1212,17 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300)
return dwords;
}
+unsigned r300_get_num_cs_end_dwords(struct r300_context *r300)
+{
+ unsigned dwords = 0;
+
+ /* Emitted in flush. */
+ dwords += 26; /* emit_query_end */
+ dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */
+
+ return dwords;
+}
+
/* Emit all dirty state. */
void r300_emit_dirty_state(struct r300_context* r300)
{
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 2f2c2f2dcb..bae2525634 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -116,6 +116,7 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state);
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state);
unsigned r300_get_num_dirty_dwords(struct r300_context *r300);
+unsigned r300_get_num_cs_end_dwords(struct r300_context *r300);
/* Emit all dirty state. */
void r300_emit_dirty_state(struct r300_context* r300);
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 7fed9b5d07..fe182b6615 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -44,8 +44,7 @@ static void r300_flush(struct pipe_context* pipe,
u_upload_flush(r300->upload_ib);
if (r300->dirty_hw) {
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- r300_emit_hyperz_end(r300);
+ r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
r300->flush_counter++;
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 87ff49a90c..2a0c30620a 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -72,6 +72,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
fs_inputs->wpos = i;
break;
+ case TGSI_SEMANTIC_FACE:
+ assert(index == 0);
+ fs_inputs->face = i;
+ break;
+
default:
fprintf(stderr, "r300: FP: Unknown input semantic: %i\n",
info->input_semantic_name[i]);
@@ -120,6 +125,9 @@ static void allocate_hardware_inputs(
allocate(mydata, inputs->color[i], reg++);
}
}
+ if (inputs->face != ATTR_UNUSED) {
+ allocate(mydata, inputs->face, reg++);
+ }
for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
if (inputs->generic[i] != ATTR_UNUSED) {
allocate(mydata, inputs->generic[i], reg++);
@@ -360,13 +368,14 @@ static void r300_translate_fragment_shader(
{
struct r300_fragment_program_compiler compiler;
struct tgsi_to_rc ttr;
- int wpos;
+ int wpos, face;
unsigned i;
tgsi_scan_shader(tokens, &shader->info);
r300_shader_read_fs_inputs(&shader->info, &shader->inputs);
wpos = shader->inputs.wpos;
+ face = shader->inputs.face;
/* Setup the compiler. */
memset(&compiler, 0, sizeof(compiler));
@@ -383,7 +392,7 @@ static void r300_translate_fragment_shader(
find_output_registers(&compiler, shader);
if (compiler.Base.Debug) {
- debug_printf("r300: Initial fragment program\n");
+ DBG(r300, DBG_FP, "r300: Initial fragment program\n");
tgsi_dump(tokens, 0);
}
@@ -406,6 +415,10 @@ static void r300_translate_fragment_shader(
rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE);
}
+ if (face != ATTR_UNUSED) {
+ rc_transform_fragment_face(&compiler.Base, face);
+ }
+
/* Invoke the compiler */
r3xx_compile_fragment_program(&compiler);
@@ -418,7 +431,7 @@ static void r300_translate_fragment_shader(
}
if (compiler.Base.Error) {
- fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
+ DBG(r300, DBG_FP, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
" instead.\nIf there's an 'unknown opcode' message, please"
" file a bug report and attach this log.\n", compiler.Base.ErrorMsg);
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index 10e440ce30..a471b7353b 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -21,12 +21,14 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#include "util/u_format.h"
-#include "util/u_mm.h"
#include "r300_context.h"
#include "r300_hyperz.h"
#include "r300_reg.h"
#include "r300_fs.h"
+#include "r300_winsys.h"
+
+#include "util/u_format.h"
+#include "util/u_mm.h"
/*
HiZ rules - taken from various docs
@@ -127,6 +129,12 @@ static void r300_update_hyperz(struct r300_context* r300)
{
struct r300_hyperz_state *z =
(struct r300_hyperz_state*)r300->hyperz_state.state;
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_texture *zstex =
+ fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
+ boolean zmask_in_use = FALSE;
+ boolean hiz_in_use = FALSE;
z->gb_z_peq_config = 0;
z->zb_bw_cntl = 0;
@@ -138,22 +146,32 @@ static void r300_update_hyperz(struct r300_context* r300)
return;
}
+ if (!zstex)
+ return;
+
+ if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
+ return;
+
+ zmask_in_use = zstex->zmask_in_use[fb->zsbuf->level];
+ hiz_in_use = zstex->hiz_in_use[fb->zsbuf->level];
+
+ /* Z fastfill. */
+ if (zmask_in_use) {
+ z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/
+ }
+
/* Zbuffer compression. */
- if (r300->z_compression) {
+ if (zmask_in_use && r300->z_compression) {
z->zb_bw_cntl |= R300_RD_COMP_ENABLE;
if (r300->z_decomp_rd == false)
z->zb_bw_cntl |= R300_WR_COMP_ENABLE;
- /* RV350 and up optimizations. */
- if (r300->z_compression == RV350_Z_COMPRESS_88)
- z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
- }
-
- /* Z fastfill. */
- if (r300->z_fastfill) {
- z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/
}
+ /* RV350 and up optimizations. */
+ /* The section 10.4.9 in the docs is a lie. */
+ if (r300->z_compression == RV350_Z_COMPRESS_88)
+ z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
- if (r300->hiz_enable) {
+ if (hiz_in_use) {
bool can_hiz = r300_can_hiz(r300);
if (can_hiz) {
z->zb_bw_cntl |= R300_HIZ_ENABLE;
@@ -163,8 +181,8 @@ static void r300_update_hyperz(struct r300_context* r300)
}
}
+ /* R500-specific features and optimizations. */
if (r300->screen->caps.is_r500) {
- /* XXX Are these bits really available on RV350? */
z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3;
z->zb_bw_cntl |=
R500_HIZ_EQUAL_REJECT_ENABLE |
@@ -333,6 +351,12 @@ void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf
tex = r300_texture(surf->base.texture);
+ /* We currently don't handle decompression for 3D textures and cubemaps
+ * correctly. */
+ if (tex->desc.b.b.target != PIPE_TEXTURE_1D &&
+ tex->desc.b.b.target != PIPE_TEXTURE_2D)
+ return;
+
if (tex->zmask_mem[level])
return;
@@ -349,23 +373,36 @@ void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf
return;
}
-void r300_hyperz_init_mm(struct r300_context *r300)
+boolean r300_hyperz_init_mm(struct r300_context *r300)
{
struct r300_screen* r300screen = r300->screen;
int frag_pipes = r300screen->caps.num_frag_pipes;
- if (r300screen->caps.hiz_ram)
+ r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes);
+ if (!r300->zmask_mm)
+ return FALSE;
+
+ if (r300screen->caps.hiz_ram) {
r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes);
+ if (!r300->hiz_mm) {
+ u_mmDestroy(r300->zmask_mm);
+ r300->zmask_mm = NULL;
+ return FALSE;
+ }
+ }
- r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes);
+ return TRUE;
}
void r300_hyperz_destroy_mm(struct r300_context *r300)
{
struct r300_screen* r300screen = r300->screen;
- if (r300screen->caps.hiz_ram)
+ if (r300screen->caps.hiz_ram) {
u_mmDestroy(r300->hiz_mm);
+ r300->hiz_mm = NULL;
+ }
u_mmDestroy(r300->zmask_mm);
+ r300->zmask_mm = NULL;
}
diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h
index 09e1ff6625..30a23ec649 100644
--- a/src/gallium/drivers/r300/r300_hyperz.h
+++ b/src/gallium/drivers/r300/r300_hyperz.h
@@ -30,6 +30,6 @@ void r300_update_hyperz_state(struct r300_context* r300);
void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf);
void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress);
-void r300_hyperz_init_mm(struct r300_context *r300);
+boolean r300_hyperz_init_mm(struct r300_context *r300);
void r300_hyperz_destroy_mm(struct r300_context *r300);
#endif
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 99a9d65055..60d3b600cb 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -496,6 +496,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228
#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c
+#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230
+#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8)
+#define R300_PVS_FC_LAST_INST(x) ((x) << 16)
+#define R300_PVS_FC_RTN_INST(x) ((x) << 24)
+
/* gap */
/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
@@ -514,6 +520,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_2288_R300 0x00750000 /* -- nh */
# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
+#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290
+#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8)
+
/* gap */
/* Addresses are relative to the vertex program instruction area of the
@@ -548,6 +558,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
+#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x)))
/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
* immediate vertices
@@ -564,6 +577,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* write 0 to indicate end of packet? */
#define R300_VAP_VTX_END_OF_PKT 0x24AC
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500
+#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16)
+
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504
+#define R500_PVS_FC_LAST_INST(x) ((x) << 0)
+#define R500_PVS_FC_RTN_INST(x) ((x) << 16)
+
/* gap */
/* These are values from r300_reg/r300_reg.h - they are known to be correct
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 910f5f7113..86b11ca045 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -186,20 +186,14 @@ enum r300_prepare_flags {
* \param cs_dwords The number of dwords to reserve in CS.
* \param aos_offset The offset passed to emit_aos.
* \param index_bias The index bias to emit.
- * \param end_cs_dwords The number of free dwords which must be available
- * at the end of CS after drawing in case the CS space
- * management is performed by a draw_* function manually.
- * The parameter may be NULL.
*/
static void r300_prepare_for_rendering(struct r300_context *r300,
enum r300_prepare_flags flags,
struct pipe_resource *index_buffer,
unsigned cs_dwords,
int aos_offset,
- int index_bias,
- unsigned *end_cs_dwords)
+ int index_bias)
{
- unsigned end_dwords = 0;
boolean flushed = FALSE;
boolean first_draw = flags & PREP_FIRST_DRAW;
boolean emit_aos = flags & PREP_EMIT_AOS;
@@ -221,12 +215,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300,
cs_dwords += 7; /* emit_aos_swtcl */
}
- /* Emitted in flush. */
- end_dwords += 26; /* emit_query_end */
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- end_dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */
-
- cs_dwords += end_dwords;
+ cs_dwords += r300_get_num_cs_end_dwords(r300);
/* Reserve requested CS space. */
if (cs_dwords > (r300->cs->ndw - r300->cs->cdw)) {
@@ -251,9 +240,6 @@ static void r300_prepare_for_rendering(struct r300_context *r300,
if (emit_aos_swtcl)
r300_emit_aos_swtcl(r300, indexed);
}
-
- if (end_cs_dwords)
- *end_cs_dwords = end_dwords;
}
static boolean immd_is_good_idea(struct r300_context *r300,
@@ -354,7 +340,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
dwords = 9 + count * vertex_size;
- r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL);
+ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0);
BEGIN_CS(dwords);
OUT_CS_REG(R300_GA_COLOR_CONTROL,
@@ -534,7 +520,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
/* 15 dwords for emit_draw_elements */
r300_prepare_for_rendering(r300,
PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
- indexBuffer, 15, buffer_offset, indexBias, NULL);
+ indexBuffer, 15, buffer_offset, indexBias);
if (alt_num_verts || count <= 65535) {
r300_emit_draw_elements(r300, indexBuffer, indexSize,
@@ -553,7 +539,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
if (count) {
r300_prepare_for_rendering(r300,
PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
- indexBuffer, 15, buffer_offset, indexBias, NULL);
+ indexBuffer, 15, buffer_offset, indexBias);
}
} while (count);
}
@@ -598,7 +584,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
} else {
/* 9 spare dwords for emit_draw_arrays. */
r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS,
- NULL, 9, start, 0, NULL);
+ NULL, 9, start, 0);
if (alt_num_verts || count <= 65535) {
r300_emit_draw_arrays(r300, mode, count);
@@ -614,7 +600,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
if (count) {
r300_prepare_for_rendering(r300,
PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9,
- start, 0, NULL);
+ start, 0);
}
} while (count);
}
@@ -855,7 +841,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
(void) i; (void) ptr;
r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL,
- NULL, dwords, 0, 0, NULL);
+ NULL, dwords, 0, 0);
DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count);
@@ -908,7 +894,8 @@ static void r300_render_draw_elements(struct vbuf_render* render,
* indices than it can fit in CS. */
r300_prepare_for_rendering(r300,
PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
- NULL, 256, 0, 0, &end_cs_dwords);
+ NULL, 256, 0, 0);
+ end_cs_dwords = r300_get_num_cs_end_dwords(r300);
while (count) {
free_dwords = r300->cs->ndw - r300->cs->cdw;
@@ -938,7 +925,8 @@ static void r300_render_draw_elements(struct vbuf_render* render,
if (count) {
r300_prepare_for_rendering(r300,
PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
- NULL, 256, 0, 0, &end_cs_dwords);
+ NULL, 256, 0, 0);
+ end_cs_dwords = r300_get_num_cs_end_dwords(r300);
}
}
}
@@ -1032,7 +1020,7 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter,
r300->clip_state.dirty = FALSE;
r300->viewport_state.dirty = FALSE;
- r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL);
+ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0);
DBG(r300, DBG_DRAW, "r300: draw_rectangle\n");
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 6268001054..1e4edcdbc3 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -115,7 +115,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_TEXTURE_SWIZZLE:
- case PIPE_CAP_DEPTH_CLAMP:
return 1;
/* Unsupported features (boolean caps). */
@@ -124,6 +123,8 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_CONT_SUPPORTED:
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */
+ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
return 0;
/* Texturing. */
@@ -150,9 +151,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_CONST_BUFFER_SIZE:
return 256;
- case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
- return 1;
-
/* Fragment coordinate conventions. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h
index cb7a37033f..4be23e64ce 100644
--- a/src/gallium/drivers/r300/r300_shader_semantics.h
+++ b/src/gallium/drivers/r300/r300_shader_semantics.h
@@ -38,6 +38,7 @@ struct r300_shader_semantics {
int psize;
int color[ATTR_COLOR_COUNT];
int bcolor[ATTR_COLOR_COUNT];
+ int face;
int generic[ATTR_GENERIC_COUNT];
int fog;
int wpos;
@@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset(
info->pos = ATTR_UNUSED;
info->psize = ATTR_UNUSED;
+ info->face = ATTR_UNUSED;
info->fog = ATTR_UNUSED;
info->wpos = ATTR_UNUSED;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 1e6b81d798..239edd98e3 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -689,8 +689,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
/* What is marked as dirty depends on the enum r300_fb_state_change. */
r300->gpu_flush.dirty = TRUE;
r300->fb_state.dirty = TRUE;
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- r300->hyperz_state.dirty = TRUE;
+ r300->hyperz_state.dirty = TRUE;
if (change == R300_CHANGED_FB_STATE) {
r300->aa_state.dirty = TRUE;
@@ -753,8 +752,6 @@ static void
r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
- r300->hiz_enable = false;
- r300->z_fastfill = false;
r300->z_compression = false;
if (state->zsbuf) {
@@ -781,23 +778,18 @@ static void
/* work out whether we can support zmask features on this buffer */
r300_zmask_alloc_block(r300, zs_surf, compress);
- if (tex->hiz_mem[level]) {
- r300->hiz_enable = 1;
- }
-
if (tex->zmask_mem[level]) {
- r300->z_fastfill = 1;
/* compression causes hangs on 16-bit */
if (zbuffer_bpp == 24)
r300->z_compression = compress;
}
DBG(r300, DBG_HYPERZ,
- "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", r300->hiz_enable,
+ "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0,
tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef,
- r300->z_compression, r300->z_fastfill,
+ r300->z_compression, tex->zmask_mem[level] ? 1 : 0,
tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef);
}
-
+
/* Polygon offset depends on the zbuffer bit depth. */
if (r300->zbuffer_bpp != zbuffer_bpp) {
r300->zbuffer_bpp = zbuffer_bpp;
@@ -1759,10 +1751,12 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */
if (r300->screen->caps.has_tcl) {
+ unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2;
r300->vs_state.dirty = TRUE;
r300->vs_state.size =
vs->code.length + 9 +
- (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0);
+ (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0) +
+ (vs->code.num_fc_ops ? vs->code.num_fc_ops * fc_op_dwords + 4 : 0);
if (vs->externals_count) {
r300->vs_constants.dirty = TRUE;
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index f3dad4c292..c8de3e1c52 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -35,7 +35,6 @@
#include "r300_state_inlines.h"
#include "r300_texture.h"
#include "r300_vs.h"
-#include "r300_winsys.h"
/* r300_state_derived: Various bits of state which are dependent upon
* currently bound CSO data. */
@@ -47,6 +46,11 @@ enum r300_rs_swizzle {
SWIZ_0001,
};
+enum r300_rs_col_write_type {
+ WRITE_COLOR = 0,
+ WRITE_FACE
+};
+
static void r300_draw_emit_attrib(struct r300_context* r300,
enum attrib_emit emit,
enum interp_mode interp,
@@ -204,8 +208,10 @@ static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
rs->inst[id] |= R300_RS_INST_COL_ID(id);
}
-static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset,
+ enum r300_rs_col_write_type type)
{
+ assert(type == WRITE_COLOR);
rs->inst[id] |= R300_RS_INST_COL_CN_WRITE |
R300_RS_INST_COL_ADDR(fp_offset);
}
@@ -253,10 +259,16 @@ static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr,
rs->inst[id] |= R500_RS_INST_COL_ID(id);
}
-static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset,
+ enum r300_rs_col_write_type type)
{
- rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
- R500_RS_INST_COL_ADDR(fp_offset);
+ if (type == WRITE_FACE)
+ rs->inst[id] |= R500_RS_INST_COL_CN_WRITE_BACKFACE |
+ R500_RS_INST_COL_ADDR(fp_offset);
+ else
+ rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
+ R500_RS_INST_COL_ADDR(fp_offset);
+
}
static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr,
@@ -306,7 +318,7 @@ static void r300_update_rs_block(struct r300_context *r300)
struct r300_rs_block rs = {0};
int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0, tex_ptr = 0;
void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle);
- void (*rX00_rs_col_write)(struct r300_rs_block*, int, int);
+ void (*rX00_rs_col_write)(struct r300_rs_block*, int, int, enum r300_rs_col_write_type);
void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle);
void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
@@ -325,6 +337,11 @@ static void r300_update_rs_block(struct r300_context *r300)
rX00_rs_tex_write = r300_rs_tex_write;
}
+ /* 0x5555 copied from classic, which means:
+ * Select user color 0 for COLOR0 up to COLOR7.
+ * What the hell does that mean? */
+ rs.vap_vtx_state_cntl = 0x5555;
+
/* The position is always present in VAP. */
rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS;
rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
@@ -351,7 +368,7 @@ static void r300_update_rs_block(struct r300_context *r300)
/* Write it to the FS input register if it's needed by the FS. */
if (fs_inputs->color[i] != ATTR_UNUSED) {
- rX00_rs_col_write(&rs, col_count, fp_offset);
+ rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_COLOR);
fp_offset++;
DBG(r300, DBG_RS,
@@ -399,6 +416,24 @@ static void r300_update_rs_block(struct r300_context *r300)
}
}
+ /* gl_FrontFacing.
+ * Note that we can use either the two-sided color selection based on
+ * the front and back vertex shader colors, or gl_FrontFacing,
+ * but not both! It locks up otherwise.
+ *
+ * In Direct3D 9, the two-sided color selection can be used
+ * with shaders 2.0 only, while gl_FrontFacing can be used
+ * with shaders 3.0 only. The hardware apparently hasn't been designed
+ * to support both at the same time. */
+ if (r300->screen->caps.is_r500 && fs_inputs->face != ATTR_UNUSED &&
+ !(any_bcolor_used && r300->two_sided_color)) {
+ rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW);
+ rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_FACE);
+ fp_offset++;
+ col_count++;
+ DBG(r300, DBG_RS, "r300: Rasterized FACE written to FS.\n");
+ }
+
/* Rasterize texture coordinates. */
for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) {
bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i));
@@ -677,8 +712,44 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
}
}
+/* We can't use compressed zbuffers as samplers. */
+static void r300_flush_depth_textures(struct r300_context *r300)
+{
+ struct r300_textures_state *state =
+ (struct r300_textures_state*)r300->textures_state.state;
+ unsigned i, level;
+ unsigned count = MIN2(state->sampler_view_count,
+ state->sampler_state_count);
+
+ if (r300->z_decomp_rd)
+ return;
+
+ for (i = 0; i < count; i++)
+ if (state->sampler_views[i] && state->sampler_states[i]) {
+ struct pipe_resource *tex = state->sampler_views[i]->base.texture;
+
+ if (tex->target == PIPE_TEXTURE_3D ||
+ tex->target == PIPE_TEXTURE_CUBE)
+ continue;
+
+ /* Ignore non-depth textures.
+ * Also ignore reinterpreted depth textures, e.g. resource_copy. */
+ if (!util_format_is_depth_or_stencil(tex->format))
+ continue;
+
+ for (level = 0; level <= tex->last_level; level++)
+ if (r300_texture(tex)->zmask_in_use[level]) {
+ /* We don't handle 3D textures and cubemaps yet. */
+ r300_flush_depth_stencil(&r300->context, tex,
+ u_subresource(0, level), 0);
+ }
+ }
+}
+
void r300_update_derived_state(struct r300_context* r300)
{
+ r300_flush_depth_textures(r300);
+
if (r300->textures_state.dirty) {
r300_merge_textures_and_samplers(r300);
}
@@ -694,6 +765,5 @@ void r300_update_derived_state(struct r300_context* r300)
}
}
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- r300_update_hyperz_state(r300);
+ r300_update_hyperz_state(r300);
}
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 51b2c55550..a4911b9a2a 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -97,13 +97,13 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_BRA: return RC_OPCODE_BRA; */
/* case TGSI_OPCODE_CAL: return RC_OPCODE_CAL; */
/* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */
- /* case TGSI_OPCODE_SSG: return RC_OPCODE_SSG; */
+ case TGSI_OPCODE_SSG: return RC_OPCODE_SSG;
case TGSI_OPCODE_CMP: return RC_OPCODE_CMP;
case TGSI_OPCODE_SCS: return RC_OPCODE_SCS;
case TGSI_OPCODE_TXB: return RC_OPCODE_TXB;
/* case TGSI_OPCODE_NRM: return RC_OPCODE_NRM; */
/* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
- /* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */
+ case TGSI_OPCODE_DP2: return RC_OPCODE_DP2;
case TGSI_OPCODE_TXL: return RC_OPCODE_TXL;
case TGSI_OPCODE_BRK: return RC_OPCODE_BRK;
case TGSI_OPCODE_IF: return RC_OPCODE_IF;
@@ -126,7 +126,7 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_SAD: return RC_OPCODE_SAD; */
/* case TGSI_OPCODE_TXF: return RC_OPCODE_TXF; */
/* case TGSI_OPCODE_TXQ: return RC_OPCODE_TXQ; */
- /* case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; */
+ case TGSI_OPCODE_CONT: return RC_OPCODE_CONT;
/* case TGSI_OPCODE_EMIT: return RC_OPCODE_EMIT; */
/* case TGSI_OPCODE_ENDPRIM: return RC_OPCODE_ENDPRIM; */
/* case TGSI_OPCODE_BGNLOOP2: return RC_OPCODE_BGNLOOP2; */
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index b25c786d6b..54c8de1241 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -207,7 +207,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
compiler.Base.max_temp_regs = 32;
if (compiler.Base.Debug) {
- debug_printf("r300: Initial vertex program\n");
+ DBG(r300, DBG_VP, "r300: Initial vertex program\n");
tgsi_dump(vs->state.tokens, 0);
}
@@ -227,8 +227,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
/* Invoke the compiler */
r3xx_compile_vertex_program(&compiler);
if (compiler.Base.Error) {
- /* XXX We should fallback using Draw. */
- fprintf(stderr, "r300 VP: Compiler error:\n%sUsing a dummy shader"
+ DBG(r300, DBG_VP, "r300 VP: Compiler error:\n%sUsing a dummy shader"
" instead.\nIf there's an 'unknown opcode' message, please"
" file a bug report and attach this log.\n", compiler.Base.ErrorMsg);
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index f1dc3dc3a9..9ea9d4354d 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -167,8 +167,7 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
struct r600_bc_alu *alu;
if (bc->cf_last == NULL) {
- R600_ERR("no last CF\n");
- return -EINVAL;
+ return 0;
}
if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
return 0;
@@ -179,12 +178,13 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
return -EINVAL;
}
alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
- if (!alu->last || !alu->nliteral) {
+ if (!alu->last || !alu->nliteral || alu->literal_added) {
return 0;
}
memcpy(alu->value, value, 4 * 4);
bc->cf_last->ndw += alu->nliteral;
bc->ndw += alu->nliteral;
+ alu->literal_added = 1;
return 0;
}
@@ -287,7 +287,7 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign
return 0;
}
-int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
+static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
{
unsigned i;
@@ -331,7 +331,7 @@ int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
return 0;
}
-int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
+static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
{
unsigned id = cf->id;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 3fd94dbda0..10d98afaf0 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -48,6 +48,7 @@ struct r600_bc_alu {
unsigned last;
unsigned is_op3;
unsigned nliteral;
+ unsigned literal_added;
u32 value[4];
};
diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c
index ae1780a1d4..edde80c660 100644
--- a/src/gallium/drivers/r600/r600_context.c
+++ b/src/gallium/drivers/r600/r600_context.c
@@ -47,20 +47,25 @@ void r600_flush(struct pipe_context *ctx, unsigned flags,
struct r600_context *rctx = r600_context(ctx);
struct r600_screen *rscreen = rctx->screen;
static int dc = 0;
+ char dname[256];
if (radeon_ctx_pm4(rctx->ctx))
return;
/* FIXME dumping should be removed once shader support instructions
* without throwing bad code
*/
- if (!dc)
- radeon_ctx_dump_bof(rctx->ctx, "gallium.bof");
+ if (!rctx->ctx->cpm4)
+ goto out;
+ sprintf(dname, "gallium-%08d.bof", dc);
+ if (dc < 1)
+ radeon_ctx_dump_bof(rctx->ctx, dname);
#if 1
radeon_ctx_submit(rctx->ctx);
#endif
+ dc++;
+out:
rctx->ctx = radeon_ctx_decref(rctx->ctx);
rctx->ctx = radeon_ctx(rscreen->rw);
- dc++;
}
static void r600_init_config(struct r600_context *rctx)
@@ -202,24 +207,6 @@ static void r600_init_config(struct r600_context *rctx)
num_es_stack_entries = 0;
break;
}
- printf("ps_prio : %d\n", ps_prio);
- printf("vs_prio : %d\n", vs_prio);
- printf("gs_prio : %d\n", gs_prio);
- printf("es_prio : %d\n", es_prio);
- printf("num_ps_gprs : %d\n", num_ps_gprs);
- printf("num_vs_gprs : %d\n", num_vs_gprs);
- printf("num_gs_gprs : %d\n", num_gs_gprs);
- printf("num_es_gprs : %d\n", num_es_gprs);
- printf("num_temp_gprs : %d\n", num_temp_gprs);
- printf("num_ps_threads : %d\n", num_ps_threads);
- printf("num_vs_threads : %d\n", num_vs_threads);
- printf("num_gs_threads : %d\n", num_gs_threads);
- printf("num_es_threads : %d\n", num_es_threads);
- printf("num_ps_stack_entries : %d\n", num_ps_stack_entries);
- printf("num_vs_stack_entries : %d\n", num_vs_stack_entries);
- printf("num_gs_stack_entries : %d\n", num_gs_stack_entries);
- printf("num_es_stack_entries : %d\n", num_es_stack_entries);
-
rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG);
rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000;
diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h
index 431f8951b2..76d5de8653 100644
--- a/src/gallium/drivers/r600/r600_context.h
+++ b/src/gallium/drivers/r600/r600_context.h
@@ -94,7 +94,7 @@ struct r600_context_hw_states {
struct radeon_state *dsa;
struct radeon_state *blend;
struct radeon_state *viewport;
- struct radeon_state *cb[7];
+ struct radeon_state *cb[8];
struct radeon_state *config;
struct radeon_state *cb_cntl;
struct radeon_state *db;
@@ -175,4 +175,7 @@ extern int r600_pipe_shader_update(struct pipe_context *ctx,
#define R600_ERR(fmt, args...) \
fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args)
+uint32_t r600_translate_texformat(enum pipe_format format,
+ const unsigned char *swizzle_view,
+ uint32_t *word4_p, uint32_t *yuv_format_p);
#endif
diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c
index 2420b76318..f058455162 100644
--- a/src/gallium/drivers/r600/r600_draw.c
+++ b/src/gallium/drivers/r600/r600_draw.c
@@ -127,7 +127,7 @@ static int r600_draw_common(struct r600_draw *draw)
draw->draw->states[R600_DRAW__VGT_NUM_INDICES] = draw->count;
draw->draw->states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator;
if (draw->index_buffer) {
- rbuffer = (struct r600_buffer*)draw->index_buffer;
+ rbuffer = (struct r600_resource*)draw->index_buffer;
draw->draw->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
draw->draw->placement[0] = RADEON_GEM_DOMAIN_GTT;
draw->draw->placement[1] = RADEON_GEM_DOMAIN_GTT;
diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c
index 4b87327a7c..cdaca9ed7d 100644
--- a/src/gallium/drivers/r600/r600_screen.c
+++ b/src/gallium/drivers/r600/r600_screen.c
@@ -53,59 +53,100 @@ static const char* r600_get_name(struct pipe_screen* pscreen)
static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
{
switch (param) {
- case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
- case PIPE_CAP_MAX_COMBINED_SAMPLERS:
- return 16;
+ /* Supported features (boolean caps). */
case PIPE_CAP_NPOT_TEXTURES:
- return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
- return 1;
case PIPE_CAP_GLSL:
- return 1;
case PIPE_CAP_DUAL_SOURCE_BLEND:
- return 1;
case PIPE_CAP_ANISOTROPIC_FILTER:
- return 1;
case PIPE_CAP_POINT_SPRITE:
- return 1;
- case PIPE_CAP_MAX_RENDER_TARGETS:
- /* FIXME some r6xx are buggy and can only do 4 */
- return 8;
case PIPE_CAP_OCCLUSION_QUERY:
- return 1;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
- return 1;
- case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
- case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- /* FIXME not sure here */
- return 13;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- return 1;
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
- return 1;
- case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- /* FIXME allow this once infrastructure is there */
- return 0;
- case PIPE_CAP_TGSI_CONT_SUPPORTED:
- return 0;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- return 1;
case PIPE_CAP_SM3:
- return 1;
+ case PIPE_CAP_TEXTURE_SWIZZLE:
case PIPE_CAP_INDEP_BLEND_ENABLE:
- return 1;
- case PIPE_CAP_INDEP_BLEND_FUNC:
- /* FIXME allow this */
- return 0;
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
return 1;
+
+ /* Unsupported features (boolean caps). */
+ case PIPE_CAP_TIMER_QUERY:
+ case PIPE_CAP_TGSI_CONT_SUPPORTED:
+ case PIPE_CAP_STREAM_OUTPUT:
+ case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */
+ case PIPE_CAP_GEOMETRY_SHADER4:
+ case PIPE_CAP_DEPTH_CLAMP: /* FIXME allow this */
+ return 0;
+
+ /* Texturing. */
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 14;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ /* FIXME allow this once infrastructure is there */
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 16;
+
+ /* Render targets. */
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ /* FIXME some r6xx are buggy and can only do 4 */
+ return 8;
+
+ /* Fragment coordinate conventions. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
return 1;
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
return 0;
+
+ /* Shader limits. */
+ case PIPE_CAP_MAX_VS_INSTRUCTIONS:
+ return 16384; //max native instructions, not greater than max instructions
+ case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS:
+ case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS:
+ case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS:
+ return 16384;
+ case PIPE_CAP_MAX_FS_INSTRUCTIONS:
+ return 16384; //max program native instructions
+ case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS:
+ return 16384; //max program native ALU instructions
+ case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS:
+ return 16384; //max program native texture instructions
+ case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS:
+ return 2048; //max program native texture indirections
+ case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH:
+ case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH:
+ return 8; /* FIXME */
+ case PIPE_CAP_MAX_VS_INPUTS:
+ return 16; //max native attributes
+ case PIPE_CAP_MAX_FS_INPUTS:
+ return 10; //max native attributes
+ case PIPE_CAP_MAX_VS_TEMPS:
+ return 256; //max native temporaries
+ case PIPE_CAP_MAX_FS_TEMPS:
+ return 256; //max native temporaries
+ case PIPE_CAP_MAX_VS_ADDRS:
+ case PIPE_CAP_MAX_FS_ADDRS:
+ return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */
+ case PIPE_CAP_MAX_VS_CONSTS:
+ return 256; //max native parameters
+ case PIPE_CAP_MAX_FS_CONSTS:
+ return 256; //max program native parameters
+ case PIPE_CAP_MAX_CONST_BUFFERS:
+ return 1;
+ case PIPE_CAP_MAX_CONST_BUFFER_SIZE: /* in bytes */
+ return 4096;
+ case PIPE_CAP_MAX_PREDICATE_REGISTERS:
+ case PIPE_CAP_MAX_VS_PREDS:
+ case PIPE_CAP_MAX_FS_PREDS:
+ return 0; /* FIXME */
+
default:
R600_ERR("r600: unknown param %d\n", param);
return 0;
diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h
index 9a452ecfe3..53b560c617 100644
--- a/src/gallium/drivers/r600/r600_screen.h
+++ b/src/gallium/drivers/r600/r600_screen.h
@@ -80,4 +80,6 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
int r600_conv_pipe_format(unsigned pformat, unsigned *format);
int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
+void r600_init_screen_texture_functions(struct pipe_screen *screen);
+
#endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index dc8d4cb315..956c7e7930 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -155,11 +155,14 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta
static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
{
+ const struct pipe_rasterizer_state *rasterizer;
struct r600_screen *rscreen = r600_screen(ctx->screen);
struct r600_shader *rshader = &rpshader->shader;
+ struct r600_context *rctx = r600_context(ctx);
struct radeon_state *state;
unsigned i, tmp, exports_ps, num_cout;
+ rasterizer = &rctx->rasterizer->state.rasterizer;
rpshader->rstate = radeon_state_decref(rpshader->rstate);
state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
if (state == NULL)
@@ -171,6 +174,9 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta
rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
}
+ if (rasterizer->sprite_coord_enable & (1 << i)) {
+ tmp |= S_028644_PT_SPRITE_TEX(1);
+ }
state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
}
@@ -339,7 +345,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
{
struct tgsi_full_immediate *immediate;
struct r600_shader_ctx ctx;
- struct r600_bc_output output;
+ struct r600_bc_output output[32];
+ unsigned output_done, noutput;
unsigned opcode;
int i, r = 0, pos0;
@@ -417,34 +424,41 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
}
}
/* export output */
- for (i = 0, pos0 = 0; i < shader->noutput; i++) {
- memset(&output, 0, sizeof(struct r600_bc_output));
- output.gpr = shader->output[i].gpr;
- output.elem_size = 3;
- output.swizzle_x = 0;
- output.swizzle_y = 1;
- output.swizzle_z = 2;
- output.swizzle_w = 3;
- output.barrier = 1;
- output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
- output.array_base = i - pos0;
- output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
- switch (ctx.type == TGSI_PROCESSOR_VERTEX) {
+ noutput = shader->noutput;
+ for (i = 0, pos0 = 0; i < noutput; i++) {
+ memset(&output[i], 0, sizeof(struct r600_bc_output));
+ output[i].gpr = shader->output[i].gpr;
+ output[i].elem_size = 3;
+ output[i].swizzle_x = 0;
+ output[i].swizzle_y = 1;
+ output[i].swizzle_z = 2;
+ output[i].swizzle_w = 3;
+ output[i].barrier = 1;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output[i].array_base = i - pos0;
+ output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
+ switch (ctx.type) {
case TGSI_PROCESSOR_VERTEX:
if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
- output.array_base = 60;
- output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ output[i].array_base = 60;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
/* position doesn't count in array_base */
- pos0 = 1;
+ pos0++;
+ }
+ if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
+ output[i].array_base = 61;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ /* position doesn't count in array_base */
+ pos0++;
}
break;
case TGSI_PROCESSOR_FRAGMENT:
if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
- output.array_base = 0;
- output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ output[i].array_base = shader->output[i].sid;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
- output.array_base = 61;
- output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ output[i].array_base = 61;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else {
R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
r = -EINVAL;
@@ -456,10 +470,58 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
r = -EINVAL;
goto out_err;
}
- if (i == (shader->noutput - 1)) {
- output.end_of_program = 1;
+ }
+ /* add fake param output for vertex shader if no param is exported */
+ if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+ for (i = 0, pos0 = 0; i < noutput; i++) {
+ if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
+ pos0 = 1;
+ break;
+ }
+ }
+ if (!pos0) {
+ memset(&output[i], 0, sizeof(struct r600_bc_output));
+ output[i].gpr = 0;
+ output[i].elem_size = 3;
+ output[i].swizzle_x = 0;
+ output[i].swizzle_y = 1;
+ output[i].swizzle_z = 2;
+ output[i].swizzle_w = 3;
+ output[i].barrier = 1;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output[i].array_base = 0;
+ output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
+ noutput++;
}
- r = r600_bc_add_output(ctx.bc, &output);
+ }
+ /* add fake pixel export */
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
+ memset(&output[0], 0, sizeof(struct r600_bc_output));
+ output[0].gpr = 0;
+ output[0].elem_size = 3;
+ output[0].swizzle_x = 7;
+ output[0].swizzle_y = 7;
+ output[0].swizzle_z = 7;
+ output[0].swizzle_w = 7;
+ output[0].barrier = 1;
+ output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output[0].array_base = 0;
+ output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
+ noutput++;
+ }
+ /* set export done on last export of each type */
+ for (i = noutput - 1, output_done = 0; i >= 0; i--) {
+ if (i == (noutput - 1)) {
+ output[i].end_of_program = 1;
+ }
+ if (!(output_done & (1 << output[i].type))) {
+ output_done |= (1 << output[i].type);
+ output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
+ }
+ }
+ /* add output to bytecode */
+ for (i = 0; i < noutput; i++) {
+ r = r600_bc_add_output(ctx.bc, &output[i]);
if (r)
goto out_err;
}
@@ -490,6 +552,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
r600_src->sel = 0;
}
+ r600_src->neg = tgsi_src->Register.Negate;
r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
return 0;
}
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index ee0381e8bd..2ee7780ead 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -28,6 +28,7 @@
struct r600_shader_io {
unsigned name;
unsigned gpr;
+ unsigned done;
int sid;
unsigned interpolate;
};
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index deb9bf3395..3efd409ae0 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -379,6 +379,8 @@ static void r600_set_scissor_state(struct pipe_context *ctx,
rstate = r600_context_state(rctx, pipe_scissor_type, state);
r600_bind_state(ctx, rstate);
+ /* refcount is taken care of this */
+ r600_delete_state(ctx, rstate);
}
static void r600_set_stencil_ref(struct pipe_context *ctx,
@@ -389,6 +391,8 @@ static void r600_set_stencil_ref(struct pipe_context *ctx,
rstate = r600_context_state(rctx, pipe_stencil_ref_type, state);
r600_bind_state(ctx, rstate);
+ /* refcount is taken care of this */
+ r600_delete_state(ctx, rstate);
}
static void r600_set_vertex_buffers(struct pipe_context *ctx,
@@ -433,6 +437,7 @@ static void r600_set_viewport_state(struct pipe_context *ctx,
rstate = r600_context_state(rctx, pipe_viewport_type, state);
r600_bind_state(ctx, rstate);
+ r600_delete_state(ctx, rstate);
}
void r600_init_state_functions(struct r600_context *rctx)
@@ -675,9 +680,8 @@ static struct radeon_state *r600_cb(struct r600_context *rctx, int cb)
unsigned color_info;
unsigned format, swap, ntype;
const struct util_format_description *desc;
- int id = R600_CB0 + cb;
- rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, id);
+ rstate = radeon_state(rscreen->rw, R600_CB0_TYPE + cb, R600_CB0 + cb);
if (rstate == NULL)
return NULL;
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
@@ -728,7 +732,7 @@ static struct radeon_state *r600_db(struct r600_context *rctx)
struct r600_resource *rbuffer;
struct radeon_state *rstate;
const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer;
- unsigned level = state->cbufs[0]->level;
+ unsigned level;
unsigned pitch, slice, format;
if (state->zsbuf == NULL)
@@ -770,7 +774,8 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
float offset_units = 0, offset_scale = 0;
char depth = 0;
unsigned offset_db_fmt_cntl = 0;
-
+ unsigned tmp;
+ unsigned prov_vtx = 1;
if (fb->zsbuf) {
offset_units = state->offset_units;
offset_scale = state->offset_scale * 12.0f;
@@ -796,23 +801,43 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
}
offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
+ if (state->flatshade_first)
+ prov_vtx = 0;
+
rctx->flat_shade = state->flatshade;
rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER);
if (rstate == NULL)
return NULL;
rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001;
+ if (state->sprite_coord_enable) {
+ rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |=
+ S_0286D4_PNT_SPRITE_ENA(1) |
+ S_0286D4_PNT_SPRITE_OVRD_X(2) |
+ S_0286D4_PNT_SPRITE_OVRD_Y(3) |
+ S_0286D4_PNT_SPRITE_OVRD_Z(0) |
+ S_0286D4_PNT_SPRITE_OVRD_W(1);
+ if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
+ rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |=
+ S_0286D4_PNT_SPRITE_TOP_1(1);
+ }
+ }
rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000;
- rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 |
- S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
- S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
- S_028814_FACE(!state->front_ccw) |
- S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
- S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
- S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri);
- rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = 0x00000000;
+ rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] =
+ S_028814_PROVOKING_VTX_LAST(prov_vtx) |
+ S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
+ S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
+ S_028814_FACE(!state->front_ccw) |
+ S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
+ S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
+ S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri);
+ rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] =
+ S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
+ S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex);
rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000;
- rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = 0x00080008;
- rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x00000000;
+ /* point size 12.4 fixed point */
+ tmp = (unsigned)(state->point_size * 8.0 / 2.0);
+ rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp);
+ rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000;
rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008;
rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005;
rstate->states[R600_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000;
@@ -837,12 +862,25 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
static struct radeon_state *r600_scissor(struct r600_context *rctx)
{
const struct pipe_scissor_state *state = &rctx->scissor->state.scissor;
+ const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
struct r600_screen *rscreen = rctx->screen;
struct radeon_state *rstate;
+ unsigned minx, maxx, miny, maxy;
u32 tl, br;
- tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1);
- br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
+ if (state == NULL) {
+ minx = 0;
+ miny = 0;
+ maxx = fb->cbufs[0]->width;
+ maxy = fb->cbufs[0]->height;
+ } else {
+ minx = state->minx;
+ miny = state->miny;
+ maxx = state->maxx;
+ maxy = state->maxy;
+ }
+ tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny) | S_028240_WINDOW_OFFSET_DISABLE(1);
+ br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy);
rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR);
if (rstate == NULL)
return NULL;
@@ -1140,8 +1178,16 @@ static struct radeon_state *r600_resource(struct r600_context *rctx,
struct r600_resource *rbuffer;
struct radeon_state *rstate;
unsigned format;
-
- format = r600_translate_colorformat(view->texture->format);
+ uint32_t word4 = 0, yuv_format = 0;
+ unsigned char swizzle[4];
+
+ swizzle[0] = view->swizzle_r;
+ swizzle[1] = view->swizzle_g;
+ swizzle[2] = view->swizzle_b;
+ swizzle[3] = view->swizzle_a;
+ format = r600_translate_texformat(view->texture->format,
+ swizzle,
+ &word4, &yuv_format);
if (format == ~0)
return NULL;
desc = util_format_description(view->texture->format);
@@ -1175,18 +1221,10 @@ static struct radeon_state *r600_resource(struct r600_context *rctx,
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0;
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8;
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] =
- S_038010_FORMAT_COMP_X(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) |
- S_038010_FORMAT_COMP_Y(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) |
- S_038010_FORMAT_COMP_Z(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) |
- S_038010_FORMAT_COMP_W(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) |
+ word4 |
S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) |
S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) |
S_038010_REQUEST_SIZE(1) |
- S_038010_DST_SEL_X(r600_tex_swizzle(view->swizzle_b)) |
- S_038010_DST_SEL_Y(r600_tex_swizzle(view->swizzle_g)) |
- S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_r)) |
- S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) |
- S_038010_FORCE_DEGAMMA(desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ? 1 : 0) |
S_038010_BASE_LEVEL(view->first_level);
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] =
S_038014_LAST_LEVEL(view->last_level) |
@@ -1206,7 +1244,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
struct r600_screen *rscreen = rctx->screen;
struct radeon_state *rstate;
const struct pipe_blend_state *pbs = &rctx->blend->state.blend;
- int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs;
+ int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs;
uint32_t color_control, target_mask, shader_mask;
int i;
@@ -1215,20 +1253,29 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
color_control = S_028808_PER_MRT_BLEND(1);
for (i = 0; i < nr_cbufs; i++) {
- shader_mask |= 0xf << i;
+ shader_mask |= 0xf << (i * 4);
}
if (pbs->logicop_enable) {
color_control |= (pbs->logicop_func) << 16;
- } else
+ } else {
color_control |= (0xcc << 16);
+ }
- for (i = 0; i < 8; i++) {
- if (pbs->rt[i].blend_enable) {
- color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i);
+ if (pbs->independent_blend_enable) {
+ for (i = 0; i < nr_cbufs; i++) {
+ if (pbs->rt[i].blend_enable) {
+ color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i);
+ }
+ target_mask |= (pbs->rt[i].colormask << (4 * i));
+ }
+ } else {
+ for (i = 0; i < nr_cbufs; i++) {
+ if (pbs->rt[0].blend_enable) {
+ color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i);
+ }
+ target_mask |= (pbs->rt[0].colormask << (4 * i));
}
- target_mask |= (pbs->rt[i].colormask << (4 * i));
-
}
rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL);
rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask;
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 8271ad19fb..f93c20da35 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -110,7 +110,7 @@ static INLINE uint32_t r600_translate_stencil_op(int s_op)
case PIPE_STENCIL_OP_DECR:
return V_028800_STENCIL_DECR;
case PIPE_STENCIL_OP_INCR_WRAP:
- return V_028800_STENCIL_INVERT;
+ return V_028800_STENCIL_INCR_WRAP;
case PIPE_STENCIL_OP_DECR_WRAP:
return V_028800_STENCIL_DECR_WRAP;
case PIPE_STENCIL_OP_INVERT:
@@ -289,7 +289,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format)
{
- return r600_translate_colorformat(format) != ~0;
+ return r600_translate_texformat(format, NULL, NULL, NULL) != ~0;
}
static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format)
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 1bce911306..30d79ebdd6 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -33,6 +33,7 @@
#include "r600_screen.h"
#include "r600_context.h"
#include "r600_resource.h"
+#include "r600d.h"
extern struct u_resource_vtbl r600_texture_vtbl;
@@ -277,3 +278,250 @@ void r600_init_screen_texture_functions(struct pipe_screen *screen)
screen->get_tex_surface = r600_get_tex_surface;
screen->tex_surface_destroy = r600_tex_surface_destroy;
}
+
+static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
+ const unsigned char *swizzle_view)
+{
+ unsigned i;
+ unsigned char swizzle[4];
+ unsigned result = 0;
+ const uint32_t swizzle_shift[4] = {
+ 16, 19, 22, 25,
+ };
+ const uint32_t swizzle_bit[4] = {
+ 0, 1, 2, 3,
+ };
+
+ if (swizzle_view) {
+ /* Combine two sets of swizzles. */
+ for (i = 0; i < 4; i++) {
+ swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ?
+ swizzle_format[swizzle_view[i]] : swizzle_view[i];
+ }
+ } else {
+ memcpy(swizzle, swizzle_format, 4);
+ }
+
+ /* Get swizzle. */
+ for (i = 0; i < 4; i++) {
+ switch (swizzle[i]) {
+ case UTIL_FORMAT_SWIZZLE_Y:
+ result |= swizzle_bit[1] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_Z:
+ result |= swizzle_bit[2] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_W:
+ result |= swizzle_bit[3] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_0:
+ result |= V_038010_SQ_SEL_0 << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_1:
+ result |= V_038010_SQ_SEL_1 << swizzle_shift[i];
+ break;
+ default: /* UTIL_FORMAT_SWIZZLE_X */
+ result |= swizzle_bit[0] << swizzle_shift[i];
+ }
+ }
+ return result;
+}
+
+/* texture format translate */
+uint32_t r600_translate_texformat(enum pipe_format format,
+ const unsigned char *swizzle_view,
+ uint32_t *word4_p, uint32_t *yuv_format_p)
+{
+ uint32_t result = 0, word4 = 0, yuv_format = 0;
+ const struct util_format_description *desc;
+ boolean uniform = TRUE;
+ int i;
+ const uint32_t sign_bit[4] = {
+ S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED),
+ S_038010_FORMAT_COMP_Y(V_038010_SQ_FORMAT_COMP_SIGNED),
+ S_038010_FORMAT_COMP_Z(V_038010_SQ_FORMAT_COMP_SIGNED),
+ S_038010_FORMAT_COMP_W(V_038010_SQ_FORMAT_COMP_SIGNED)
+ };
+ desc = util_format_description(format);
+
+ /* Colorspace (return non-RGB formats directly). */
+ switch (desc->colorspace) {
+ /* Depth stencil formats */
+ case UTIL_FORMAT_COLORSPACE_ZS:
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ result = V_028010_DEPTH_16;
+ goto out_word4;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ result = V_028010_DEPTH_X8_24;
+ goto out_word4;
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ result = V_028010_DEPTH_8_24;
+ goto out_word4;
+ default:
+ goto out_unknown;
+ }
+
+ case UTIL_FORMAT_COLORSPACE_YUV:
+ yuv_format |= (1 << 30);
+ switch (format) {
+ case PIPE_FORMAT_UYVY:
+ case PIPE_FORMAT_YUYV:
+ default:
+ break;
+ }
+ goto out_unknown; /* TODO */
+
+ case UTIL_FORMAT_COLORSPACE_SRGB:
+ word4 |= S_038010_FORCE_DEGAMMA(1);
+ if (format == PIPE_FORMAT_L8A8_SRGB || format == PIPE_FORMAT_L8_SRGB)
+ goto out_unknown; /* fails for some reason - TODO */
+ break;
+
+ default:
+ break;
+ }
+
+ word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view);
+
+ /* S3TC formats. TODO */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ goto out_unknown;
+ }
+
+
+ for (i = 0; i < desc->nr_channels; i++) {
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ word4 |= sign_bit[i];
+ }
+ }
+
+ /* R8G8Bx_SNORM - TODO CxV8U8 */
+
+ /* RGTC - TODO */
+
+ /* See whether the components are of the same size. */
+ for (i = 1; i < desc->nr_channels; i++) {
+ uniform = uniform && desc->channel[0].size == desc->channel[i].size;
+ }
+
+ /* Non-uniform formats. */
+ if (!uniform) {
+ switch(desc->nr_channels) {
+ case 3:
+ if (desc->channel[0].size == 5 &&
+ desc->channel[1].size == 6 &&
+ desc->channel[2].size == 5) {
+ result |= V_0280A0_COLOR_5_6_5;
+ goto out_word4;
+ }
+ goto out_unknown;
+ case 4:
+ if (desc->channel[0].size == 5 &&
+ desc->channel[1].size == 5 &&
+ desc->channel[2].size == 5 &&
+ desc->channel[3].size == 1) {
+ result |= V_0280A0_COLOR_1_5_5_5;
+ goto out_word4;
+ }
+ if (desc->channel[0].size == 10 &&
+ desc->channel[1].size == 10 &&
+ desc->channel[2].size == 10 &&
+ desc->channel[3].size == 2) {
+ result |= V_0280A0_COLOR_10_10_10_2;
+ goto out_word4;
+ }
+ goto out_unknown;
+ }
+ goto out_unknown;
+ }
+
+ /* uniform formats */
+ switch (desc->channel[0].type) {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if (!desc->channel[0].normalized &&
+ desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) {
+ goto out_unknown;
+ }
+
+ switch (desc->channel[0].size) {
+ case 4:
+ switch (desc->nr_channels) {
+ case 2:
+ result |= V_0280A0_COLOR_4_4;
+ goto out_word4;
+ case 4:
+ result |= V_0280A0_COLOR_4_4_4_4;
+ goto out_word4;
+ }
+ goto out_unknown;
+ case 8:
+ switch (desc->nr_channels) {
+ case 1:
+ result |= V_0280A0_COLOR_8;
+ goto out_word4;
+ case 2:
+ result |= V_0280A0_COLOR_8_8;
+ goto out_word4;
+ case 4:
+ result |= V_0280A0_COLOR_8_8_8_8;
+ goto out_word4;
+ }
+ goto out_unknown;
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ result |= V_0280A0_COLOR_16;
+ goto out_word4;
+ case 2:
+ result |= V_0280A0_COLOR_16_16;
+ goto out_word4;
+ case 4:
+ result |= V_0280A0_COLOR_16_16_16_16;
+ goto out_word4;
+ }
+ }
+ goto out_unknown;
+
+ case UTIL_FORMAT_TYPE_FLOAT:
+ switch (desc->channel[0].size) {
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ result |= V_0280A0_COLOR_16_FLOAT;
+ goto out_word4;
+ case 2:
+ result |= V_0280A0_COLOR_16_16_FLOAT;
+ goto out_word4;
+ case 4:
+ result |= V_0280A0_COLOR_16_16_16_16_FLOAT;
+ goto out_word4;
+ }
+ goto out_unknown;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ result |= V_0280A0_COLOR_32_FLOAT;
+ goto out_word4;
+ case 2:
+ result |= V_0280A0_COLOR_32_32_FLOAT;
+ goto out_word4;
+ case 4:
+ result |= V_0280A0_COLOR_32_32_32_32_FLOAT;
+ goto out_word4;
+ }
+ }
+
+ }
+out_word4:
+ if (word4_p)
+ *word4_p = word4;
+ if (yuv_format_p)
+ *yuv_format_p = yuv_format;
+// fprintf(stderr,"returning %08x %08x %08x\n", result, word4, yuv_format);
+ return result;
+out_unknown:
+// R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format));
+ return ~0;
+}
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index fb71b1e5d1..53388f822e 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -352,6 +352,61 @@
#define S_028808_ROP3(x) (((x) & 0xFF) << 16)
#define G_028808_ROP3(x) (((x) >> 16) & 0xFF)
#define C_028808_ROP3 0xFF00FFFF
+#define R_028810_PA_CL_CLIP_CNTL 0x028810
+#define S_028810_UCP_ENA_0(x) (((x) & 0x1) << 0)
+#define G_028810_UCP_ENA_0(x) (((x) >> 0) & 0x1)
+#define C_028810_UCP_ENA_0 0xFFFFFFFE
+#define S_028810_UCP_ENA_1(x) (((x) & 0x1) << 1)
+#define G_028810_UCP_ENA_1(x) (((x) >> 1) & 0x1)
+#define C_028810_UCP_ENA_1 0xFFFFFFFD
+#define S_028810_UCP_ENA_2(x) (((x) & 0x1) << 2)
+#define G_028810_UCP_ENA_2(x) (((x) >> 2) & 0x1)
+#define C_028810_UCP_ENA_2 0xFFFFFFFB
+#define S_028810_UCP_ENA_3(x) (((x) & 0x1) << 3)
+#define G_028810_UCP_ENA_3(x) (((x) >> 3) & 0x1)
+#define C_028810_UCP_ENA_3 0xFFFFFFF7
+#define S_028810_UCP_ENA_4(x) (((x) & 0x1) << 4)
+#define G_028810_UCP_ENA_4(x) (((x) >> 4) & 0x1)
+#define C_028810_UCP_ENA_4 0xFFFFFFEF
+#define S_028810_UCP_ENA_5(x) (((x) & 0x1) << 5)
+#define G_028810_UCP_ENA_5(x) (((x) >> 5) & 0x1)
+#define C_028810_UCP_ENA_5 0xFFFFFFDF
+#define S_028810_PS_UCP_Y_SCALE_NEG(x) (((x) & 0x1) << 13)
+#define G_028810_PS_UCP_Y_SCALE_NEG(x) (((x) >> 13) & 0x1)
+#define C_028810_PS_UCP_Y_SCALE_NEG 0xFFFFDFFF
+#define S_028810_PS_UCP_MODE(x) (((x) & 0x3) << 14)
+#define G_028810_PS_UCP_MODE(x) (((x) >> 14) & 0x3)
+#define C_028810_PS_UCP_MODE 0xFFFF3FFF
+#define S_028810_CLIP_DISABLE(x) (((x) & 0x1) << 16)
+#define G_028810_CLIP_DISABLE(x) (((x) >> 16) & 0x1)
+#define C_028810_CLIP_DISABLE 0xFFFEFFFF
+#define S_028810_UCP_CULL_ONLY_ENA(x) (((x) & 0x1) << 17)
+#define G_028810_UCP_CULL_ONLY_ENA(x) (((x) >> 17) & 0x1)
+#define C_028810_UCP_CULL_ONLY_ENA 0xFFFDFFFF
+#define S_028810_BOUNDARY_EDGE_FLAG_ENA(x) (((x) & 0x1) << 18)
+#define G_028810_BOUNDARY_EDGE_FLAG_ENA(x) (((x) >> 18) & 0x1)
+#define C_028810_BOUNDARY_EDGE_FLAG_ENA 0xFFFBFFFF
+#define S_028810_DX_CLIP_SPACE_DEF(x) (((x) & 0x1) << 19)
+#define G_028810_DX_CLIP_SPACE_DEF(x) (((x) >> 19) & 0x1)
+#define C_028810_DX_CLIP_SPACE_DEF 0xFFF7FFFF
+#define S_028810_DIS_CLIP_ERR_DETECT(x) (((x) & 0x1) << 20)
+#define G_028810_DIS_CLIP_ERR_DETECT(x) (((x) >> 20) & 0x1)
+#define C_028810_DIS_CLIP_ERR_DETECT 0xFFEFFFFF
+#define S_028810_VTX_KILL_OR(x) (((x) & 0x1) << 21)
+#define G_028810_VTX_KILL_OR(x) (((x) >> 21) & 0x1)
+#define C_028810_VTX_KILL_OR 0xFFDFFFFF
+#define S_028810_DX_LINEAR_ATTR_CLIP_ENA(x) (((x) & 0x1) << 24)
+#define G_028810_DX_LINEAR_ATTR_CLIP_ENA(x) (((x) >> 24) & 0x1)
+#define C_028810_DX_LINEAR_ATTR_CLIP_ENA 0xFEFFFFFF
+#define S_028810_VTE_VPORT_PROVOKE_DISABLE(x) (((x) & 0x1) << 25)
+#define G_028810_VTE_VPORT_PROVOKE_DISABLE(x) (((x) >> 25) & 0x1)
+#define C_028810_VTE_VPORT_PROVOKE_DISABLE 0xFDFFFFFF
+#define S_028810_ZCLIP_NEAR_DISABLE(x) (((x) & 0x1) << 26)
+#define G_028810_ZCLIP_NEAR_DISABLE(x) (((x) >> 26) & 0x1)
+#define C_028810_ZCLIP_NEAR_DISABLE 0xFBFFFFFF
+#define S_028810_ZCLIP_FAR_DISABLE(x) (((x) & 0x1) << 27)
+#define G_028810_ZCLIP_FAR_DISABLE(x) (((x) >> 27) & 0x1)
+#define C_028810_ZCLIP_FAR_DISABLE 0xF7FFFFFF
#define R_028010_DB_DEPTH_INFO 0x028010
#define S_028010_FORMAT(x) (((x) & 0x7) << 0)
#define G_028010_FORMAT(x) (((x) >> 0) & 0x7)
@@ -599,6 +654,13 @@
#define S_028E0C_OFFSET(x) (((x) & 0xFFFFFFFF) << 0)
#define G_028E0C_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF)
#define C_028E0C_OFFSET 0x00000000
+#define R_028A00_PA_SU_POINT_SIZE 0x028A00
+#define S_028A00_HEIGHT(x) (((x) & 0xFFFF) << 0)
+#define G_028A00_HEIGHT(x) (((x) >> 0) & 0xFFFF)
+#define C_028A00_HEIGHT 0xFFFF0000
+#define S_028A00_WIDTH(x) (((x) & 0xFFFF) << 16)
+#define G_028A00_WIDTH(x) (((x) >> 16) & 0xFFFF)
+#define C_028A00_WIDTH 0x0000FFFF
#define R_028A40_VGT_GS_MODE 0x028A40
#define S_028A40_MODE(x) (((x) & 0x3) << 0)
#define G_028A40_MODE(x) (((x) >> 0) & 0x3)
@@ -1098,6 +1160,79 @@
#define V_008958_DI_PT_2D_FILL_RECT_LIST 0x0000001A
#define V_008958_DI_PT_2D_LINE_STRIP 0x0000001B
#define V_008958_DI_PT_2D_TRI_STRIP 0x0000001C
+#define R_02881C_PA_CL_VS_OUT_CNTL 0x02881C
+#define S_02881C_CLIP_DIST_ENA_0(x) (((x) & 0x1) << 0)
+#define G_02881C_CLIP_DIST_ENA_0(x) (((x) >> 0) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_0 0xFFFFFFFE
+#define S_02881C_CLIP_DIST_ENA_1(x) (((x) & 0x1) << 1)
+#define G_02881C_CLIP_DIST_ENA_1(x) (((x) >> 1) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_1 0xFFFFFFFD
+#define S_02881C_CLIP_DIST_ENA_2(x) (((x) & 0x1) << 2)
+#define G_02881C_CLIP_DIST_ENA_2(x) (((x) >> 2) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_2 0xFFFFFFFB
+#define S_02881C_CLIP_DIST_ENA_3(x) (((x) & 0x1) << 3)
+#define G_02881C_CLIP_DIST_ENA_3(x) (((x) >> 3) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_3 0xFFFFFFF7
+#define S_02881C_CLIP_DIST_ENA_4(x) (((x) & 0x1) << 4)
+#define G_02881C_CLIP_DIST_ENA_4(x) (((x) >> 4) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_4 0xFFFFFFEF
+#define S_02881C_CLIP_DIST_ENA_5(x) (((x) & 0x1) << 5)
+#define G_02881C_CLIP_DIST_ENA_5(x) (((x) >> 5) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_5 0xFFFFFFDF
+#define S_02881C_CLIP_DIST_ENA_6(x) (((x) & 0x1) << 6)
+#define G_02881C_CLIP_DIST_ENA_6(x) (((x) >> 6) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_6 0xFFFFFFBF
+#define S_02881C_CLIP_DIST_ENA_7(x) (((x) & 0x1) << 7)
+#define G_02881C_CLIP_DIST_ENA_7(x) (((x) >> 7) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_7 0xFFFFFF7F
+#define S_02881C_CULL_DIST_ENA_0(x) (((x) & 0x1) << 8)
+#define G_02881C_CULL_DIST_ENA_0(x) (((x) >> 8) & 0x1)
+#define C_02881C_CULL_DIST_ENA_0 0xFFFFFEFF
+#define S_02881C_CULL_DIST_ENA_1(x) (((x) & 0x1) << 9)
+#define G_02881C_CULL_DIST_ENA_1(x) (((x) >> 9) & 0x1)
+#define C_02881C_CULL_DIST_ENA_1 0xFFFFFDFF
+#define S_02881C_CULL_DIST_ENA_2(x) (((x) & 0x1) << 10)
+#define G_02881C_CULL_DIST_ENA_2(x) (((x) >> 10) & 0x1)
+#define C_02881C_CULL_DIST_ENA_2 0xFFFFFBFF
+#define S_02881C_CULL_DIST_ENA_3(x) (((x) & 0x1) << 11)
+#define G_02881C_CULL_DIST_ENA_3(x) (((x) >> 11) & 0x1)
+#define C_02881C_CULL_DIST_ENA_3 0xFFFFF7FF
+#define S_02881C_CULL_DIST_ENA_4(x) (((x) & 0x1) << 12)
+#define G_02881C_CULL_DIST_ENA_4(x) (((x) >> 12) & 0x1)
+#define C_02881C_CULL_DIST_ENA_4 0xFFFFEFFF
+#define S_02881C_CULL_DIST_ENA_5(x) (((x) & 0x1) << 13)
+#define G_02881C_CULL_DIST_ENA_5(x) (((x) >> 13) & 0x1)
+#define C_02881C_CULL_DIST_ENA_5 0xFFFFDFFF
+#define S_02881C_CULL_DIST_ENA_6(x) (((x) & 0x1) << 14)
+#define G_02881C_CULL_DIST_ENA_6(x) (((x) >> 14) & 0x1)
+#define C_02881C_CULL_DIST_ENA_6 0xFFFFBFFF
+#define S_02881C_CULL_DIST_ENA_7(x) (((x) & 0x1) << 15)
+#define G_02881C_CULL_DIST_ENA_7(x) (((x) >> 15) & 0x1)
+#define C_02881C_CULL_DIST_ENA_7 0xFFFF7FFF
+#define S_02881C_USE_VTX_POINT_SIZE(x) (((x) & 0x1) << 16)
+#define G_02881C_USE_VTX_POINT_SIZE(x) (((x) >> 16) & 0x1)
+#define C_02881C_USE_VTX_POINT_SIZE 0xFFFEFFFF
+#define S_02881C_USE_VTX_EDGE_FLAG(x) (((x) & 0x1) << 17)
+#define G_02881C_USE_VTX_EDGE_FLAG(x) (((x) >> 17) & 0x1)
+#define C_02881C_USE_VTX_EDGE_FLAG 0xFFFDFFFF
+#define S_02881C_USE_VTX_RENDER_TARGET_INDX(x) (((x) & 0x1) << 18)
+#define G_02881C_USE_VTX_RENDER_TARGET_INDX(x) (((x) >> 18) & 0x1)
+#define C_02881C_USE_VTX_RENDER_TARGET_INDX 0xFFFBFFFF
+#define S_02881C_USE_VTX_VIEWPORT_INDX(x) (((x) & 0x1) << 19)
+#define G_02881C_USE_VTX_VIEWPORT_INDX(x) (((x) >> 19) & 0x1)
+#define C_02881C_USE_VTX_VIEWPORT_INDX 0xFFF7FFFF
+#define S_02881C_USE_VTX_KILL_FLAG(x) (((x) & 0x1) << 20)
+#define G_02881C_USE_VTX_KILL_FLAG(x) (((x) >> 20) & 0x1)
+#define C_02881C_USE_VTX_KILL_FLAG 0xFFEFFFFF
+#define S_02881C_VS_OUT_MISC_VEC_ENA(x) (((x) & 0x1) << 21)
+#define G_02881C_VS_OUT_MISC_VEC_ENA(x) (((x) >> 21) & 0x1)
+#define C_02881C_VS_OUT_MISC_VEC_ENA 0xFFDFFFFF
+#define S_02881C_VS_OUT_CCDIST0_VEC_ENA(x) (((x) & 0x1) << 22)
+#define G_02881C_VS_OUT_CCDIST0_VEC_ENA(x) (((x) >> 22) & 0x1)
+#define C_02881C_VS_OUT_CCDIST0_VEC_ENA 0xFFBFFFFF
+#define S_02881C_VS_OUT_CCDIST1_VEC_ENA(x) (((x) & 0x1) << 23)
+#define G_02881C_VS_OUT_CCDIST1_VEC_ENA(x) (((x) >> 23) & 0x1)
+#define C_02881C_VS_OUT_CCDIST1_VEC_ENA 0xFF7FFFFF
#define R_028868_SQ_PGM_RESOURCES_VS 0x028868
#define S_028868_NUM_GPRS(x) (((x) & 0xFF) << 0)
#define G_028868_NUM_GPRS(x) (((x) >> 0) & 0xFF)
diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h
index 3a8405f9b4..8f00a4895a 100644
--- a/src/gallium/drivers/r600/radeon.h
+++ b/src/gallium/drivers/r600/radeon.h
@@ -157,11 +157,42 @@ int radeon_ctx_submit(struct radeon_ctx *ctx);
void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file);
/*
+ * radeon context functions
+ */
+#pragma pack(1)
+struct radeon_cs_reloc {
+ uint32_t handle;
+ uint32_t read_domain;
+ uint32_t write_domain;
+ uint32_t flags;
+};
+#pragma pack()
+
+struct radeon_ctx {
+ int refcount;
+ struct radeon *radeon;
+ u32 *pm4;
+ u32 cpm4;
+ u32 draw_cpm4;
+ unsigned id;
+ unsigned next_id;
+ unsigned nreloc;
+ struct radeon_cs_reloc *reloc;
+ unsigned nbo;
+ struct radeon_bo **bo;
+ unsigned ndraw;
+ struct radeon_draw *cdraw;
+ struct radeon_draw **draw;
+ unsigned nstate;
+ struct radeon_state **state;
+};
+
+/*
* R600/R700
*/
-#define R600_NSTATE 1273
-#define R600_NTYPE 25
+#define R600_NSTATE 1280
+#define R600_NTYPE 32
#define R600_CONFIG 0
#define R600_CONFIG_TYPE 0
@@ -207,12 +238,26 @@ void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file);
#define R600_GS_SAMPLER_BORDER_TYPE 20
#define R600_CB0 1269
#define R600_CB0_TYPE 21
-#define R600_DB 1270
-#define R600_DB_TYPE 22
-#define R600_VGT 1271
-#define R600_VGT_TYPE 23
-#define R600_DRAW 1272
-#define R600_DRAW_TYPE 24
+#define R600_CB1 1270
+#define R600_CB1_TYPE 22
+#define R600_CB2 1271
+#define R600_CB2_TYPE 23
+#define R600_CB3 1272
+#define R600_CB3_TYPE 24
+#define R600_CB4 1273
+#define R600_CB4_TYPE 25
+#define R600_CB5 1274
+#define R600_CB5_TYPE 26
+#define R600_CB6 1275
+#define R600_CB6_TYPE 27
+#define R600_CB7 1276
+#define R600_CB7_TYPE 28
+#define R600_DB 1277
+#define R600_DB_TYPE 29
+#define R600_VGT 1278
+#define R600_VGT_TYPE 30
+#define R600_DRAW 1279
+#define R600_DRAW_TYPE 31
/* R600_CONFIG */
#define R600_CONFIG__SQ_CONFIG 0
#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1 1
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index 0358c14e24..1fa3ec8300 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -102,6 +102,16 @@ typedef unsigned char boolean;
# endif
#endif
+/* Forced function inlining */
+#ifndef ALWAYS_INLINE
+# ifdef __GNUC__
+# define ALWAYS_INLINE inline __attribute__((always_inline))
+# elif defined(_MSC_VER)
+# define ALWAYS_INLINE __forceinline
+# else
+# define ALWAYS_INLINE INLINE
+# endif
+#endif
/* Function visibility */
#ifndef PUBLIC
diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c
index c67ca2224d..1bdfdccf43 100644
--- a/src/gallium/state_trackers/dri/common/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/common/dri_drawable.c
@@ -30,6 +30,7 @@
*/
#include "dri_screen.h"
+#include "dri_context.h"
#include "dri_drawable.h"
#include "pipe/p_screen.h"
@@ -157,9 +158,9 @@ dri_destroy_buffer(__DRIdrawable * dPriv)
/**
* Validate the texture at an attachment. Allocate the texture if it does not
- * exist.
+ * exist. Used by the TFP extension.
*/
-void
+static void
dri_drawable_validate_att(struct dri_drawable *drawable,
enum st_attachment_type statt)
{
@@ -180,11 +181,61 @@ dri_drawable_validate_att(struct dri_drawable *drawable,
drawable->texture_stamp = drawable->dPriv->lastStamp - 1;
- /* this calles into the manager */
drawable->base.validate(&drawable->base, statts, count, NULL);
}
/**
+ * These are used for GLX_EXT_texture_from_pixmap
+ */
+static void
+dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target,
+ GLint format, __DRIdrawable *dPriv)
+{
+ struct dri_context *ctx = dri_context(pDRICtx);
+ struct dri_drawable *drawable = dri_drawable(dPriv);
+ struct pipe_resource *pt;
+
+ dri_drawable_validate_att(drawable, ST_ATTACHMENT_FRONT_LEFT);
+
+ pt = drawable->textures[ST_ATTACHMENT_FRONT_LEFT];
+
+ if (pt) {
+ enum pipe_format internal_format = pt->format;
+
+ if (format == __DRI_TEXTURE_FORMAT_RGB) {
+ /* only need to cover the formats recognized by dri_fill_st_visual */
+ switch (internal_format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ internal_format = PIPE_FORMAT_B8G8R8X8_UNORM;
+ break;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ internal_format = PIPE_FORMAT_X8R8G8B8_UNORM;
+ break;
+ default:
+ break;
+ }
+ }
+
+ ctx->st->teximage(ctx->st,
+ (target == GL_TEXTURE_2D) ? ST_TEXTURE_2D : ST_TEXTURE_RECT,
+ 0, internal_format, pt, FALSE);
+ }
+}
+
+static void
+dri_set_tex_buffer(__DRIcontext *pDRICtx, GLint target,
+ __DRIdrawable *dPriv)
+{
+ dri_set_tex_buffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
+
+const __DRItexBufferExtension driTexBufferExtension = {
+ { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+ dri_set_tex_buffer,
+ dri_set_tex_buffer2,
+};
+
+/**
* Get the format and binding of an attachment.
*/
void
diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h b/src/gallium/state_trackers/dri/common/dri_drawable.h
index 3f2e24fc15..74e662d36c 100644
--- a/src/gallium/state_trackers/dri/common/dri_drawable.h
+++ b/src/gallium/state_trackers/dri/common/dri_drawable.h
@@ -89,9 +89,7 @@ dri_drawable_get_format(struct dri_drawable *drawable,
enum pipe_format *format,
unsigned *bind);
-void
-dri_drawable_validate_att(struct dri_drawable *drawable,
- enum st_attachment_type statt);
+extern const __DRItexBufferExtension driTexBufferExtension;
#endif
diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c
index 1fb8996337..47005c17e2 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -67,86 +67,6 @@ static const __DRI2flushExtension dri2FlushExtension = {
};
/**
- * These are used for GLX_EXT_texture_from_pixmap
- */
-static void
-dri2_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target,
- GLint format, __DRIdrawable *dPriv)
-{
- struct dri_context *ctx = dri_context(pDRICtx);
- struct dri_drawable *drawable = dri_drawable(dPriv);
- struct pipe_resource *pt;
-
- dri_drawable_validate_att(drawable, ST_ATTACHMENT_FRONT_LEFT);
-
- pt = drawable->textures[ST_ATTACHMENT_FRONT_LEFT];
-
- if (pt) {
- enum pipe_format internal_format = pt->format;
-
- if (format == __DRI_TEXTURE_FORMAT_RGB) {
- /* only need to cover the formats recognized by dri_fill_st_visual */
- switch (internal_format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- internal_format = PIPE_FORMAT_B8G8R8X8_UNORM;
- break;
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- internal_format = PIPE_FORMAT_X8R8G8B8_UNORM;
- break;
- default:
- break;
- }
- }
-
- ctx->st->teximage(ctx->st,
- (target == GL_TEXTURE_2D) ? ST_TEXTURE_2D : ST_TEXTURE_RECT,
- 0, internal_format, pt, FALSE);
- }
-}
-
-static void
-dri2_set_tex_buffer(__DRIcontext *pDRICtx, GLint target,
- __DRIdrawable *dPriv)
-{
- dri2_set_tex_buffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
-}
-
-static const __DRItexBufferExtension dri2TexBufferExtension = {
- { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
- dri2_set_tex_buffer,
- dri2_set_tex_buffer2,
-};
-
-/**
- * Get the format and binding of an attachment.
- */
-static INLINE void
-dri2_drawable_get_format(struct dri_drawable *drawable,
- enum st_attachment_type statt,
- enum pipe_format *format,
- unsigned *bind)
-{
- switch (statt) {
- case ST_ATTACHMENT_FRONT_LEFT:
- case ST_ATTACHMENT_BACK_LEFT:
- case ST_ATTACHMENT_FRONT_RIGHT:
- case ST_ATTACHMENT_BACK_RIGHT:
- *format = drawable->stvis.color_format;
- *bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
- break;
- case ST_ATTACHMENT_DEPTH_STENCIL:
- *format = drawable->stvis.depth_stencil_format;
- *bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */
- break;
- default:
- *format = PIPE_FORMAT_NONE;
- *bind = 0;
- break;
- }
-}
-
-
-/**
* Retrieve __DRIbuffer from the DRI loader.
*/
static __DRIbuffer *
@@ -176,7 +96,7 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable,
unsigned bind;
int att, bpp;
- dri2_drawable_get_format(drawable, statts[i], &format, &bind);
+ dri_drawable_get_format(drawable, statts[i], &format, &bind);
if (format == PIPE_FORMAT_NONE)
continue;
@@ -318,7 +238,7 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable,
break;
}
- dri2_drawable_get_format(drawable, statt, &format, &bind);
+ dri_drawable_get_format(drawable, statt, &format, &bind);
if (statt == ST_ATTACHMENT_INVALID || format == PIPE_FORMAT_NONE)
continue;
@@ -483,7 +403,7 @@ static const __DRIextension *dri_screen_extensions[] = {
&driCopySubBufferExtension.base,
&driSwapControlExtension.base,
&driMediaStreamCounterExtension.base,
- &dri2TexBufferExtension.base,
+ &driTexBufferExtension.base,
&dri2FlushExtension.base,
&dri2ImageExtension.base,
&dri2ConfigQueryExtension.base,
diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c
index ae96f1b20e..249ccd7fcf 100644
--- a/src/gallium/state_trackers/dri/sw/drisw.c
+++ b/src/gallium/state_trackers/dri/sw/drisw.c
@@ -201,7 +201,7 @@ drisw_allocate_textures(struct dri_drawable *drawable,
struct pipe_resource templ;
unsigned width, height;
boolean resized;
- int i;
+ unsigned i;
width = drawable->dPriv->w;
height = drawable->dPriv->h;
@@ -222,7 +222,7 @@ drisw_allocate_textures(struct dri_drawable *drawable,
templ.depth0 = 1;
templ.last_level = 0;
- for (i = 0; i < ST_ATTACHMENT_COUNT; i++) {
+ for (i = 0; i < count; i++) {
enum pipe_format format;
unsigned bind;
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c
index 56d575ffe0..02b9f6aec4 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.c
@@ -530,6 +530,9 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy,
if (gdpy->native->get_param(gdpy->native, NATIVE_PARAM_USE_NATIVE_BUFFER))
dpy->Extensions.KHR_image_pixmap = EGL_TRUE;
+ dpy->Extensions.KHR_reusable_sync = EGL_TRUE;
+ dpy->Extensions.KHR_fence_sync = EGL_TRUE;
+
if (egl_g3d_add_configs(drv, dpy, 1) == 1) {
_eglError(EGL_NOT_INITIALIZED, "eglInitialize(unable to add configs)");
goto fail;
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.h b/src/gallium/state_trackers/egl/common/egl_g3d.h
index f33dc91cf9..be450bbede 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.h
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.h
@@ -30,12 +30,14 @@
#include "pipe/p_screen.h"
#include "pipe/p_context.h"
#include "pipe/p_format.h"
+#include "os/os_thread.h"
#include "egldriver.h"
#include "egldisplay.h"
#include "eglcontext.h"
#include "eglsurface.h"
#include "eglconfig.h"
#include "eglimage.h"
+#include "eglsync.h"
#include "eglscreen.h"
#include "eglmode.h"
@@ -99,6 +101,24 @@ struct egl_g3d_image {
_EGL_DRIVER_STANDARD_TYPECASTS(egl_g3d)
_EGL_DRIVER_TYPECAST(egl_g3d_image, _EGLImage, obj)
+#ifdef EGL_KHR_reusable_sync
+
+struct egl_g3d_sync {
+ _EGLSync base;
+
+ int refs;
+
+ /* the mutex protects only the condvar, not the struct */
+ pipe_mutex mutex;
+ pipe_condvar condvar;
+
+ /* for fence sync */
+ struct pipe_fence_handle *fence;
+};
+_EGL_DRIVER_TYPECAST(egl_g3d_sync, _EGLSync, obj)
+
+#endif /* EGL_KHR_reusable_sync */
+
#ifdef EGL_MESA_screen_surface
struct egl_g3d_screen {
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.c b/src/gallium/state_trackers/egl/common/egl_g3d_api.c
index edac72a822..1120945edc 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_api.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.c
@@ -34,6 +34,7 @@
#include "egl_g3d.h"
#include "egl_g3d_api.h"
#include "egl_g3d_image.h"
+#include "egl_g3d_sync.h"
#include "egl_g3d_st.h"
#include "egl_g3d_loader.h"
#include "native.h"
@@ -806,6 +807,13 @@ egl_g3d_init_driver_api(_EGLDriver *drv)
drv->API.CreateImageKHR = egl_g3d_create_image;
drv->API.DestroyImageKHR = egl_g3d_destroy_image;
+#ifdef EGL_KHR_reusable_sync
+ drv->API.CreateSyncKHR = egl_g3d_create_sync;
+ drv->API.DestroySyncKHR = egl_g3d_destroy_sync;
+ drv->API.ClientWaitSyncKHR = egl_g3d_client_wait_sync;
+ drv->API.SignalSyncKHR = egl_g3d_signal_sync;
+#endif
+
#ifdef EGL_MESA_screen_surface
drv->API.CreateScreenSurfaceMESA = egl_g3d_create_screen_surface;
drv->API.ShowScreenSurfaceMESA = egl_g3d_show_screen_surface;
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_sync.c b/src/gallium/state_trackers/egl/common/egl_g3d_sync.c
new file mode 100644
index 0000000000..ec74e9eb94
--- /dev/null
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_sync.c
@@ -0,0 +1,284 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_atomic.h"
+#include "os/os_thread.h"
+#include "eglsync.h"
+#include "eglcurrent.h"
+
+#include "egl_g3d.h"
+#include "egl_g3d_sync.h"
+
+#ifdef EGL_KHR_reusable_sync
+
+/**
+ * Wait for the conditional variable.
+ */
+static EGLint
+egl_g3d_wait_sync_condvar(struct egl_g3d_sync *gsync, EGLTimeKHR timeout)
+{
+ _EGLDisplay *dpy = gsync->base.Resource.Display;
+
+ pipe_mutex_lock(gsync->mutex);
+
+ /* unlock display lock just before waiting */
+ _eglUnlockMutex(&dpy->Mutex);
+
+ /* No timed wait. Always treat timeout as EGL_FOREVER_KHR */
+ pipe_condvar_wait(gsync->condvar, gsync->mutex);
+
+ _eglLockMutex(&dpy->Mutex);
+
+ pipe_mutex_unlock(gsync->mutex);
+
+ return EGL_CONDITION_SATISFIED_KHR;
+}
+
+/**
+ * Signal the conditional variable.
+ */
+static void
+egl_g3d_signal_sync_condvar(struct egl_g3d_sync *gsync)
+{
+ pipe_mutex_lock(gsync->mutex);
+ pipe_condvar_broadcast(gsync->condvar);
+ pipe_mutex_unlock(gsync->mutex);
+}
+
+/**
+ * Insert a fence command to the command stream of the current context.
+ */
+static EGLint
+egl_g3d_insert_fence_sync(struct egl_g3d_sync *gsync)
+{
+ _EGLContext *ctx = _eglGetCurrentContext();
+ struct egl_g3d_context *gctx = egl_g3d_context(ctx);
+
+ /* already checked in egl_g3d_create_sync */
+ assert(gctx);
+
+ /* insert the fence command */
+ gctx->stctxi->flush(gctx->stctxi, 0x0, &gsync->fence);
+ if (!gsync->fence)
+ gsync->base.SyncStatus = EGL_SIGNALED_KHR;
+
+ return EGL_SUCCESS;
+}
+
+/**
+ * Wait for the fence sync to be signaled.
+ */
+static EGLint
+egl_g3d_wait_fence_sync(struct egl_g3d_sync *gsync, EGLTimeKHR timeout)
+{
+ EGLint ret;
+
+ if (gsync->fence) {
+ _EGLDisplay *dpy = gsync->base.Resource.Display;
+ struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
+ struct pipe_screen *screen = gdpy->native->screen;
+ struct pipe_fence_handle *fence = gsync->fence;
+
+ gsync->fence = NULL;
+
+ _eglUnlockMutex(&dpy->Mutex);
+ /* no timed finish? */
+ screen->fence_finish(screen, fence, 0x0);
+ ret = EGL_CONDITION_SATISFIED_KHR;
+ _eglLockMutex(&dpy->Mutex);
+
+ gsync->base.SyncStatus = EGL_SIGNALED_KHR;
+
+ screen->fence_reference(screen, &fence, NULL);
+ egl_g3d_signal_sync_condvar(gsync);
+ }
+ else {
+ ret = egl_g3d_wait_sync_condvar(gsync, timeout);
+ }
+
+ return ret;
+}
+
+static INLINE void
+egl_g3d_ref_sync(struct egl_g3d_sync *gsync)
+{
+ p_atomic_inc(&gsync->refs);
+}
+
+static INLINE void
+egl_g3d_unref_sync(struct egl_g3d_sync *gsync)
+{
+ if (p_atomic_dec_zero(&gsync->refs)) {
+ pipe_condvar_destroy(gsync->condvar);
+ pipe_mutex_destroy(gsync->mutex);
+
+ if (gsync->fence) {
+ struct egl_g3d_display *gdpy =
+ egl_g3d_display(gsync->base.Resource.Display);
+ struct pipe_screen *screen = gdpy->native->screen;
+
+ screen->fence_reference(screen, &gsync->fence, NULL);
+ }
+
+ FREE(gsync);
+ }
+}
+
+_EGLSync *
+egl_g3d_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
+ EGLenum type, const EGLint *attrib_list)
+{
+ _EGLContext *ctx = _eglGetCurrentContext();
+ struct egl_g3d_sync *gsync;
+ EGLint err;
+
+ if (!ctx || ctx->Resource.Display != dpy) {
+ _eglError(EGL_BAD_MATCH, "eglCreateSyncKHR");
+ return NULL;
+ }
+
+ gsync = CALLOC_STRUCT(egl_g3d_sync);
+ if (!gsync) {
+ _eglError(EGL_BAD_ALLOC, "eglCreateSyncKHR");
+ return NULL;
+ }
+
+ if (!_eglInitSync(&gsync->base, dpy, type, attrib_list)) {
+ FREE(gsync);
+ return NULL;
+ }
+
+ switch (type) {
+ case EGL_SYNC_REUSABLE_KHR:
+ err = EGL_SUCCESS;
+ break;
+ case EGL_SYNC_FENCE_KHR:
+ err = egl_g3d_insert_fence_sync(gsync);
+ break;
+ default:
+ err = EGL_BAD_ATTRIBUTE;
+ break;
+ }
+
+ if (err != EGL_SUCCESS) {
+ _eglError(err, "eglCreateSyncKHR");
+ FREE(gsync);
+ return NULL;
+ }
+
+ pipe_mutex_init(gsync->mutex);
+ pipe_condvar_init(gsync->condvar);
+ p_atomic_set(&gsync->refs, 1);
+
+ return &gsync->base;
+}
+
+EGLBoolean
+egl_g3d_destroy_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync)
+{
+ struct egl_g3d_sync *gsync = egl_g3d_sync(sync);
+
+ switch (gsync->base.Type) {
+ case EGL_SYNC_REUSABLE_KHR:
+ /* signal the waiters */
+ if (gsync->base.SyncStatus != EGL_SIGNALED_KHR) {
+ gsync->base.SyncStatus = EGL_SIGNALED_KHR;
+ egl_g3d_signal_sync_condvar(gsync);
+ }
+ break;
+ default:
+ break;
+ }
+
+ egl_g3d_unref_sync(gsync);
+
+ return EGL_TRUE;
+}
+
+EGLint
+egl_g3d_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
+ EGLint flags, EGLTimeKHR timeout)
+{
+ struct egl_g3d_sync *gsync = egl_g3d_sync(sync);
+ EGLint ret = EGL_CONDITION_SATISFIED_KHR;
+
+ if (gsync->base.SyncStatus != EGL_SIGNALED_KHR) {
+ /* flush if there is a current context */
+ if (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR) {
+ _EGLContext *ctx = _eglGetCurrentContext();
+ struct egl_g3d_context *gctx = egl_g3d_context(ctx);
+
+ if (gctx)
+ gctx->stctxi->flush(gctx->stctxi, PIPE_FLUSH_RENDER_CACHE , NULL);
+ }
+
+ if (timeout) {
+ /* reference the sync object in case it is destroyed while waiting */
+ egl_g3d_ref_sync(gsync);
+
+ switch (gsync->base.Type) {
+ case EGL_SYNC_REUSABLE_KHR:
+ ret = egl_g3d_wait_sync_condvar(gsync, timeout);
+ break;
+ case EGL_SYNC_FENCE_KHR:
+ ret = egl_g3d_wait_fence_sync(gsync, timeout);
+ default:
+ break;
+ }
+
+ egl_g3d_unref_sync(gsync);
+ }
+ else {
+ ret = EGL_TIMEOUT_EXPIRED_KHR;
+ }
+ }
+
+ return ret;
+}
+
+EGLBoolean
+egl_g3d_signal_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
+ EGLenum mode)
+{
+ struct egl_g3d_sync *gsync = egl_g3d_sync(sync);
+
+ /* only for reusable sync */
+ if (sync->Type != EGL_SYNC_REUSABLE_KHR)
+ return _eglError(EGL_BAD_MATCH, "eglSignalSyncKHR");
+
+ if (gsync->base.SyncStatus != mode) {
+ gsync->base.SyncStatus = mode;
+ if (mode == EGL_SIGNALED_KHR)
+ egl_g3d_signal_sync_condvar(gsync);
+ }
+
+ return EGL_TRUE;
+}
+
+#endif /* EGL_KHR_reusable_sync */
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_sync.h b/src/gallium/state_trackers/egl/common/egl_g3d_sync.h
new file mode 100644
index 0000000000..3179ca04e1
--- /dev/null
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_sync.h
@@ -0,0 +1,53 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef _EGL_G3D_SYNC_H_
+#define _EGL_G3D_SYNC_H_
+
+#include "egl_g3d.h"
+
+#ifdef EGL_KHR_reusable_sync
+
+_EGLSync *
+egl_g3d_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
+ EGLenum type, const EGLint *attrib_list);
+
+EGLBoolean
+egl_g3d_destroy_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync);
+
+EGLint
+egl_g3d_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
+ EGLint flags, EGLTimeKHR timeout);
+
+EGLBoolean
+egl_g3d_signal_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
+ EGLenum mode);
+
+#endif /* EGL_KHR_reusable_sync */
+
+#endif /* _EGL_G3D_SYNC_H_ */
diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile
index 932303d194..ff886b37f2 100644
--- a/src/gallium/targets/dri-r600/Makefile
+++ b/src/gallium/targets/dri-r600/Makefile
@@ -21,6 +21,6 @@ DRIVER_DEFINES = \
include ../Makefile.dri
-DRI_LIB_DEPS += -ldrm_radeon
+DRI_LIB_DEPS +=
symlinks:
diff --git a/src/gallium/targets/dri-r600/SConscript b/src/gallium/targets/dri-r600/SConscript
index 97c5df01fe..64d6d2a7f6 100644
--- a/src/gallium/targets/dri-r600/SConscript
+++ b/src/gallium/targets/dri-r600/SConscript
@@ -12,7 +12,7 @@ env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE'])
env.Prepend(LIBS = [
st_dri,
- r600drm,
+ r600winsys,
r600,
trace,
rbug,
diff --git a/src/gallium/targets/egl/st_GLESv1_CM.c b/src/gallium/targets/egl/st_GLESv1_CM.c
index 0c8de8992f..c1652d5131 100644
--- a/src/gallium/targets/egl/st_GLESv1_CM.c
+++ b/src/gallium/targets/egl/st_GLESv1_CM.c
@@ -1,3 +1,4 @@
+#include "state_tracker/st_api.h"
#include "state_tracker/st_gl_api.h"
PUBLIC struct st_api *
diff --git a/src/gallium/targets/egl/st_GLESv2.c b/src/gallium/targets/egl/st_GLESv2.c
index 87b3e65e23..9c26989008 100644
--- a/src/gallium/targets/egl/st_GLESv2.c
+++ b/src/gallium/targets/egl/st_GLESv2.c
@@ -1,3 +1,4 @@
+#include "state_tracker/st_api.h"
#include "state_tracker/st_gl_api.h"
PUBLIC struct st_api *
diff --git a/src/gallium/tests/graw/SConscript b/src/gallium/tests/graw/SConscript
index 7e39ec21a4..860a17e13e 100644
--- a/src/gallium/tests/graw/SConscript
+++ b/src/gallium/tests/graw/SConscript
@@ -11,9 +11,12 @@ env = env.Clone()
env.Prepend(LIBPATH = [graw.dir])
env.Prepend(LIBS = ['graw'] + gallium)
-if platform == 'sunos5':
+if platform in ('freebsd8', 'sunos5'):
env.Append(LIBS = ['m'])
+if platform == 'freebsd8':
+ env.Append(LIBS = ['pthread'])
+
progs = [
'clear',
'tri',
diff --git a/src/gallium/tests/unit/Makefile b/src/gallium/tests/unit/Makefile
index f65958dadd..345bd1f694 100644
--- a/src/gallium/tests/unit/Makefile
+++ b/src/gallium/tests/unit/Makefile
@@ -22,7 +22,8 @@ SOURCES = \
pipe_barrier_test.c \
u_cache_test.c \
u_half_test.c \
- u_format_test.c
+ u_format_test.c \
+ translate_test.c
OBJECTS = $(SOURCES:.c=.o)
diff --git a/src/gallium/tests/unit/SConscript b/src/gallium/tests/unit/SConscript
index a200123f44..edc68e34d9 100644
--- a/src/gallium/tests/unit/SConscript
+++ b/src/gallium/tests/unit/SConscript
@@ -4,14 +4,18 @@ env = env.Clone()
env.Prepend(LIBS = [gallium])
-if platform == 'sunos5':
+if platform in ('freebsd8', 'sunos5'):
env.Append(LIBS = ['m'])
+if platform == 'freebsd8':
+ env.Append(LIBS = ['pthread'])
+
progs = [
'pipe_barrier_test',
'u_cache_test',
'u_format_test',
- 'u_half_test'
+ 'u_half_test',
+ 'translate_test'
]
for prog in progs:
diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c
new file mode 100644
index 0000000000..d0946a91a2
--- /dev/null
+++ b/src/gallium/tests/unit/translate_test.c
@@ -0,0 +1,310 @@
+/**************************************************************************
+ *
+ * Copyright © 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+#include <translate/translate.h>
+#include <util/u_memory.h>
+#include <util/u_format.h>
+#include <util/u_cpu_detect.h>
+#include <rtasm/rtasm_cpu.h>
+
+/* don't use this for serious use */
+static double rand_double()
+{
+ const double rm = (double)RAND_MAX + 1;
+ double div = 1;
+ double v = 0;
+ unsigned i;
+ for(i = 0; i < 4; ++i)
+ {
+ div *= rm;
+ v += (double)rand() / div;
+ }
+ return v;
+}
+
+int main(int argc, char** argv)
+{
+ struct translate *(*create_fn)(const struct translate_key *key) = 0;
+
+ struct translate_key key;
+ unsigned output_format;
+ unsigned input_format;
+ unsigned buffer_size = 4096;
+ unsigned char* buffer[5];
+ unsigned char* byte_buffer;
+ float* float_buffer;
+ double* double_buffer;
+ unsigned count = 4;
+ unsigned i, j, k;
+ unsigned passed = 0;
+ unsigned total = 0;
+ const float error = 0.03125;
+
+ create_fn = 0;
+
+ util_cpu_detect();
+
+ if(argc <= 1)
+ {}
+ else if (!strcmp(argv[1], "generic"))
+ create_fn = translate_generic_create;
+ else if (!strcmp(argv[1], "x86"))
+ create_fn = translate_sse2_create;
+ else if (!strcmp(argv[1], "nosse"))
+ {
+ util_cpu_caps.has_sse = 0;
+ util_cpu_caps.has_sse2 = 0;
+ util_cpu_caps.has_sse3 = 0;
+ util_cpu_caps.has_sse4_1 = 0;
+ create_fn = translate_sse2_create;
+ }
+ else if (!strcmp(argv[1], "sse"))
+ {
+ if(!util_cpu_caps.has_sse || !rtasm_cpu_has_sse())
+ {
+ printf("Error: CPU doesn't support SSE (test with qemu)\n");
+ return 2;
+ }
+ util_cpu_caps.has_sse2 = 0;
+ util_cpu_caps.has_sse3 = 0;
+ util_cpu_caps.has_sse4_1 = 0;
+ create_fn = translate_sse2_create;
+ }
+ else if (!strcmp(argv[1], "sse2"))
+ {
+ if(!util_cpu_caps.has_sse2 || !rtasm_cpu_has_sse())
+ {
+ printf("Error: CPU doesn't support SSE2 (test with qemu)\n");
+ return 2;
+ }
+ util_cpu_caps.has_sse3 = 0;
+ util_cpu_caps.has_sse4_1 = 0;
+ create_fn = translate_sse2_create;
+ }
+ else if (!strcmp(argv[1], "sse3"))
+ {
+ if(!util_cpu_caps.has_sse3 || !rtasm_cpu_has_sse())
+ {
+ printf("Error: CPU doesn't support SSE3 (test with qemu)\n");
+ return 2;
+ }
+ util_cpu_caps.has_sse4_1 = 0;
+ create_fn = translate_sse2_create;
+ }
+ else if (!strcmp(argv[1], "sse4.1"))
+ {
+ if(!util_cpu_caps.has_sse4_1 || !rtasm_cpu_has_sse())
+ {
+ printf("Error: CPU doesn't support SSE4.1 (test with qemu)\n");
+ return 2;
+ }
+ create_fn = translate_sse2_create;
+ }
+
+ if (!create_fn)
+ {
+ printf("Usage: ./translate_test [generic|x86|nosse|sse|sse2|sse3|sse4.1]\n");
+ return 2;
+ }
+
+ for (i = 1; i < Elements(buffer); ++i)
+ buffer[i] = align_malloc(buffer_size, 4096);
+
+ byte_buffer = align_malloc(buffer_size, 4096);
+ float_buffer = align_malloc(buffer_size, 4096);
+ double_buffer = align_malloc(buffer_size, 4096);
+
+ key.nr_elements = 1;
+ key.element[0].input_buffer = 0;
+ key.element[0].input_offset = 0;
+ key.element[0].output_offset = 0;
+ key.element[0].type = TRANSLATE_ELEMENT_NORMAL;
+ key.element[0].instance_divisor = 0;
+
+ srand(4359025);
+
+ /* avoid negative values that work badly when converted to unsigned format*/
+ for (i = 0; i < buffer_size; ++i)
+ byte_buffer[i] = rand() & 0x7f7f7f7f;
+
+ for (i = 0; i < buffer_size / sizeof(float); ++i)
+ float_buffer[i] = (float)rand_double();
+
+ for (i = 0; i < buffer_size / sizeof(double); ++i)
+ double_buffer[i] = rand_double();
+
+ for (output_format = 1; output_format < PIPE_FORMAT_COUNT; ++output_format)
+ {
+ const struct util_format_description* output_format_desc = util_format_description(output_format);
+ unsigned output_format_size;
+ unsigned output_normalized = 0;
+
+ if (!output_format_desc
+ || !output_format_desc->fetch_rgba_float
+ || !output_format_desc->pack_rgba_float
+ || output_format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB
+ || output_format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN
+ || !translate_is_output_format_supported(output_format))
+ continue;
+
+ for(i = 0; i < output_format_desc->nr_channels; ++i)
+ {
+ if(output_format_desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT)
+ output_normalized |= (1 << output_format_desc->channel[i].normalized);
+ }
+
+ output_format_size = util_format_get_stride(output_format, 1);
+
+ for (input_format = 1; input_format < PIPE_FORMAT_COUNT; ++input_format)
+ {
+ const struct util_format_description* input_format_desc = util_format_description(input_format);
+ unsigned input_format_size;
+ struct translate* translate[2];
+ unsigned fail = 0;
+ unsigned used_generic = 0;
+ unsigned input_normalized = 0;
+ boolean input_is_float = FALSE;
+
+ if (!input_format_desc
+ || !input_format_desc->fetch_rgba_float
+ || !input_format_desc->pack_rgba_float
+ || input_format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB
+ || input_format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN
+ || !translate_is_output_format_supported(input_format))
+ continue;
+
+ input_format_size = util_format_get_stride(input_format, 1);
+
+ for(i = 0; i < input_format_desc->nr_channels; ++i)
+ {
+ if(input_format_desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
+ {
+ input_is_float = 1;
+ input_normalized |= 1 << 1;
+ }
+ else
+ input_normalized |= (1 << input_format_desc->channel[i].normalized);
+ }
+
+ if(((input_normalized | output_normalized) == 3)
+ || ((input_normalized & 1) && (output_normalized & 1)
+ && input_format_size * output_format_desc->nr_channels > output_format_size * input_format_desc->nr_channels))
+ continue;
+
+ key.element[0].input_format = input_format;
+ key.element[0].output_format = output_format;
+ key.output_stride = output_format_size;
+ translate[0] = create_fn(&key);
+ if (!translate[0])
+ continue;
+
+ key.element[0].input_format = output_format;
+ key.element[0].output_format = input_format;
+ key.output_stride = input_format_size;
+ translate[1] = create_fn(&key);
+ if(!translate[1])
+ {
+ used_generic = 1;
+ translate[1] = translate_generic_create(&key);
+ if(!translate[1])
+ continue;
+ }
+
+ for(i = 1; i < 5; ++i)
+ memset(buffer[i], 0xcd - (0x22 * i), 4096);
+
+ if(input_is_float && input_format_desc->channel[0].size == 32)
+ buffer[0] = (unsigned char*)float_buffer;
+ else if(input_is_float && input_format_desc->channel[0].size == 64)
+ buffer[0] = (unsigned char*)double_buffer;
+ else if(input_is_float)
+ abort();
+ else
+ buffer[0] = byte_buffer;
+
+ translate[0]->set_buffer(translate[0], 0, buffer[0], input_format_size, ~0);
+ translate[0]->run(translate[0], 0, count, 0, buffer[1]);
+ translate[1]->set_buffer(translate[1], 0, buffer[1], output_format_size, ~0);
+ translate[1]->run(translate[1], 0, count, 0, buffer[2]);
+ translate[0]->set_buffer(translate[0], 0, buffer[2], input_format_size, ~0);
+ translate[0]->run(translate[0], 0, count, 0, buffer[3]);
+ translate[1]->set_buffer(translate[1], 0, buffer[3], output_format_size, ~0);
+ translate[1]->run(translate[1], 0, count, 0, buffer[4]);
+
+ for (i = 0; i < count; ++i)
+ {
+ float a[4];
+ float b[4];
+ input_format_desc->fetch_rgba_float(a, buffer[2] + i * input_format_size, 0, 0);
+ input_format_desc->fetch_rgba_float(b, buffer[4] + i * input_format_size, 0, 0);
+
+ for (j = 0; j < count; ++j)
+ {
+ float d = a[j] - b[j];
+ if (d > error || d < -error)
+ {
+ fail = 1;
+ break;
+ }
+ }
+ }
+
+ printf("%s%s: %s -> %s -> %s -> %s -> %s\n",
+ fail ? "FAIL" : "PASS",
+ used_generic ? "[GENERIC]" : "",
+ input_format_desc->name, output_format_desc->name, input_format_desc->name, output_format_desc->name, input_format_desc->name);
+
+ if (1)
+ {
+ for (i = 0; i < Elements(buffer); ++i)
+ {
+ unsigned format_size = (i & 1) ? output_format_size : input_format_size;
+ printf("%c ", (i == 2 || i == 4) ? '*' : ' ');
+ for (j = 0; j < count; ++j)
+ {
+ for (k = 0; k < format_size; ++k)
+ {
+ printf("%02x", buffer[i][j * format_size + k]);
+ }
+ printf(" ");
+ }
+ printf("\n");
+ }
+ }
+
+ if (!fail)
+ ++passed;
+ ++total;
+
+ if(translate[1])
+ translate[1]->release(translate[1]);
+ translate[0]->release(translate[0]);
+ }
+ }
+
+ printf("%u/%u tests passed for translate_%s\n", passed, total, argv[1]);
+ return passed != total;
+}
diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript
new file mode 100644
index 0000000000..2f20d9f895
--- /dev/null
+++ b/src/gallium/winsys/r600/drm/SConscript
@@ -0,0 +1,25 @@
+Import('*')
+
+env = env.Clone()
+
+r600_sources = [
+ 'bof.c',
+ 'r600_state.c',
+ 'radeon_ctx.c',
+ 'radeon_draw.c',
+ 'radeon_state.c',
+ 'radeon_bo.c',
+ 'radeon_pciid.c',
+ 'radeon.c',
+ 'r600_drm.c'
+]
+
+env.ParseConfig('pkg-config --cflags libdrm_radeon')
+env.Append(CPPPATH = '#/src/gallium/drivers/r600')
+
+r600winsys = env.ConvenienceLibrary(
+ target ='r600winsys',
+ source = r600_sources,
+)
+
+Export('r600winsys')
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index 9520792f54..c76e7f5fa5 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -31,7 +31,6 @@
#include "radeon_priv.h"
#include "r600_screen.h"
#include "r600_resource.h"
-#include "r600_public.h"
#include "r600_drm_public.h"
#include "state_tracker/drm_driver.h"
@@ -45,7 +44,7 @@ boolean r600_buffer_get_handle(struct radeon *rw,
struct winsys_handle *whandle)
{
struct drm_gem_flink flink;
- struct r600_resource* rbuffer = (struct r600_buffer*)buf;
+ struct r600_resource* rbuffer = (struct r600_resource*)buf;
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
if (!rbuffer->flink) {
diff --git a/src/gallium/winsys/r600/drm/r600_states.h b/src/gallium/winsys/r600/drm/r600_states.h
index 5896df21b2..e40c77d8f6 100644
--- a/src/gallium/winsys/r600/drm/r600_states.h
+++ b/src/gallium/winsys/r600/drm/r600_states.h
@@ -372,6 +372,76 @@ static const struct radeon_register R600_CB0_names[] = {
{0x00028100, 0, 0, "CB_COLOR0_MASK"},
};
+static const struct radeon_register R600_CB1_names[] = {
+ {0x00028044, 1, 0, "CB_COLOR1_BASE"},
+ {0x000280A4, 0, 0, "CB_COLOR1_INFO"},
+ {0x00028064, 0, 0, "CB_COLOR1_SIZE"},
+ {0x00028084, 0, 0, "CB_COLOR1_VIEW"},
+ {0x000280E4, 1, 1, "CB_COLOR1_FRAG"},
+ {0x000280C4, 1, 2, "CB_COLOR1_TILE"},
+ {0x00028104, 0, 0, "CB_COLOR1_MASK"},
+};
+
+static const struct radeon_register R600_CB2_names[] = {
+ {0x00028048, 1, 0, "CB_COLOR2_BASE"},
+ {0x000280A8, 0, 0, "CB_COLOR2_INFO"},
+ {0x00028068, 0, 0, "CB_COLOR2_SIZE"},
+ {0x00028088, 0, 0, "CB_COLOR2_VIEW"},
+ {0x000280E8, 1, 1, "CB_COLOR2_FRAG"},
+ {0x000280C8, 1, 2, "CB_COLOR2_TILE"},
+ {0x00028108, 0, 0, "CB_COLOR2_MASK"},
+};
+
+static const struct radeon_register R600_CB3_names[] = {
+ {0x0002804C, 1, 0, "CB_COLOR3_BASE"},
+ {0x000280AC, 0, 0, "CB_COLOR3_INFO"},
+ {0x0002806C, 0, 0, "CB_COLOR3_SIZE"},
+ {0x0002808C, 0, 0, "CB_COLOR3_VIEW"},
+ {0x000280EC, 1, 1, "CB_COLOR3_FRAG"},
+ {0x000280CC, 1, 2, "CB_COLOR3_TILE"},
+ {0x0002810C, 0, 0, "CB_COLOR3_MASK"},
+};
+
+static const struct radeon_register R600_CB4_names[] = {
+ {0x00028050, 1, 0, "CB_COLOR4_BASE"},
+ {0x000280B0, 0, 0, "CB_COLOR4_INFO"},
+ {0x00028070, 0, 0, "CB_COLOR4_SIZE"},
+ {0x00028090, 0, 0, "CB_COLOR4_VIEW"},
+ {0x000280F0, 1, 1, "CB_COLOR4_FRAG"},
+ {0x000280D0, 1, 2, "CB_COLOR4_TILE"},
+ {0x00028110, 0, 0, "CB_COLOR4_MASK"},
+};
+
+static const struct radeon_register R600_CB5_names[] = {
+ {0x00028054, 1, 0, "CB_COLOR5_BASE"},
+ {0x000280B4, 0, 0, "CB_COLOR5_INFO"},
+ {0x00028074, 0, 0, "CB_COLOR5_SIZE"},
+ {0x00028094, 0, 0, "CB_COLOR5_VIEW"},
+ {0x000280F4, 1, 1, "CB_COLOR5_FRAG"},
+ {0x000280D4, 1, 2, "CB_COLOR5_TILE"},
+ {0x00028114, 0, 0, "CB_COLOR5_MASK"},
+};
+
+static const struct radeon_register R600_CB6_names[] = {
+ {0x00028058, 1, 0, "CB_COLOR6_BASE"},
+ {0x000280B8, 0, 0, "CB_COLOR6_INFO"},
+ {0x00028078, 0, 0, "CB_COLOR6_SIZE"},
+ {0x00028098, 0, 0, "CB_COLOR6_VIEW"},
+ {0x000280F8, 1, 1, "CB_COLOR6_FRAG"},
+ {0x000280D8, 1, 2, "CB_COLOR6_TILE"},
+ {0x00028118, 0, 0, "CB_COLOR6_MASK"},
+};
+
+static const struct radeon_register R600_CB7_names[] = {
+ {0x0002805C, 1, 0, "CB_COLOR7_BASE"},
+ {0x000280BC, 0, 0, "CB_COLOR7_INFO"},
+ {0x0002807C, 0, 0, "CB_COLOR7_SIZE"},
+ {0x0002809C, 0, 0, "CB_COLOR7_VIEW"},
+ {0x000280FC, 1, 1, "CB_COLOR7_FRAG"},
+ {0x000280DC, 1, 2, "CB_COLOR7_TILE"},
+ {0x0002811C, 0, 0, "CB_COLOR7_MASK"},
+};
+
static const struct radeon_register R600_DB_names[] = {
{0x0002800C, 1, 0, "DB_DEPTH_BASE"},
{0x00028000, 0, 0, "DB_DEPTH_SIZE"},
@@ -425,9 +495,16 @@ static struct radeon_type R600_types[] = {
{ 128, 1233, 0x0000A600, 0x0000A720, 0x0010, 0, "R600_VS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_VS_SAMPLER_BORDER_names},
{ 128, 1251, 0x0000A800, 0x0000A920, 0x0010, 0, "R600_GS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_GS_SAMPLER_BORDER_names},
{ 128, 1269, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB0", 7, r600_state_pm4_cb0, R600_CB0_names},
- { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r600_state_pm4_db, R600_DB_names},
- { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names},
- { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names},
+ { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB1", 7, r600_state_pm4_cb0, R600_CB1_names},
+ { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB2", 7, r600_state_pm4_cb0, R600_CB2_names},
+ { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB3", 7, r600_state_pm4_cb0, R600_CB3_names},
+ { 128, 1273, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB4", 7, r600_state_pm4_cb0, R600_CB4_names},
+ { 128, 1274, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB5", 7, r600_state_pm4_cb0, R600_CB5_names},
+ { 128, 1275, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB6", 7, r600_state_pm4_cb0, R600_CB6_names},
+ { 128, 1276, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB7", 7, r600_state_pm4_cb0, R600_CB7_names},
+ { 128, 1277, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r600_state_pm4_db, R600_DB_names},
+ { 128, 1278, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names},
+ { 128, 1279, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names},
};
static struct radeon_type R700_types[] = {
@@ -453,9 +530,16 @@ static struct radeon_type R700_types[] = {
{ 128, 1233, 0x0000A600, 0x0000A720, 0x0010, 0, "R600_VS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_VS_SAMPLER_BORDER_names},
{ 128, 1251, 0x0000A800, 0x0000A920, 0x0010, 0, "R600_GS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_GS_SAMPLER_BORDER_names},
{ 128, 1269, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB0", 7, r700_state_pm4_cb0, R600_CB0_names},
- { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r700_state_pm4_db, R600_DB_names},
- { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names},
- { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names},
+ { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB1", 7, r600_state_pm4_cb0, R600_CB1_names},
+ { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB2", 7, r600_state_pm4_cb0, R600_CB2_names},
+ { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB3", 7, r600_state_pm4_cb0, R600_CB3_names},
+ { 128, 1273, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB4", 7, r600_state_pm4_cb0, R600_CB4_names},
+ { 128, 1274, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB5", 7, r600_state_pm4_cb0, R600_CB5_names},
+ { 128, 1275, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB6", 7, r600_state_pm4_cb0, R600_CB6_names},
+ { 128, 1276, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB7", 7, r600_state_pm4_cb0, R600_CB7_names},
+ { 128, 1277, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r700_state_pm4_db, R600_DB_names},
+ { 128, 1278, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names},
+ { 128, 1279, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names},
};
#endif
diff --git a/src/gallium/winsys/r600/drm/radeon.c b/src/gallium/winsys/r600/drm/radeon.c
index 7e65669806..80b0a1d397 100644
--- a/src/gallium/winsys/r600/drm/radeon.c
+++ b/src/gallium/winsys/r600/drm/radeon.c
@@ -23,7 +23,6 @@
#include "xf86drm.h"
#include "radeon_priv.h"
#include "radeon_drm.h"
-#include "r600d.h"
enum radeon_family radeon_get_family(struct radeon *radeon)
{
diff --git a/src/gallium/winsys/r600/drm/radeon_bo_pb.c b/src/gallium/winsys/r600/drm/radeon_bo_pb.c
new file mode 100644
index 0000000000..e8e53a971f
--- /dev/null
+++ b/src/gallium/winsys/r600/drm/radeon_bo_pb.c
@@ -0,0 +1,186 @@
+#include "radeon_priv.h"
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_double_list.h"
+#include "pipebuffer/pb_buffer.h"
+#include "pipebuffer/pb_bufmgr.h"
+
+struct radeon_bo_pb {
+ struct pb_buffer b;
+ struct radeon_bo *bo;
+
+ struct radeon_bo_pbmgr *mgr;
+ struct list_head maplist;
+};
+
+extern const struct pb_vtbl radeon_bo_pb_vtbl;
+
+static INLINE struct radeon_bo_pb *radeon_bo_pb(struct pb_buffer *buf)
+{
+ assert(buf);
+ assert(buf->vtbl == &radeon_bo_pb_vtbl);
+ return (struct radeon_bo_pb *)buf;
+}
+
+struct radeon_bo_pbmgr {
+ struct pb_manager b;
+ struct radeon *radeon;
+ struct list_head buffer_map_list;
+};
+
+static INLINE struct radeon_bo_pbmgr *radeon_bo_pbmgr(struct pb_manager *mgr)
+{
+ assert(mgr);
+ return (struct radeon_bo_pbmgr *)mgr;
+}
+
+static void radeon_bo_pb_destroy(struct pb_buffer *_buf)
+{
+ struct radeon_bo_pb *buf = radeon_bo_pb(_buf);
+
+ if (buf->bo->data != NULL) {
+ LIST_DEL(&buf->maplist);
+ radeon_bo_unmap(buf->mgr->radeon, buf->bo);
+ }
+ radeon_bo_decref(buf->mgr->radeon, buf->bo);
+ FREE(buf);
+}
+
+static void *
+radeon_bo_pb_map_internal(struct pb_buffer *_buf,
+ unsigned flags)
+{
+ struct radeon_bo_pb *buf = radeon_bo_pb(_buf);
+
+ if (buf->bo->data != NULL)
+ return buf->bo->data;
+
+ if (flags & PB_USAGE_DONTBLOCK) {
+ uint32_t domain;
+ if (radeon_bo_busy(buf->mgr->radeon, buf->bo, &domain))
+ return NULL;
+ }
+
+ if (radeon_bo_map(buf->mgr->radeon, buf->bo)) {
+ return NULL;
+ }
+ LIST_ADDTAIL(&buf->maplist, &buf->mgr->buffer_map_list);
+ return buf->bo->data;
+}
+
+static void radeon_bo_pb_unmap_internal(struct pb_buffer *_buf)
+{
+ (void)_buf;
+}
+
+static void
+radeon_bo_pb_get_base_buffer(struct pb_buffer *buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset)
+{
+ *base_buf = buf;
+ *offset = 0;
+}
+
+static enum pipe_error
+radeon_bo_pb_validate(struct pb_buffer *_buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ /* Always pinned */
+ return PIPE_OK;
+}
+
+static void
+radeon_bo_pb_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence)
+{
+}
+
+const struct pb_vtbl radeon_bo_pb_vtbl = {
+ radeon_bo_pb_destroy,
+ radeon_bo_pb_map_internal,
+ radeon_bo_pb_unmap_internal,
+ radeon_bo_pb_validate,
+ radeon_bo_pb_fence,
+ radeon_bo_pb_get_base_buffer,
+};
+
+static struct pb_buffer *
+radeon_bo_pb_create_buffer(struct pb_manager *_mgr,
+ pb_size size,
+ const struct pb_desc *desc)
+{
+ struct radeon_bo_pbmgr *mgr = radeon_bo_pbmgr(_mgr);
+ struct radeon *radeon = mgr->radeon;
+ struct radeon_bo_pb *bo;
+ uint32_t domain;
+
+ bo = CALLOC_STRUCT(radeon_bo_pb);
+ if (!bo)
+ goto error1;
+
+ pipe_reference_init(&bo->b.base.reference, 1);
+ bo->b.base.alignment = desc->alignment;
+ bo->b.base.usage = desc->usage;
+ bo->b.base.size = size;
+ bo->b.vtbl = &radeon_bo_pb_vtbl;
+ bo->mgr = mgr;
+
+ LIST_INITHEAD(&bo->maplist);
+
+ bo->bo = radeon_bo(radeon, 0, size,
+ desc->alignment, NULL);
+ if (bo->bo == NULL)
+ goto error2;
+ return &bo->b;
+
+error2:
+ FREE(bo);
+error1:
+ return NULL;
+}
+
+static void
+radeon_bo_pbmgr_flush(struct pb_manager *mgr)
+{
+ /* NOP */
+}
+
+static void
+radeon_bo_pbmgr_destroy(struct pb_manager *_mgr)
+{
+ struct radeon_bo_pbmgr *mgr = radeon_bo_pbmgr(_mgr);
+ FREE(mgr);
+}
+
+struct pb_manager *radeon_bo_pbmgr_create(struct radeon *radeon)
+{
+ struct radeon_bo_pbmgr *mgr;
+
+ mgr = CALLOC_STRUCT(radeon_bo_pbmgr);
+ if (!mgr)
+ return NULL;
+
+ mgr->b.destroy = radeon_bo_pbmgr_destroy;
+ mgr->b.create_buffer = radeon_bo_pb_create_buffer;
+ mgr->b.flush = radeon_bo_pbmgr_flush;
+
+ mgr->radeon = radeon;
+ LIST_INITHEAD(&mgr->buffer_map_list);
+ return &mgr->b;
+}
+
+void radeon_bo_pbmgr_flush_maps(struct pb_manager *_mgr)
+{
+ struct radeon_bo_pbmgr *mgr = radeon_bo_pbmgr(_mgr);
+ struct radeon_bo_pb *rpb, *t_rpb;
+
+ LIST_FOR_EACH_ENTRY_SAFE(rpb, t_rpb, &mgr->buffer_map_list, maplist) {
+ radeon_bo_unmap(mgr->radeon, rpb->bo);
+ LIST_DEL(&rpb->maplist);
+ }
+
+ LIST_INITHEAD(&mgr->buffer_map_list);
+}
diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c
index 6b0eba0b28..45b706bb0f 100644
--- a/src/gallium/winsys/r600/drm/radeon_ctx.c
+++ b/src/gallium/winsys/r600/drm/radeon_ctx.c
@@ -112,6 +112,7 @@ struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx)
ctx->bo[i] = radeon_bo_decref(ctx->radeon, ctx->bo[i]);
}
ctx->radeon = radeon_decref(ctx->radeon);
+ free(ctx->state);
free(ctx->draw);
free(ctx->bo);
free(ctx->pm4);
@@ -151,6 +152,8 @@ int radeon_ctx_submit(struct radeon_ctx *ctx)
uint64_t chunk_array[2];
int r = 0;
+ if (!ctx->cpm4)
+ return 0;
#if 0
for (r = 0; r < ctx->cpm4; r++) {
fprintf(stderr, "0x%08X\n", ctx->pm4[r]);
diff --git a/src/gallium/winsys/r600/drm/radeon_priv.h b/src/gallium/winsys/r600/drm/radeon_priv.h
index b91421f438..96c0d060f7 100644
--- a/src/gallium/winsys/r600/drm/radeon_priv.h
+++ b/src/gallium/winsys/r600/drm/radeon_priv.h
@@ -68,36 +68,6 @@ extern int radeon_is_family_compatible(unsigned family1, unsigned family2);
extern int radeon_reg_id(struct radeon *radeon, unsigned offset, unsigned *typeid, unsigned *stateid, unsigned *id);
extern unsigned radeon_type_from_id(struct radeon *radeon, unsigned id);
-/*
- * radeon context functions
- */
-#pragma pack(1)
-struct radeon_cs_reloc {
- uint32_t handle;
- uint32_t read_domain;
- uint32_t write_domain;
- uint32_t flags;
-};
-#pragma pack()
-
-struct radeon_ctx {
- int refcount;
- struct radeon *radeon;
- u32 *pm4;
- u32 cpm4;
- u32 draw_cpm4;
- unsigned id;
- unsigned next_id;
- unsigned nreloc;
- struct radeon_cs_reloc *reloc;
- unsigned nbo;
- struct radeon_bo **bo;
- unsigned ndraw;
- struct radeon_draw *cdraw;
- struct radeon_draw **draw;
- unsigned nstate;
- struct radeon_state **state;
-};
int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo);
struct radeon_bo *radeon_ctx_get_bo(struct radeon_ctx *ctx, unsigned reloc);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm.c
index ecaf096dea..86d4f94969 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm.c
@@ -39,7 +39,6 @@
#include "util/u_memory.h"
#include "xf86drm.h"
-#include <sys/ioctl.h>
static struct radeon_libdrm_winsys *
radeon_winsys_create(int fd)
@@ -55,6 +54,31 @@ radeon_winsys_create(int fd)
return rws;
}
+/* Enable/disable Hyper-Z access. Return TRUE on success. */
+static boolean radeon_set_hyperz_access(int fd, boolean enable)
+{
+#ifndef RADEON_INFO_WANT_HYPERZ
+#define RADEON_INFO_WANT_HYPERZ 7
+#endif
+
+ struct drm_radeon_info info = {0};
+ unsigned value = enable ? 1 : 0;
+
+ if (!debug_get_bool_option("RADEON_HYPERZ", FALSE))
+ return FALSE;
+
+ info.value = (unsigned long)&value;
+ info.request = RADEON_INFO_WANT_HYPERZ;
+
+ if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0)
+ return FALSE;
+
+ if (enable && !value)
+ return FALSE;
+
+ return TRUE;
+}
+
/* Helper function to do the ioctls needed for setup and init. */
static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys)
{
@@ -134,15 +158,7 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys)
}
winsys->z_pipes = target;
- winsys->hyperz = FALSE;
-#ifndef RADEON_INFO_WANT_HYPERZ
-#define RADEON_INFO_WANT_HYPERZ 7
-#endif
- info.request = RADEON_INFO_WANT_HYPERZ;
- retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
- if (!retval && target == 1) {
- winsys->hyperz = TRUE;
- }
+ winsys->hyperz = radeon_set_hyperz_access(fd, TRUE);
retval = drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO,
&gem_info, sizeof(gem_info));
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_dri.c b/src/gallium/winsys/svga/drm/vmw_screen_dri.c
index 1b0d10f60d..7bd4407e9f 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_dri.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_dri.c
@@ -32,8 +32,6 @@
#include "vmw_screen.h"
#include "vmw_surface.h"
-#include "vmw_fence.h"
-#include "vmw_context.h"
#include "svga_drm_public.h"
#include "state_tracker/drm_driver.h"