summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/Makefile2
-rw-r--r--src/gallium/auxiliary/SConscript6
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c11
-rw-r--r--src/gallium/auxiliary/draw/draw_decompose_tmp.h425
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.c42
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.h3
-rw-r--r--src/gallium/auxiliary/draw/draw_gs_tmp.h186
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c24
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c206
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_clip.c40
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h7
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c20
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_decompose.h199
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_so_emit.c20
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_util.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h7
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c157
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h209
-rw-r--r--src/gallium/auxiliary/draw/draw_so_emit_tmp.h156
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c16
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h3
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c369
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_assert.c101
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_assert.h41
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c14
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_soa.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c93
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.h5
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp32
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c21
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_quad.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c24
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c14
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c52
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h10
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c13
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.h3
-rw-r--r--src/gallium/auxiliary/translate/translate.c5
-rw-r--r--src/gallium/auxiliary/translate/translate.h3
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c155
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c44
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h2
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c70
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.h13
-rw-r--r--src/gallium/auxiliary/util/u_debug.c2
-rw-r--r--src/gallium/auxiliary/util/u_draw.h138
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.c2
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.h6
-rw-r--r--src/gallium/auxiliary/util/u_format.h38
-rw-r--r--src/gallium/auxiliary/util/u_format_other.c15
-rw-r--r--src/gallium/auxiliary/util/u_framebuffer.c6
-rw-r--r--src/gallium/auxiliary/util/u_mempool.c6
-rw-r--r--src/gallium/auxiliary/util/u_network.c2
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h47
-rw-r--r--src/gallium/auxiliary/util/u_prim.h14
-rw-r--r--src/gallium/auxiliary/util/u_split_prim.h105
-rw-r--r--src/gallium/auxiliary/util/u_sse.h30
-rw-r--r--src/gallium/auxiliary/util/u_staging.c95
-rw-r--r--src/gallium/auxiliary/util/u_staging.h37
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.c79
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.h18
72 files changed, 2265 insertions, 1248 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index dcebab7c0f..9544e90a96 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -131,6 +131,7 @@ C_SOURCES = \
util/u_sampler.c \
util/u_simple_shaders.c \
util/u_snprintf.c \
+ util/u_staging.c \
util/u_surface.c \
util/u_surfaces.c \
util/u_texture.c \
@@ -149,6 +150,7 @@ C_SOURCES = \
GALLIVM_SOURCES = \
gallivm/lp_bld_arit.c \
+ gallivm/lp_bld_assert.c \
gallivm/lp_bld_const.c \
gallivm/lp_bld_conv.c \
gallivm/lp_bld_debug.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 72a16617db..3124e20ce8 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -34,14 +34,14 @@ env.CodeGenerate(
target = 'util/u_format_table.c',
script = '#src/gallium/auxiliary/util/u_format_table.py',
source = ['#src/gallium/auxiliary/util/u_format.csv'],
- command = 'python $SCRIPT $SOURCE > $TARGET'
+ command = python_cmd + ' $SCRIPT $SOURCE > $TARGET'
)
env.CodeGenerate(
target = 'util/u_half.c',
script = 'util/u_half.py',
source = [],
- command = 'python $SCRIPT > $TARGET'
+ command = python_cmd + ' $SCRIPT > $TARGET'
)
env.Depends('util/u_format_table.c', [
@@ -180,6 +180,7 @@ source = [
'util/u_sampler.c',
'util/u_simple_shaders.c',
'util/u_snprintf.c',
+ 'util/u_staging.c',
'util/u_surface.c',
'util/u_surfaces.c',
'util/u_texture.c',
@@ -198,6 +199,7 @@ source = [
if env['llvm']:
source += [
'gallivm/lp_bld_arit.c',
+ 'gallivm/lp_bld_assert.c',
'gallivm/lp_bld_const.c',
'gallivm/lp_bld_conv.c',
'gallivm/lp_bld_debug.c',
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index c127f74188..995b675b9a 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -288,12 +288,19 @@ draw_set_mapped_constant_buffer(struct draw_context *draw,
shader_type == PIPE_SHADER_GEOMETRY);
debug_assert(slot < PIPE_MAX_CONSTANT_BUFFERS);
- if (shader_type == PIPE_SHADER_VERTEX) {
+ switch (shader_type) {
+ case PIPE_SHADER_VERTEX:
draw->pt.user.vs_constants[slot] = buffer;
+ draw->pt.user.vs_constants_size[slot] = size;
draw_vs_set_constants(draw, slot, buffer, size);
- } else if (shader_type == PIPE_SHADER_GEOMETRY) {
+ break;
+ case PIPE_SHADER_GEOMETRY:
draw->pt.user.gs_constants[slot] = buffer;
+ draw->pt.user.gs_constants_size[slot] = size;
draw_gs_set_constants(draw, slot, buffer, size);
+ break;
+ default:
+ assert(0 && "invalid shader type in draw_set_mapped_constant_buffer");
}
}
diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
new file mode 100644
index 0000000000..a52d2b5058
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
@@ -0,0 +1,425 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Chia-I Wu <olv@lunarg.com>
+ */
+
+/* these macros are optional */
+#ifndef LOCAL_VARS
+#define LOCAL_VARS
+#endif
+#ifndef FUNC_ENTER
+#define FUNC_ENTER do {} while (0)
+#endif
+#ifndef FUNC_EXIT
+#define FUNC_EXIT do {} while (0)
+#endif
+#ifndef LINE_ADJ
+#define LINE_ADJ(flags, a0, i0, i1, a1) LINE(flags, i0, i1)
+#endif
+#ifndef TRIANGLE_ADJ
+#define TRIANGLE_ADJ(flags, i0, a0, i1, a1, i2, a2) TRIANGLE(flags, i0, i1, i2)
+#endif
+
+static void
+FUNC(FUNC_VARS)
+{
+ unsigned idx[6], i;
+ ushort flags;
+ LOCAL_VARS
+
+ FUNC_ENTER;
+
+ /* prim, count, and last_vertex_last should have been defined */
+ if (0) {
+ debug_printf("%s: prim 0x%x, count %d, last_vertex_last %d\n",
+ __FUNCTION__, prim, count, last_vertex_last);
+ }
+
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < count; i++) {
+ idx[0] = GET_ELT(i);
+ POINT(idx[0]);
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (i = 0; i + 1 < count; i += 2) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ LINE(flags, idx[0], idx[1]);
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ case PIPE_PRIM_LINE_STRIP:
+ if (count >= 2) {
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ idx[1] = GET_ELT(0);
+ idx[2] = idx[1];
+
+ for (i = 1; i < count; i++, flags = 0) {
+ idx[0] = idx[1];
+ idx[1] = GET_ELT(i);
+ LINE(flags, idx[0], idx[1]);
+ }
+ /* close the loop */
+ if (prim == PIPE_PRIM_LINE_LOOP)
+ LINE(flags, idx[1], idx[2]);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ for (i = 0; i + 2 < count; i += 3) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (count >= 3) {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ idx[1] = GET_ELT(0);
+ idx[2] = GET_ELT(1);
+
+ if (last_vertex_last) {
+ for (i = 0; i + 2 < count; i++) {
+ idx[0] = idx[1];
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+ /* always emit idx[2] last */
+ if (i & 1)
+ TRIANGLE(flags, idx[1], idx[0], idx[2]);
+ else
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ }
+ else {
+ for (i = 0; i + 2 < count; i++) {
+ idx[0] = idx[1];
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+ /* always emit idx[0] first */
+ if (i & 1)
+ TRIANGLE(flags, idx[0], idx[2], idx[1]);
+ else
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count >= 3) {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ idx[0] = GET_ELT(0);
+ idx[2] = GET_ELT(1);
+
+ /* idx[0] is neither the first nor the last vertex */
+ if (last_vertex_last) {
+ for (i = 0; i + 2 < count; i++) {
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+ /* always emit idx[2] last */
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ }
+ else {
+ for (i = 0; i + 2 < count; i++) {
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+ /* always emit idx[1] first */
+ TRIANGLE(flags, idx[1], idx[2], idx[0]);
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUADS:
+ if (last_vertex_last) {
+ for (i = 0; i + 3 < count; i += 4) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+
+ flags = DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_2;
+ /* always emit idx[3] last */
+ TRIANGLE(flags, idx[0], idx[1], idx[3]);
+
+ flags = DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1;
+ TRIANGLE(flags, idx[1], idx[2], idx[3]);
+ }
+ }
+ else {
+ for (i = 0; i + 3 < count; i += 4) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+
+ flags = DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1;
+ /* XXX should always emit idx[0] first */
+ /* always emit idx[3] first */
+ TRIANGLE(flags, idx[3], idx[0], idx[1]);
+
+ flags = DRAW_PIPE_EDGE_FLAG_1 |
+ DRAW_PIPE_EDGE_FLAG_2;
+ TRIANGLE(flags, idx[3], idx[1], idx[2]);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ if (count >= 4) {
+ idx[2] = GET_ELT(0);
+ idx[3] = GET_ELT(1);
+
+ if (last_vertex_last) {
+ for (i = 0; i + 3 < count; i += 2) {
+ idx[0] = idx[2];
+ idx[1] = idx[3];
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+
+ /* always emit idx[3] last */
+ flags = DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_2;
+ TRIANGLE(flags, idx[2], idx[0], idx[3]);
+
+ flags = DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1;
+ TRIANGLE(flags, idx[0], idx[1], idx[3]);
+ }
+ }
+ else {
+ for (i = 0; i + 3 < count; i += 2) {
+ idx[0] = idx[2];
+ idx[1] = idx[3];
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+
+ flags = DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1;
+ /* XXX should always emit idx[0] first */
+ /* always emit idx[3] first */
+ TRIANGLE(flags, idx[3], idx[2], idx[0]);
+
+ flags = DRAW_PIPE_EDGE_FLAG_1 |
+ DRAW_PIPE_EDGE_FLAG_2;
+ TRIANGLE(flags, idx[3], idx[0], idx[1]);
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ if (count >= 3) {
+ ushort edge_next, edge_finish;
+
+ if (last_vertex_last) {
+ flags = (DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_2 |
+ DRAW_PIPE_EDGE_FLAG_0);
+ edge_next = DRAW_PIPE_EDGE_FLAG_0;
+ edge_finish = DRAW_PIPE_EDGE_FLAG_1;
+ }
+ else {
+ flags = (DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1);
+ edge_next = DRAW_PIPE_EDGE_FLAG_1;
+ edge_finish = DRAW_PIPE_EDGE_FLAG_2;
+ }
+
+ idx[0] = GET_ELT(0);
+ idx[2] = GET_ELT(1);
+
+ for (i = 0; i + 2 < count; i++, flags = edge_next) {
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+
+ if (i + 3 == count)
+ flags |= edge_finish;
+
+ /* idx[0] is both the first and the last vertex */
+ if (last_vertex_last)
+ TRIANGLE(flags, idx[1], idx[2], idx[0]);
+ else
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_LINES_ADJACENCY:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (i = 0; i + 3 < count; i += 4) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+ LINE_ADJ(flags, idx[0], idx[1], idx[2], idx[3]);
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ if (count >= 4) {
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ idx[1] = GET_ELT(0);
+ idx[2] = GET_ELT(1);
+ idx[3] = GET_ELT(2);
+
+ for (i = 1; i + 2 < count; i++, flags = 0) {
+ idx[0] = idx[1];
+ idx[1] = idx[2];
+ idx[2] = idx[3];
+ idx[3] = GET_ELT(i + 2);
+ LINE_ADJ(flags, idx[0], idx[1], idx[2], idx[3]);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ for (i = 0; i + 5 < count; i += 6) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+ idx[4] = GET_ELT(i + 4);
+ idx[5] = GET_ELT(i + 5);
+ TRIANGLE_ADJ(flags, idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ if (count >= 6) {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ idx[0] = GET_ELT(1);
+ idx[2] = GET_ELT(0);
+ idx[4] = GET_ELT(2);
+ idx[3] = GET_ELT(4);
+
+ /*
+ * The vertices of the i-th triangle are stored in
+ * idx[0,2,4] = { 2*i, 2*i+2, 2*i+4 };
+ *
+ * The adjacent vertices are stored in
+ * idx[1,3,5] = { 2*i-2, 2*i+6, 2*i+3 }.
+ *
+ * However, there are two exceptions:
+ *
+ * For the first triangle, idx[1] = 1;
+ * For the last triangle, idx[3] = 2*i+5.
+ */
+ if (last_vertex_last) {
+ for (i = 0; i + 5 < count; i += 2) {
+ idx[1] = idx[0];
+
+ idx[0] = idx[2];
+ idx[2] = idx[4];
+ idx[4] = idx[3];
+
+ idx[3] = GET_ELT(i + ((i + 7 < count) ? 6 : 5));
+ idx[5] = GET_ELT(i + 3);
+
+ /*
+ * alternate the first two vertices (idx[0] and idx[2]) and the
+ * corresponding adjacent vertices (idx[3] and idx[5]) to have
+ * the correct orientation
+ */
+ if (i & 2) {
+ TRIANGLE_ADJ(flags,
+ idx[2], idx[1], idx[0], idx[5], idx[4], idx[3]);
+ }
+ else {
+ TRIANGLE_ADJ(flags,
+ idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]);
+ }
+ }
+ }
+ else {
+ for (i = 0; i + 5 < count; i += 2) {
+ idx[1] = idx[0];
+
+ idx[0] = idx[2];
+ idx[2] = idx[4];
+ idx[4] = idx[3];
+
+ idx[3] = GET_ELT(i + ((i + 7 < count) ? 6 : 5));
+ idx[5] = GET_ELT(i + 3);
+
+ /*
+ * alternate the last two vertices (idx[2] and idx[4]) and the
+ * corresponding adjacent vertices (idx[1] and idx[5]) to have
+ * the correct orientation
+ */
+ if (i & 2) {
+ TRIANGLE_ADJ(flags,
+ idx[0], idx[5], idx[4], idx[3], idx[2], idx[1]);
+ }
+ else {
+ TRIANGLE_ADJ(flags,
+ idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]);
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ FUNC_EXIT;
+}
+
+#undef LOCAL_VARS
+#undef FUNC_ENTER
+#undef FUNC_EXIT
+#undef LINE_ADJ
+#undef TRIANGLE_ADJ
+
+#undef FUNC
+#undef FUNC_VARS
+#undef GET_ELT
+#undef POINT
+#undef LINE
+#undef TRIANGLE
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 79a57a67f3..4a1013e79a 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMWare Inc.
+ * Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -75,7 +75,10 @@ draw_gs_set_constants(struct draw_context *draw,
const void *constants,
unsigned size)
{
- /* noop */
+ /* noop. added here for symmetry with the VS
+ * code and in case we'll ever want to allign
+ * the constants, e.g. when we'll change to a
+ * different interpreter */
}
@@ -370,32 +373,23 @@ static void gs_tri_adj(struct draw_geometry_shader *shader,
gs_flush(shader, 1);
}
-#define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,i0,i1,i2)
-#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5)
-#define LINE(gs,i0,i1) gs_line(gs,i0,i1)
-#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3)
-#define POINT(gs,i0) gs_point(gs,i0)
-#define FUNC gs_run
-#define LOCAL_VARS
+#define FUNC gs_run
+#define GET_ELT(idx) (idx)
#include "draw_gs_tmp.h"
-#define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,elts[i0],elts[i1],elts[i2])
-#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) \
- gs_tri_adj(gs,elts[i0],elts[i1],elts[i2],elts[i3], \
- elts[i4],elts[i5])
-#define LINE(gs,i0,i1) gs_line(gs,elts[i0],elts[i1])
-#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,elts[i0], \
- elts[i1], \
- elts[i2],elts[i3])
-#define POINT(gs,i0) gs_point(gs,elts[i0])
-#define FUNC gs_run_elts
-#define LOCAL_VARS \
- const ushort *elts = input_prims->elts;
+#define FUNC gs_run_elts
+#define LOCAL_VARS const ushort *elts = input_prims->elts;
+#define GET_ELT(idx) (elts[idx] & ~DRAW_PIPE_FLAG_MASK)
#include "draw_gs_tmp.h"
+
+/**
+ * Execute geometry shader using TGSI interpreter.
+ */
int draw_geometry_shader_run(struct draw_geometry_shader *shader,
const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
const struct draw_vertex_info *input_verts,
const struct draw_prim_info *input_prim,
struct draw_vertex_info *output_verts,
@@ -405,7 +399,6 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
unsigned input_stride = input_verts->vertex_size;
unsigned vertex_size = input_verts->vertex_size;
struct tgsi_exec_machine *machine = shader->machine;
- unsigned int i;
unsigned num_input_verts = input_prim->linear ?
input_verts->count :
input_prim->count;
@@ -447,9 +440,8 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
}
shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned));
- for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
- machine->Consts[i] = constants[i];
- }
+ tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
+ constants, constants_size);
if (input_prim->linear)
gs_run(shader, input_prim, input_verts,
diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h
index 2cb634818c..67bc1aa73f 100644
--- a/src/gallium/auxiliary/draw/draw_gs.h
+++ b/src/gallium/auxiliary/draw/draw_gs.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMWare Inc.
+ * Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -73,6 +73,7 @@ struct draw_geometry_shader {
*/
int draw_geometry_shader_run(struct draw_geometry_shader *shader,
const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
const struct draw_vertex_info *input_verts,
const struct draw_prim_info *input_prim,
struct draw_vertex_info *output_verts,
diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h
index 7a8683cf7c..4a17af0dea 100644
--- a/src/gallium/auxiliary/draw/draw_gs_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h
@@ -1,152 +1,34 @@
-
-static void FUNC( struct draw_geometry_shader *shader,
- const struct draw_prim_info *input_prims,
- const struct draw_vertex_info *input_verts,
- struct draw_prim_info *output_prims,
- struct draw_vertex_info *output_verts)
-{
- struct draw_context *draw = shader->draw;
-
- boolean flatfirst = (draw->rasterizer->flatshade &&
- draw->rasterizer->flatshade_first);
- unsigned i, j;
- unsigned count = input_prims->count;
- LOCAL_VARS
-
- if (0) debug_printf("%s %d\n", __FUNCTION__, count);
-
- debug_assert(input_prims->primitive_count == 1);
-
- switch (input_prims->prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i++) {
- POINT( shader, i + 0 );
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- LINE( shader , i + 0 , i + 1 );
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
-
- for (i = 1; i < count; i++) {
- LINE( shader, i - 1, i );
- }
-
- LINE( shader, i - 1, 0 );
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- for (i = 1; i < count; i++) {
- LINE( shader, i - 1, i );
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3) {
- TRIANGLE( shader, i + 0, i + 1, i + 2 );
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( shader,
- i + 0,
- i + 1 + (i&1),
- i + 2 - (i&1) );
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( shader,
- i + 0 + (i&1),
- i + 1 - (i&1),
- i + 2 );
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( shader,
- i + 1,
- i + 2,
- 0 );
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( shader,
- 0,
- i + 1,
- i + 2 );
- }
- }
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- {
- for (i = 0; i+2 < count; i++) {
-
- if (flatfirst) {
- TRIANGLE( shader, 0, i + 1, i + 2 );
- }
- else {
- TRIANGLE( shader, i + 1, i + 2, 0 );
- }
- }
- }
- break;
-
- case PIPE_PRIM_LINES_ADJACENCY:
- for (i = 0; i+3 < count; i += 4) {
- LINE_ADJ( shader , i + 0 , i + 1, i + 2, i + 3 );
- }
- break;
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- for (i = 1; i + 2 < count; i++) {
- LINE_ADJ( shader, i - 1, i, i + 1, i + 2 );
- }
- break;
-
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- for (i = 0; i+5 < count; i += 5) {
- TRI_ADJ( shader, i + 0, i + 1, i + 2,
- i + 3, i + 4, i + 5);
- }
- break;
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- for (i = 0, j = 0; i+5 < count; i += 2, ++j) {
- TRI_ADJ( shader,
- i + 0,
- i + 1 + 2*(j&1),
- i + 2 + 2*(j&1),
- i + 3 - 2*(j&1),
- i + 4 - 2*(j&1),
- i + 5);
- }
- break;
-
- default:
- debug_assert(!"Unsupported primitive in geometry shader");
- break;
- }
-}
-
-
-#undef TRIANGLE
-#undef TRI_ADJ
-#undef POINT
-#undef LINE
-#undef LINE_ADJ
-#undef FUNC
-#undef LOCAL_VARS
+#define FUNC_VARS struct draw_geometry_shader *gs, \
+ const struct draw_prim_info *input_prims, \
+ const struct draw_vertex_info *input_verts, \
+ struct draw_prim_info *output_prims, \
+ struct draw_vertex_info *output_verts
+
+#define FUNC_ENTER \
+ /* declare more local vars */ \
+ struct draw_context *draw = gs->draw; \
+ const unsigned prim = input_prims->prim; \
+ const unsigned count = input_prims->count; \
+ const boolean last_vertex_last = \
+ !(draw->rasterizer->flatshade && \
+ draw->rasterizer->flatshade_first); \
+ do { \
+ debug_assert(input_prims->primitive_count == 1); \
+ switch (prim) { \
+ case PIPE_PRIM_QUADS: \
+ case PIPE_PRIM_QUAD_STRIP: \
+ case PIPE_PRIM_POLYGON: \
+ debug_assert(!"unexpected primitive type in GS"); \
+ return; \
+ default: \
+ break; \
+ } \
+ } while (0) \
+
+#define POINT(i0) gs_point(gs,i0)
+#define LINE(flags,i0,i1) gs_line(gs,i0,i1)
+#define TRIANGLE(flags,i0,i1,i2) gs_tri(gs,i0,i1,i2)
+#define LINE_ADJ(flags,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3)
+#define TRIANGLE_ADJ(flags,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5)
+
+#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 19f96c37ab..8d53601d19 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -37,6 +37,8 @@
#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_tgsi.h"
#include "gallivm/lp_bld_printf.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_init.h"
#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_dump.h"
@@ -681,7 +683,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
unsigned i, j;
struct lp_build_context bld;
struct lp_build_loop_state lp_loop;
- struct lp_type vs_type = lp_type_float_vec(32);
const int max_vertices = 4;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
void *code;
@@ -730,7 +731,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- lp_build_context_init(&bld, builder, vs_type);
+ lp_build_context_init(&bld, builder, lp_type_int(32));
end = lp_build_add(&bld, start, count);
@@ -793,6 +794,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
sampler->destroy(sampler);
+#ifdef PIPE_ARCH_X86
+ /* Avoid corrupting the FPU stack on 32bit OSes. */
+ lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+
LLVMBuildRetVoid(builder);
LLVMDisposeBuilder(builder);
@@ -820,6 +826,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
if (gallivm_debug & GALLIVM_DEBUG_ASM) {
lp_disassemble(code);
}
+ lp_func_delete_body(variant->function);
}
@@ -837,9 +844,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
struct draw_context *draw = llvm->draw;
unsigned i, j;
struct lp_build_context bld;
- struct lp_build_context bld_int;
struct lp_build_loop_state lp_loop;
- struct lp_type vs_type = lp_type_float_vec(32);
const int max_vertices = 4;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
LLVMValueRef fetch_max;
@@ -891,8 +896,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- lp_build_context_init(&bld, builder, vs_type);
- lp_build_context_init(&bld_int, builder, lp_type_int(32));
+ lp_build_context_init(&bld, builder, lp_type_int(32));
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
@@ -927,7 +931,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
/* make sure we're not out of bounds which can happen
* if fetch_count % 4 != 0, because on the last iteration
* a few of the 4 vertex fetches will be out of bounds */
- true_index = lp_build_min(&bld_int, true_index, fetch_max);
+ true_index = lp_build_min(&bld, true_index, fetch_max);
fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
&true_index, 1, "");
@@ -963,6 +967,11 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
sampler->destroy(sampler);
+#ifdef PIPE_ARCH_X86
+ /* Avoid corrupting the FPU stack on 32bit OSes. */
+ lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+
LLVMBuildRetVoid(builder);
LLVMDisposeBuilder(builder);
@@ -990,6 +999,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
if (gallivm_debug & GALLIVM_DEBUG_ASM) {
lp_disassemble(code);
}
+ lp_func_delete_body(variant->function_elts);
}
void
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 8cd75ecf9a..58995e0724 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -169,77 +169,50 @@ static void do_triangle( struct draw_context *draw,
/*
* Set up macros for draw_pt_decompose.h template code.
* This code uses vertex indexes / elements.
+ *
+ * Flags are needed by the stipple and unfilled stages. When the two stages
+ * are active, vcache_run_extras is called and the flags are stored in the
+ * higher bits of i0. Otherwise, flags do not matter.
*/
-/* emit first quad vertex as first vertex in triangles */
-#define QUAD_FIRST_PV(i0,i1,i2,i3) \
- do_triangle( draw, \
- ( DRAW_PIPE_RESET_STIPPLE | \
- DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_1 ), \
- verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK)); \
- do_triangle( draw, \
- ( DRAW_PIPE_EDGE_FLAG_1 | \
- DRAW_PIPE_EDGE_FLAG_2 ), \
- verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK))
-
-/* emit last quad vertex as last vertex in triangles */
-#define QUAD_LAST_PV(i0,i1,i2,i3) \
- do_triangle( draw, \
- ( DRAW_PIPE_RESET_STIPPLE | \
- DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_2 ), \
- verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)); \
- do_triangle( draw, \
- ( DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_1 ), \
- verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK))
-
-#define TRIANGLE(flags,i0,i1,i2) \
- do_triangle( draw, \
- elts[i0], /* flags */ \
- verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK) );
-
-#define LINE(flags,i0,i1) \
- do_line( draw, \
- elts[i0], \
- verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK) );
+#define TRIANGLE(flags,i0,i1,i2) \
+ do { \
+ assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \
+ assert(!((i2) & DRAW_PIPE_FLAG_MASK)); \
+ do_triangle( draw, \
+ i0, /* flags */ \
+ verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i1), \
+ verts + stride * (i2) ); \
+ } while (0)
+
+#define LINE(flags,i0,i1) \
+ do { \
+ assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \
+ do_line( draw, \
+ i0, /* flags */ \
+ verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i1) ); \
+ } while (0)
#define POINT(i0) \
- do_point( draw, \
- verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK) )
+ do { \
+ assert(!((i0) & DRAW_PIPE_FLAG_MASK)); \
+ do_point( draw, verts + stride * (i0) ); \
+ } while (0)
+
+#define GET_ELT(idx) (elts[idx])
-#define FUNC pipe_run
-#define ARGS \
+#define FUNC pipe_run_elts
+#define FUNC_VARS \
struct draw_context *draw, \
unsigned prim, \
struct vertex_header *vertices, \
unsigned stride, \
- const ushort *elts
-
-#define LOCAL_VARS \
- char *verts = (char *)vertices; \
- boolean flatfirst = (draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first); \
- unsigned i; \
- ushort flags
-
-#define FLUSH
+ const ushort *elts, \
+ unsigned count
#include "draw_pt_decompose.h"
-#undef ARGS
-#undef LOCAL_VARS
@@ -269,14 +242,29 @@ void draw_pipeline_run( struct draw_context *draw,
i < prim_info->primitive_count;
start += prim_info->primitive_lengths[i], i++)
{
- unsigned count = prim_info->primitive_lengths[i];
-
- pipe_run(draw,
- prim_info->prim,
- vert_info->verts,
- vert_info->stride,
- prim_info->elts + start,
- count);
+ const unsigned count = prim_info->primitive_lengths[i];
+
+#if DEBUG
+ /* make sure none of the element indexes go outside the vertex buffer */
+ {
+ unsigned max_index = 0x0, i;
+ /* find the largest element index */
+ for (i = 0; i < count; i++) {
+ unsigned int index = (prim_info->elts[start + i]
+ & ~DRAW_PIPE_FLAG_MASK);
+ if (index > max_index)
+ max_index = index;
+ }
+ assert(max_index <= vert_info->count);
+ }
+#endif
+
+ pipe_run_elts(draw,
+ prim_info->prim,
+ vert_info->verts,
+ vert_info->stride,
+ prim_info->elts + start,
+ count);
}
draw->pipeline.verts = NULL;
@@ -289,70 +277,30 @@ void draw_pipeline_run( struct draw_context *draw,
* This code is for non-indexed (aka linear) rendering (no elts).
*/
-/* emit first quad vertex as first vertex in triangles */
-#define QUAD_FIRST_PV(i0,i1,i2,i3) \
- do_triangle( draw, \
- ( DRAW_PIPE_RESET_STIPPLE | \
- DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_1 ), \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)); \
- do_triangle( draw, \
- ( DRAW_PIPE_EDGE_FLAG_1 | \
- DRAW_PIPE_EDGE_FLAG_2 ), \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK))
-
-/* emit last quad vertex as last vertex in triangles */
-#define QUAD_LAST_PV(i0,i1,i2,i3) \
- do_triangle( draw, \
- ( DRAW_PIPE_RESET_STIPPLE | \
- DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_2 ), \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)); \
- do_triangle( draw, \
- ( DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_1 ), \
- verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK))
-
-#define TRIANGLE(flags,i0,i1,i2) \
- do_triangle( draw, \
- flags, /* flags */ \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK))
-
-#define LINE(flags,i0,i1) \
- do_line( draw, \
- flags, \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK))
+#define TRIANGLE(flags,i0,i1,i2) \
+ do_triangle( draw, flags, \
+ verts + stride * (i0), \
+ verts + stride * (i1), \
+ verts + stride * (i2) )
-#define POINT(i0) \
- do_point( draw, \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK) )
+#define LINE(flags,i0,i1) \
+ do_line( draw, flags, \
+ verts + stride * (i0), \
+ verts + stride * (i1) )
-#define FUNC pipe_run_linear
-#define ARGS \
- struct draw_context *draw, \
- unsigned prim, \
- struct vertex_header *vertices, \
- unsigned stride
+#define POINT(i0) \
+ do_point( draw, verts + stride * (i0) )
-#define LOCAL_VARS \
- char *verts = (char *)vertices; \
- boolean flatfirst = (draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first); \
- unsigned i; \
- ushort flags
-#define FLUSH
+#define GET_ELT(idx) (idx)
+
+#define FUNC pipe_run_linear
+#define FUNC_VARS \
+ struct draw_context *draw, \
+ unsigned prim, \
+ struct vertex_header *vertices, \
+ unsigned stride, \
+ unsigned count
#include "draw_pt_decompose.h"
@@ -378,6 +326,8 @@ void draw_pipeline_run_linear( struct draw_context *draw,
draw->pipeline.vertex_stride = vert_info->stride;
draw->pipeline.vertex_count = count;
+ assert(count <= vert_info->count);
+
pipe_run_linear(draw,
prim_info->prim,
(struct vertex_header*)verts,
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 1cf6ee7a7f..8a3d499feb 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -68,8 +68,7 @@ struct clip_stage {
};
-/* This is a bit confusing:
- */
+/** Cast wrapper */
static INLINE struct clip_stage *clip_stage( struct draw_stage *stage )
{
return (struct clip_stage *)stage;
@@ -81,18 +80,22 @@ static INLINE struct clip_stage *clip_stage( struct draw_stage *stage )
/* All attributes are float[4], so this is easy:
*/
-static void interp_attr( float *fdst,
+static void interp_attr( float dst[4],
float t,
- const float *fin,
- const float *fout )
+ const float in[4],
+ const float out[4] )
{
- fdst[0] = LINTERP( t, fout[0], fin[0] );
- fdst[1] = LINTERP( t, fout[1], fin[1] );
- fdst[2] = LINTERP( t, fout[2], fin[2] );
- fdst[3] = LINTERP( t, fout[3], fin[3] );
+ dst[0] = LINTERP( t, out[0], in[0] );
+ dst[1] = LINTERP( t, out[1], in[1] );
+ dst[2] = LINTERP( t, out[2], in[2] );
+ dst[3] = LINTERP( t, out[3], in[3] );
}
+/**
+ * Copy front/back, primary/secondary colors from src vertex to dst vertex.
+ * Used when flat shading.
+ */
static void copy_colors( struct draw_stage *stage,
struct vertex_header *dst,
const struct vertex_header *src )
@@ -121,20 +124,17 @@ static void interp( const struct clip_stage *clip,
/* Vertex header.
*/
- {
- dst->clipmask = 0;
- dst->edgeflag = 0; /* will get overwritten later */
- dst->pad = 0;
- dst->vertex_id = UNDEFINED_VERTEX_ID;
- }
+ dst->clipmask = 0;
+ dst->edgeflag = 0; /* will get overwritten later */
+ dst->pad = 0;
+ dst->vertex_id = UNDEFINED_VERTEX_ID;
- /* Clip coordinates: interpolate normally
+ /* Interpolate the clip-space coords.
*/
- {
- interp_attr(dst->clip, t, in->clip, out->clip);
- }
+ interp_attr(dst->clip, t, in->clip, out->clip);
- /* Do the projective divide and insert window coordinates:
+ /* Do the projective divide and viewport transformation to get
+ * new window coordinates:
*/
{
const float *pos = dst->clip;
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 058aeedc17..397d4bf653 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -163,9 +163,11 @@ struct draw_context
/** vertex arrays */
const void *vbuffer[PIPE_MAX_ATTRIBS];
- /** constant buffer (for vertex/geometry shader) */
+ /** constant buffers (for vertex/geometry shader) */
const void *vs_constants[PIPE_MAX_CONSTANT_BUFFERS];
+ unsigned vs_constants_size[PIPE_MAX_CONSTANT_BUFFERS];
const void *gs_constants[PIPE_MAX_CONSTANT_BUFFERS];
+ unsigned gs_constants_size[PIPE_MAX_CONSTANT_BUFFERS];
} user;
boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */
@@ -198,6 +200,7 @@ struct draw_context
struct pipe_viewport_state viewport;
boolean identity_viewport;
+ /** Vertex shader state */
struct {
struct draw_vertex_shader *vertex_shader;
uint num_vs_outputs; /**< convenience, from vertex_shader */
@@ -227,6 +230,7 @@ struct draw_context
struct translate_cache *emit_cache;
} vs;
+ /** Geometry shader state */
struct {
struct draw_geometry_shader *geometry_shader;
uint num_gs_outputs; /**< convenience, from geometry_shader */
@@ -239,6 +243,7 @@ struct draw_context
struct tgsi_sampler **samplers;
} gs;
+ /** Stream output (vertex feedback) state */
struct {
struct pipe_stream_output_state state;
void *buffers[PIPE_MAX_SO_BUFFERS];
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 92d4113b4c..248927505d 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -259,6 +259,12 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
for (j = 0; j < draw->pt.nr_vertex_elements; j++) {
uint buf = draw->pt.vertex_element[j].vertex_buffer_index;
ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf];
+
+ if (draw->pt.vertex_element[j].instance_divisor) {
+ ii = draw->instance_id / draw->pt.vertex_element[j].instance_divisor;
+ }
+
+ ptr += draw->pt.vertex_buffer[buf].buffer_offset;
ptr += draw->pt.vertex_buffer[buf].stride * ii;
ptr += draw->pt.vertex_element[j].src_offset;
@@ -341,19 +347,22 @@ draw_arrays_instanced(struct draw_context *draw,
unsigned reduced_prim = u_reduced_prim(mode);
unsigned instance;
+ assert(instanceCount > 0);
+
if (reduced_prim != draw->reduced_prim) {
draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
draw->reduced_prim = reduced_prim;
}
if (0)
- draw_print_arrays(draw, mode, start, MIN2(count, 20));
-
- if (0) {
- unsigned int i;
debug_printf("draw_arrays(mode=%u start=%u count=%u):\n",
mode, start, count);
+
+ if (0)
tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
+
+ if (0) {
+ unsigned int i;
debug_printf("Elements:\n");
for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
debug_printf(" %u: src_offset=%u inst_div=%u vbuf=%u format=%s\n",
@@ -374,6 +383,9 @@ draw_arrays_instanced(struct draw_context *draw,
}
}
+ if (0)
+ draw_print_arrays(draw, mode, start, MIN2(count, 20));
+
for (instance = 0; instance < instanceCount; instance++) {
draw->instance_id = instance + startInstance;
draw_pt_arrays(draw, mode, start, count);
diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h
index 52f9593d46..3127aad731 100644
--- a/src/gallium/auxiliary/draw/draw_pt_decompose.h
+++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h
@@ -1,194 +1,7 @@
+#define LOCAL_VARS \
+ char *verts = (char *) vertices; \
+ const boolean last_vertex_last = \
+ !(draw->rasterizer->flatshade && \
+ draw->rasterizer->flatshade_first);
-
-static void FUNC( ARGS,
- unsigned count )
-{
- LOCAL_VARS;
-
- switch (prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i ++) {
- POINT( (i + 0) );
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- LINE( DRAW_PIPE_RESET_STIPPLE,
- (i + 0),
- (i + 1));
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
-
- for (i = 1; i < count; i++, flags = 0) {
- LINE( flags,
- (i - 1),
- (i ));
- }
-
- LINE( flags,
- (i - 1),
- (0 ));
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- flags = DRAW_PIPE_RESET_STIPPLE;
- for (i = 1; i < count; i++, flags = 0) {
- LINE( flags,
- (i - 1),
- (i ));
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3) {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 0),
- (i + 1),
- (i + 2 ));
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- /* Emit first triangle vertex as first triangle vertex */
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 0),
- (i + 1 + (i&1)),
- (i + 2 - (i&1)) );
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- /* Emit last triangle vertex as last triangle vertex */
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 0 + (i&1)),
- (i + 1 - (i&1)),
- (i + 2 ));
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 1),
- (i + 2),
- 0 );
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (0),
- (i + 1),
- (i + 2 ));
- }
- }
- }
- break;
-
-
- case PIPE_PRIM_QUADS:
- /* GL quads don't follow provoking vertex convention */
- if (flatfirst) {
- for (i = 0; i+3 < count; i += 4) {
- /* emit last quad vertex as first triangle vertex */
- QUAD_FIRST_PV( (i + 3),
- (i + 0),
- (i + 1),
- (i + 2) );
- }
- }
- else {
- for (i = 0; i+3 < count; i += 4) {
- /* emit last quad vertex as last triangle vertex */
- QUAD_LAST_PV( (i + 0),
- (i + 1),
- (i + 2),
- (i + 3) );
- }
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- /* GL quad strips don't follow provoking vertex convention */
- if (flatfirst) {
- for (i = 0; i+3 < count; i += 2) {
- /* emit last quad vertex as first triangle vertex */
- QUAD_FIRST_PV( (i + 3),
- (i + 2),
- (i + 0),
- (i + 1) );
-
- }
- }
- else {
- for (i = 0; i+3 < count; i += 2) {
- /* emit last quad vertex as last triangle vertex */
- QUAD_LAST_PV( (i + 2),
- (i + 0),
- (i + 1),
- (i + 3) );
- }
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- /* GL polygons don't follow provoking vertex convention */
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
-
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
-
- for (i = 0; i+2 < count; i++, flags = edge_middle) {
-
- if (i + 3 == count)
- flags |= edge_last;
-
- if (flatfirst) {
- /* emit first polygon vertex as first triangle vertex */
- TRIANGLE( flags,
- (0),
- (i + 1),
- (i + 2) );
- }
- else {
- /* emit first polygon vertex as last triangle vertex */
- TRIANGLE( flags,
- (i + 1),
- (i + 2),
- (0));
- }
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-
- FLUSH;
-}
-
-
-#undef TRIANGLE
-#undef QUAD_FIRST_PV
-#undef QUAD_LAST_PV
-#undef POINT
-#undef LINE
-#undef FUNC
+#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index 0229bcc7fe..5568fbb9f8 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -182,6 +182,7 @@ void draw_pt_emit( struct pt_emit *emit,
0,
~0);
+ /* fetch/translate vertex attribs to fill hw_verts[] */
translate->run( translate,
0,
vertex_count,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 121dfc414a..5b16c3788e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -176,6 +176,7 @@ static void emit(struct pt_emit *emit,
static void draw_vertex_shader_run(struct draw_vertex_shader *vshader,
const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
const struct draw_vertex_info *input_verts,
struct draw_vertex_info *output_verts )
{
@@ -190,6 +191,7 @@ static void draw_vertex_shader_run(struct draw_vertex_shader *vshader,
(const float (*)[4])input_verts->verts->data,
( float (*)[4])output_verts->verts->data,
constants,
+ const_size,
input_verts->count,
input_verts->vertex_size,
input_verts->vertex_size);
@@ -236,6 +238,7 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle,
if (fpme->opt & PT_SHADE) {
draw_vertex_shader_run(vshader,
draw->pt.user.vs_constants,
+ draw->pt.user.vs_constants_size,
vert_info,
&vs_vert_info);
@@ -246,6 +249,7 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle,
if ((fpme->opt & PT_SHADE) && gshader) {
draw_geometry_shader_run(gshader,
draw->pt.user.gs_constants,
+ draw->pt.user.gs_constants_size,
vert_info,
prim_info,
&gs_vert_info,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index bc074df8c2..4b99bee86a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2010 VMWare, Inc.
+ * Copyright 2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -254,6 +254,7 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle,
if ((opt & PT_SHADE) && gshader) {
draw_geometry_shader_run(gshader,
draw->pt.user.gs_constants,
+ draw->pt.user.gs_constants_size,
vert_info,
prim_info,
&gs_vert_info,
diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
index 5d82934889..f7f4f24d35 100644
--- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -218,25 +218,15 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2)
}
-#define TRIANGLE(gs,i0,i1,i2) so_tri(so,i0,i1,i2)
-#define LINE(gs,i0,i1) so_line(so,i0,i1)
-#define POINT(gs,i0) so_point(so,i0)
-#define FUNC so_run_linear
-#define LOCAL_VARS
+#define FUNC so_run_linear
+#define GET_ELT(idx) (start + (idx))
#include "draw_so_emit_tmp.h"
-#undef LOCAL_VARS
-#undef FUNC
-#define TRIANGLE(gs,i0,i1,i2) so_tri(gs,elts[i0],elts[i1],elts[i2])
-#define LINE(gs,i0,i1) so_line(gs,elts[i0],elts[i1])
-#define POINT(gs,i0) so_point(gs,elts[i0])
-#define FUNC so_run_elts
-#define LOCAL_VARS \
- const ushort *elts = input_prims->elts;
+#define FUNC so_run_elts
+#define LOCAL_VARS const ushort *elts = input_prims->elts;
+#define GET_ELT(idx) (elts[start + (idx)] & ~DRAW_PIPE_FLAG_MASK)
#include "draw_so_emit_tmp.h"
-#undef LOCAL_VARS
-#undef FUNC
void draw_pt_so_emit( struct pt_so_emit *emit,
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
index 3236d38e6a..182a597cca 100644
--- a/src/gallium/auxiliary/draw/draw_pt_util.c
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -53,7 +53,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
case PIPE_PRIM_LINES_ADJACENCY:
*first = 4;
- *incr = 2;
+ *incr = 4;
break;
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
*first = 4;
@@ -65,7 +65,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
case PIPE_PRIM_TRIANGLES_ADJACENCY:
*first = 6;
- *incr = 3;
+ *incr = 6;
break;
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_TRIANGLE_FAN:
@@ -75,7 +75,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
*first = 6;
- *incr = 1;
+ *incr = 2;
break;
case PIPE_PRIM_QUADS:
*first = 4;
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
index a292346be9..55e43b2a71 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -1,6 +1,11 @@
static unsigned trim( unsigned count, unsigned first, unsigned incr )
{
- return count - (count - first) % incr;
+ /*
+ * count either has been trimmed in draw_pt_arrays or is set to
+ * (driver)_fetch_max which is hopefully always larger than first.
+ */
+ assert(count >= first);
+ return count - (count - first) % incr;
}
static void FUNC(struct draw_pt_front_end *frontend,
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 8ef94c3163..a848b54f7d 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -95,7 +95,7 @@ static INLINE void
vcache_check_flush( struct vcache_frontend *vcache )
{
if (vcache->draw_count + 6 >= DRAW_MAX ||
- vcache->fetch_count + 4 >= FETCH_MAX) {
+ vcache->fetch_count + 6 >= FETCH_MAX) {
vcache_flush( vcache );
}
}
@@ -180,59 +180,61 @@ vcache_point( struct vcache_frontend *vcache,
}
-static INLINE void
-vcache_quad( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1,
- unsigned i2,
- unsigned i3 )
+static INLINE void
+vcache_line_adj_flags( struct vcache_frontend *vcache,
+ unsigned flags,
+ unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
{
- if (vcache->draw->rasterizer->flatshade_first) {
- /* pass last quad vertex as first triangle vertex */
- vcache_triangle( vcache, i3, i0, i1 );
- vcache_triangle( vcache, i3, i1, i2 );
- }
- else {
- /* pass last quad vertex as last triangle vertex */
- vcache_triangle( vcache, i0, i1, i3 );
- vcache_triangle( vcache, i1, i2, i3 );
- }
+ vcache_elt(vcache, a0, 0);
+ vcache_elt(vcache, i0, flags);
+ vcache_elt(vcache, i1, 0);
+ vcache_elt(vcache, a1, 0);
+ vcache_check_flush(vcache);
}
-static INLINE void
-vcache_ef_quad( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1,
- unsigned i2,
- unsigned i3 )
+static INLINE void
+vcache_line_adj( struct vcache_frontend *vcache,
+ unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
{
- if (vcache->draw->rasterizer->flatshade_first) {
- /* pass last quad vertex as first triangle vertex */
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_0 |
- DRAW_PIPE_EDGE_FLAG_1 ),
- i3, i0, i1 );
-
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_EDGE_FLAG_1 |
- DRAW_PIPE_EDGE_FLAG_2 ),
- i3, i1, i2 );
- }
- else {
- /* pass last quad vertex as last triangle vertex */
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_0 |
- DRAW_PIPE_EDGE_FLAG_2 ),
- i0, i1, i3 );
-
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_EDGE_FLAG_0 |
- DRAW_PIPE_EDGE_FLAG_1 ),
- i1, i2, i3 );
- }
+ vcache_elt(vcache, a0, 0);
+ vcache_elt(vcache, i0, 0);
+ vcache_elt(vcache, i1, 0);
+ vcache_elt(vcache, a1, 0);
+ vcache_check_flush(vcache);
+}
+
+
+static INLINE void
+vcache_triangle_adj_flags( struct vcache_frontend *vcache,
+ unsigned flags,
+ unsigned i0, unsigned a0,
+ unsigned i1, unsigned a1,
+ unsigned i2, unsigned a2 )
+{
+ vcache_elt(vcache, i0, flags);
+ vcache_elt(vcache, a0, 0);
+ vcache_elt(vcache, i1, 0);
+ vcache_elt(vcache, a1, 0);
+ vcache_elt(vcache, i2, 0);
+ vcache_elt(vcache, a2, 0);
+ vcache_check_flush(vcache);
+}
+
+
+static INLINE void
+vcache_triangle_adj( struct vcache_frontend *vcache,
+ unsigned i0, unsigned a0,
+ unsigned i1, unsigned a1,
+ unsigned i2, unsigned a2 )
+{
+ vcache_elt(vcache, i0, 0);
+ vcache_elt(vcache, a0, 0);
+ vcache_elt(vcache, i1, 0);
+ vcache_elt(vcache, a1, 0);
+ vcache_elt(vcache, i2, 0);
+ vcache_elt(vcache, a2, 0);
+ vcache_check_flush(vcache);
}
@@ -240,17 +242,23 @@ vcache_ef_quad( struct vcache_frontend *vcache,
* this. The two paths aren't too different though - it may be
* possible to reunify them.
*/
-#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1)
-#define POINT(vc,i0) vcache_point(vc,i0)
+#define TRIANGLE(flags,i0,i1,i2) vcache_triangle_flags(vcache,flags,i0,i1,i2)
+#define LINE(flags,i0,i1) vcache_line_flags(vcache,flags,i0,i1)
+#define POINT(i0) vcache_point(vcache,i0)
+#define LINE_ADJ(flags,a0,i0,i1,a1) \
+ vcache_line_adj_flags(vcache,flags,a0,i0,i1,a1)
+#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
+ vcache_triangle_adj_flags(vcache,flags,i0,a0,i1,a1,i2,a2)
#define FUNC vcache_run_extras
#include "draw_pt_vcache_tmp.h"
-#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1)
-#define POINT(vc,i0) vcache_point(vc,i0)
+#define TRIANGLE(flags,i0,i1,i2) vcache_triangle(vcache,i0,i1,i2)
+#define LINE(flags,i0,i1) vcache_line(vcache,i0,i1)
+#define POINT(i0) vcache_point(vcache,i0)
+#define LINE_ADJ(flags,a0,i0,i1,a1) \
+ vcache_line_adj(vcache,a0,i0,i1,a1)
+#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
+ vcache_triangle_adj(vcache,i0,a0,i1,a1,i2,a2)
#define FUNC vcache_run
#include "draw_pt_vcache_tmp.h"
@@ -339,6 +347,25 @@ format_from_get_elt( pt_elt_func get_elt )
#endif
+/**
+ * Check if any vertex attributes use instance divisors.
+ * Note that instance divisors complicate vertex fetching so we need
+ * to take the vcache path when they're in use.
+ */
+static boolean
+any_instance_divisors(const struct draw_context *draw)
+{
+ uint i;
+
+ for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+ uint div = draw->pt.vertex_element[i].instance_divisor;
+ if (div)
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
static INLINE void
vcache_check_run( struct draw_pt_front_end *frontend,
pt_elt_func get_elt,
@@ -382,6 +409,9 @@ vcache_check_run( struct draw_pt_front_end *frontend,
if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES)
goto fail;
+ if (any_instance_divisors(draw))
+ goto fail;
+
fetch_count = max_index + 1 - min_index;
if (0)
@@ -518,7 +548,18 @@ vcache_prepare( struct draw_pt_front_end *frontend,
* which is a separate issue.
*/
vcache->input_prim = in_prim;
- vcache->output_prim = u_reduced_prim(in_prim);
+ switch (in_prim) {
+ case PIPE_PRIM_LINES_ADJACENCY:
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ vcache->output_prim = PIPE_PRIM_LINES_ADJACENCY;
+ break;
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ vcache->output_prim = PIPE_PRIM_TRIANGLES_ADJACENCY;
+ break;
+ default:
+ vcache->output_prim = u_reduced_prim(in_prim);
+ }
vcache->middle = middle;
vcache->opt = opt;
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
index dac68ad439..1a3748d5f0 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
@@ -1,198 +1,19 @@
+#define FUNC_VARS \
+ struct draw_pt_front_end *frontend, \
+ pt_elt_func get_elt, \
+ const void *elts, \
+ int elt_bias, \
+ unsigned count
+#define LOCAL_VARS \
+ struct vcache_frontend *vcache = (struct vcache_frontend *) frontend; \
+ struct draw_context *draw = vcache->draw; \
+ const unsigned prim = vcache->input_prim; \
+ const boolean last_vertex_last = !(draw->rasterizer->flatshade && \
+ draw->rasterizer->flatshade_first);
-static void FUNC( struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned count )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- struct draw_context *draw = vcache->draw;
+#define GET_ELT(idx) (get_elt(elts, idx) + elt_bias)
- boolean flatfirst = (draw->rasterizer->flatshade &&
- draw->rasterizer->flatshade_first);
- unsigned i;
- ushort flags;
+#define FUNC_EXIT do { vcache_flush(vcache); } while (0)
- if (0) debug_printf("%s %d\n", __FUNCTION__, count);
-
-
- switch (vcache->input_prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i ++) {
- POINT( vcache,
- get_elt(elts, i + 0) + elt_bias );
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- LINE( vcache,
- DRAW_PIPE_RESET_STIPPLE,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
-
- for (i = 1; i < count; i++, flags = 0) {
- LINE( vcache,
- flags,
- get_elt(elts, i - 1) + elt_bias,
- get_elt(elts, i ) + elt_bias);
- }
-
- LINE( vcache,
- flags,
- get_elt(elts, i - 1) + elt_bias,
- get_elt(elts, 0 ) + elt_bias);
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- flags = DRAW_PIPE_RESET_STIPPLE;
- for (i = 1; i < count; i++, flags = 0) {
- LINE( vcache,
- flags,
- get_elt(elts, i - 1) + elt_bias,
- get_elt(elts, i ) + elt_bias);
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2 ) + elt_bias);
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1 + (i&1)) + elt_bias,
- get_elt(elts, i + 2 - (i&1)) + elt_bias);
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 0 + (i&1)) + elt_bias,
- get_elt(elts, i + 1 - (i&1)) + elt_bias,
- get_elt(elts, i + 2 ) + elt_bias);
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2) + elt_bias,
- get_elt(elts, 0 ) + elt_bias);
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2 ) + elt_bias);
- }
- }
- }
- break;
-
-
- case PIPE_PRIM_QUADS:
- for (i = 0; i+3 < count; i += 4) {
- QUAD( vcache,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2) + elt_bias,
- get_elt(elts, i + 3) + elt_bias );
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- for (i = 0; i+3 < count; i += 2) {
- QUAD( vcache,
- get_elt(elts, i + 2) + elt_bias,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 3) + elt_bias );
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- ushort edge_next, edge_finish;
-
- if (flatfirst) {
- flags = (DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_1 |
- DRAW_PIPE_EDGE_FLAG_2);
- edge_next = DRAW_PIPE_EDGE_FLAG_2;
- edge_finish = DRAW_PIPE_EDGE_FLAG_0;
- }
- else {
- flags = (DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_2 |
- DRAW_PIPE_EDGE_FLAG_0);
- edge_next = DRAW_PIPE_EDGE_FLAG_0;
- edge_finish = DRAW_PIPE_EDGE_FLAG_1;
- }
-
- for (i = 0; i+2 < count; i++, flags = edge_next) {
-
- if (i + 3 == count)
- flags |= edge_finish;
-
- if (flatfirst) {
- TRIANGLE( vcache,
- flags,
- get_elt(elts, 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2) + elt_bias );
- }
- else {
- TRIANGLE( vcache,
- flags,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2) + elt_bias,
- get_elt(elts, 0) + elt_bias);
- }
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-
- vcache_flush( vcache );
-}
-
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
+#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
index 01212a8e53..6d8937a0b4 100644
--- a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
@@ -1,123 +1,33 @@
-
-static void FUNC( struct pt_so_emit *so,
- const struct draw_prim_info *input_prims,
- const struct draw_vertex_info *input_verts,
- unsigned start,
- unsigned count)
-{
- struct draw_context *draw = so->draw;
-
- boolean flatfirst = (draw->rasterizer->flatshade &&
- draw->rasterizer->flatshade_first);
- unsigned i;
- LOCAL_VARS
-
- if (0) debug_printf("%s %d\n", __FUNCTION__, count);
-
- debug_assert(input_prims->primitive_count == 1);
-
- switch (input_prims->prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i++) {
- POINT( so, start + i + 0 );
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- LINE( so , start + i + 0 , start + i + 1 );
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
-
- for (i = 1; i < count; i++) {
- LINE( so, start + i - 1, start + i );
- }
-
- LINE( so, start + i - 1, start );
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- for (i = 1; i < count; i++) {
- LINE( so, start + i - 1, start + i );
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3) {
- TRIANGLE( so, start + i + 0, start + i + 1, start + i + 2 );
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( so,
- start + i + 0,
- start + i + 1 + (i&1),
- start + i + 2 - (i&1) );
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( so,
- start + i + 0 + (i&1),
- start + i + 1 - (i&1),
- start + i + 2 );
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( so,
- start + i + 1,
- start + i + 2,
- start );
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( so,
- start,
- start + i + 1,
- start + i + 2 );
- }
- }
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
-
- for (i = 0; i+2 < count; i++) {
-
- if (flatfirst) {
- TRIANGLE( so, start + 0, start + i + 1, start + i + 2 );
- }
- else {
- TRIANGLE( so, start + i + 1, start + i + 2, start + 0 );
- }
- }
- }
- break;
-
- default:
- debug_assert(!"Unsupported primitive in stream output");
- break;
- }
-}
-
-
-#undef TRIANGLE
-#undef POINT
-#undef LINE
-#undef FUNC
+#define FUNC_VARS \
+ struct pt_so_emit *so, \
+ const struct draw_prim_info *input_prims, \
+ const struct draw_vertex_info *input_verts, \
+ unsigned start, \
+ unsigned count
+
+#define FUNC_ENTER \
+ /* declare more local vars */ \
+ struct draw_context *draw = so->draw; \
+ const unsigned prim = input_prims->prim; \
+ const boolean last_vertex_last = \
+ !(draw->rasterizer->flatshade && \
+ draw->rasterizer->flatshade_first); \
+ do { \
+ debug_assert(input_prims->primitive_count == 1); \
+ switch (prim) { \
+ case PIPE_PRIM_LINES_ADJACENCY: \
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY: \
+ case PIPE_PRIM_TRIANGLES_ADJACENCY: \
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: \
+ debug_assert(!"unexpected primitive type in stream output"); \
+ return; \
+ default: \
+ break; \
+ } \
+ } while (0) \
+
+#define POINT(i0) so_point(so,i0)
+#define LINE(flags,i0,i1) so_line(so,i0,i1)
+#define TRIANGLE(flags,i0,i1,i2) so_tri(so,i0,i1,i2)
+
+#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index 3af31ffe12..e63cf5f4f9 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -166,7 +166,7 @@ static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit
}
}
-static INLINE enum attrib_emit draw_translate_vinfo_size(enum attrib_emit emit)
+static INLINE unsigned draw_translate_vinfo_size(enum attrib_emit emit)
{
switch (emit) {
case EMIT_OMIT:
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 57ea63fc06..fb665b08ff 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -48,18 +48,30 @@
DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE)
+
+/**
+ * Set a vertex shader constant buffer.
+ * \param slot which constant buffer in [0, PIPE_MAX_CONSTANT_BUFFERS-1]
+ * \param constants the mapped buffer
+ * \param size size of buffer in bytes
+ */
void
draw_vs_set_constants(struct draw_context *draw,
unsigned slot,
const void *constants,
unsigned size)
{
- if (((uintptr_t)constants) & 0xf) {
+ const int alignment = 16;
+
+ /* check if buffer is 16-byte aligned */
+ if (((uintptr_t)constants) & (alignment - 1)) {
+ /* if not, copy the constants into a new, 16-byte aligned buffer */
if (size > draw->vs.const_storage_size[slot]) {
if (draw->vs.aligned_constant_storage[slot]) {
align_free((void *)draw->vs.aligned_constant_storage[slot]);
}
- draw->vs.aligned_constant_storage[slot] = align_malloc(size, 16);
+ draw->vs.aligned_constant_storage[slot] =
+ align_malloc(size, alignment);
}
assert(constants);
memcpy((void *)draw->vs.aligned_constant_storage[slot],
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index a731994523..f9a038788f 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -133,7 +133,8 @@ struct draw_vertex_shader {
void (*run_linear)( struct draw_vertex_shader *shader,
const float (*input)[4],
float (*output)[4],
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
unsigned count,
unsigned input_stride,
unsigned output_stride );
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index bc34d390da..dab3eb1ca8 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -85,7 +85,8 @@ static void
vs_exec_run_linear( struct draw_vertex_shader *shader,
const float (*input)[4],
float (*output)[4],
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
unsigned count,
unsigned input_stride,
unsigned output_stride )
@@ -95,9 +96,8 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
unsigned int i, j;
unsigned slot;
- for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
- machine->Consts[i] = constants[i];
- }
+ tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
+ constants, const_size);
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index 6c13df7913..d13ad24fff 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -49,6 +49,7 @@ vs_llvm_run_linear( struct draw_vertex_shader *shader,
const float (*input)[4],
float (*output)[4],
const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
unsigned count,
unsigned input_stride,
unsigned output_stride )
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 14c95082a9..0b0c6077c6 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -84,6 +84,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
const float (*input)[4],
float (*output)[4],
const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
unsigned count,
unsigned input_stride,
unsigned output_stride )
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 6eb26927f2..eacd160187 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -149,7 +149,8 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
temp_buffer,
temp_buffer,
- vsvg->base.vs->draw->pt.user.vs_constants,
+ vsvg->base.vs->draw->pt.user.vs_constants,
+ vsvg->base.vs->draw->pt.user.vs_constants_size,
count,
temp_vertex_stride,
temp_vertex_stride);
@@ -214,7 +215,8 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
temp_buffer,
temp_buffer,
- vsvg->base.vs->draw->pt.user.vs_constants,
+ vsvg->base.vs->draw->pt.user.vs_constants,
+ vsvg->base.vs->draw->pt.user.vs_constants_size,
count,
temp_vertex_stride,
temp_vertex_stride);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index f5f2623e46..7b35dd4bb4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2009-2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -59,6 +59,19 @@
#include "lp_bld_arit.h"
+/*
+ * XXX: Increasing eliminates some artifacts, but adds others, most
+ * noticeably corruption in the Earth halo in Google Earth.
+ */
+#define RCP_NEWTON_STEPS 0
+
+#define RSQRT_NEWTON_STEPS 0
+
+#define EXP_POLY_DEGREE 3
+
+#define LOG_POLY_DEGREE 5
+
+
/**
* Generate min(a, b)
* No checks for special case values of a or b = 1 or 0 are done.
@@ -72,6 +85,9 @@ lp_build_min_simple(struct lp_build_context *bld,
const char *intrinsic = NULL;
LLVMValueRef cond;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
/* TODO: optimize the constant case */
if(type.width * type.length == 128) {
@@ -118,6 +134,9 @@ lp_build_max_simple(struct lp_build_context *bld,
const char *intrinsic = NULL;
LLVMValueRef cond;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
/* TODO: optimize the constant case */
if(type.width * type.length == 128) {
@@ -160,6 +179,8 @@ lp_build_comp(struct lp_build_context *bld,
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+
if(a == bld->one)
return bld->zero;
if(a == bld->zero)
@@ -173,9 +194,15 @@ lp_build_comp(struct lp_build_context *bld,
}
if(LLVMIsConstant(a))
- return LLVMConstSub(bld->one, a);
+ if (type.floating)
+ return LLVMConstFSub(bld->one, a);
+ else
+ return LLVMConstSub(bld->one, a);
else
- return LLVMBuildSub(bld->builder, bld->one, a, "");
+ if (type.floating)
+ return LLVMBuildFSub(bld->builder, bld->one, a, "");
+ else
+ return LLVMBuildSub(bld->builder, bld->one, a, "");
}
@@ -190,6 +217,9 @@ lp_build_add(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == bld->zero)
return b;
if(b == bld->zero)
@@ -217,9 +247,15 @@ lp_build_add(struct lp_build_context *bld,
}
if(LLVMIsConstant(a) && LLVMIsConstant(b))
- res = LLVMConstAdd(a, b);
+ if (type.floating)
+ res = LLVMConstFAdd(a, b);
+ else
+ res = LLVMConstAdd(a, b);
else
- res = LLVMBuildAdd(bld->builder, a, b, "");
+ if (type.floating)
+ res = LLVMBuildFAdd(bld->builder, a, b, "");
+ else
+ res = LLVMBuildAdd(bld->builder, a, b, "");
/* clamp to ceiling of 1.0 */
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
@@ -240,6 +276,8 @@ lp_build_sum_vector(struct lp_build_context *bld,
LLVMValueRef index, res;
unsigned i;
+ assert(lp_check_value(type, a));
+
if (a == bld->zero)
return bld->zero;
if (a == bld->undef)
@@ -253,9 +291,16 @@ lp_build_sum_vector(struct lp_build_context *bld,
for (i = 1; i < type.length; i++) {
index = LLVMConstInt(LLVMInt32Type(), i, 0);
- res = LLVMBuildAdd(bld->builder, res,
- LLVMBuildExtractElement(bld->builder, a, index, ""),
- "");
+ if (type.floating)
+ res = LLVMBuildFAdd(bld->builder, res,
+ LLVMBuildExtractElement(bld->builder,
+ a, index, ""),
+ "");
+ else
+ res = LLVMBuildAdd(bld->builder, res,
+ LLVMBuildExtractElement(bld->builder,
+ a, index, ""),
+ "");
}
return res;
@@ -273,6 +318,9 @@ lp_build_sub(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(b == bld->zero)
return a;
if(a == bld->undef || b == bld->undef)
@@ -300,9 +348,15 @@ lp_build_sub(struct lp_build_context *bld,
}
if(LLVMIsConstant(a) && LLVMIsConstant(b))
- res = LLVMConstSub(a, b);
+ if (type.floating)
+ res = LLVMConstFSub(a, b);
+ else
+ res = LLVMConstSub(a, b);
else
- res = LLVMBuildSub(bld->builder, a, b, "");
+ if (type.floating)
+ res = LLVMBuildFSub(bld->builder, a, b, "");
+ else
+ res = LLVMBuildSub(bld->builder, a, b, "");
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
res = lp_build_max_simple(bld, res, bld->zero);
@@ -360,6 +414,10 @@ lp_build_mul_u8n(LLVMBuilderRef builder,
LLVMValueRef c8;
LLVMValueRef ab;
+ assert(!i16_type.floating);
+ assert(lp_check_value(i16_type, a));
+ assert(lp_check_value(i16_type, b));
+
c8 = lp_build_const_int_vec(i16_type, 8);
#if 0
@@ -395,6 +453,9 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef shift;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == bld->zero)
return bld->zero;
if(a == bld->one)
@@ -433,7 +494,10 @@ lp_build_mul(struct lp_build_context *bld,
shift = NULL;
if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
- res = LLVMConstMul(a, b);
+ if (type.floating)
+ res = LLVMConstFMul(a, b);
+ else
+ res = LLVMConstMul(a, b);
if(shift) {
if(type.sign)
res = LLVMConstAShr(res, shift);
@@ -442,7 +506,10 @@ lp_build_mul(struct lp_build_context *bld,
}
}
else {
- res = LLVMBuildMul(bld->builder, a, b, "");
+ if (type.floating)
+ res = LLVMBuildFMul(bld->builder, a, b, "");
+ else
+ res = LLVMBuildMul(bld->builder, a, b, "");
if(shift) {
if(type.sign)
res = LLVMBuildAShr(bld->builder, res, shift, "");
@@ -465,6 +532,8 @@ lp_build_mul_imm(struct lp_build_context *bld,
{
LLVMValueRef factor;
+ assert(lp_check_value(bld->type, a));
+
if(b == 0)
return bld->zero;
@@ -472,7 +541,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
return a;
if(b == -1)
- return LLVMBuildNeg(bld->builder, a, "");
+ return lp_build_negate(bld, a);
if(b == 2 && bld->type.floating)
return lp_build_add(bld, a, a);
@@ -518,6 +587,9 @@ lp_build_div(struct lp_build_context *bld,
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == bld->zero)
return bld->zero;
if(a == bld->one)
@@ -529,13 +601,24 @@ lp_build_div(struct lp_build_context *bld,
if(a == bld->undef || b == bld->undef)
return bld->undef;
- if(LLVMIsConstant(a) && LLVMIsConstant(b))
- return LLVMConstFDiv(a, b);
+ if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
+ if (type.floating)
+ return LLVMConstFDiv(a, b);
+ else if (type.sign)
+ return LLVMConstSDiv(a, b);
+ else
+ return LLVMConstUDiv(a, b);
+ }
if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
return lp_build_mul(bld, a, lp_build_rcp(bld, b));
- return LLVMBuildFDiv(bld->builder, a, b, "");
+ if (type.floating)
+ return LLVMBuildFDiv(bld->builder, a, b, "");
+ else if (type.sign)
+ return LLVMBuildSDiv(bld->builder, a, b, "");
+ else
+ return LLVMBuildUDiv(bld->builder, a, b, "");
}
@@ -555,6 +638,10 @@ lp_build_lerp(struct lp_build_context *bld,
LLVMValueRef delta;
LLVMValueRef res;
+ assert(lp_check_value(bld->type, x));
+ assert(lp_check_value(bld->type, v0));
+ assert(lp_check_value(bld->type, v1));
+
delta = lp_build_sub(bld, v1, v0);
res = lp_build_mul(bld, x, delta);
@@ -596,6 +683,9 @@ lp_build_min(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, b));
+
if(a == bld->undef || b == bld->undef)
return bld->undef;
@@ -624,6 +714,9 @@ lp_build_max(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, b));
+
if(a == bld->undef || b == bld->undef)
return bld->undef;
@@ -653,6 +746,10 @@ lp_build_clamp(struct lp_build_context *bld,
LLVMValueRef min,
LLVMValueRef max)
{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, min));
+ assert(lp_check_value(bld->type, max));
+
a = lp_build_min(bld, a, max);
a = lp_build_max(bld, a, min);
return a;
@@ -669,6 +766,8 @@ lp_build_abs(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
+ assert(lp_check_value(type, a));
+
if(!type.sign)
return a;
@@ -702,7 +801,16 @@ LLVMValueRef
lp_build_negate(struct lp_build_context *bld,
LLVMValueRef a)
{
- return LLVMBuildNeg(bld->builder, a, "");
+ assert(lp_check_value(bld->type, a));
+
+#if HAVE_LLVM >= 0x0207
+ if (bld->type.floating)
+ a = LLVMBuildFNeg(bld->builder, a, "");
+ else
+#endif
+ a = LLVMBuildNeg(bld->builder, a, "");
+
+ return a;
}
@@ -715,6 +823,8 @@ lp_build_sgn(struct lp_build_context *bld,
LLVMValueRef cond;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+
/* Handle non-zero case */
if(!type.sign) {
/* if not zero then sign must be positive */
@@ -773,6 +883,7 @@ lp_build_set_sign(struct lp_build_context *bld,
LLVMValueRef val, res;
assert(type.floating);
+ assert(lp_check_value(type, a));
/* val = reinterpret_cast<int>(a) */
val = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
@@ -1021,7 +1132,7 @@ lp_build_iround(struct lp_build_context *bld,
half = LLVMBuildOr(bld->builder, sign, half, "");
half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
- res = LLVMBuildAdd(bld->builder, a, half, "");
+ res = LLVMBuildFAdd(bld->builder, a, half, "");
}
res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
@@ -1070,7 +1181,7 @@ lp_build_ifloor(struct lp_build_context *bld,
offset = LLVMBuildAnd(bld->builder, offset, sign, "");
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
- res = LLVMBuildAdd(bld->builder, a, offset, "ifloor.res");
+ res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res");
}
/* round to nearest (toward zero) */
@@ -1120,7 +1231,7 @@ lp_build_iceil(struct lp_build_context *bld,
offset = LLVMBuildAnd(bld->builder, offset, sign, "");
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
- res = LLVMBuildAdd(bld->builder, a, offset, "iceil.res");
+ res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res");
}
/* round to nearest (toward zero) */
@@ -1138,6 +1249,8 @@ lp_build_sqrt(struct lp_build_context *bld,
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
+ assert(lp_check_value(type, a));
+
/* TODO: optimize the constant case */
/* TODO: optimize the constant case */
@@ -1148,12 +1261,39 @@ lp_build_sqrt(struct lp_build_context *bld,
}
+/**
+ * Do one Newton-Raphson step to improve reciprocate precision:
+ *
+ * x_{i+1} = x_i * (2 - a * x_i)
+ *
+ * See also:
+ * - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
+ * - http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+static INLINE LLVMValueRef
+lp_build_rcp_refine(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef rcp_a)
+{
+ LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
+ LLVMValueRef res;
+
+ res = LLVMBuildFMul(bld->builder, a, rcp_a, "");
+ res = LLVMBuildFSub(bld->builder, two, res, "");
+ res = LLVMBuildFMul(bld->builder, rcp_a, res, "");
+
+ return res;
+}
+
+
LLVMValueRef
lp_build_rcp(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+
if(a == bld->zero)
return bld->undef;
if(a == bld->one)
@@ -1167,32 +1307,16 @@ lp_build_rcp(struct lp_build_context *bld,
return LLVMConstFDiv(bld->one, a);
if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
- /*
- * XXX: Added precision is not always necessary, so only enable this
- * when we have a better system in place to track minimum precision.
- */
-
-#if 0
- /*
- * Do one Newton-Raphson step to improve precision:
- *
- * x1 = (2 - a * rcp(a)) * rcp(a)
- */
-
- LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
- LLVMValueRef rcp_a;
LLVMValueRef res;
+ unsigned i;
- rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+ res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a);
- res = LLVMBuildMul(bld->builder, a, rcp_a, "");
- res = LLVMBuildSub(bld->builder, two, res, "");
- res = LLVMBuildMul(bld->builder, res, rcp_a, "");
+ for (i = 0; i < RCP_NEWTON_STEPS; ++i) {
+ res = lp_build_rcp_refine(bld, a, res);
+ }
- return rcp_a;
-#else
- return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
-#endif
+ return res;
}
return LLVMBuildFDiv(bld->builder, bld->one, a, "");
@@ -1200,6 +1324,33 @@ lp_build_rcp(struct lp_build_context *bld,
/**
+ * Do one Newton-Raphson step to improve rsqrt precision:
+ *
+ * x_{i+1} = 0.5 * x_i * (3.0 - a * x_i * x_i)
+ *
+ * See also:
+ * - http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+static INLINE LLVMValueRef
+lp_build_rsqrt_refine(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef rsqrt_a)
+{
+ LLVMValueRef half = lp_build_const_vec(bld->type, 0.5);
+ LLVMValueRef three = lp_build_const_vec(bld->type, 3.0);
+ LLVMValueRef res;
+
+ res = LLVMBuildFMul(bld->builder, rsqrt_a, rsqrt_a, "");
+ res = LLVMBuildFMul(bld->builder, a, res, "");
+ res = LLVMBuildFSub(bld->builder, three, res, "");
+ res = LLVMBuildFMul(bld->builder, rsqrt_a, res, "");
+ res = LLVMBuildFMul(bld->builder, half, res, "");
+
+ return res;
+}
+
+
+/**
* Generate 1/sqrt(a)
*/
LLVMValueRef
@@ -1208,10 +1359,22 @@ lp_build_rsqrt(struct lp_build_context *bld,
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+
assert(type.floating);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
- return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);
+ if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ LLVMValueRef res;
+ unsigned i;
+
+ res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a);
+
+ for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) {
+ res = lp_build_rsqrt_refine(bld, a, res);
+ }
+
+ return res;
+ }
return lp_build_rcp(bld, lp_build_sqrt(bld, a));
}
@@ -1270,7 +1433,7 @@ lp_build_sin(struct lp_build_context *bld,
*/
LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516);
- LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y");
+ LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y");
/*
* store the integer part of y in mm0
@@ -1344,9 +1507,9 @@ lp_build_sin(struct lp_build_context *bld,
* xmm2 = _mm_mul_ps(y, xmm2);
* xmm3 = _mm_mul_ps(y, xmm3);
*/
- LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1");
- LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2");
- LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3");
+ LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1");
+ LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2");
+ LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3");
/*
* x = _mm_add_ps(x, xmm1);
@@ -1354,16 +1517,16 @@ lp_build_sin(struct lp_build_context *bld,
* x = _mm_add_ps(x, xmm3);
*/
- LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1");
- LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2");
- LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3");
+ LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1");
+ LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2");
+ LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3");
/*
* Evaluate the first polynom (0 <= x <= Pi/4)
*
* z = _mm_mul_ps(x,x);
*/
- LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z");
+ LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z");
/*
* _PS_CONST(coscof_p0, 2.443315711809948E-005);
@@ -1378,12 +1541,12 @@ lp_build_sin(struct lp_build_context *bld,
* y = *(v4sf*)_ps_coscof_p0;
* y = _mm_mul_ps(y, z);
*/
- LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3");
- LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4");
- LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5");
- LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6");
- LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7");
- LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8");
+ LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3");
+ LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4");
+ LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5");
+ LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6");
+ LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7");
+ LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8");
/*
@@ -1392,10 +1555,10 @@ lp_build_sin(struct lp_build_context *bld,
* y = _mm_add_ps(y, *(v4sf*)_ps_1);
*/
LLVMValueRef half = lp_build_const_v4sf(0.5);
- LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp");
- LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8");
+ LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp");
+ LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8");
LLVMValueRef one = lp_build_const_v4sf(1.0);
- LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9");
+ LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9");
/*
* _PS_CONST(sincof_p0, -1.9515295891E-4);
@@ -1419,13 +1582,13 @@ lp_build_sin(struct lp_build_context *bld,
* y2 = _mm_add_ps(y2, x);
*/
- LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3");
- LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4");
- LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5");
- LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6");
- LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7");
- LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8");
- LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9");
+ LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3");
+ LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4");
+ LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5");
+ LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6");
+ LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7");
+ LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8");
+ LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9");
/*
* select the correct result from the two polynoms
@@ -1481,7 +1644,7 @@ lp_build_cos(struct lp_build_context *bld,
*/
LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516);
- LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y");
+ LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y");
/*
* store the integer part of y in mm0
@@ -1561,9 +1724,9 @@ lp_build_cos(struct lp_build_context *bld,
* xmm2 = _mm_mul_ps(y, xmm2);
* xmm3 = _mm_mul_ps(y, xmm3);
*/
- LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1");
- LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2");
- LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3");
+ LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1");
+ LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2");
+ LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3");
/*
* x = _mm_add_ps(x, xmm1);
@@ -1571,16 +1734,16 @@ lp_build_cos(struct lp_build_context *bld,
* x = _mm_add_ps(x, xmm3);
*/
- LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1");
- LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2");
- LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3");
+ LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1");
+ LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2");
+ LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3");
/*
* Evaluate the first polynom (0 <= x <= Pi/4)
*
* z = _mm_mul_ps(x,x);
*/
- LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z");
+ LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z");
/*
* _PS_CONST(coscof_p0, 2.443315711809948E-005);
@@ -1595,12 +1758,12 @@ lp_build_cos(struct lp_build_context *bld,
* y = *(v4sf*)_ps_coscof_p0;
* y = _mm_mul_ps(y, z);
*/
- LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3");
- LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4");
- LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5");
- LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6");
- LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7");
- LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8");
+ LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3");
+ LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4");
+ LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5");
+ LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6");
+ LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7");
+ LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8");
/*
@@ -1609,10 +1772,10 @@ lp_build_cos(struct lp_build_context *bld,
* y = _mm_add_ps(y, *(v4sf*)_ps_1);
*/
LLVMValueRef half = lp_build_const_v4sf(0.5);
- LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp");
- LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8");
+ LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp");
+ LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8");
LLVMValueRef one = lp_build_const_v4sf(1.0);
- LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9");
+ LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9");
/*
* _PS_CONST(sincof_p0, -1.9515295891E-4);
@@ -1636,13 +1799,13 @@ lp_build_cos(struct lp_build_context *bld,
* y2 = _mm_add_ps(y2, x);
*/
- LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3");
- LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4");
- LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5");
- LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6");
- LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7");
- LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8");
- LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9");
+ LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3");
+ LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4");
+ LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5");
+ LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6");
+ LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7");
+ LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8");
+ LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9");
/*
* select the correct result from the two polynoms
@@ -1695,6 +1858,8 @@ lp_build_exp(struct lp_build_context *bld,
/* log2(e) = 1/log(2) */
LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634);
+ assert(lp_check_value(bld->type, x));
+
return lp_build_mul(bld, log2e, lp_build_exp2(bld, x));
}
@@ -1709,14 +1874,12 @@ lp_build_log(struct lp_build_context *bld,
/* log(2) */
LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529);
+ assert(lp_check_value(bld->type, x));
+
return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
}
-#define EXP_POLY_DEGREE 3
-#define LOG_POLY_DEGREE 5
-
-
/**
* Generate polynomial.
* Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2].
@@ -1731,6 +1894,8 @@ lp_build_polynomial(struct lp_build_context *bld,
LLVMValueRef res = NULL;
unsigned i;
+ assert(lp_check_value(bld->type, x));
+
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
@@ -1802,6 +1967,8 @@ lp_build_exp2_approx(struct lp_build_context *bld,
LLVMValueRef expfpart = NULL;
LLVMValueRef res = NULL;
+ assert(lp_check_value(bld->type, x));
+
if(p_exp2_int_part || p_frac_part || p_exp2) {
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
@@ -1817,7 +1984,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
ipart = lp_build_floor(bld, x);
/* fpart = x - ipart */
- fpart = LLVMBuildSub(bld->builder, x, ipart, "");
+ fpart = LLVMBuildFSub(bld->builder, x, ipart, "");
}
if(p_exp2_int_part || p_exp2) {
@@ -1832,7 +1999,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
Elements(lp_build_exp2_polynomial));
- res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
+ res = LLVMBuildFMul(bld->builder, expipart, expfpart, "");
}
if(p_exp2_int_part)
@@ -1915,6 +2082,8 @@ lp_build_log2_approx(struct lp_build_context *bld,
LLVMValueRef logmant = NULL;
LLVMValueRef res = NULL;
+ assert(lp_check_value(bld->type, x));
+
if(p_exp || p_floor_log2 || p_log2) {
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
@@ -1945,9 +2114,9 @@ lp_build_log2_approx(struct lp_build_context *bld,
Elements(lp_build_log2_polynomial));
/* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
- logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+ logmant = LLVMBuildFMul(bld->builder, logmant, LLVMBuildFSub(bld->builder, mant, bld->one, ""), "");
- res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+ res = LLVMBuildFAdd(bld->builder, logmant, logexp, "");
}
if(p_exp) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.c b/src/gallium/auxiliary/gallivm/lp_bld_assert.c
new file mode 100644
index 0000000000..f2ebd868a8
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.c
@@ -0,0 +1,101 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "lp_bld_assert.h"
+#include "lp_bld_init.h"
+#include "lp_bld_printf.h"
+
+
+/**
+ * A call to lp_build_assert() will build a function call to this function.
+ */
+static void
+lp_assert(int condition, const char *msg)
+{
+ if (!condition) {
+ debug_printf("LLVM assertion '%s' failed!\n", msg);
+ assert(condition);
+ }
+}
+
+
+
+/**
+ * lp_build_assert.
+ *
+ * Build an assertion in LLVM IR by building a function call to the
+ * lp_assert() function above.
+ *
+ * \param condition should be an 'i1' or 'i32' value
+ * \param msg a string to print if the assertion fails.
+ */
+LLVMValueRef
+lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition,
+ const char *msg)
+{
+ LLVMModuleRef module;
+ LLVMTypeRef arg_types[2];
+ LLVMValueRef msg_string, assert_func, params[2], r;
+
+ module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
+ LLVMGetInsertBlock(builder)));
+
+ msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1);
+
+ arg_types[0] = LLVMInt32Type();
+ arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
+
+ /* lookup the lp_assert function */
+ assert_func = LLVMGetNamedFunction(module, "lp_assert");
+
+ /* Create the assertion function if not found */
+ if (!assert_func) {
+ LLVMTypeRef func_type =
+ LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0);
+
+ assert_func = LLVMAddFunction(module, "lp_assert", func_type);
+ LLVMSetFunctionCallConv(assert_func, LLVMCCallConv);
+ LLVMSetLinkage(assert_func, LLVMExternalLinkage);
+ LLVMAddGlobalMapping(lp_build_engine, assert_func,
+ func_to_pointer((func_pointer)lp_assert));
+ }
+ assert(assert_func);
+
+ /* build function call param list */
+ params[0] = LLVMBuildZExt(builder, condition, arg_types[0], "");
+ params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], "");
+
+ /* check arg types */
+ assert(LLVMTypeOf(params[0]) == arg_types[0]);
+ assert(LLVMTypeOf(params[1]) == arg_types[1]);
+
+ r = LLVMBuildCall(builder, assert_func, params, 2, "");
+
+ return r;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.h b/src/gallium/auxiliary/gallivm/lp_bld_assert.h
new file mode 100644
index 0000000000..ddd879dc2c
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.h
@@ -0,0 +1,41 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_ASSERT_H
+#define LP_BLD_ASSERT_H
+
+
+#include "lp_bld.h"
+
+
+LLVMValueRef
+lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition,
+ const char *msg);
+
+
+#endif
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 77012f1fac..8b477313d4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -117,8 +117,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
scale = (double)mask/ubound;
bias = (double)((unsigned long long)1 << (mantissa - n));
- res = LLVMBuildMul(builder, src, lp_build_const_vec(src_type, scale), "");
- res = LLVMBuildAdd(builder, res, lp_build_const_vec(src_type, bias), "");
+ res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
+ res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
res = LLVMBuildBitCast(builder, res, int_vec_type, "");
if(dst_width > n) {
@@ -175,6 +175,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
double scale;
double bias;
+ assert(dst_type.floating);
+
mantissa = lp_mantissa(dst_type);
n = MIN2(mantissa, src_width);
@@ -199,8 +201,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
res = LLVMBuildBitCast(builder, res, vec_type, "");
- res = LLVMBuildSub(builder, res, bias_, "");
- res = LLVMBuildMul(builder, res, lp_build_const_vec(dst_type, scale), "");
+ res = LLVMBuildFSub(builder, res, bias_, "");
+ res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");
return res;
}
@@ -296,7 +298,7 @@ lp_build_conv(LLVMBuilderRef builder,
if (dst_scale != 1.0) {
LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale);
for(i = 0; i < num_tmps; ++i)
- tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
+ tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
}
/* Use an equally sized integer for intermediate computations */
@@ -391,7 +393,7 @@ lp_build_conv(LLVMBuilderRef builder,
if (src_scale != 1.0) {
LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale);
for(i = 0; i < num_tmps; ++i)
- tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
+ tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
}
}
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 0f01fc1d75..247cb83ce6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -240,7 +240,7 @@ lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder,
*/
if (normalized)
- scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
+ scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
else
scaled = casted;
@@ -322,7 +322,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
}
if (normalized)
- scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
+ scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
else
scaled = unswizzled;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 9f405921b0..c724a4453e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -197,7 +197,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
if (format_desc->channel[chan].normalized) {
double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
LLVMValueRef scale_val = lp_build_const_vec(type, scale);
- input = LLVMBuildMul(builder, input, scale_val, "");
+ input = LLVMBuildFMul(builder, input, scale_val, "");
}
}
else {
@@ -227,7 +227,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
LLVMValueRef scale_val = lp_build_const_vec(type, scale);
input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
- input = LLVMBuildMul(builder, input, scale_val, "");
+ input = LLVMBuildFMul(builder, input, scale_val, "");
}
else {
/* FIXME */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 69353dea09..60d8bcfa55 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -45,6 +45,8 @@ static const struct debug_named_value lp_bld_debug_flags[] = {
{ "nopt", GALLIVM_DEBUG_NO_OPT, NULL },
DEBUG_NAMED_VALUE_END
};
+
+DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0)
#endif
@@ -89,7 +91,7 @@ void
lp_build_init(void)
{
#ifdef DEBUG
- gallivm_debug = debug_get_flags_option("GALLIVM_DEBUG", lp_bld_debug_flags, 0 );
+ gallivm_debug = debug_get_option_gallivm_debug();
#endif
lp_set_target_options();
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h
index a32ced9b4c..f26fdac466 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -44,5 +44,7 @@ extern LLVMPassManagerRef lp_build_pass;
void
lp_build_init(void);
+extern void
+lp_func_delete_body(LLVMValueRef func);
#endif /* !LP_BLD_INIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 39854e43b1..7d7db3b0d9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -83,6 +83,8 @@ lp_build_compare(LLVMBuilderRef builder,
assert(func >= PIPE_FUNC_NEVER);
assert(func <= PIPE_FUNC_ALWAYS);
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
if(func == PIPE_FUNC_NEVER)
return zeros;
@@ -363,9 +365,55 @@ lp_build_cmp(struct lp_build_context *bld,
/**
+ * Return (mask & a) | (~mask & b);
+ */
+LLVMValueRef
+lp_build_select_bitwise(struct lp_build_context *bld,
+ LLVMValueRef mask,
+ LLVMValueRef a,
+ LLVMValueRef b)
+{
+ struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
+ if (a == b) {
+ return a;
+ }
+
+ if(type.floating) {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+ }
+
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+
+ /* This often gets translated to PANDN, but sometimes the NOT is
+ * pre-computed and stored in another constant. The best strategy depends
+ * on available registers, so it is not a big deal -- hopefully LLVM does
+ * the right decision attending the rest of the program.
+ */
+ b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+
+ res = LLVMBuildOr(bld->builder, a, b, "");
+
+ if(type.floating) {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ }
+
+ return res;
+}
+
+
+/**
* Return mask ? a : b;
*
- * mask is a bitwise mask, composed of 0 or ~0 for each element.
+ * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
+ * will yield unpredictable results.
*/
LLVMValueRef
lp_build_select(struct lp_build_context *bld,
@@ -376,6 +424,9 @@ lp_build_select(struct lp_build_context *bld,
struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == b)
return a;
@@ -424,27 +475,7 @@ lp_build_select(struct lp_build_context *bld,
}
}
else {
- if(type.floating) {
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
- }
-
- a = LLVMBuildAnd(bld->builder, a, mask, "");
-
- /* This often gets translated to PANDN, but sometimes the NOT is
- * pre-computed and stored in another constant. The best strategy depends
- * on available registers, so it is not a big deal -- hopefully LLVM does
- * the right decision attending the rest of the program.
- */
- b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
-
- res = LLVMBuildOr(bld->builder, a, b, "");
-
- if(type.floating) {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
- }
+ res = lp_build_select_bitwise(bld, mask, a, b);
}
return res;
@@ -461,6 +492,9 @@ lp_build_select_aos(struct lp_build_context *bld,
const unsigned n = type.length;
unsigned i, j;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == b)
return a;
if(cond[0] && cond[1] && cond[2] && cond[3])
@@ -516,7 +550,22 @@ lp_build_select_aos(struct lp_build_context *bld,
LLVMValueRef
lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
{
+ const struct lp_type type = bld->type;
+
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
+ /* can't do bitwise ops on floating-point values */
+ if(type.floating) {
+ a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, "");
+ }
+
b = LLVMBuildNot(bld->builder, b, "");
b = LLVMBuildAnd(bld->builder, a, b, "");
+
+ if(type.floating) {
+ b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, "");
+ }
return b;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
index 29f9fc3b20..4e7b4c9938 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
@@ -63,6 +63,11 @@ lp_build_cmp(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
+LLVMValueRef
+lp_build_select_bitwise(struct lp_build_context *bld,
+ LLVMValueRef mask,
+ LLVMValueRef a,
+ LLVMValueRef b);
LLVMValueRef
lp_build_select(struct lp_build_context *bld,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 5a9488b5f7..6d5410d970 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -39,6 +39,7 @@
#include <llvm/Target/TargetOptions.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/JITEventListener.h>
+#include <llvm/Support/CommandLine.h>
#include "pipe/p_config.h"
#include "util/u_debug.h"
@@ -141,4 +142,35 @@ lp_set_target_options(void)
#if 0
llvm::UnsafeFPMath = true;
#endif
+
+#if 0
+ /*
+ * LLVM will generate MMX instructions for vectors <= 64 bits, leading to
+ * innefficient code, and in 32bit systems, to the corruption of the FPU
+ * stack given that it expects the user to generate the EMMS instructions.
+ *
+ * See also:
+ * - http://llvm.org/bugs/show_bug.cgi?id=3287
+ * - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/
+ *
+ * XXX: Unfortunately this is not working.
+ */
+ static boolean first = FALSE;
+ if (first) {
+ static const char* options[] = {
+ "prog",
+ "-disable-mmx"
+ };
+ llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options));
+ first = FALSE;
+ }
+#endif
+}
+
+
+extern "C" void
+lp_func_delete_body(LLVMValueRef FF)
+{
+ llvm::Function *func = llvm::unwrap<llvm::Function>(FF);
+ func->deleteBody();
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 7748f8f099..b7b630f2e8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -171,14 +171,13 @@ lp_build_unpack2(LLVMBuilderRef builder,
msb = lp_build_zero(src_type);
/* Interleave bits */
- if(util_cpu_caps.little_endian) {
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
*dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0);
*dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1);
- }
- else {
+#else
*dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0);
*dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1);
- }
+#endif
/* Cast the result into the new type (twice as wide) */
@@ -261,13 +260,14 @@ lp_build_pack2(LLVMBuilderRef builder,
#endif
LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
LLVMValueRef shuffle;
- LLVMValueRef res;
+ LLVMValueRef res = NULL;
assert(!src_type.floating);
assert(!dst_type.floating);
assert(src_type.width == dst_type.width * 2);
assert(src_type.length * 2 == dst_type.length);
+ /* Check for special cases first */
if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
switch(src_type.width) {
case 32:
@@ -283,8 +283,8 @@ lp_build_pack2(LLVMBuilderRef builder,
return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
}
else {
- assert(0);
- return LLVMGetUndef(dst_vec_type);
+ /* use generic shuffle below */
+ res = NULL;
}
}
break;
@@ -310,10 +310,13 @@ lp_build_pack2(LLVMBuilderRef builder,
break;
}
- res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
- return res;
+ if (res) {
+ res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
+ return res;
+ }
}
+ /* generic shuffle */
lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
index ca36046d22..7b1088939b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -85,7 +85,7 @@ lp_build_scalar_ddx(struct lp_build_context *bld,
LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0);
LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "");
LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "");
- return LLVMBuildSub(bld->builder, a_right, a_left, "");
+ return lp_build_sub(bld, a_right, a_left);
}
@@ -97,5 +97,5 @@ lp_build_scalar_ddy(struct lp_build_context *bld,
LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0);
LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "");
LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "");
- return LLVMBuildSub(bld->builder, a_bottom, a_top, "");
+ return lp_build_sub(bld, a_bottom, a_top);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 1a20d74cac..806c7d56a8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -40,7 +40,6 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_format.h"
-#include "util/u_cpu_detect.h"
#include "lp_bld_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
@@ -811,7 +810,7 @@ lp_build_minify(struct lp_build_sample_context *bld,
LLVMValueRef base_size,
LLVMValueRef level)
{
- LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
+ LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
return size;
}
@@ -888,17 +887,17 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
/* Compute rho = max of all partial derivatives scaled by texture size.
* XXX this could be vectorized somewhat
*/
- rho = LLVMBuildMul(bld->builder,
+ rho = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dsdx, dsdy),
lp_build_int_to_float(float_bld, width), "");
if (dims > 1) {
LLVMValueRef max;
- max = LLVMBuildMul(bld->builder,
+ max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dtdx, dtdy),
lp_build_int_to_float(float_bld, height), "");
rho = lp_build_max(float_bld, rho, max);
if (dims > 2) {
- max = LLVMBuildMul(bld->builder,
+ max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, drdx, drdy),
lp_build_int_to_float(float_bld, depth), "");
rho = lp_build_max(float_bld, rho, max);
@@ -912,12 +911,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
if (lod_bias) {
lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
index0, "");
- lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
+ lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
}
}
/* add sampler lod bias */
- lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
+ lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
/* clamp lod */
lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
@@ -1219,8 +1218,7 @@ lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
/* ima = -0.5 / abs(coord); */
LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
- LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
- lp_build_rcp(coord_bld, absCoord));
+ LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
return ima;
}
@@ -1841,7 +1839,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
unsigned i, j;
for(j = 0; j < h16.type.length; j += 4) {
- unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ unsigned subindex = 0;
+#else
+ unsigned subindex = 1;
+#endif
LLVMValueRef index;
index = LLVMConstInt(elem_type, j/2 + subindex, 0);
@@ -2029,6 +2031,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
debug_printf("Sample from %s\n", util_format_name(fmt));
}
+ assert(type.floating);
+
/* Setup our build context */
memset(&bld, 0, sizeof bld);
bld.builder = builder;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 21236839fb..0aa64affac 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -489,7 +489,7 @@ get_indirect_offsets(struct lp_build_tgsi_soa_context *bld,
int_vec_type, "");
/* addr_vec = addr_vec * 4 */
- addr_vec = lp_build_mul(&bld->base, addr_vec, vec4);
+ addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4);
return addr_vec;
}
@@ -533,7 +533,7 @@ emit_fetch(
reg->Register.Index * 4 + swizzle);
/* index_vec = index_vec + addr_vec */
- index_vec = lp_build_add(&bld->base, index_vec, addr_vec);
+ index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
/* Gather values from the constant buffer */
res = build_gather(bld, bld->consts_ptr, index_vec);
@@ -612,11 +612,9 @@ emit_fetch(
case TGSI_UTIL_SIGN_SET:
/* TODO: Use bitwese OR for floating point */
res = lp_build_abs( &bld->base, res );
- res = LLVMBuildNeg( bld->base.builder, res, "" );
- break;
-
+ /* fall through */
case TGSI_UTIL_SIGN_TOGGLE:
- res = LLVMBuildNeg( bld->base.builder, res, "" );
+ res = lp_build_negate( &bld->base, res );
break;
case TGSI_UTIL_SIGN_KEEP:
@@ -773,7 +771,9 @@ emit_store(
addr = LLVMBuildExtractElement(bld->base.builder,
addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
"");
- addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+ addr = LLVMBuildMul(bld->base.builder,
+ addr, LLVMConstInt(LLVMInt32Type(), 4, 0),
+ "");
}
switch( reg->Register.File ) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 5275faa5e2..298f3d0a8b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -557,6 +557,23 @@ print_temp(const struct tgsi_exec_machine *mach, uint index)
#endif
+void
+tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
+ unsigned num_bufs,
+ const void **bufs,
+ const unsigned *buf_sizes)
+{
+ unsigned i;
+
+ for (i = 0; i < num_bufs; i++) {
+ mach->Consts[i] = bufs[i];
+ mach->ConstsSize[i] = buf_sizes[i];
+ }
+}
+
+
+
+
/**
* Check if there's a potential src/dst register data dependency when
* using SOA execution.
@@ -632,6 +649,10 @@ tgsi_exec_machine_bind_shader(
util_init_math();
+ if (numSamplers) {
+ assert(samplers);
+ }
+
mach->Tokens = tokens;
mach->Samplers = samplers;
@@ -1040,6 +1061,8 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
{
uint i;
+ assert(swizzle < 4);
+
switch (file) {
case TGSI_FILE_CONSTANT:
for (i = 0; i < QUAD_SIZE; i++) {
@@ -1049,9 +1072,23 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
if (index->i[i] < 0) {
chan->u[i] = 0;
} else {
- const uint *p = (const uint *)mach->Consts[index2D->i[i]];
-
- chan->u[i] = p[index->i[i] * 4 + swizzle];
+ /* NOTE: copying the const value as a uint instead of float */
+ const uint constbuf = index2D->i[i];
+ const uint *buf = (const uint *)mach->Consts[constbuf];
+ const int pos = index->i[i] * 4 + swizzle;
+ /* const buffer bounds check */
+ if (pos < 0 || pos >= mach->ConstsSize[constbuf]) {
+ if (0) {
+ /* Debug: print warning */
+ static int count = 0;
+ if (count++ < 100)
+ debug_printf("TGSI Exec: const buffer index %d"
+ " out of bounds\n", pos);
+ }
+ chan->u[i] = 0;
+ }
+ else
+ chan->u[i] = buf[pos];
}
}
break;
@@ -1065,9 +1102,10 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
index2D->i[i], index->i[i]);
}*/
- chan->u[i] = mach->Inputs[index2D->i[i] *
- TGSI_EXEC_MAX_INPUT_ATTRIBS +
- index->i[i]].xyzw[swizzle].u[i];
+ int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
+ assert(pos >= 0);
+ assert(pos < Elements(mach->Inputs));
+ chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
}
break;
@@ -1187,7 +1225,7 @@ fetch_source(const struct tgsi_exec_machine *mach,
index2.i[1] =
index2.i[2] =
index2.i[3] = reg->Indirect.Index;
-
+ assert(reg->Indirect.File == TGSI_FILE_ADDRESS);
/* get current value of address register[swizzle] */
swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
fetch_src_file_channel(mach,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index ccf80ca6fd..6dee362d58 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -253,7 +253,10 @@ struct tgsi_exec_machine
struct tgsi_sampler **Samplers;
unsigned ImmLimit;
+
const void *Consts[PIPE_MAX_CONSTANT_BUFFERS];
+ unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS];
+
const struct tgsi_token *Tokens; /**< Declarations, instructions */
unsigned Processor; /**< TGSI_PROCESSOR_x */
@@ -367,6 +370,13 @@ tgsi_set_exec_mask(struct tgsi_exec_machine *mach,
}
+extern void
+tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
+ unsigned num_bufs,
+ const void **bufs,
+ const unsigned *buf_sizes);
+
+
#if defined __cplusplus
} /* extern "C" */
#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 97148dbe23..acbff103ef 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -33,6 +33,10 @@
#include "tgsi_info.h"
#include "tgsi_iterate.h"
+
+DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", FALSE)
+
+
typedef struct {
uint file : 28;
/* max 2 dimensions */
@@ -54,6 +58,8 @@ struct sanity_check_ctx
uint errors;
uint warnings;
uint implied_array_size;
+
+ boolean print;
};
static INLINE unsigned
@@ -148,6 +154,9 @@ report_error(
{
va_list args;
+ if (!ctx->print)
+ return;
+
debug_printf( "Error : " );
va_start( args, format );
_debug_vprintf( format, args );
@@ -164,6 +173,9 @@ report_warning(
{
va_list args;
+ if (!ctx->print)
+ return;
+
debug_printf( "Warning: " );
va_start( args, format );
_debug_vprintf( format, args );
@@ -539,6 +551,7 @@ tgsi_sanity_check(
ctx.errors = 0;
ctx.warnings = 0;
ctx.implied_array_size = 0;
+ ctx.print = debug_get_option_print_sanity();
if (!tgsi_iterate_shader( tokens, &ctx.iter ))
return FALSE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h
index 52263ff883..73f0f414e3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h
@@ -35,7 +35,8 @@ extern "C" {
#endif
/* Check the given token stream for errors and common mistakes.
- * Diagnostic messages are printed out to the debug output.
+ * Diagnostic messages are printed out to the debug output, and is
+ * controlled by the debug option TGSI_PRINT_SANITY (default false).
* Returns TRUE if there are no errors, even though there could be some warnings.
*/
boolean
diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c
index a9b7253bf4..fe638e211f 100644
--- a/src/gallium/auxiliary/translate/translate.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -48,3 +48,8 @@ struct translate *translate_create( const struct translate_key *key )
return translate_generic_create( key );
}
+
+boolean translate_is_output_format_supported(enum pipe_format format)
+{
+ return translate_generic_is_output_format_supported(format);
+}
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index edd95e0788..eb6f2cc486 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -105,6 +105,8 @@ struct translate *translate_lookup_or_create( struct translate_context *tctx,
struct translate *translate_create( const struct translate_key *key );
+boolean translate_is_output_format_supported(enum pipe_format format);
+
static INLINE int translate_keysize( const struct translate_key *key )
{
return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element);
@@ -138,5 +140,6 @@ struct translate *translate_sse2_create( const struct translate_key *key );
struct translate *translate_generic_create( const struct translate_key *key );
+boolean translate_generic_is_output_format_supported(enum pipe_format format);
#endif
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 0e43a512ee..42cfd763e9 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -187,9 +187,15 @@ ATTRIB( R8G8B8_SNORM, 3, char, TO_8_SNORM )
ATTRIB( R8G8_SNORM, 2, char, TO_8_SNORM )
ATTRIB( R8_SNORM, 1, char, TO_8_SNORM )
-ATTRIB( A8R8G8B8_UNORM, 4, ubyte, TO_8_UNORM )
-/*ATTRIB( R8G8B8A8_UNORM, 4, ubyte, TO_8_UNORM )*/
-
+static void
+emit_A8R8G8B8_UNORM( const float *attrib, void *ptr)
+{
+ ubyte *out = (ubyte *)ptr;
+ out[0] = TO_8_UNORM(attrib[3]);
+ out[1] = TO_8_UNORM(attrib[0]);
+ out[2] = TO_8_UNORM(attrib[1]);
+ out[3] = TO_8_UNORM(attrib[2]);
+}
static void
emit_B8G8R8A8_UNORM( const float *attrib, void *ptr)
@@ -368,23 +374,23 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
/* loop over vertex attributes (vertex shader inputs)
*/
for (i = 0; i < count; i++) {
- unsigned elt = *elts++;
+ const unsigned elt = *elts++;
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
- const uint8_t *src;
- unsigned index;
-
- char *dst = (vert +
- tg->attrib[attr].output_offset);
+ char *dst = vert + tg->attrib[attr].output_offset;
if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+ const uint8_t *src;
+ unsigned index;
+
if (tg->attrib[attr].instance_divisor) {
index = instance_id / tg->attrib[attr].instance_divisor;
} else {
index = elt;
}
+ /* clamp to void going out of bounds */
index = MIN2(index, tg->attrib[attr].max_index);
src = tg->attrib[attr].input_ptr +
@@ -392,11 +398,23 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
tg->attrib[attr].fetch( data, src, 0, 0 );
+ if (0)
+ debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: "
+ " %f, %f, %f, %f \n",
+ attr,
+ tg->attrib[attr].input_ptr,
+ tg->attrib[attr].input_stride,
+ tg->attrib[attr].instance_divisor,
+ tg->attrib[attr].max_index,
+ index,
+ data[0], data[1],data[2], data[3]);
} else {
data[0] = (float)instance_id;
}
- if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
- i, elt, attr, data[0], data[1], data[2], data[3]);
+
+ if (0)
+ debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
+ i, elt, attr, data[0], data[1], data[2], data[3]);
tg->attrib[attr].emit( data, dst );
}
@@ -425,29 +443,42 @@ static void PIPE_CDECL generic_run( struct translate *translate,
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
-
- char *dst = (vert +
- tg->attrib[attr].output_offset);
+ char *dst = vert + tg->attrib[attr].output_offset;
if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
const uint8_t *src;
+ unsigned index;
if (tg->attrib[attr].instance_divisor) {
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride *
- (instance_id / tg->attrib[attr].instance_divisor);
- } else {
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * elt;
+ index = instance_id / tg->attrib[attr].instance_divisor;
}
+ else {
+ index = elt;
+ }
+
+ /* clamp to void going out of bounds */
+ index = MIN2(index, tg->attrib[attr].max_index);
+
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * index;
tg->attrib[attr].fetch( data, src, 0, 0 );
+
+ if (0)
+ debug_printf("Fetch linear attr %d from %p stride %d index %d: "
+ " %f, %f, %f, %f \n",
+ attr,
+ tg->attrib[attr].input_ptr,
+ tg->attrib[attr].input_stride,
+ index,
+ data[0], data[1],data[2], data[3]);
} else {
data[0] = (float)instance_id;
}
- if (0) debug_printf("vert %d attr %d: %f %f %f %f\n",
- i, attr, data[0], data[1], data[2], data[3]);
+ if (0)
+ debug_printf("vert %d attr %d: %f %f %f %f\n",
+ i, attr, data[0], data[1], data[2], data[3]);
tg->attrib[attr].emit( data, dst );
}
@@ -523,3 +554,83 @@ struct translate *translate_generic_create( const struct translate_key *key )
return &tg->translate;
}
+
+boolean translate_generic_is_output_format_supported(enum pipe_format format)
+{
+ switch(format)
+ {
+ case PIPE_FORMAT_R64G64B64A64_FLOAT: return TRUE;
+ case PIPE_FORMAT_R64G64B64_FLOAT: return TRUE;
+ case PIPE_FORMAT_R64G64_FLOAT: return TRUE;
+ case PIPE_FORMAT_R64_FLOAT: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_FLOAT: return TRUE;
+ case PIPE_FORMAT_R32G32B32_FLOAT: return TRUE;
+ case PIPE_FORMAT_R32G32_FLOAT: return TRUE;
+ case PIPE_FORMAT_R32_FLOAT: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_USCALED: return TRUE;
+ case PIPE_FORMAT_R32G32B32_USCALED: return TRUE;
+ case PIPE_FORMAT_R32G32_USCALED: return TRUE;
+ case PIPE_FORMAT_R32_USCALED: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_SSCALED: return TRUE;
+ case PIPE_FORMAT_R32G32B32_SSCALED: return TRUE;
+ case PIPE_FORMAT_R32G32_SSCALED: return TRUE;
+ case PIPE_FORMAT_R32_SSCALED: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_UNORM: return TRUE;
+ case PIPE_FORMAT_R32G32B32_UNORM: return TRUE;
+ case PIPE_FORMAT_R32G32_UNORM: return TRUE;
+ case PIPE_FORMAT_R32_UNORM: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_SNORM: return TRUE;
+ case PIPE_FORMAT_R32G32B32_SNORM: return TRUE;
+ case PIPE_FORMAT_R32G32_SNORM: return TRUE;
+ case PIPE_FORMAT_R32_SNORM: return TRUE;
+
+ case PIPE_FORMAT_R16G16B16A16_USCALED: return TRUE;
+ case PIPE_FORMAT_R16G16B16_USCALED: return TRUE;
+ case PIPE_FORMAT_R16G16_USCALED: return TRUE;
+ case PIPE_FORMAT_R16_USCALED: return TRUE;
+
+ case PIPE_FORMAT_R16G16B16A16_SSCALED: return TRUE;
+ case PIPE_FORMAT_R16G16B16_SSCALED: return TRUE;
+ case PIPE_FORMAT_R16G16_SSCALED: return TRUE;
+ case PIPE_FORMAT_R16_SSCALED: return TRUE;
+
+ case PIPE_FORMAT_R16G16B16A16_UNORM: return TRUE;
+ case PIPE_FORMAT_R16G16B16_UNORM: return TRUE;
+ case PIPE_FORMAT_R16G16_UNORM: return TRUE;
+ case PIPE_FORMAT_R16_UNORM: return TRUE;
+
+ case PIPE_FORMAT_R16G16B16A16_SNORM: return TRUE;
+ case PIPE_FORMAT_R16G16B16_SNORM: return TRUE;
+ case PIPE_FORMAT_R16G16_SNORM: return TRUE;
+ case PIPE_FORMAT_R16_SNORM: return TRUE;
+
+ case PIPE_FORMAT_R8G8B8A8_USCALED: return TRUE;
+ case PIPE_FORMAT_R8G8B8_USCALED: return TRUE;
+ case PIPE_FORMAT_R8G8_USCALED: return TRUE;
+ case PIPE_FORMAT_R8_USCALED: return TRUE;
+
+ case PIPE_FORMAT_R8G8B8A8_SSCALED: return TRUE;
+ case PIPE_FORMAT_R8G8B8_SSCALED: return TRUE;
+ case PIPE_FORMAT_R8G8_SSCALED: return TRUE;
+ case PIPE_FORMAT_R8_SSCALED: return TRUE;
+
+ case PIPE_FORMAT_R8G8B8A8_UNORM: return TRUE;
+ case PIPE_FORMAT_R8G8B8_UNORM: return TRUE;
+ case PIPE_FORMAT_R8G8_UNORM: return TRUE;
+ case PIPE_FORMAT_R8_UNORM: return TRUE;
+
+ case PIPE_FORMAT_R8G8B8A8_SNORM: return TRUE;
+ case PIPE_FORMAT_R8G8B8_SNORM: return TRUE;
+ case PIPE_FORMAT_R8G8_SNORM: return TRUE;
+ case PIPE_FORMAT_R8_SNORM: return TRUE;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM: return TRUE;
+ case PIPE_FORMAT_B8G8R8A8_UNORM: return TRUE;
+ default: return FALSE;
+ }
+}
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 0d94aaae95..b5b86b7214 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -87,6 +87,7 @@ struct blitter_context_priv
void *dsa_write_depth_keep_stencil;
void *dsa_keep_depth_stencil;
void *dsa_keep_depth_write_stencil;
+ void *dsa_flush_depth_stencil;
void *velem_state;
@@ -156,6 +157,10 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
ctx->dsa_keep_depth_stencil =
pipe->create_depth_stencil_alpha_state(pipe, &dsa);
+ dsa.depth.writemask = 1;
+ ctx->dsa_flush_depth_stencil =
+ pipe->create_depth_stencil_alpha_state(pipe, &dsa);
+
dsa.depth.enabled = 1;
dsa.depth.writemask = 1;
dsa.depth.func = PIPE_FUNC_ALWAYS;
@@ -940,3 +945,42 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
UTIL_BLITTER_ATTRIB_NONE, NULL);
blitter_restore_CSOs(ctx);
}
+
+/* Clear a region of a depth stencil surface. */
+void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *dstsurf)
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+ struct pipe_context *pipe = ctx->base.pipe;
+ struct pipe_framebuffer_state fb_state;
+
+ assert(dstsurf->texture);
+ if (!dstsurf->texture)
+ return;
+
+ /* check the saved state */
+ blitter_check_saved_CSOs(ctx);
+ assert(blitter->saved_fb_state.nr_cbufs != ~0);
+
+ /* bind CSOs */
+ pipe->bind_blend_state(pipe, ctx->blend_keep_color);
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil);
+
+ pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+ pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0));
+ pipe->bind_vs_state(pipe, ctx->vs_col);
+ pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
+
+ /* set a framebuffer state */
+ fb_state.width = dstsurf->width;
+ fb_state.height = dstsurf->height;
+ fb_state.nr_cbufs = 0;
+ fb_state.cbufs[0] = 0;
+ fb_state.zsbuf = dstsurf;
+ pipe->set_framebuffer_state(pipe, &fb_state);
+
+ blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
+ blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0,
+ UTIL_BLITTER_ATTRIB_NONE, NULL);
+ blitter_restore_CSOs(ctx);
+}
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index ba3f92eca8..f316587dea 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -200,6 +200,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height);
+void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *dstsurf);
/* The functions below should be used to save currently bound constant state
* objects inside a driver. The objects are automatically restored at the end
* of the util_blitter_{clear, copy_region, fill_region} functions and then
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index a08241971c..5056351307 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -38,7 +38,7 @@
#include "u_cpu_detect.h"
#if defined(PIPE_ARCH_PPC)
-#if defined(PIPE_OS_DARWIN)
+#if defined(PIPE_OS_APPLE)
#include <sys/sysctl.h>
#else
#include <signal.h>
@@ -73,9 +73,15 @@
#endif
+DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE)
+
+
struct util_cpu_caps util_cpu_caps;
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
static int has_cpuid(void);
+#endif
+
#if defined(PIPE_ARCH_X86)
@@ -132,7 +138,7 @@ win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
#endif /* PIPE_ARCH_X86 */
-#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_DARWIN)
+#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE)
static jmp_buf __lv_powerpc_jmpbuf;
static volatile sig_atomic_t __lv_powerpc_canjump = 0;
@@ -153,7 +159,7 @@ sigill_handler(int sig)
static void
check_os_altivec_support(void)
{
-#if defined(PIPE_OS_DARWIN)
+#if defined(PIPE_OS_APPLE)
int sels[2] = {CTL_HW, HW_VECTORUNIT};
int has_vu = 0;
int len = sizeof (has_vu);
@@ -166,8 +172,8 @@ check_os_altivec_support(void)
util_cpu_caps.has_altivec = 1;
}
}
-#else /* !PIPE_OS_DARWIN */
- /* no Darwin, do it the brute-force way */
+#else /* !PIPE_OS_APPLE */
+ /* not on Apple/Darwin, do it the brute-force way */
/* this is borrowed from the libmpeg2 library */
signal(SIGILL, sigill_handler);
if (setjmp(__lv_powerpc_jmpbuf)) {
@@ -184,7 +190,7 @@ check_os_altivec_support(void)
signal(SIGILL, SIG_DFL);
util_cpu_caps.has_altivec = 1;
}
-#endif /* PIPE_OS_DARWIN */
+#endif /* !PIPE_OS_APPLE */
}
#endif /* PIPE_ARCH_PPC */
@@ -385,23 +391,6 @@ util_cpu_detect(void)
memset(&util_cpu_caps, 0, sizeof util_cpu_caps);
- /* Check for arch type */
-#if defined(PIPE_ARCH_MIPS)
- util_cpu_caps.arch = UTIL_CPU_ARCH_MIPS;
-#elif defined(PIPE_ARCH_ALPHA)
- util_cpu_caps.arch = UTIL_CPU_ARCH_ALPHA;
-#elif defined(PIPE_ARCH_SPARC)
- util_cpu_caps.arch = UTIL_CPU_ARCH_SPARC;
-#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- util_cpu_caps.arch = UTIL_CPU_ARCH_X86;
- util_cpu_caps.little_endian = 1;
-#elif defined(PIPE_ARCH_PPC)
- util_cpu_caps.arch = UTIL_CPU_ARCH_POWERPC;
- util_cpu_caps.little_endian = 0;
-#else
- util_cpu_caps.arch = UTIL_CPU_ARCH_UNKNOWN;
-#endif
-
/* Count the number of CPUs in system */
#if defined(PIPE_OS_WINDOWS)
{
@@ -497,23 +486,24 @@ util_cpu_detect(void)
#endif /* PIPE_ARCH_PPC */
#ifdef DEBUG
- debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch);
- debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus);
-
- debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type);
- debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline);
-
- debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc);
- debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx);
- debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2);
- debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse);
- debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2);
- debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3);
- debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3);
- debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
- debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
- debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
- debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
+ if (debug_get_option_dump_cpu()) {
+ debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus);
+
+ debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type);
+ debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline);
+
+ debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc);
+ debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx);
+ debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2);
+ debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse);
+ debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2);
+ debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3);
+ debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3);
+ debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
+ debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
+ debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
+ debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
+ }
#endif
util_cpu_detect_initialized = TRUE;
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h
index 4b3dc39c34..f3bef0993c 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -36,26 +36,15 @@
#define _UTIL_CPU_DETECT_H
#include "pipe/p_compiler.h"
-
-enum util_cpu_arch {
- UTIL_CPU_ARCH_UNKNOWN = 0,
- UTIL_CPU_ARCH_MIPS,
- UTIL_CPU_ARCH_ALPHA,
- UTIL_CPU_ARCH_SPARC,
- UTIL_CPU_ARCH_X86,
- UTIL_CPU_ARCH_POWERPC
-};
+#include "pipe/p_config.h"
struct util_cpu_caps {
- enum util_cpu_arch arch;
unsigned nr_cpus;
/* Feature flags */
int x86_cpu_type;
unsigned cacheline;
- unsigned little_endian:1;
-
unsigned has_tsc:1;
unsigned has_mmx:1;
unsigned has_mmx2:1;
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index ad162558bc..504e6d2a18 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -88,7 +88,7 @@ debug_get_option_should_print(void)
* but its cool since we set first to false
*/
first = FALSE;
- value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", TRUE);
+ value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", FALSE);
/* XXX should we print this option? Currently it wont */
return value;
}
diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h
new file mode 100644
index 0000000000..2a91ea0f9a
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_draw.h
@@ -0,0 +1,138 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_DRAW_H
+#define U_DRAW_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_context.h"
+
+
+static INLINE void
+util_draw_init_info(struct pipe_draw_info *info)
+{
+ memset(info, 0, sizeof(*info));
+ info->instance_count = 1;
+ info->max_index = 0xffffffff;
+}
+
+
+static INLINE void
+util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count)
+{
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.min_index = start;
+ info.max_index = start + count - 1;
+
+ pipe->draw_vbo(pipe, &info);
+}
+
+static INLINE void
+util_draw_elements(struct pipe_context *pipe, int index_bias,
+ uint mode, uint start, uint count)
+{
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+ info.indexed = TRUE;
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.index_bias = index_bias;
+
+ pipe->draw_vbo(pipe, &info);
+}
+
+static INLINE void
+util_draw_arrays_instanced(struct pipe_context *pipe,
+ uint mode, uint start, uint count,
+ uint start_instance,
+ uint instance_count)
+{
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.start_instance = start_instance;
+ info.instance_count = instance_count;
+ info.min_index = start;
+ info.max_index = start + count - 1;
+
+ pipe->draw_vbo(pipe, &info);
+}
+
+static INLINE void
+util_draw_elements_instanced(struct pipe_context *pipe,
+ int index_bias,
+ uint mode, uint start, uint count,
+ uint start_instance,
+ uint instance_count)
+{
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+ info.indexed = TRUE;
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.index_bias = index_bias;
+ info.start_instance = start_instance;
+ info.instance_count = instance_count;
+
+ pipe->draw_vbo(pipe, &info);
+}
+
+static INLINE void
+util_draw_range_elements(struct pipe_context *pipe,
+ int index_bias,
+ uint min_index,
+ uint max_index,
+ uint mode, uint start, uint count)
+{
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+ info.indexed = TRUE;
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.index_bias = index_bias;
+ info.min_index = min_index;
+ info.max_index = max_index;
+
+ pipe->draw_vbo(pipe, &info);
+}
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c
index b37b48b5ae..0b6dc5880f 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.c
+++ b/src/gallium/auxiliary/util/u_draw_quad.c
@@ -60,7 +60,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
/* note: vertex elements already set by caller */
/* draw */
- pipe->draw_arrays(pipe, prim_type, 0, num_verts);
+ util_draw_arrays(pipe, prim_type, 0, num_verts);
}
diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h
index 42eb184428..52994fe05c 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.h
+++ b/src/gallium/auxiliary/util/u_draw_quad.h
@@ -29,12 +29,18 @@
#define U_DRAWQUAD_H
+#include "pipe/p_compiler.h"
+#include "pipe/p_context.h"
+
+
#ifdef __cplusplus
extern "C" {
#endif
struct pipe_resource;
+#include "util/u_draw.h"
+
extern void
util_draw_vertex_buffer(struct pipe_context *pipe,
struct pipe_resource *vbuf, uint offset,
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 38254b1096..8e786a390a 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -631,6 +631,44 @@ util_format_has_alpha(enum pipe_format format)
}
/**
+ * Return the matching SRGB format, or PIPE_FORMAT_NONE if none.
+ */
+static INLINE enum pipe_format
+util_format_srgb(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_L8_UNORM:
+ return PIPE_FORMAT_L8_SRGB;
+ case PIPE_FORMAT_L8A8_UNORM:
+ return PIPE_FORMAT_L8A8_SRGB;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return PIPE_FORMAT_R8G8B8_SRGB;
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ return PIPE_FORMAT_A8B8G8R8_SRGB;
+ case PIPE_FORMAT_X8B8G8R8_UNORM:
+ return PIPE_FORMAT_X8B8G8R8_SRGB;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return PIPE_FORMAT_B8G8R8A8_SRGB;
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ return PIPE_FORMAT_B8G8R8X8_SRGB;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return PIPE_FORMAT_A8R8G8B8_SRGB;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ return PIPE_FORMAT_X8R8G8B8_SRGB;
+ case PIPE_FORMAT_DXT1_RGB:
+ return PIPE_FORMAT_DXT1_SRGB;
+ case PIPE_FORMAT_DXT1_RGBA:
+ return PIPE_FORMAT_DXT1_SRGBA;
+ case PIPE_FORMAT_DXT3_RGBA:
+ return PIPE_FORMAT_DXT3_SRGBA;
+ case PIPE_FORMAT_DXT5_RGBA:
+ return PIPE_FORMAT_DXT5_SRGBA;
+ default:
+ return PIPE_FORMAT_NONE;
+ }
+}
+
+/**
* Return the number of components stored.
* Formats with block size != 1x1 will always have 1 component (the block).
*/
diff --git a/src/gallium/auxiliary/util/u_format_other.c b/src/gallium/auxiliary/util/u_format_other.c
index 723fa8c3bf..fa42ec3713 100644
--- a/src/gallium/auxiliary/util/u_format_other.c
+++ b/src/gallium/auxiliary/util/u_format_other.c
@@ -121,6 +121,15 @@ util_format_r1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
* A.k.a. D3DFMT_CxV8U8
*/
+static uint8_t
+r8g8bx_derive(int16_t r, int16_t g)
+{
+ /* Derive blue from red and green components.
+ * Apparently, we must always use integers to perform calculations,
+ * otherwise the results won't match D3D's CxV8U8 definition.
+ */
+ return (uint8_t)sqrtf(0x7f * 0x7f - r * r - g * g) * 0xff / 0x7f;
+}
void
util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
@@ -145,7 +154,7 @@ util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
dst[0] = (float)(r * (1.0f/0x7f)); /* r */
dst[1] = (float)(g * (1.0f/0x7f)); /* g */
- dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */
+ dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */
dst[3] = 1.0f; /* a */
dst += 4;
}
@@ -177,7 +186,7 @@ util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_strid
dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */
dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */
- dst[2] = (uint8_t)sqrtf(0x7f*0x7f - r * r - g * g) * 0xff / 0x7f; /* b */
+ dst[2] = r8g8bx_derive(r, g); /* b */
dst[3] = 255; /* a */
dst += 4;
}
@@ -262,6 +271,6 @@ util_format_r8g8bx_snorm_fetch_rgba_float(float *dst, const uint8_t *src,
dst[0] = r * (1.0f/0x7f); /* r */
dst[1] = g * (1.0f/0x7f); /* g */
- dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */
+ dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */
dst[3] = 1.0f; /* a */
}
diff --git a/src/gallium/auxiliary/util/u_framebuffer.c b/src/gallium/auxiliary/util/u_framebuffer.c
index 768ae9ceb5..7803ec6a8b 100644
--- a/src/gallium/auxiliary/util/u_framebuffer.c
+++ b/src/gallium/auxiliary/util/u_framebuffer.c
@@ -85,9 +85,11 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst,
dst->width = src->width;
dst->height = src->height;
- for (i = 0; i < Elements(src->cbufs); i++) {
+ for (i = 0; i < src->nr_cbufs; i++)
pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
- }
+
+ for (i = src->nr_cbufs; i < dst->nr_cbufs; i++)
+ pipe_surface_reference(&dst->cbufs[i], NULL);
dst->nr_cbufs = src->nr_cbufs;
diff --git a/src/gallium/auxiliary/util/u_mempool.c b/src/gallium/auxiliary/util/u_mempool.c
index 84e2a34acc..1f336b39a1 100644
--- a/src/gallium/auxiliary/util/u_mempool.c
+++ b/src/gallium/auxiliary/util/u_mempool.c
@@ -126,7 +126,6 @@ void util_mempool_set_thread_safety(struct util_mempool *pool,
pool->threading = threading;
if (threading) {
- pipe_mutex_init(pool->mutex);
pool->malloc = util_mempool_malloc_mt;
pool->free = util_mempool_free_mt;
} else {
@@ -152,6 +151,8 @@ void util_mempool_create(struct util_mempool *pool,
make_empty_list(&pool->list);
+ pipe_mutex_init(pool->mutex);
+
util_mempool_set_thread_safety(pool, threading);
}
@@ -164,6 +165,5 @@ void util_mempool_destroy(struct util_mempool *pool)
FREE(page);
}
- if (pool->threading)
- pipe_mutex_destroy(pool->mutex);
+ pipe_mutex_destroy(pool->mutex);
}
diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c
index 87ee0e4768..77f2c5fc7d 100644
--- a/src/gallium/auxiliary/util/u_network.c
+++ b/src/gallium/auxiliary/util/u_network.c
@@ -6,7 +6,7 @@
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
# include <winsock2.h>
# include <windows.h>
-#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE)
+#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_CYGWIN)
# include <sys/socket.h>
# include <netinet/in.h>
# include <unistd.h>
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index 3ebef9fb74..5f113f742b 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -425,6 +425,53 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color *
}
}
+/* Integer versions of util_pack_z and util_pack_z_stencil - useful for
+ * constructing clear masks.
+ */
+static INLINE uint
+util_pack_uint_z(enum pipe_format format, unsigned z)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return z & 0xffff;
+ case PIPE_FORMAT_Z32_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
+ return z;
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return z & 0xffffff;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ return (z & 0xffffff) << 8;
+ case PIPE_FORMAT_S8_USCALED:
+ return 0;
+ default:
+ debug_print_format("gallium: unhandled format in util_pack_z()", format);
+ assert(0);
+ return 0;
+ }
+}
+
+static INLINE uint
+util_pack_uint_z_stencil(enum pipe_format format, double z, uint s)
+{
+ unsigned packed = util_pack_uint_z(format, z);
+
+ s &= 0xff;
+
+ switch (format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ return packed | (s << 24);
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ return packed | s;
+ case PIPE_FORMAT_S8_USCALED:
+ return packed | s;
+ default:
+ return packed;
+ }
+}
+
+
/**
* Note: it's assumed that z is in [0,1]
diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h
index 606b9b5c6b..3c851f7340 100644
--- a/src/gallium/auxiliary/util/u_prim.h
+++ b/src/gallium/auxiliary/util/u_prim.h
@@ -108,6 +108,20 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
ok = (*nr >= 4);
*nr -= (*nr % 2);
break;
+ case PIPE_PRIM_LINES_ADJACENCY:
+ ok = (*nr >= 4);
+ *nr -= (*nr % 4);
+ break;
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ ok = (*nr >= 4);
+ break;
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ ok = (*nr >= 6);
+ *nr -= (*nr % 5);
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ ok = (*nr >= 4);
+ break;
default:
ok = 0;
break;
diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h
new file mode 100644
index 0000000000..206e1ec311
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_split_prim.h
@@ -0,0 +1,105 @@
+/* Originally written by Ben Skeggs for the nv50 driver*/
+#include <pipe/p_defines.h>
+
+struct util_split_prim {
+ void *priv;
+ void (*emit)(void *priv, unsigned start, unsigned count);
+ void (*edge)(void *priv, boolean enabled);
+
+ unsigned mode;
+ unsigned start;
+ unsigned p_start;
+ unsigned p_end;
+
+ uint repeat_first:1;
+ uint close_first:1;
+ uint edgeflag_off:1;
+};
+
+static INLINE void
+util_split_prim_init(struct util_split_prim *s,
+ unsigned mode, unsigned start, unsigned count)
+{
+ if (mode == PIPE_PRIM_LINE_LOOP) {
+ s->mode = PIPE_PRIM_LINE_STRIP;
+ s->close_first = 1;
+ } else {
+ s->mode = mode;
+ s->close_first = 0;
+ }
+ s->start = start;
+ s->p_start = start;
+ s->p_end = start + count;
+ s->edgeflag_off = 0;
+ s->repeat_first = 0;
+}
+
+static INLINE boolean
+util_split_prim_next(struct util_split_prim *s, unsigned max_verts)
+{
+ int repeat = 0;
+
+ if (s->repeat_first) {
+ s->emit(s->priv, s->start, 1);
+ max_verts--;
+ if (s->edgeflag_off) {
+ s->edge(s->priv, TRUE);
+ s->edgeflag_off = FALSE;
+ }
+ }
+
+ if (s->p_start + s->close_first + max_verts >= s->p_end) {
+ s->emit(s->priv, s->p_start, s->p_end - s->p_start);
+ if (s->close_first)
+ s->emit(s->priv, s->start, 1);
+ return TRUE;
+ }
+
+ switch (s->mode) {
+ case PIPE_PRIM_LINES:
+ max_verts &= ~1;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ repeat = 1;
+ break;
+ case PIPE_PRIM_POLYGON:
+ max_verts--;
+ s->emit(s->priv, s->p_start, max_verts);
+ s->edge(s->priv, FALSE);
+ s->emit(s->priv, s->p_start + max_verts, 1);
+ s->p_start += max_verts;
+ s->repeat_first = TRUE;
+ s->edgeflag_off = TRUE;
+ return FALSE;
+ case PIPE_PRIM_TRIANGLES:
+ max_verts = max_verts - (max_verts % 3);
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ /* to ensure winding stays correct, always split
+ * on an even number of generated triangles
+ */
+ max_verts = max_verts & ~1;
+ repeat = 2;
+ break;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ s->repeat_first = TRUE;
+ repeat = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ max_verts &= ~3;
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ max_verts &= ~1;
+ repeat = 2;
+ break;
+ case PIPE_PRIM_POINTS:
+ break;
+ default:
+ /* TODO: implement adjacency primitives */
+ assert(0);
+ }
+
+ s->emit (s->priv, s->p_start, max_verts);
+ s->p_start += (max_verts - repeat);
+ return FALSE;
+}
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
index e2a8491e62..87959ab0aa 100644
--- a/src/gallium/auxiliary/util/u_sse.h
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -41,7 +41,6 @@
#if defined(PIPE_ARCH_SSE)
-#include <xmmintrin.h>
#include <emmintrin.h>
@@ -72,6 +71,35 @@ _mm_castps_si128(__m128 a)
#endif /* defined(_MSC_VER) && _MSC_VER < 1500 */
+
+#if defined(PIPE_ARCH_SSSE3)
+
+#include <tmmintrin.h>
+
+#else /* !PIPE_ARCH_SSSE3 */
+
+#include <emmintrin.h>
+
+/**
+ * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases
+ * where -mssse3 is not supported/enabled.
+ *
+ * MSVC will never get in here as its intrinsics support do not rely on
+ * compiler command line options.
+ */
+static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi8(__m128i a, __m128i mask)
+{
+ __m128i result;
+ __asm__("pshufb %1, %0"
+ : "=x" (result)
+ : "xm" (mask), "0" (a));
+ return result;
+}
+
+#endif /* !PIPE_ARCH_SSSE3 */
+
+
#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
#endif /* U_SSE_H_ */
diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c
new file mode 100644
index 0000000000..607c31f5ee
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_staging.c
@@ -0,0 +1,95 @@
+#include "util/u_staging.h"
+#include "pipe/p_context.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+static void
+util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigned height, unsigned depth, struct pipe_resource *template)
+{
+ memset(template, 0, sizeof(struct pipe_resource));
+ if(pt->target != PIPE_BUFFER && depth <= 1)
+ template->target = PIPE_TEXTURE_2D;
+ else
+ template->target = pt->target;
+ template->format = pt->format;
+ template->width0 = width;
+ template->height0 = height;
+ template->depth0 = depth;
+ template->last_level = 0;
+ template->nr_samples = pt->nr_samples;
+ template->bind = 0;
+ template->usage = PIPE_USAGE_STAGING;
+ template->flags = 0;
+}
+
+struct util_staging_transfer *
+util_staging_transfer_new(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ bool direct)
+{
+ struct pipe_screen *pscreen = pipe->screen;
+ struct util_staging_transfer *tx;
+ struct pipe_resource staging_resource_template;
+
+ tx = CALLOC_STRUCT(util_staging_transfer);
+ if (!tx)
+ return NULL;
+
+ pipe_resource_reference(&tx->base.resource, pt);
+ tx->base.sr = sr;
+ tx->base.usage = usage;
+ tx->base.box = *box;
+
+ if (direct)
+ {
+ tx->staging_resource = pt;
+ return tx;
+ }
+
+ util_staging_resource_template(pt, box->width, box->height, box->depth, &staging_resource_template);
+ tx->staging_resource = pscreen->resource_create(pscreen, &staging_resource_template);
+ if (!tx->staging_resource)
+ {
+ pipe_resource_reference(&tx->base.resource, NULL);
+ FREE(tx);
+ return NULL;
+ }
+
+ if (usage & PIPE_TRANSFER_READ)
+ {
+ struct pipe_subresource dstsr;
+ unsigned zi;
+ dstsr.face = 0;
+ dstsr.level = 0;
+ for(zi = 0; zi < box->depth; ++zi)
+ pipe->resource_copy_region(pipe, tx->staging_resource, dstsr, 0, 0, 0, tx->base.resource, sr, box->x, box->y, box->z + zi, box->width, box->height);
+ }
+
+ return tx;
+}
+
+void
+util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx)
+{
+ struct util_staging_transfer *tx = (struct util_staging_transfer *)ptx;
+
+ if (tx->staging_resource != tx->base.resource)
+ {
+ if(tx->base.usage & PIPE_TRANSFER_WRITE) {
+ struct pipe_subresource srcsr;
+ unsigned zi;
+ srcsr.face = 0;
+ srcsr.level = 0;
+ for(zi = 0; zi < tx->base.box.depth; ++zi)
+ pipe->resource_copy_region(pipe, tx->base.resource, tx->base.sr, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi, tx->staging_resource, srcsr, 0, 0, 0, tx->base.box.width, tx->base.box.height);
+ }
+
+ pipe_resource_reference(&tx->staging_resource, NULL);
+ }
+
+ pipe_resource_reference(&ptx->resource, NULL);
+ FREE(ptx);
+}
diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h
new file mode 100644
index 0000000000..602faa2971
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_staging.h
@@ -0,0 +1,37 @@
+/* Direct3D 10/11 has no concept of transfers. Applications instead
+ * create resources with a STAGING or DYNAMIC usage, copy between them
+ * and the real resource and use Map to map the STAGING/DYNAMIC resource.
+ *
+ * This util module allows to implement Gallium drivers as a Direct3D
+ * driver would be implemented: transfers allocate a resource with
+ * PIPE_USAGE_STAGING, and copy the data between it and the real resource
+ * with resource_copy_region.
+ */
+
+#ifndef U_STAGING_H
+#define U_STAGING_H
+
+#include "pipe/p_state.h"
+
+struct util_staging_transfer {
+ struct pipe_transfer base;
+
+ /* if direct, same as base.resource, otherwise the temporary staging resource */
+ struct pipe_resource *staging_resource;
+};
+
+/* user must be stride, slice_stride and offset */
+/* pt->usage == PIPE_USAGE_DYNAMIC should be a good value to pass for direct */
+/* staging resource is currently created with PIPE_USAGE_DYNAMIC */
+struct util_staging_transfer *
+util_staging_transfer_new(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ bool direct);
+
+void
+util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx);
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c
index b5d21570d5..7733ad24d0 100644
--- a/src/gallium/auxiliary/util/u_surfaces.c
+++ b/src/gallium/auxiliary/util/u_surfaces.c
@@ -3,40 +3,22 @@
#include "util/u_inlines.h"
#include "util/u_memory.h"
-/* TODO: ouch, util_hash_table should do these by default when passed a null function pointer
- * this indirect function call is quite bad
- */
-static unsigned
-hash(void *key)
-{
- return (unsigned)(uintptr_t)key;
-}
-
-static int
-compare(void *key1, void *key2)
-{
- return (unsigned)(uintptr_t)key1 - (unsigned)(uintptr_t)key2;
-}
-
struct pipe_surface *
util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags)
{
struct pipe_surface *ps;
- void *key = NULL;
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
- { /* or 2D array */
- if(!us->u.table)
- us->u.table = util_hash_table_create(hash, compare);
- key = (void *)(uintptr_t)(((zslice + face) << 8) | level);
- /* TODO: ouch, should have a get-reference function...
- * also, shouldn't allocate a two-pointer structure for each item... */
- ps = util_hash_table_get(us->u.table, key);
+ { /* or 2D array */
+ if(!us->u.hash)
+ us->u.hash = cso_hash_create();
+
+ ps = cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level));
}
else
{
if(!us->u.array)
- us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *));
+ us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *));
ps = us->u.array[level];
}
@@ -54,7 +36,7 @@ util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, str
ps->offset = ~0;
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
- util_hash_table_set(us->u.table, key, ps);
+ cso_hash_insert(us->u.hash, ((zslice + face) << 8) | level, ps);
else
us->u.array[level] = ps;
@@ -66,47 +48,44 @@ util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps)
{
struct pipe_resource *pt = ps->texture;
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
- { /* or 2D array */
- void* key = (void*)(uintptr_t)(((ps->zslice + ps->face) << 8) | ps->level);
- util_hash_table_remove(us->u.table, key);
+ { /* or 2D array */
+ cso_hash_erase(us->u.hash, cso_hash_find(us->u.hash, ((ps->zslice + ps->face) << 8) | ps->level));
}
else
us->u.array[ps->level] = 0;
}
-static enum pipe_error
-util_surfaces_destroy_callback(void *key, void *value, void *data)
-{
- void (*destroy_surface) (struct pipe_surface * ps) = data;
- destroy_surface((struct pipe_surface *)value);
- return PIPE_OK;
-}
-
void
util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void (*destroy_surface) (struct pipe_surface *))
{
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
- { /* or 2D array */
- if(us->u.table)
+ { /* or 2D array */
+ if(us->u.hash)
{
- util_hash_table_foreach(us->u.table, util_surfaces_destroy_callback, destroy_surface);
- util_hash_table_destroy(us->u.table);
- us->u.table = NULL;
+ struct cso_hash_iter iter;
+ iter = cso_hash_first_node(us->u.hash);
+ while (!cso_hash_iter_is_null(iter)) {
+ destroy_surface(cso_hash_iter_data(iter));
+ iter = cso_hash_iter_next(iter);
+ }
+
+ cso_hash_delete(us->u.hash);
+ us->u.hash = NULL;
}
}
else
{
if(us->u.array)
{
- unsigned i;
- for(i = 0; i < pt->last_level; ++i)
- {
- struct pipe_surface *ps = us->u.array[i];
- if(ps)
- destroy_surface(ps);
- }
- FREE(us->u.array);
- us->u.array = NULL;
+ unsigned i;
+ for(i = 0; i <= pt->last_level; ++i)
+ {
+ struct pipe_surface *ps = us->u.array[i];
+ if(ps)
+ destroy_surface(ps);
+ }
+ FREE(us->u.array);
+ us->u.array = NULL;
}
}
}
diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h
index 0195bf5afb..af978c7057 100644
--- a/src/gallium/auxiliary/util/u_surfaces.h
+++ b/src/gallium/auxiliary/util/u_surfaces.h
@@ -4,15 +4,15 @@
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
#include "util/u_atomic.h"
-
-struct util_hash_table;
+#include "cso_cache/cso_hash.h"
struct util_surfaces
{
union
{
- struct util_hash_table *table;
+ struct cso_hash *hash;
struct pipe_surface **array;
+ void* pv;
} u;
};
@@ -35,6 +35,18 @@ util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct
return util_surfaces_do_get(us, surface_struct_size, pscreen, pt, face, level, zslice, flags);
}
+static INLINE struct pipe_surface *
+util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice)
+{
+ if(!us->u.pv)
+ return 0;
+
+ if(unlikely(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE))
+ return cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level));
+ else
+ return us->u.array[level];
+}
+
void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps);
static INLINE void