diff options
Diffstat (limited to 'src/gallium/auxiliary')
72 files changed, 2265 insertions, 1248 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index dcebab7c0f..9544e90a96 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -131,6 +131,7 @@ C_SOURCES = \ util/u_sampler.c \ util/u_simple_shaders.c \ util/u_snprintf.c \ + util/u_staging.c \ util/u_surface.c \ util/u_surfaces.c \ util/u_texture.c \ @@ -149,6 +150,7 @@ C_SOURCES = \ GALLIVM_SOURCES = \ gallivm/lp_bld_arit.c \ + gallivm/lp_bld_assert.c \ gallivm/lp_bld_const.c \ gallivm/lp_bld_conv.c \ gallivm/lp_bld_debug.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 72a16617db..3124e20ce8 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -34,14 +34,14 @@ env.CodeGenerate( target = 'util/u_format_table.c', script = '#src/gallium/auxiliary/util/u_format_table.py', source = ['#src/gallium/auxiliary/util/u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' + command = python_cmd + ' $SCRIPT $SOURCE > $TARGET' ) env.CodeGenerate( target = 'util/u_half.c', script = 'util/u_half.py', source = [], - command = 'python $SCRIPT > $TARGET' + command = python_cmd + ' $SCRIPT > $TARGET' ) env.Depends('util/u_format_table.c', [ @@ -180,6 +180,7 @@ source = [ 'util/u_sampler.c', 'util/u_simple_shaders.c', 'util/u_snprintf.c', + 'util/u_staging.c', 'util/u_surface.c', 'util/u_surfaces.c', 'util/u_texture.c', @@ -198,6 +199,7 @@ source = [ if env['llvm']: source += [ 'gallivm/lp_bld_arit.c', + 'gallivm/lp_bld_assert.c', 'gallivm/lp_bld_const.c', 'gallivm/lp_bld_conv.c', 'gallivm/lp_bld_debug.c', diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index c127f74188..995b675b9a 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -288,12 +288,19 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, shader_type == PIPE_SHADER_GEOMETRY); debug_assert(slot < PIPE_MAX_CONSTANT_BUFFERS); - if (shader_type == PIPE_SHADER_VERTEX) { + switch (shader_type) { + case PIPE_SHADER_VERTEX: draw->pt.user.vs_constants[slot] = buffer; + draw->pt.user.vs_constants_size[slot] = size; draw_vs_set_constants(draw, slot, buffer, size); - } else if (shader_type == PIPE_SHADER_GEOMETRY) { + break; + case PIPE_SHADER_GEOMETRY: draw->pt.user.gs_constants[slot] = buffer; + draw->pt.user.gs_constants_size[slot] = size; draw_gs_set_constants(draw, slot, buffer, size); + break; + default: + assert(0 && "invalid shader type in draw_set_mapped_constant_buffer"); } } diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h new file mode 100644 index 0000000000..a52d2b5058 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h @@ -0,0 +1,425 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Chia-I Wu <olv@lunarg.com> + */ + +/* these macros are optional */ +#ifndef LOCAL_VARS +#define LOCAL_VARS +#endif +#ifndef FUNC_ENTER +#define FUNC_ENTER do {} while (0) +#endif +#ifndef FUNC_EXIT +#define FUNC_EXIT do {} while (0) +#endif +#ifndef LINE_ADJ +#define LINE_ADJ(flags, a0, i0, i1, a1) LINE(flags, i0, i1) +#endif +#ifndef TRIANGLE_ADJ +#define TRIANGLE_ADJ(flags, i0, a0, i1, a1, i2, a2) TRIANGLE(flags, i0, i1, i2) +#endif + +static void +FUNC(FUNC_VARS) +{ + unsigned idx[6], i; + ushort flags; + LOCAL_VARS + + FUNC_ENTER; + + /* prim, count, and last_vertex_last should have been defined */ + if (0) { + debug_printf("%s: prim 0x%x, count %d, last_vertex_last %d\n", + __FUNCTION__, prim, count, last_vertex_last); + } + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) { + idx[0] = GET_ELT(i); + POINT(idx[0]); + } + break; + + case PIPE_PRIM_LINES: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 0; i + 1 < count; i += 2) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + LINE(flags, idx[0], idx[1]); + } + break; + + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_LINE_STRIP: + if (count >= 2) { + flags = DRAW_PIPE_RESET_STIPPLE; + idx[1] = GET_ELT(0); + idx[2] = idx[1]; + + for (i = 1; i < count; i++, flags = 0) { + idx[0] = idx[1]; + idx[1] = GET_ELT(i); + LINE(flags, idx[0], idx[1]); + } + /* close the loop */ + if (prim == PIPE_PRIM_LINE_LOOP) + LINE(flags, idx[1], idx[2]); + } + break; + + case PIPE_PRIM_TRIANGLES: + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (i = 0; i + 2 < count; i += 3) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (count >= 3) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + idx[1] = GET_ELT(0); + idx[2] = GET_ELT(1); + + if (last_vertex_last) { + for (i = 0; i + 2 < count; i++) { + idx[0] = idx[1]; + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + /* always emit idx[2] last */ + if (i & 1) + TRIANGLE(flags, idx[1], idx[0], idx[2]); + else + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + } + else { + for (i = 0; i + 2 < count; i++) { + idx[0] = idx[1]; + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + /* always emit idx[0] first */ + if (i & 1) + TRIANGLE(flags, idx[0], idx[2], idx[1]); + else + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + idx[0] = GET_ELT(0); + idx[2] = GET_ELT(1); + + /* idx[0] is neither the first nor the last vertex */ + if (last_vertex_last) { + for (i = 0; i + 2 < count; i++) { + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + /* always emit idx[2] last */ + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + } + else { + for (i = 0; i + 2 < count; i++) { + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + /* always emit idx[1] first */ + TRIANGLE(flags, idx[1], idx[2], idx[0]); + } + } + } + break; + + case PIPE_PRIM_QUADS: + if (last_vertex_last) { + for (i = 0; i + 3 < count; i += 4) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + + flags = DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_2; + /* always emit idx[3] last */ + TRIANGLE(flags, idx[0], idx[1], idx[3]); + + flags = DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1; + TRIANGLE(flags, idx[1], idx[2], idx[3]); + } + } + else { + for (i = 0; i + 3 < count; i += 4) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + + flags = DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1; + /* XXX should always emit idx[0] first */ + /* always emit idx[3] first */ + TRIANGLE(flags, idx[3], idx[0], idx[1]); + + flags = DRAW_PIPE_EDGE_FLAG_1 | + DRAW_PIPE_EDGE_FLAG_2; + TRIANGLE(flags, idx[3], idx[1], idx[2]); + } + } + break; + + case PIPE_PRIM_QUAD_STRIP: + if (count >= 4) { + idx[2] = GET_ELT(0); + idx[3] = GET_ELT(1); + + if (last_vertex_last) { + for (i = 0; i + 3 < count; i += 2) { + idx[0] = idx[2]; + idx[1] = idx[3]; + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + + /* always emit idx[3] last */ + flags = DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_2; + TRIANGLE(flags, idx[2], idx[0], idx[3]); + + flags = DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1; + TRIANGLE(flags, idx[0], idx[1], idx[3]); + } + } + else { + for (i = 0; i + 3 < count; i += 2) { + idx[0] = idx[2]; + idx[1] = idx[3]; + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + + flags = DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1; + /* XXX should always emit idx[0] first */ + /* always emit idx[3] first */ + TRIANGLE(flags, idx[3], idx[2], idx[0]); + + flags = DRAW_PIPE_EDGE_FLAG_1 | + DRAW_PIPE_EDGE_FLAG_2; + TRIANGLE(flags, idx[3], idx[0], idx[1]); + } + } + } + break; + + case PIPE_PRIM_POLYGON: + if (count >= 3) { + ushort edge_next, edge_finish; + + if (last_vertex_last) { + flags = (DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_2 | + DRAW_PIPE_EDGE_FLAG_0); + edge_next = DRAW_PIPE_EDGE_FLAG_0; + edge_finish = DRAW_PIPE_EDGE_FLAG_1; + } + else { + flags = (DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1); + edge_next = DRAW_PIPE_EDGE_FLAG_1; + edge_finish = DRAW_PIPE_EDGE_FLAG_2; + } + + idx[0] = GET_ELT(0); + idx[2] = GET_ELT(1); + + for (i = 0; i + 2 < count; i++, flags = edge_next) { + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + + if (i + 3 == count) + flags |= edge_finish; + + /* idx[0] is both the first and the last vertex */ + if (last_vertex_last) + TRIANGLE(flags, idx[1], idx[2], idx[0]); + else + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + } + break; + + case PIPE_PRIM_LINES_ADJACENCY: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 0; i + 3 < count; i += 4) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + LINE_ADJ(flags, idx[0], idx[1], idx[2], idx[3]); + } + break; + + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + if (count >= 4) { + flags = DRAW_PIPE_RESET_STIPPLE; + idx[1] = GET_ELT(0); + idx[2] = GET_ELT(1); + idx[3] = GET_ELT(2); + + for (i = 1; i + 2 < count; i++, flags = 0) { + idx[0] = idx[1]; + idx[1] = idx[2]; + idx[2] = idx[3]; + idx[3] = GET_ELT(i + 2); + LINE_ADJ(flags, idx[0], idx[1], idx[2], idx[3]); + } + } + break; + + case PIPE_PRIM_TRIANGLES_ADJACENCY: + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (i = 0; i + 5 < count; i += 6) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + idx[4] = GET_ELT(i + 4); + idx[5] = GET_ELT(i + 5); + TRIANGLE_ADJ(flags, idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + if (count >= 6) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + idx[0] = GET_ELT(1); + idx[2] = GET_ELT(0); + idx[4] = GET_ELT(2); + idx[3] = GET_ELT(4); + + /* + * The vertices of the i-th triangle are stored in + * idx[0,2,4] = { 2*i, 2*i+2, 2*i+4 }; + * + * The adjacent vertices are stored in + * idx[1,3,5] = { 2*i-2, 2*i+6, 2*i+3 }. + * + * However, there are two exceptions: + * + * For the first triangle, idx[1] = 1; + * For the last triangle, idx[3] = 2*i+5. + */ + if (last_vertex_last) { + for (i = 0; i + 5 < count; i += 2) { + idx[1] = idx[0]; + + idx[0] = idx[2]; + idx[2] = idx[4]; + idx[4] = idx[3]; + + idx[3] = GET_ELT(i + ((i + 7 < count) ? 6 : 5)); + idx[5] = GET_ELT(i + 3); + + /* + * alternate the first two vertices (idx[0] and idx[2]) and the + * corresponding adjacent vertices (idx[3] and idx[5]) to have + * the correct orientation + */ + if (i & 2) { + TRIANGLE_ADJ(flags, + idx[2], idx[1], idx[0], idx[5], idx[4], idx[3]); + } + else { + TRIANGLE_ADJ(flags, + idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]); + } + } + } + else { + for (i = 0; i + 5 < count; i += 2) { + idx[1] = idx[0]; + + idx[0] = idx[2]; + idx[2] = idx[4]; + idx[4] = idx[3]; + + idx[3] = GET_ELT(i + ((i + 7 < count) ? 6 : 5)); + idx[5] = GET_ELT(i + 3); + + /* + * alternate the last two vertices (idx[2] and idx[4]) and the + * corresponding adjacent vertices (idx[1] and idx[5]) to have + * the correct orientation + */ + if (i & 2) { + TRIANGLE_ADJ(flags, + idx[0], idx[5], idx[4], idx[3], idx[2], idx[1]); + } + else { + TRIANGLE_ADJ(flags, + idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]); + } + } + } + } + break; + + default: + assert(0); + break; + } + + FUNC_EXIT; +} + +#undef LOCAL_VARS +#undef FUNC_ENTER +#undef FUNC_EXIT +#undef LINE_ADJ +#undef TRIANGLE_ADJ + +#undef FUNC +#undef FUNC_VARS +#undef GET_ELT +#undef POINT +#undef LINE +#undef TRIANGLE diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 79a57a67f3..4a1013e79a 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMWare Inc. + * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -75,7 +75,10 @@ draw_gs_set_constants(struct draw_context *draw, const void *constants, unsigned size) { - /* noop */ + /* noop. added here for symmetry with the VS + * code and in case we'll ever want to allign + * the constants, e.g. when we'll change to a + * different interpreter */ } @@ -370,32 +373,23 @@ static void gs_tri_adj(struct draw_geometry_shader *shader, gs_flush(shader, 1); } -#define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,i0,i1,i2) -#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5) -#define LINE(gs,i0,i1) gs_line(gs,i0,i1) -#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3) -#define POINT(gs,i0) gs_point(gs,i0) -#define FUNC gs_run -#define LOCAL_VARS +#define FUNC gs_run +#define GET_ELT(idx) (idx) #include "draw_gs_tmp.h" -#define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,elts[i0],elts[i1],elts[i2]) -#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) \ - gs_tri_adj(gs,elts[i0],elts[i1],elts[i2],elts[i3], \ - elts[i4],elts[i5]) -#define LINE(gs,i0,i1) gs_line(gs,elts[i0],elts[i1]) -#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,elts[i0], \ - elts[i1], \ - elts[i2],elts[i3]) -#define POINT(gs,i0) gs_point(gs,elts[i0]) -#define FUNC gs_run_elts -#define LOCAL_VARS \ - const ushort *elts = input_prims->elts; +#define FUNC gs_run_elts +#define LOCAL_VARS const ushort *elts = input_prims->elts; +#define GET_ELT(idx) (elts[idx] & ~DRAW_PIPE_FLAG_MASK) #include "draw_gs_tmp.h" + +/** + * Execute geometry shader using TGSI interpreter. + */ int draw_geometry_shader_run(struct draw_geometry_shader *shader, const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, const struct draw_prim_info *input_prim, struct draw_vertex_info *output_verts, @@ -405,7 +399,6 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, unsigned input_stride = input_verts->vertex_size; unsigned vertex_size = input_verts->vertex_size; struct tgsi_exec_machine *machine = shader->machine; - unsigned int i; unsigned num_input_verts = input_prim->linear ? input_verts->count : input_prim->count; @@ -447,9 +440,8 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, } shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned)); - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - machine->Consts[i] = constants[i]; - } + tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, + constants, constants_size); if (input_prim->linear) gs_run(shader, input_prim, input_verts, diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index 2cb634818c..67bc1aa73f 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMWare Inc. + * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -73,6 +73,7 @@ struct draw_geometry_shader { */ int draw_geometry_shader_run(struct draw_geometry_shader *shader, const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, const struct draw_prim_info *input_prim, struct draw_vertex_info *output_verts, diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h index 7a8683cf7c..4a17af0dea 100644 --- a/src/gallium/auxiliary/draw/draw_gs_tmp.h +++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h @@ -1,152 +1,34 @@ - -static void FUNC( struct draw_geometry_shader *shader, - const struct draw_prim_info *input_prims, - const struct draw_vertex_info *input_verts, - struct draw_prim_info *output_prims, - struct draw_vertex_info *output_verts) -{ - struct draw_context *draw = shader->draw; - - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i, j; - unsigned count = input_prims->count; - LOCAL_VARS - - if (0) debug_printf("%s %d\n", __FUNCTION__, count); - - debug_assert(input_prims->primitive_count == 1); - - switch (input_prims->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i++) { - POINT( shader, i + 0 ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE( shader , i + 0 , i + 1 ); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - - for (i = 1; i < count; i++) { - LINE( shader, i - 1, i ); - } - - LINE( shader, i - 1, 0 ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - LINE( shader, i - 1, i ); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE( shader, i + 0, i + 1, i + 2 ); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( shader, - i + 0, - i + 1 + (i&1), - i + 2 - (i&1) ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( shader, - i + 0 + (i&1), - i + 1 - (i&1), - i + 2 ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( shader, - i + 1, - i + 2, - 0 ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( shader, - 0, - i + 1, - i + 2 ); - } - } - } - break; - - case PIPE_PRIM_POLYGON: - { - for (i = 0; i+2 < count; i++) { - - if (flatfirst) { - TRIANGLE( shader, 0, i + 1, i + 2 ); - } - else { - TRIANGLE( shader, i + 1, i + 2, 0 ); - } - } - } - break; - - case PIPE_PRIM_LINES_ADJACENCY: - for (i = 0; i+3 < count; i += 4) { - LINE_ADJ( shader , i + 0 , i + 1, i + 2, i + 3 ); - } - break; - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - for (i = 1; i + 2 < count; i++) { - LINE_ADJ( shader, i - 1, i, i + 1, i + 2 ); - } - break; - - case PIPE_PRIM_TRIANGLES_ADJACENCY: - for (i = 0; i+5 < count; i += 5) { - TRI_ADJ( shader, i + 0, i + 1, i + 2, - i + 3, i + 4, i + 5); - } - break; - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - for (i = 0, j = 0; i+5 < count; i += 2, ++j) { - TRI_ADJ( shader, - i + 0, - i + 1 + 2*(j&1), - i + 2 + 2*(j&1), - i + 3 - 2*(j&1), - i + 4 - 2*(j&1), - i + 5); - } - break; - - default: - debug_assert(!"Unsupported primitive in geometry shader"); - break; - } -} - - -#undef TRIANGLE -#undef TRI_ADJ -#undef POINT -#undef LINE -#undef LINE_ADJ -#undef FUNC -#undef LOCAL_VARS +#define FUNC_VARS struct draw_geometry_shader *gs, \ + const struct draw_prim_info *input_prims, \ + const struct draw_vertex_info *input_verts, \ + struct draw_prim_info *output_prims, \ + struct draw_vertex_info *output_verts + +#define FUNC_ENTER \ + /* declare more local vars */ \ + struct draw_context *draw = gs->draw; \ + const unsigned prim = input_prims->prim; \ + const unsigned count = input_prims->count; \ + const boolean last_vertex_last = \ + !(draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first); \ + do { \ + debug_assert(input_prims->primitive_count == 1); \ + switch (prim) { \ + case PIPE_PRIM_QUADS: \ + case PIPE_PRIM_QUAD_STRIP: \ + case PIPE_PRIM_POLYGON: \ + debug_assert(!"unexpected primitive type in GS"); \ + return; \ + default: \ + break; \ + } \ + } while (0) \ + +#define POINT(i0) gs_point(gs,i0) +#define LINE(flags,i0,i1) gs_line(gs,i0,i1) +#define TRIANGLE(flags,i0,i1,i2) gs_tri(gs,i0,i1,i2) +#define LINE_ADJ(flags,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3) +#define TRIANGLE_ADJ(flags,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5) + +#include "draw_decompose_tmp.h" diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 19f96c37ab..8d53601d19 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -37,6 +37,8 @@ #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_tgsi.h" #include "gallivm/lp_bld_printf.h" +#include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_init.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" @@ -681,7 +683,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) unsigned i, j; struct lp_build_context bld; struct lp_build_loop_state lp_loop; - struct lp_type vs_type = lp_type_float_vec(32); const int max_vertices = 4; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; void *code; @@ -730,7 +731,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, vs_type); + lp_build_context_init(&bld, builder, lp_type_int(32)); end = lp_build_add(&bld, start, count); @@ -793,6 +794,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) sampler->destroy(sampler); +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); @@ -820,6 +826,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); } + lp_func_delete_body(variant->function); } @@ -837,9 +844,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian struct draw_context *draw = llvm->draw; unsigned i, j; struct lp_build_context bld; - struct lp_build_context bld_int; struct lp_build_loop_state lp_loop; - struct lp_type vs_type = lp_type_float_vec(32); const int max_vertices = 4; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef fetch_max; @@ -891,8 +896,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, vs_type); - lp_build_context_init(&bld_int, builder, lp_type_int(32)); + lp_build_context_init(&bld, builder, lp_type_int(32)); step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); @@ -927,7 +931,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian /* make sure we're not out of bounds which can happen * if fetch_count % 4 != 0, because on the last iteration * a few of the 4 vertex fetches will be out of bounds */ - true_index = lp_build_min(&bld_int, true_index, fetch_max); + true_index = lp_build_min(&bld, true_index, fetch_max); fetch_ptr = LLVMBuildGEP(builder, fetch_elts, &true_index, 1, ""); @@ -963,6 +967,11 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian sampler->destroy(sampler); +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); @@ -990,6 +999,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); } + lp_func_delete_body(variant->function_elts); } void diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 8cd75ecf9a..58995e0724 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -169,77 +169,50 @@ static void do_triangle( struct draw_context *draw, /* * Set up macros for draw_pt_decompose.h template code. * This code uses vertex indexes / elements. + * + * Flags are needed by the stipple and unfilled stages. When the two stages + * are active, vcache_run_extras is called and the flags are stored in the + * higher bits of i0. Otherwise, flags do not matter. */ -/* emit first quad vertex as first vertex in triangles */ -#define QUAD_FIRST_PV(i0,i1,i2,i3) \ - do_triangle( draw, \ - ( DRAW_PIPE_RESET_STIPPLE | \ - DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK)); \ - do_triangle( draw, \ - ( DRAW_PIPE_EDGE_FLAG_1 | \ - DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) - -/* emit last quad vertex as last vertex in triangles */ -#define QUAD_LAST_PV(i0,i1,i2,i3) \ - do_triangle( draw, \ - ( DRAW_PIPE_RESET_STIPPLE | \ - DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)); \ - do_triangle( draw, \ - ( DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) - -#define TRIANGLE(flags,i0,i1,i2) \ - do_triangle( draw, \ - elts[i0], /* flags */ \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK) ); - -#define LINE(flags,i0,i1) \ - do_line( draw, \ - elts[i0], \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK) ); +#define TRIANGLE(flags,i0,i1,i2) \ + do { \ + assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \ + assert(!((i2) & DRAW_PIPE_FLAG_MASK)); \ + do_triangle( draw, \ + i0, /* flags */ \ + verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1), \ + verts + stride * (i2) ); \ + } while (0) + +#define LINE(flags,i0,i1) \ + do { \ + assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \ + do_line( draw, \ + i0, /* flags */ \ + verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1) ); \ + } while (0) #define POINT(i0) \ - do_point( draw, \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK) ) + do { \ + assert(!((i0) & DRAW_PIPE_FLAG_MASK)); \ + do_point( draw, verts + stride * (i0) ); \ + } while (0) + +#define GET_ELT(idx) (elts[idx]) -#define FUNC pipe_run -#define ARGS \ +#define FUNC pipe_run_elts +#define FUNC_VARS \ struct draw_context *draw, \ unsigned prim, \ struct vertex_header *vertices, \ unsigned stride, \ - const ushort *elts - -#define LOCAL_VARS \ - char *verts = (char *)vertices; \ - boolean flatfirst = (draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first); \ - unsigned i; \ - ushort flags - -#define FLUSH + const ushort *elts, \ + unsigned count #include "draw_pt_decompose.h" -#undef ARGS -#undef LOCAL_VARS @@ -269,14 +242,29 @@ void draw_pipeline_run( struct draw_context *draw, i < prim_info->primitive_count; start += prim_info->primitive_lengths[i], i++) { - unsigned count = prim_info->primitive_lengths[i]; - - pipe_run(draw, - prim_info->prim, - vert_info->verts, - vert_info->stride, - prim_info->elts + start, - count); + const unsigned count = prim_info->primitive_lengths[i]; + +#if DEBUG + /* make sure none of the element indexes go outside the vertex buffer */ + { + unsigned max_index = 0x0, i; + /* find the largest element index */ + for (i = 0; i < count; i++) { + unsigned int index = (prim_info->elts[start + i] + & ~DRAW_PIPE_FLAG_MASK); + if (index > max_index) + max_index = index; + } + assert(max_index <= vert_info->count); + } +#endif + + pipe_run_elts(draw, + prim_info->prim, + vert_info->verts, + vert_info->stride, + prim_info->elts + start, + count); } draw->pipeline.verts = NULL; @@ -289,70 +277,30 @@ void draw_pipeline_run( struct draw_context *draw, * This code is for non-indexed (aka linear) rendering (no elts). */ -/* emit first quad vertex as first vertex in triangles */ -#define QUAD_FIRST_PV(i0,i1,i2,i3) \ - do_triangle( draw, \ - ( DRAW_PIPE_RESET_STIPPLE | \ - DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)); \ - do_triangle( draw, \ - ( DRAW_PIPE_EDGE_FLAG_1 | \ - DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) - -/* emit last quad vertex as last vertex in triangles */ -#define QUAD_LAST_PV(i0,i1,i2,i3) \ - do_triangle( draw, \ - ( DRAW_PIPE_RESET_STIPPLE | \ - DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)); \ - do_triangle( draw, \ - ( DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) - -#define TRIANGLE(flags,i0,i1,i2) \ - do_triangle( draw, \ - flags, /* flags */ \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)) - -#define LINE(flags,i0,i1) \ - do_line( draw, \ - flags, \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK)) +#define TRIANGLE(flags,i0,i1,i2) \ + do_triangle( draw, flags, \ + verts + stride * (i0), \ + verts + stride * (i1), \ + verts + stride * (i2) ) -#define POINT(i0) \ - do_point( draw, \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK) ) +#define LINE(flags,i0,i1) \ + do_line( draw, flags, \ + verts + stride * (i0), \ + verts + stride * (i1) ) -#define FUNC pipe_run_linear -#define ARGS \ - struct draw_context *draw, \ - unsigned prim, \ - struct vertex_header *vertices, \ - unsigned stride +#define POINT(i0) \ + do_point( draw, verts + stride * (i0) ) -#define LOCAL_VARS \ - char *verts = (char *)vertices; \ - boolean flatfirst = (draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first); \ - unsigned i; \ - ushort flags -#define FLUSH +#define GET_ELT(idx) (idx) + +#define FUNC pipe_run_linear +#define FUNC_VARS \ + struct draw_context *draw, \ + unsigned prim, \ + struct vertex_header *vertices, \ + unsigned stride, \ + unsigned count #include "draw_pt_decompose.h" @@ -378,6 +326,8 @@ void draw_pipeline_run_linear( struct draw_context *draw, draw->pipeline.vertex_stride = vert_info->stride; draw->pipeline.vertex_count = count; + assert(count <= vert_info->count); + pipe_run_linear(draw, prim_info->prim, (struct vertex_header*)verts, diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 1cf6ee7a7f..8a3d499feb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -68,8 +68,7 @@ struct clip_stage { }; -/* This is a bit confusing: - */ +/** Cast wrapper */ static INLINE struct clip_stage *clip_stage( struct draw_stage *stage ) { return (struct clip_stage *)stage; @@ -81,18 +80,22 @@ static INLINE struct clip_stage *clip_stage( struct draw_stage *stage ) /* All attributes are float[4], so this is easy: */ -static void interp_attr( float *fdst, +static void interp_attr( float dst[4], float t, - const float *fin, - const float *fout ) + const float in[4], + const float out[4] ) { - fdst[0] = LINTERP( t, fout[0], fin[0] ); - fdst[1] = LINTERP( t, fout[1], fin[1] ); - fdst[2] = LINTERP( t, fout[2], fin[2] ); - fdst[3] = LINTERP( t, fout[3], fin[3] ); + dst[0] = LINTERP( t, out[0], in[0] ); + dst[1] = LINTERP( t, out[1], in[1] ); + dst[2] = LINTERP( t, out[2], in[2] ); + dst[3] = LINTERP( t, out[3], in[3] ); } +/** + * Copy front/back, primary/secondary colors from src vertex to dst vertex. + * Used when flat shading. + */ static void copy_colors( struct draw_stage *stage, struct vertex_header *dst, const struct vertex_header *src ) @@ -121,20 +124,17 @@ static void interp( const struct clip_stage *clip, /* Vertex header. */ - { - dst->clipmask = 0; - dst->edgeflag = 0; /* will get overwritten later */ - dst->pad = 0; - dst->vertex_id = UNDEFINED_VERTEX_ID; - } + dst->clipmask = 0; + dst->edgeflag = 0; /* will get overwritten later */ + dst->pad = 0; + dst->vertex_id = UNDEFINED_VERTEX_ID; - /* Clip coordinates: interpolate normally + /* Interpolate the clip-space coords. */ - { - interp_attr(dst->clip, t, in->clip, out->clip); - } + interp_attr(dst->clip, t, in->clip, out->clip); - /* Do the projective divide and insert window coordinates: + /* Do the projective divide and viewport transformation to get + * new window coordinates: */ { const float *pos = dst->clip; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 058aeedc17..397d4bf653 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -163,9 +163,11 @@ struct draw_context /** vertex arrays */ const void *vbuffer[PIPE_MAX_ATTRIBS]; - /** constant buffer (for vertex/geometry shader) */ + /** constant buffers (for vertex/geometry shader) */ const void *vs_constants[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned vs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; const void *gs_constants[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned gs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; } user; boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ @@ -198,6 +200,7 @@ struct draw_context struct pipe_viewport_state viewport; boolean identity_viewport; + /** Vertex shader state */ struct { struct draw_vertex_shader *vertex_shader; uint num_vs_outputs; /**< convenience, from vertex_shader */ @@ -227,6 +230,7 @@ struct draw_context struct translate_cache *emit_cache; } vs; + /** Geometry shader state */ struct { struct draw_geometry_shader *geometry_shader; uint num_gs_outputs; /**< convenience, from geometry_shader */ @@ -239,6 +243,7 @@ struct draw_context struct tgsi_sampler **samplers; } gs; + /** Stream output (vertex feedback) state */ struct { struct pipe_stream_output_state state; void *buffers[PIPE_MAX_SO_BUFFERS]; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 92d4113b4c..248927505d 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -259,6 +259,12 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) for (j = 0; j < draw->pt.nr_vertex_elements; j++) { uint buf = draw->pt.vertex_element[j].vertex_buffer_index; ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf]; + + if (draw->pt.vertex_element[j].instance_divisor) { + ii = draw->instance_id / draw->pt.vertex_element[j].instance_divisor; + } + + ptr += draw->pt.vertex_buffer[buf].buffer_offset; ptr += draw->pt.vertex_buffer[buf].stride * ii; ptr += draw->pt.vertex_element[j].src_offset; @@ -341,19 +347,22 @@ draw_arrays_instanced(struct draw_context *draw, unsigned reduced_prim = u_reduced_prim(mode); unsigned instance; + assert(instanceCount > 0); + if (reduced_prim != draw->reduced_prim) { draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); draw->reduced_prim = reduced_prim; } if (0) - draw_print_arrays(draw, mode, start, MIN2(count, 20)); - - if (0) { - unsigned int i; debug_printf("draw_arrays(mode=%u start=%u count=%u):\n", mode, start, count); + + if (0) tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); + + if (0) { + unsigned int i; debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { debug_printf(" %u: src_offset=%u inst_div=%u vbuf=%u format=%s\n", @@ -374,6 +383,9 @@ draw_arrays_instanced(struct draw_context *draw, } } + if (0) + draw_print_arrays(draw, mode, start, MIN2(count, 20)); + for (instance = 0; instance < instanceCount; instance++) { draw->instance_id = instance + startInstance; draw_pt_arrays(draw, mode, start, count); diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h index 52f9593d46..3127aad731 100644 --- a/src/gallium/auxiliary/draw/draw_pt_decompose.h +++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h @@ -1,194 +1,7 @@ +#define LOCAL_VARS \ + char *verts = (char *) vertices; \ + const boolean last_vertex_last = \ + !(draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first); - -static void FUNC( ARGS, - unsigned count ) -{ - LOCAL_VARS; - - switch (prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - POINT( (i + 0) ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE( DRAW_PIPE_RESET_STIPPLE, - (i + 0), - (i + 1)); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - flags = DRAW_PIPE_RESET_STIPPLE; - - for (i = 1; i < count; i++, flags = 0) { - LINE( flags, - (i - 1), - (i )); - } - - LINE( flags, - (i - 1), - (0 )); - } - break; - - case PIPE_PRIM_LINE_STRIP: - flags = DRAW_PIPE_RESET_STIPPLE; - for (i = 1; i < count; i++, flags = 0) { - LINE( flags, - (i - 1), - (i )); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (i + 0), - (i + 1), - (i + 2 )); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - /* Emit first triangle vertex as first triangle vertex */ - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (i + 0), - (i + 1 + (i&1)), - (i + 2 - (i&1)) ); - } - } - else { - for (i = 0; i+2 < count; i++) { - /* Emit last triangle vertex as last triangle vertex */ - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (i + 0 + (i&1)), - (i + 1 - (i&1)), - (i + 2 )); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (i + 1), - (i + 2), - 0 ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (0), - (i + 1), - (i + 2 )); - } - } - } - break; - - - case PIPE_PRIM_QUADS: - /* GL quads don't follow provoking vertex convention */ - if (flatfirst) { - for (i = 0; i+3 < count; i += 4) { - /* emit last quad vertex as first triangle vertex */ - QUAD_FIRST_PV( (i + 3), - (i + 0), - (i + 1), - (i + 2) ); - } - } - else { - for (i = 0; i+3 < count; i += 4) { - /* emit last quad vertex as last triangle vertex */ - QUAD_LAST_PV( (i + 0), - (i + 1), - (i + 2), - (i + 3) ); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - /* GL quad strips don't follow provoking vertex convention */ - if (flatfirst) { - for (i = 0; i+3 < count; i += 2) { - /* emit last quad vertex as first triangle vertex */ - QUAD_FIRST_PV( (i + 3), - (i + 2), - (i + 0), - (i + 1) ); - - } - } - else { - for (i = 0; i+3 < count; i += 2) { - /* emit last quad vertex as last triangle vertex */ - QUAD_LAST_PV( (i + 2), - (i + 0), - (i + 1), - (i + 3) ); - } - } - break; - - case PIPE_PRIM_POLYGON: - /* GL polygons don't follow provoking vertex convention */ - { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; - const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; - const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; - - flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; - - for (i = 0; i+2 < count; i++, flags = edge_middle) { - - if (i + 3 == count) - flags |= edge_last; - - if (flatfirst) { - /* emit first polygon vertex as first triangle vertex */ - TRIANGLE( flags, - (0), - (i + 1), - (i + 2) ); - } - else { - /* emit first polygon vertex as last triangle vertex */ - TRIANGLE( flags, - (i + 1), - (i + 2), - (0)); - } - } - } - break; - - default: - assert(0); - break; - } - - FLUSH; -} - - -#undef TRIANGLE -#undef QUAD_FIRST_PV -#undef QUAD_LAST_PV -#undef POINT -#undef LINE -#undef FUNC +#include "draw_decompose_tmp.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 0229bcc7fe..5568fbb9f8 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -182,6 +182,7 @@ void draw_pt_emit( struct pt_emit *emit, 0, ~0); + /* fetch/translate vertex attribs to fill hw_verts[] */ translate->run( translate, 0, vertex_count, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 121dfc414a..5b16c3788e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -176,6 +176,7 @@ static void emit(struct pt_emit *emit, static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, struct draw_vertex_info *output_verts ) { @@ -190,6 +191,7 @@ static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, (const float (*)[4])input_verts->verts->data, ( float (*)[4])output_verts->verts->data, constants, + const_size, input_verts->count, input_verts->vertex_size, input_verts->vertex_size); @@ -236,6 +238,7 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle, if (fpme->opt & PT_SHADE) { draw_vertex_shader_run(vshader, draw->pt.user.vs_constants, + draw->pt.user.vs_constants_size, vert_info, &vs_vert_info); @@ -246,6 +249,7 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle, if ((fpme->opt & PT_SHADE) && gshader) { draw_geometry_shader_run(gshader, draw->pt.user.gs_constants, + draw->pt.user.gs_constants_size, vert_info, prim_info, &gs_vert_info, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index bc074df8c2..4b99bee86a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2010 VMWare, Inc. + * Copyright 2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -254,6 +254,7 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, if ((opt & PT_SHADE) && gshader) { draw_geometry_shader_run(gshader, draw->pt.user.gs_constants, + draw->pt.user.gs_constants_size, vert_info, prim_info, &gs_vert_info, diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index 5d82934889..f7f4f24d35 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -218,25 +218,15 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2) } -#define TRIANGLE(gs,i0,i1,i2) so_tri(so,i0,i1,i2) -#define LINE(gs,i0,i1) so_line(so,i0,i1) -#define POINT(gs,i0) so_point(so,i0) -#define FUNC so_run_linear -#define LOCAL_VARS +#define FUNC so_run_linear +#define GET_ELT(idx) (start + (idx)) #include "draw_so_emit_tmp.h" -#undef LOCAL_VARS -#undef FUNC -#define TRIANGLE(gs,i0,i1,i2) so_tri(gs,elts[i0],elts[i1],elts[i2]) -#define LINE(gs,i0,i1) so_line(gs,elts[i0],elts[i1]) -#define POINT(gs,i0) so_point(gs,elts[i0]) -#define FUNC so_run_elts -#define LOCAL_VARS \ - const ushort *elts = input_prims->elts; +#define FUNC so_run_elts +#define LOCAL_VARS const ushort *elts = input_prims->elts; +#define GET_ELT(idx) (elts[start + (idx)] & ~DRAW_PIPE_FLAG_MASK) #include "draw_so_emit_tmp.h" -#undef LOCAL_VARS -#undef FUNC void draw_pt_so_emit( struct pt_so_emit *emit, diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c index 3236d38e6a..182a597cca 100644 --- a/src/gallium/auxiliary/draw/draw_pt_util.c +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -53,7 +53,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) break; case PIPE_PRIM_LINES_ADJACENCY: *first = 4; - *incr = 2; + *incr = 4; break; case PIPE_PRIM_LINE_STRIP_ADJACENCY: *first = 4; @@ -65,7 +65,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) break; case PIPE_PRIM_TRIANGLES_ADJACENCY: *first = 6; - *incr = 3; + *incr = 6; break; case PIPE_PRIM_TRIANGLE_STRIP: case PIPE_PRIM_TRIANGLE_FAN: @@ -75,7 +75,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) break; case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: *first = 6; - *incr = 1; + *incr = 2; break; case PIPE_PRIM_QUADS: *first = 4; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index a292346be9..55e43b2a71 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -1,6 +1,11 @@ static unsigned trim( unsigned count, unsigned first, unsigned incr ) { - return count - (count - first) % incr; + /* + * count either has been trimmed in draw_pt_arrays or is set to + * (driver)_fetch_max which is hopefully always larger than first. + */ + assert(count >= first); + return count - (count - first) % incr; } static void FUNC(struct draw_pt_front_end *frontend, diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 8ef94c3163..a848b54f7d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -95,7 +95,7 @@ static INLINE void vcache_check_flush( struct vcache_frontend *vcache ) { if (vcache->draw_count + 6 >= DRAW_MAX || - vcache->fetch_count + 4 >= FETCH_MAX) { + vcache->fetch_count + 6 >= FETCH_MAX) { vcache_flush( vcache ); } } @@ -180,59 +180,61 @@ vcache_point( struct vcache_frontend *vcache, } -static INLINE void -vcache_quad( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1, - unsigned i2, - unsigned i3 ) +static INLINE void +vcache_line_adj_flags( struct vcache_frontend *vcache, + unsigned flags, + unsigned a0, unsigned i0, unsigned i1, unsigned a1 ) { - if (vcache->draw->rasterizer->flatshade_first) { - /* pass last quad vertex as first triangle vertex */ - vcache_triangle( vcache, i3, i0, i1 ); - vcache_triangle( vcache, i3, i1, i2 ); - } - else { - /* pass last quad vertex as last triangle vertex */ - vcache_triangle( vcache, i0, i1, i3 ); - vcache_triangle( vcache, i1, i2, i3 ); - } + vcache_elt(vcache, a0, 0); + vcache_elt(vcache, i0, flags); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, a1, 0); + vcache_check_flush(vcache); } -static INLINE void -vcache_ef_quad( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1, - unsigned i2, - unsigned i3 ) +static INLINE void +vcache_line_adj( struct vcache_frontend *vcache, + unsigned a0, unsigned i0, unsigned i1, unsigned a1 ) { - if (vcache->draw->rasterizer->flatshade_first) { - /* pass last quad vertex as first triangle vertex */ - vcache_triangle_flags( vcache, - ( DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_1 ), - i3, i0, i1 ); - - vcache_triangle_flags( vcache, - ( DRAW_PIPE_EDGE_FLAG_1 | - DRAW_PIPE_EDGE_FLAG_2 ), - i3, i1, i2 ); - } - else { - /* pass last quad vertex as last triangle vertex */ - vcache_triangle_flags( vcache, - ( DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_2 ), - i0, i1, i3 ); - - vcache_triangle_flags( vcache, - ( DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_1 ), - i1, i2, i3 ); - } + vcache_elt(vcache, a0, 0); + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, a1, 0); + vcache_check_flush(vcache); +} + + +static INLINE void +vcache_triangle_adj_flags( struct vcache_frontend *vcache, + unsigned flags, + unsigned i0, unsigned a0, + unsigned i1, unsigned a1, + unsigned i2, unsigned a2 ) +{ + vcache_elt(vcache, i0, flags); + vcache_elt(vcache, a0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, a1, 0); + vcache_elt(vcache, i2, 0); + vcache_elt(vcache, a2, 0); + vcache_check_flush(vcache); +} + + +static INLINE void +vcache_triangle_adj( struct vcache_frontend *vcache, + unsigned i0, unsigned a0, + unsigned i1, unsigned a1, + unsigned i2, unsigned a2 ) +{ + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, a0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, a1, 0); + vcache_elt(vcache, i2, 0); + vcache_elt(vcache, a2, 0); + vcache_check_flush(vcache); } @@ -240,17 +242,23 @@ vcache_ef_quad( struct vcache_frontend *vcache, * this. The two paths aren't too different though - it may be * possible to reunify them. */ -#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2) -#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3) -#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1) -#define POINT(vc,i0) vcache_point(vc,i0) +#define TRIANGLE(flags,i0,i1,i2) vcache_triangle_flags(vcache,flags,i0,i1,i2) +#define LINE(flags,i0,i1) vcache_line_flags(vcache,flags,i0,i1) +#define POINT(i0) vcache_point(vcache,i0) +#define LINE_ADJ(flags,a0,i0,i1,a1) \ + vcache_line_adj_flags(vcache,flags,a0,i0,i1,a1) +#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \ + vcache_triangle_adj_flags(vcache,flags,i0,a0,i1,a1,i2,a2) #define FUNC vcache_run_extras #include "draw_pt_vcache_tmp.h" -#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2) -#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3) -#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1) -#define POINT(vc,i0) vcache_point(vc,i0) +#define TRIANGLE(flags,i0,i1,i2) vcache_triangle(vcache,i0,i1,i2) +#define LINE(flags,i0,i1) vcache_line(vcache,i0,i1) +#define POINT(i0) vcache_point(vcache,i0) +#define LINE_ADJ(flags,a0,i0,i1,a1) \ + vcache_line_adj(vcache,a0,i0,i1,a1) +#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \ + vcache_triangle_adj(vcache,i0,a0,i1,a1,i2,a2) #define FUNC vcache_run #include "draw_pt_vcache_tmp.h" @@ -339,6 +347,25 @@ format_from_get_elt( pt_elt_func get_elt ) #endif +/** + * Check if any vertex attributes use instance divisors. + * Note that instance divisors complicate vertex fetching so we need + * to take the vcache path when they're in use. + */ +static boolean +any_instance_divisors(const struct draw_context *draw) +{ + uint i; + + for (i = 0; i < draw->pt.nr_vertex_elements; i++) { + uint div = draw->pt.vertex_element[i].instance_divisor; + if (div) + return TRUE; + } + return FALSE; +} + + static INLINE void vcache_check_run( struct draw_pt_front_end *frontend, pt_elt_func get_elt, @@ -382,6 +409,9 @@ vcache_check_run( struct draw_pt_front_end *frontend, if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES) goto fail; + if (any_instance_divisors(draw)) + goto fail; + fetch_count = max_index + 1 - min_index; if (0) @@ -518,7 +548,18 @@ vcache_prepare( struct draw_pt_front_end *frontend, * which is a separate issue. */ vcache->input_prim = in_prim; - vcache->output_prim = u_reduced_prim(in_prim); + switch (in_prim) { + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + vcache->output_prim = PIPE_PRIM_LINES_ADJACENCY; + break; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + vcache->output_prim = PIPE_PRIM_TRIANGLES_ADJACENCY; + break; + default: + vcache->output_prim = u_reduced_prim(in_prim); + } vcache->middle = middle; vcache->opt = opt; diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index dac68ad439..1a3748d5f0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -1,198 +1,19 @@ +#define FUNC_VARS \ + struct draw_pt_front_end *frontend, \ + pt_elt_func get_elt, \ + const void *elts, \ + int elt_bias, \ + unsigned count +#define LOCAL_VARS \ + struct vcache_frontend *vcache = (struct vcache_frontend *) frontend; \ + struct draw_context *draw = vcache->draw; \ + const unsigned prim = vcache->input_prim; \ + const boolean last_vertex_last = !(draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first); -static void FUNC( struct draw_pt_front_end *frontend, - pt_elt_func get_elt, - const void *elts, - int elt_bias, - unsigned count ) -{ - struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - struct draw_context *draw = vcache->draw; +#define GET_ELT(idx) (get_elt(elts, idx) + elt_bias) - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i; - ushort flags; +#define FUNC_EXIT do { vcache_flush(vcache); } while (0) - if (0) debug_printf("%s %d\n", __FUNCTION__, count); - - - switch (vcache->input_prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - POINT( vcache, - get_elt(elts, i + 0) + elt_bias ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE( vcache, - DRAW_PIPE_RESET_STIPPLE, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - flags = DRAW_PIPE_RESET_STIPPLE; - - for (i = 1; i < count; i++, flags = 0) { - LINE( vcache, - flags, - get_elt(elts, i - 1) + elt_bias, - get_elt(elts, i ) + elt_bias); - } - - LINE( vcache, - flags, - get_elt(elts, i - 1) + elt_bias, - get_elt(elts, 0 ) + elt_bias); - } - break; - - case PIPE_PRIM_LINE_STRIP: - flags = DRAW_PIPE_RESET_STIPPLE; - for (i = 1; i < count; i++, flags = 0) { - LINE( vcache, - flags, - get_elt(elts, i - 1) + elt_bias, - get_elt(elts, i ) + elt_bias); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2 ) + elt_bias); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1 + (i&1)) + elt_bias, - get_elt(elts, i + 2 - (i&1)) + elt_bias); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0 + (i&1)) + elt_bias, - get_elt(elts, i + 1 - (i&1)) + elt_bias, - get_elt(elts, i + 2 ) + elt_bias); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2) + elt_bias, - get_elt(elts, 0 ) + elt_bias); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2 ) + elt_bias); - } - } - } - break; - - - case PIPE_PRIM_QUADS: - for (i = 0; i+3 < count; i += 4) { - QUAD( vcache, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2) + elt_bias, - get_elt(elts, i + 3) + elt_bias ); - } - break; - - case PIPE_PRIM_QUAD_STRIP: - for (i = 0; i+3 < count; i += 2) { - QUAD( vcache, - get_elt(elts, i + 2) + elt_bias, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 3) + elt_bias ); - } - break; - - case PIPE_PRIM_POLYGON: - { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - ushort edge_next, edge_finish; - - if (flatfirst) { - flags = (DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_1 | - DRAW_PIPE_EDGE_FLAG_2); - edge_next = DRAW_PIPE_EDGE_FLAG_2; - edge_finish = DRAW_PIPE_EDGE_FLAG_0; - } - else { - flags = (DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_2 | - DRAW_PIPE_EDGE_FLAG_0); - edge_next = DRAW_PIPE_EDGE_FLAG_0; - edge_finish = DRAW_PIPE_EDGE_FLAG_1; - } - - for (i = 0; i+2 < count; i++, flags = edge_next) { - - if (i + 3 == count) - flags |= edge_finish; - - if (flatfirst) { - TRIANGLE( vcache, - flags, - get_elt(elts, 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2) + elt_bias ); - } - else { - TRIANGLE( vcache, - flags, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2) + elt_bias, - get_elt(elts, 0) + elt_bias); - } - } - } - break; - - default: - assert(0); - break; - } - - vcache_flush( vcache ); -} - - -#undef TRIANGLE -#undef QUAD -#undef POINT -#undef LINE -#undef FUNC +#include "draw_decompose_tmp.h" diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h index 01212a8e53..6d8937a0b4 100644 --- a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h +++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h @@ -1,123 +1,33 @@ - -static void FUNC( struct pt_so_emit *so, - const struct draw_prim_info *input_prims, - const struct draw_vertex_info *input_verts, - unsigned start, - unsigned count) -{ - struct draw_context *draw = so->draw; - - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i; - LOCAL_VARS - - if (0) debug_printf("%s %d\n", __FUNCTION__, count); - - debug_assert(input_prims->primitive_count == 1); - - switch (input_prims->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i++) { - POINT( so, start + i + 0 ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE( so , start + i + 0 , start + i + 1 ); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - - for (i = 1; i < count; i++) { - LINE( so, start + i - 1, start + i ); - } - - LINE( so, start + i - 1, start ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - LINE( so, start + i - 1, start + i ); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE( so, start + i + 0, start + i + 1, start + i + 2 ); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( so, - start + i + 0, - start + i + 1 + (i&1), - start + i + 2 - (i&1) ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( so, - start + i + 0 + (i&1), - start + i + 1 - (i&1), - start + i + 2 ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( so, - start + i + 1, - start + i + 2, - start ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( so, - start, - start + i + 1, - start + i + 2 ); - } - } - } - break; - - case PIPE_PRIM_POLYGON: - { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - - for (i = 0; i+2 < count; i++) { - - if (flatfirst) { - TRIANGLE( so, start + 0, start + i + 1, start + i + 2 ); - } - else { - TRIANGLE( so, start + i + 1, start + i + 2, start + 0 ); - } - } - } - break; - - default: - debug_assert(!"Unsupported primitive in stream output"); - break; - } -} - - -#undef TRIANGLE -#undef POINT -#undef LINE -#undef FUNC +#define FUNC_VARS \ + struct pt_so_emit *so, \ + const struct draw_prim_info *input_prims, \ + const struct draw_vertex_info *input_verts, \ + unsigned start, \ + unsigned count + +#define FUNC_ENTER \ + /* declare more local vars */ \ + struct draw_context *draw = so->draw; \ + const unsigned prim = input_prims->prim; \ + const boolean last_vertex_last = \ + !(draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first); \ + do { \ + debug_assert(input_prims->primitive_count == 1); \ + switch (prim) { \ + case PIPE_PRIM_LINES_ADJACENCY: \ + case PIPE_PRIM_LINE_STRIP_ADJACENCY: \ + case PIPE_PRIM_TRIANGLES_ADJACENCY: \ + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: \ + debug_assert(!"unexpected primitive type in stream output"); \ + return; \ + default: \ + break; \ + } \ + } while (0) \ + +#define POINT(i0) so_point(so,i0) +#define LINE(flags,i0,i1) so_line(so,i0,i1) +#define TRIANGLE(flags,i0,i1,i2) so_tri(so,i0,i1,i2) + +#include "draw_decompose_tmp.h" diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 3af31ffe12..e63cf5f4f9 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -166,7 +166,7 @@ static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit } } -static INLINE enum attrib_emit draw_translate_vinfo_size(enum attrib_emit emit) +static INLINE unsigned draw_translate_vinfo_size(enum attrib_emit emit) { switch (emit) { case EMIT_OMIT: diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 57ea63fc06..fb665b08ff 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -48,18 +48,30 @@ DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE) + +/** + * Set a vertex shader constant buffer. + * \param slot which constant buffer in [0, PIPE_MAX_CONSTANT_BUFFERS-1] + * \param constants the mapped buffer + * \param size size of buffer in bytes + */ void draw_vs_set_constants(struct draw_context *draw, unsigned slot, const void *constants, unsigned size) { - if (((uintptr_t)constants) & 0xf) { + const int alignment = 16; + + /* check if buffer is 16-byte aligned */ + if (((uintptr_t)constants) & (alignment - 1)) { + /* if not, copy the constants into a new, 16-byte aligned buffer */ if (size > draw->vs.const_storage_size[slot]) { if (draw->vs.aligned_constant_storage[slot]) { align_free((void *)draw->vs.aligned_constant_storage[slot]); } - draw->vs.aligned_constant_storage[slot] = align_malloc(size, 16); + draw->vs.aligned_constant_storage[slot] = + align_malloc(size, alignment); } assert(constants); memcpy((void *)draw->vs.aligned_constant_storage[slot], diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index a731994523..f9a038788f 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -133,7 +133,8 @@ struct draw_vertex_shader { void (*run_linear)( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ); diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index bc34d390da..dab3eb1ca8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -85,7 +85,8 @@ static void vs_exec_run_linear( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) @@ -95,9 +96,8 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, unsigned int i, j; unsigned slot; - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - machine->Consts[i] = constants[i]; - } + tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, + constants, const_size); for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 6c13df7913..d13ad24fff 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -49,6 +49,7 @@ vs_llvm_run_linear( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 14c95082a9..0b0c6077c6 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -84,6 +84,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base, const float (*input)[4], float (*output)[4], const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 6eb26927f2..eacd160187 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -149,7 +149,8 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants_size, count, temp_vertex_stride, temp_vertex_stride); @@ -214,7 +215,8 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants_size, count, temp_vertex_stride, temp_vertex_stride); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index f5f2623e46..7b35dd4bb4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -59,6 +59,19 @@ #include "lp_bld_arit.h" +/* + * XXX: Increasing eliminates some artifacts, but adds others, most + * noticeably corruption in the Earth halo in Google Earth. + */ +#define RCP_NEWTON_STEPS 0 + +#define RSQRT_NEWTON_STEPS 0 + +#define EXP_POLY_DEGREE 3 + +#define LOG_POLY_DEGREE 5 + + /** * Generate min(a, b) * No checks for special case values of a or b = 1 or 0 are done. @@ -72,6 +85,9 @@ lp_build_min_simple(struct lp_build_context *bld, const char *intrinsic = NULL; LLVMValueRef cond; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + /* TODO: optimize the constant case */ if(type.width * type.length == 128) { @@ -118,6 +134,9 @@ lp_build_max_simple(struct lp_build_context *bld, const char *intrinsic = NULL; LLVMValueRef cond; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + /* TODO: optimize the constant case */ if(type.width * type.length == 128) { @@ -160,6 +179,8 @@ lp_build_comp(struct lp_build_context *bld, { const struct lp_type type = bld->type; + assert(lp_check_value(type, a)); + if(a == bld->one) return bld->zero; if(a == bld->zero) @@ -173,9 +194,15 @@ lp_build_comp(struct lp_build_context *bld, } if(LLVMIsConstant(a)) - return LLVMConstSub(bld->one, a); + if (type.floating) + return LLVMConstFSub(bld->one, a); + else + return LLVMConstSub(bld->one, a); else - return LLVMBuildSub(bld->builder, bld->one, a, ""); + if (type.floating) + return LLVMBuildFSub(bld->builder, bld->one, a, ""); + else + return LLVMBuildSub(bld->builder, bld->one, a, ""); } @@ -190,6 +217,9 @@ lp_build_add(struct lp_build_context *bld, const struct lp_type type = bld->type; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == bld->zero) return b; if(b == bld->zero) @@ -217,9 +247,15 @@ lp_build_add(struct lp_build_context *bld, } if(LLVMIsConstant(a) && LLVMIsConstant(b)) - res = LLVMConstAdd(a, b); + if (type.floating) + res = LLVMConstFAdd(a, b); + else + res = LLVMConstAdd(a, b); else - res = LLVMBuildAdd(bld->builder, a, b, ""); + if (type.floating) + res = LLVMBuildFAdd(bld->builder, a, b, ""); + else + res = LLVMBuildAdd(bld->builder, a, b, ""); /* clamp to ceiling of 1.0 */ if(bld->type.norm && (bld->type.floating || bld->type.fixed)) @@ -240,6 +276,8 @@ lp_build_sum_vector(struct lp_build_context *bld, LLVMValueRef index, res; unsigned i; + assert(lp_check_value(type, a)); + if (a == bld->zero) return bld->zero; if (a == bld->undef) @@ -253,9 +291,16 @@ lp_build_sum_vector(struct lp_build_context *bld, for (i = 1; i < type.length; i++) { index = LLVMConstInt(LLVMInt32Type(), i, 0); - res = LLVMBuildAdd(bld->builder, res, - LLVMBuildExtractElement(bld->builder, a, index, ""), - ""); + if (type.floating) + res = LLVMBuildFAdd(bld->builder, res, + LLVMBuildExtractElement(bld->builder, + a, index, ""), + ""); + else + res = LLVMBuildAdd(bld->builder, res, + LLVMBuildExtractElement(bld->builder, + a, index, ""), + ""); } return res; @@ -273,6 +318,9 @@ lp_build_sub(struct lp_build_context *bld, const struct lp_type type = bld->type; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(b == bld->zero) return a; if(a == bld->undef || b == bld->undef) @@ -300,9 +348,15 @@ lp_build_sub(struct lp_build_context *bld, } if(LLVMIsConstant(a) && LLVMIsConstant(b)) - res = LLVMConstSub(a, b); + if (type.floating) + res = LLVMConstFSub(a, b); + else + res = LLVMConstSub(a, b); else - res = LLVMBuildSub(bld->builder, a, b, ""); + if (type.floating) + res = LLVMBuildFSub(bld->builder, a, b, ""); + else + res = LLVMBuildSub(bld->builder, a, b, ""); if(bld->type.norm && (bld->type.floating || bld->type.fixed)) res = lp_build_max_simple(bld, res, bld->zero); @@ -360,6 +414,10 @@ lp_build_mul_u8n(LLVMBuilderRef builder, LLVMValueRef c8; LLVMValueRef ab; + assert(!i16_type.floating); + assert(lp_check_value(i16_type, a)); + assert(lp_check_value(i16_type, b)); + c8 = lp_build_const_int_vec(i16_type, 8); #if 0 @@ -395,6 +453,9 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef shift; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == bld->zero) return bld->zero; if(a == bld->one) @@ -433,7 +494,10 @@ lp_build_mul(struct lp_build_context *bld, shift = NULL; if(LLVMIsConstant(a) && LLVMIsConstant(b)) { - res = LLVMConstMul(a, b); + if (type.floating) + res = LLVMConstFMul(a, b); + else + res = LLVMConstMul(a, b); if(shift) { if(type.sign) res = LLVMConstAShr(res, shift); @@ -442,7 +506,10 @@ lp_build_mul(struct lp_build_context *bld, } } else { - res = LLVMBuildMul(bld->builder, a, b, ""); + if (type.floating) + res = LLVMBuildFMul(bld->builder, a, b, ""); + else + res = LLVMBuildMul(bld->builder, a, b, ""); if(shift) { if(type.sign) res = LLVMBuildAShr(bld->builder, res, shift, ""); @@ -465,6 +532,8 @@ lp_build_mul_imm(struct lp_build_context *bld, { LLVMValueRef factor; + assert(lp_check_value(bld->type, a)); + if(b == 0) return bld->zero; @@ -472,7 +541,7 @@ lp_build_mul_imm(struct lp_build_context *bld, return a; if(b == -1) - return LLVMBuildNeg(bld->builder, a, ""); + return lp_build_negate(bld, a); if(b == 2 && bld->type.floating) return lp_build_add(bld, a, a); @@ -518,6 +587,9 @@ lp_build_div(struct lp_build_context *bld, { const struct lp_type type = bld->type; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == bld->zero) return bld->zero; if(a == bld->one) @@ -529,13 +601,24 @@ lp_build_div(struct lp_build_context *bld, if(a == bld->undef || b == bld->undef) return bld->undef; - if(LLVMIsConstant(a) && LLVMIsConstant(b)) - return LLVMConstFDiv(a, b); + if(LLVMIsConstant(a) && LLVMIsConstant(b)) { + if (type.floating) + return LLVMConstFDiv(a, b); + else if (type.sign) + return LLVMConstSDiv(a, b); + else + return LLVMConstUDiv(a, b); + } if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_mul(bld, a, lp_build_rcp(bld, b)); - return LLVMBuildFDiv(bld->builder, a, b, ""); + if (type.floating) + return LLVMBuildFDiv(bld->builder, a, b, ""); + else if (type.sign) + return LLVMBuildSDiv(bld->builder, a, b, ""); + else + return LLVMBuildUDiv(bld->builder, a, b, ""); } @@ -555,6 +638,10 @@ lp_build_lerp(struct lp_build_context *bld, LLVMValueRef delta; LLVMValueRef res; + assert(lp_check_value(bld->type, x)); + assert(lp_check_value(bld->type, v0)); + assert(lp_check_value(bld->type, v1)); + delta = lp_build_sub(bld, v1, v0); res = lp_build_mul(bld, x, delta); @@ -596,6 +683,9 @@ lp_build_min(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + assert(lp_check_value(bld->type, a)); + assert(lp_check_value(bld->type, b)); + if(a == bld->undef || b == bld->undef) return bld->undef; @@ -624,6 +714,9 @@ lp_build_max(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + assert(lp_check_value(bld->type, a)); + assert(lp_check_value(bld->type, b)); + if(a == bld->undef || b == bld->undef) return bld->undef; @@ -653,6 +746,10 @@ lp_build_clamp(struct lp_build_context *bld, LLVMValueRef min, LLVMValueRef max) { + assert(lp_check_value(bld->type, a)); + assert(lp_check_value(bld->type, min)); + assert(lp_check_value(bld->type, max)); + a = lp_build_min(bld, a, max); a = lp_build_max(bld, a, min); return a; @@ -669,6 +766,8 @@ lp_build_abs(struct lp_build_context *bld, const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); + assert(lp_check_value(type, a)); + if(!type.sign) return a; @@ -702,7 +801,16 @@ LLVMValueRef lp_build_negate(struct lp_build_context *bld, LLVMValueRef a) { - return LLVMBuildNeg(bld->builder, a, ""); + assert(lp_check_value(bld->type, a)); + +#if HAVE_LLVM >= 0x0207 + if (bld->type.floating) + a = LLVMBuildFNeg(bld->builder, a, ""); + else +#endif + a = LLVMBuildNeg(bld->builder, a, ""); + + return a; } @@ -715,6 +823,8 @@ lp_build_sgn(struct lp_build_context *bld, LLVMValueRef cond; LLVMValueRef res; + assert(lp_check_value(type, a)); + /* Handle non-zero case */ if(!type.sign) { /* if not zero then sign must be positive */ @@ -773,6 +883,7 @@ lp_build_set_sign(struct lp_build_context *bld, LLVMValueRef val, res; assert(type.floating); + assert(lp_check_value(type, a)); /* val = reinterpret_cast<int>(a) */ val = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); @@ -1021,7 +1132,7 @@ lp_build_iround(struct lp_build_context *bld, half = LLVMBuildOr(bld->builder, sign, half, ""); half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); - res = LLVMBuildAdd(bld->builder, a, half, ""); + res = LLVMBuildFAdd(bld->builder, a, half, ""); } res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); @@ -1070,7 +1181,7 @@ lp_build_ifloor(struct lp_build_context *bld, offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset"); - res = LLVMBuildAdd(bld->builder, a, offset, "ifloor.res"); + res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res"); } /* round to nearest (toward zero) */ @@ -1120,7 +1231,7 @@ lp_build_iceil(struct lp_build_context *bld, offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset"); - res = LLVMBuildAdd(bld->builder, a, offset, "iceil.res"); + res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res"); } /* round to nearest (toward zero) */ @@ -1138,6 +1249,8 @@ lp_build_sqrt(struct lp_build_context *bld, LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; + assert(lp_check_value(type, a)); + /* TODO: optimize the constant case */ /* TODO: optimize the constant case */ @@ -1148,12 +1261,39 @@ lp_build_sqrt(struct lp_build_context *bld, } +/** + * Do one Newton-Raphson step to improve reciprocate precision: + * + * x_{i+1} = x_i * (2 - a * x_i) + * + * See also: + * - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division + * - http://softwarecommunity.intel.com/articles/eng/1818.htm + */ +static INLINE LLVMValueRef +lp_build_rcp_refine(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef rcp_a) +{ + LLVMValueRef two = lp_build_const_vec(bld->type, 2.0); + LLVMValueRef res; + + res = LLVMBuildFMul(bld->builder, a, rcp_a, ""); + res = LLVMBuildFSub(bld->builder, two, res, ""); + res = LLVMBuildFMul(bld->builder, rcp_a, res, ""); + + return res; +} + + LLVMValueRef lp_build_rcp(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; + assert(lp_check_value(type, a)); + if(a == bld->zero) return bld->undef; if(a == bld->one) @@ -1167,32 +1307,16 @@ lp_build_rcp(struct lp_build_context *bld, return LLVMConstFDiv(bld->one, a); if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { - /* - * XXX: Added precision is not always necessary, so only enable this - * when we have a better system in place to track minimum precision. - */ - -#if 0 - /* - * Do one Newton-Raphson step to improve precision: - * - * x1 = (2 - a * rcp(a)) * rcp(a) - */ - - LLVMValueRef two = lp_build_const_vec(bld->type, 2.0); - LLVMValueRef rcp_a; LLVMValueRef res; + unsigned i; - rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); + res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a); - res = LLVMBuildMul(bld->builder, a, rcp_a, ""); - res = LLVMBuildSub(bld->builder, two, res, ""); - res = LLVMBuildMul(bld->builder, res, rcp_a, ""); + for (i = 0; i < RCP_NEWTON_STEPS; ++i) { + res = lp_build_rcp_refine(bld, a, res); + } - return rcp_a; -#else - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); -#endif + return res; } return LLVMBuildFDiv(bld->builder, bld->one, a, ""); @@ -1200,6 +1324,33 @@ lp_build_rcp(struct lp_build_context *bld, /** + * Do one Newton-Raphson step to improve rsqrt precision: + * + * x_{i+1} = 0.5 * x_i * (3.0 - a * x_i * x_i) + * + * See also: + * - http://softwarecommunity.intel.com/articles/eng/1818.htm + */ +static INLINE LLVMValueRef +lp_build_rsqrt_refine(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef rsqrt_a) +{ + LLVMValueRef half = lp_build_const_vec(bld->type, 0.5); + LLVMValueRef three = lp_build_const_vec(bld->type, 3.0); + LLVMValueRef res; + + res = LLVMBuildFMul(bld->builder, rsqrt_a, rsqrt_a, ""); + res = LLVMBuildFMul(bld->builder, a, res, ""); + res = LLVMBuildFSub(bld->builder, three, res, ""); + res = LLVMBuildFMul(bld->builder, rsqrt_a, res, ""); + res = LLVMBuildFMul(bld->builder, half, res, ""); + + return res; +} + + +/** * Generate 1/sqrt(a) */ LLVMValueRef @@ -1208,10 +1359,22 @@ lp_build_rsqrt(struct lp_build_context *bld, { const struct lp_type type = bld->type; + assert(lp_check_value(type, a)); + assert(type.floating); - if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a); + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { + LLVMValueRef res; + unsigned i; + + res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a); + + for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) { + res = lp_build_rsqrt_refine(bld, a, res); + } + + return res; + } return lp_build_rcp(bld, lp_build_sqrt(bld, a)); } @@ -1270,7 +1433,7 @@ lp_build_sin(struct lp_build_context *bld, */ LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516); - LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y"); + LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y"); /* * store the integer part of y in mm0 @@ -1344,9 +1507,9 @@ lp_build_sin(struct lp_build_context *bld, * xmm2 = _mm_mul_ps(y, xmm2); * xmm3 = _mm_mul_ps(y, xmm3); */ - LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1"); - LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2"); - LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3"); + LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1"); + LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2"); + LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3"); /* * x = _mm_add_ps(x, xmm1); @@ -1354,16 +1517,16 @@ lp_build_sin(struct lp_build_context *bld, * x = _mm_add_ps(x, xmm3); */ - LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1"); - LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2"); - LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3"); + LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1"); + LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2"); + LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3"); /* * Evaluate the first polynom (0 <= x <= Pi/4) * * z = _mm_mul_ps(x,x); */ - LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z"); + LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z"); /* * _PS_CONST(coscof_p0, 2.443315711809948E-005); @@ -1378,12 +1541,12 @@ lp_build_sin(struct lp_build_context *bld, * y = *(v4sf*)_ps_coscof_p0; * y = _mm_mul_ps(y, z); */ - LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3"); - LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4"); - LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5"); - LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6"); - LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7"); - LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8"); + LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3"); + LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4"); + LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5"); + LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6"); + LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7"); + LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8"); /* @@ -1392,10 +1555,10 @@ lp_build_sin(struct lp_build_context *bld, * y = _mm_add_ps(y, *(v4sf*)_ps_1); */ LLVMValueRef half = lp_build_const_v4sf(0.5); - LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp"); - LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8"); + LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp"); + LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8"); LLVMValueRef one = lp_build_const_v4sf(1.0); - LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9"); + LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9"); /* * _PS_CONST(sincof_p0, -1.9515295891E-4); @@ -1419,13 +1582,13 @@ lp_build_sin(struct lp_build_context *bld, * y2 = _mm_add_ps(y2, x); */ - LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3"); - LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4"); - LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5"); - LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6"); - LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7"); - LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8"); - LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9"); + LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3"); + LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4"); + LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5"); + LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6"); + LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7"); + LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8"); + LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9"); /* * select the correct result from the two polynoms @@ -1481,7 +1644,7 @@ lp_build_cos(struct lp_build_context *bld, */ LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516); - LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y"); + LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y"); /* * store the integer part of y in mm0 @@ -1561,9 +1724,9 @@ lp_build_cos(struct lp_build_context *bld, * xmm2 = _mm_mul_ps(y, xmm2); * xmm3 = _mm_mul_ps(y, xmm3); */ - LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1"); - LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2"); - LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3"); + LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1"); + LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2"); + LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3"); /* * x = _mm_add_ps(x, xmm1); @@ -1571,16 +1734,16 @@ lp_build_cos(struct lp_build_context *bld, * x = _mm_add_ps(x, xmm3); */ - LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1"); - LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2"); - LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3"); + LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1"); + LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2"); + LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3"); /* * Evaluate the first polynom (0 <= x <= Pi/4) * * z = _mm_mul_ps(x,x); */ - LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z"); + LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z"); /* * _PS_CONST(coscof_p0, 2.443315711809948E-005); @@ -1595,12 +1758,12 @@ lp_build_cos(struct lp_build_context *bld, * y = *(v4sf*)_ps_coscof_p0; * y = _mm_mul_ps(y, z); */ - LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3"); - LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4"); - LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5"); - LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6"); - LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7"); - LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8"); + LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3"); + LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4"); + LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5"); + LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6"); + LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7"); + LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8"); /* @@ -1609,10 +1772,10 @@ lp_build_cos(struct lp_build_context *bld, * y = _mm_add_ps(y, *(v4sf*)_ps_1); */ LLVMValueRef half = lp_build_const_v4sf(0.5); - LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp"); - LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8"); + LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp"); + LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8"); LLVMValueRef one = lp_build_const_v4sf(1.0); - LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9"); + LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9"); /* * _PS_CONST(sincof_p0, -1.9515295891E-4); @@ -1636,13 +1799,13 @@ lp_build_cos(struct lp_build_context *bld, * y2 = _mm_add_ps(y2, x); */ - LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3"); - LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4"); - LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5"); - LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6"); - LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7"); - LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8"); - LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9"); + LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3"); + LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4"); + LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5"); + LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6"); + LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7"); + LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8"); + LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9"); /* * select the correct result from the two polynoms @@ -1695,6 +1858,8 @@ lp_build_exp(struct lp_build_context *bld, /* log2(e) = 1/log(2) */ LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634); + assert(lp_check_value(bld->type, x)); + return lp_build_mul(bld, log2e, lp_build_exp2(bld, x)); } @@ -1709,14 +1874,12 @@ lp_build_log(struct lp_build_context *bld, /* log(2) */ LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529); + assert(lp_check_value(bld->type, x)); + return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); } -#define EXP_POLY_DEGREE 3 -#define LOG_POLY_DEGREE 5 - - /** * Generate polynomial. * Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2]. @@ -1731,6 +1894,8 @@ lp_build_polynomial(struct lp_build_context *bld, LLVMValueRef res = NULL; unsigned i; + assert(lp_check_value(bld->type, x)); + /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", @@ -1802,6 +1967,8 @@ lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef expfpart = NULL; LLVMValueRef res = NULL; + assert(lp_check_value(bld->type, x)); + if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) @@ -1817,7 +1984,7 @@ lp_build_exp2_approx(struct lp_build_context *bld, ipart = lp_build_floor(bld, x); /* fpart = x - ipart */ - fpart = LLVMBuildSub(bld->builder, x, ipart, ""); + fpart = LLVMBuildFSub(bld->builder, x, ipart, ""); } if(p_exp2_int_part || p_exp2) { @@ -1832,7 +1999,7 @@ lp_build_exp2_approx(struct lp_build_context *bld, expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, Elements(lp_build_exp2_polynomial)); - res = LLVMBuildMul(bld->builder, expipart, expfpart, ""); + res = LLVMBuildFMul(bld->builder, expipart, expfpart, ""); } if(p_exp2_int_part) @@ -1915,6 +2082,8 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef logmant = NULL; LLVMValueRef res = NULL; + assert(lp_check_value(bld->type, x)); + if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) @@ -1945,9 +2114,9 @@ lp_build_log2_approx(struct lp_build_context *bld, Elements(lp_build_log2_polynomial)); /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ - logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); + logmant = LLVMBuildFMul(bld->builder, logmant, LLVMBuildFSub(bld->builder, mant, bld->one, ""), ""); - res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); + res = LLVMBuildFAdd(bld->builder, logmant, logexp, ""); } if(p_exp) { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.c b/src/gallium/auxiliary/gallivm/lp_bld_assert.c new file mode 100644 index 0000000000..f2ebd868a8 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.c @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "lp_bld_assert.h" +#include "lp_bld_init.h" +#include "lp_bld_printf.h" + + +/** + * A call to lp_build_assert() will build a function call to this function. + */ +static void +lp_assert(int condition, const char *msg) +{ + if (!condition) { + debug_printf("LLVM assertion '%s' failed!\n", msg); + assert(condition); + } +} + + + +/** + * lp_build_assert. + * + * Build an assertion in LLVM IR by building a function call to the + * lp_assert() function above. + * + * \param condition should be an 'i1' or 'i32' value + * \param msg a string to print if the assertion fails. + */ +LLVMValueRef +lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, + const char *msg) +{ + LLVMModuleRef module; + LLVMTypeRef arg_types[2]; + LLVMValueRef msg_string, assert_func, params[2], r; + + module = LLVMGetGlobalParent(LLVMGetBasicBlockParent( + LLVMGetInsertBlock(builder))); + + msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1); + + arg_types[0] = LLVMInt32Type(); + arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); + + /* lookup the lp_assert function */ + assert_func = LLVMGetNamedFunction(module, "lp_assert"); + + /* Create the assertion function if not found */ + if (!assert_func) { + LLVMTypeRef func_type = + LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0); + + assert_func = LLVMAddFunction(module, "lp_assert", func_type); + LLVMSetFunctionCallConv(assert_func, LLVMCCallConv); + LLVMSetLinkage(assert_func, LLVMExternalLinkage); + LLVMAddGlobalMapping(lp_build_engine, assert_func, + func_to_pointer((func_pointer)lp_assert)); + } + assert(assert_func); + + /* build function call param list */ + params[0] = LLVMBuildZExt(builder, condition, arg_types[0], ""); + params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], ""); + + /* check arg types */ + assert(LLVMTypeOf(params[0]) == arg_types[0]); + assert(LLVMTypeOf(params[1]) == arg_types[1]); + + r = LLVMBuildCall(builder, assert_func, params, 2, ""); + + return r; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.h b/src/gallium/auxiliary/gallivm/lp_bld_assert.h new file mode 100644 index 0000000000..ddd879dc2c --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.h @@ -0,0 +1,41 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_BLD_ASSERT_H +#define LP_BLD_ASSERT_H + + +#include "lp_bld.h" + + +LLVMValueRef +lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, + const char *msg); + + +#endif + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 77012f1fac..8b477313d4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -117,8 +117,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, scale = (double)mask/ubound; bias = (double)((unsigned long long)1 << (mantissa - n)); - res = LLVMBuildMul(builder, src, lp_build_const_vec(src_type, scale), ""); - res = LLVMBuildAdd(builder, res, lp_build_const_vec(src_type, bias), ""); + res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); if(dst_width > n) { @@ -175,6 +175,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, double scale; double bias; + assert(dst_type.floating); + mantissa = lp_mantissa(dst_type); n = MIN2(mantissa, src_width); @@ -199,8 +201,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, res = LLVMBuildBitCast(builder, res, vec_type, ""); - res = LLVMBuildSub(builder, res, bias_, ""); - res = LLVMBuildMul(builder, res, lp_build_const_vec(dst_type, scale), ""); + res = LLVMBuildFSub(builder, res, bias_, ""); + res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); return res; } @@ -296,7 +298,7 @@ lp_build_conv(LLVMBuilderRef builder, if (dst_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) - tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); + tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } /* Use an equally sized integer for intermediate computations */ @@ -391,7 +393,7 @@ lp_build_conv(LLVMBuilderRef builder, if (src_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) - tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); + tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 0f01fc1d75..247cb83ce6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -240,7 +240,7 @@ lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder, */ if (normalized) - scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), ""); + scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), ""); else scaled = casted; @@ -322,7 +322,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, } if (normalized) - scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); + scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); else scaled = unswizzled; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 9f405921b0..c724a4453e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -197,7 +197,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, if (format_desc->channel[chan].normalized) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); LLVMValueRef scale_val = lp_build_const_vec(type, scale); - input = LLVMBuildMul(builder, input, scale_val, ""); + input = LLVMBuildFMul(builder, input, scale_val, ""); } } else { @@ -227,7 +227,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); LLVMValueRef scale_val = lp_build_const_vec(type, scale); input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); - input = LLVMBuildMul(builder, input, scale_val, ""); + input = LLVMBuildFMul(builder, input, scale_val, ""); } else { /* FIXME */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 69353dea09..60d8bcfa55 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -45,6 +45,8 @@ static const struct debug_named_value lp_bld_debug_flags[] = { { "nopt", GALLIVM_DEBUG_NO_OPT, NULL }, DEBUG_NAMED_VALUE_END }; + +DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0) #endif @@ -89,7 +91,7 @@ void lp_build_init(void) { #ifdef DEBUG - gallivm_debug = debug_get_flags_option("GALLIVM_DEBUG", lp_bld_debug_flags, 0 ); + gallivm_debug = debug_get_option_gallivm_debug(); #endif lp_set_target_options(); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index a32ced9b4c..f26fdac466 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -44,5 +44,7 @@ extern LLVMPassManagerRef lp_build_pass; void lp_build_init(void); +extern void +lp_func_delete_body(LLVMValueRef func); #endif /* !LP_BLD_INIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 39854e43b1..7d7db3b0d9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -83,6 +83,8 @@ lp_build_compare(LLVMBuilderRef builder, assert(func >= PIPE_FUNC_NEVER); assert(func <= PIPE_FUNC_ALWAYS); + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); if(func == PIPE_FUNC_NEVER) return zeros; @@ -363,9 +365,55 @@ lp_build_cmp(struct lp_build_context *bld, /** + * Return (mask & a) | (~mask & b); + */ +LLVMValueRef +lp_build_select_bitwise(struct lp_build_context *bld, + LLVMValueRef mask, + LLVMValueRef a, + LLVMValueRef b) +{ + struct lp_type type = bld->type; + LLVMValueRef res; + + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + + if (a == b) { + return a; + } + + if(type.floating) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); + } + + a = LLVMBuildAnd(bld->builder, a, mask, ""); + + /* This often gets translated to PANDN, but sometimes the NOT is + * pre-computed and stored in another constant. The best strategy depends + * on available registers, so it is not a big deal -- hopefully LLVM does + * the right decision attending the rest of the program. + */ + b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); + + res = LLVMBuildOr(bld->builder, a, b, ""); + + if(type.floating) { + LLVMTypeRef vec_type = lp_build_vec_type(type); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } + + return res; +} + + +/** * Return mask ? a : b; * - * mask is a bitwise mask, composed of 0 or ~0 for each element. + * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value + * will yield unpredictable results. */ LLVMValueRef lp_build_select(struct lp_build_context *bld, @@ -376,6 +424,9 @@ lp_build_select(struct lp_build_context *bld, struct lp_type type = bld->type; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == b) return a; @@ -424,27 +475,7 @@ lp_build_select(struct lp_build_context *bld, } } else { - if(type.floating) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); - } - - a = LLVMBuildAnd(bld->builder, a, mask, ""); - - /* This often gets translated to PANDN, but sometimes the NOT is - * pre-computed and stored in another constant. The best strategy depends - * on available registers, so it is not a big deal -- hopefully LLVM does - * the right decision attending the rest of the program. - */ - b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); - - res = LLVMBuildOr(bld->builder, a, b, ""); - - if(type.floating) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); - } + res = lp_build_select_bitwise(bld, mask, a, b); } return res; @@ -461,6 +492,9 @@ lp_build_select_aos(struct lp_build_context *bld, const unsigned n = type.length; unsigned i, j; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == b) return a; if(cond[0] && cond[1] && cond[2] && cond[3]) @@ -516,7 +550,22 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + const struct lp_type type = bld->type; + + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + + /* can't do bitwise ops on floating-point values */ + if(type.floating) { + a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, ""); + b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, ""); + } + b = LLVMBuildNot(bld->builder, b, ""); b = LLVMBuildAnd(bld->builder, a, b, ""); + + if(type.floating) { + b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, ""); + } return b; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index 29f9fc3b20..4e7b4c9938 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -63,6 +63,11 @@ lp_build_cmp(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef +lp_build_select_bitwise(struct lp_build_context *bld, + LLVMValueRef mask, + LLVMValueRef a, + LLVMValueRef b); LLVMValueRef lp_build_select(struct lp_build_context *bld, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 5a9488b5f7..6d5410d970 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -39,6 +39,7 @@ #include <llvm/Target/TargetOptions.h> #include <llvm/ExecutionEngine/ExecutionEngine.h> #include <llvm/ExecutionEngine/JITEventListener.h> +#include <llvm/Support/CommandLine.h> #include "pipe/p_config.h" #include "util/u_debug.h" @@ -141,4 +142,35 @@ lp_set_target_options(void) #if 0 llvm::UnsafeFPMath = true; #endif + +#if 0 + /* + * LLVM will generate MMX instructions for vectors <= 64 bits, leading to + * innefficient code, and in 32bit systems, to the corruption of the FPU + * stack given that it expects the user to generate the EMMS instructions. + * + * See also: + * - http://llvm.org/bugs/show_bug.cgi?id=3287 + * - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/ + * + * XXX: Unfortunately this is not working. + */ + static boolean first = FALSE; + if (first) { + static const char* options[] = { + "prog", + "-disable-mmx" + }; + llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options)); + first = FALSE; + } +#endif +} + + +extern "C" void +lp_func_delete_body(LLVMValueRef FF) +{ + llvm::Function *func = llvm::unwrap<llvm::Function>(FF); + func->deleteBody(); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 7748f8f099..b7b630f2e8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -171,14 +171,13 @@ lp_build_unpack2(LLVMBuilderRef builder, msb = lp_build_zero(src_type); /* Interleave bits */ - if(util_cpu_caps.little_endian) { +#ifdef PIPE_ARCH_LITTLE_ENDIAN *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0); *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1); - } - else { +#else *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0); *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1); - } +#endif /* Cast the result into the new type (twice as wide) */ @@ -261,13 +260,14 @@ lp_build_pack2(LLVMBuilderRef builder, #endif LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); LLVMValueRef shuffle; - LLVMValueRef res; + LLVMValueRef res = NULL; assert(!src_type.floating); assert(!dst_type.floating); assert(src_type.width == dst_type.width * 2); assert(src_type.length * 2 == dst_type.length); + /* Check for special cases first */ if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { switch(src_type.width) { case 32: @@ -283,8 +283,8 @@ lp_build_pack2(LLVMBuilderRef builder, return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); } else { - assert(0); - return LLVMGetUndef(dst_vec_type); + /* use generic shuffle below */ + res = NULL; } } break; @@ -310,10 +310,13 @@ lp_build_pack2(LLVMBuilderRef builder, break; } - res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); - return res; + if (res) { + res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); + return res; + } } + /* generic shuffle */ lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index ca36046d22..7b1088939b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -85,7 +85,7 @@ lp_build_scalar_ddx(struct lp_build_context *bld, LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0); LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, ""); LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, ""); - return LLVMBuildSub(bld->builder, a_right, a_left, ""); + return lp_build_sub(bld, a_right, a_left); } @@ -97,5 +97,5 @@ lp_build_scalar_ddy(struct lp_build_context *bld, LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0); LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, ""); LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, ""); - return LLVMBuildSub(bld->builder, a_bottom, a_top, ""); + return lp_build_sub(bld, a_bottom, a_top); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 1a20d74cac..806c7d56a8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -40,7 +40,6 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_format.h" -#include "util/u_cpu_detect.h" #include "lp_bld_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" @@ -811,7 +810,7 @@ lp_build_minify(struct lp_build_sample_context *bld, LLVMValueRef base_size, LLVMValueRef level) { - LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify"); + LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify"); size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); return size; } @@ -888,17 +887,17 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, /* Compute rho = max of all partial derivatives scaled by texture size. * XXX this could be vectorized somewhat */ - rho = LLVMBuildMul(bld->builder, + rho = LLVMBuildFMul(bld->builder, lp_build_max(float_bld, dsdx, dsdy), lp_build_int_to_float(float_bld, width), ""); if (dims > 1) { LLVMValueRef max; - max = LLVMBuildMul(bld->builder, + max = LLVMBuildFMul(bld->builder, lp_build_max(float_bld, dtdx, dtdy), lp_build_int_to_float(float_bld, height), ""); rho = lp_build_max(float_bld, rho, max); if (dims > 2) { - max = LLVMBuildMul(bld->builder, + max = LLVMBuildFMul(bld->builder, lp_build_max(float_bld, drdx, drdy), lp_build_int_to_float(float_bld, depth), ""); rho = lp_build_max(float_bld, rho, max); @@ -912,12 +911,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, if (lod_bias) { lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias, index0, ""); - lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); + lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); } } /* add sampler lod bias */ - lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); + lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); /* clamp lod */ lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); @@ -1219,8 +1218,7 @@ lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) /* ima = -0.5 / abs(coord); */ LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5); LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); - LLVMValueRef ima = lp_build_mul(coord_bld, negHalf, - lp_build_rcp(coord_bld, absCoord)); + LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord); return ima; } @@ -1841,7 +1839,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, unsigned i, j; for(j = 0; j < h16.type.length; j += 4) { - unsigned subindex = util_cpu_caps.little_endian ? 0 : 1; +#ifdef PIPE_ARCH_LITTLE_ENDIAN + unsigned subindex = 0; +#else + unsigned subindex = 1; +#endif LLVMValueRef index; index = LLVMConstInt(elem_type, j/2 + subindex, 0); @@ -2029,6 +2031,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, debug_printf("Sample from %s\n", util_format_name(fmt)); } + assert(type.floating); + /* Setup our build context */ memset(&bld, 0, sizeof bld); bld.builder = builder; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 21236839fb..0aa64affac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -489,7 +489,7 @@ get_indirect_offsets(struct lp_build_tgsi_soa_context *bld, int_vec_type, ""); /* addr_vec = addr_vec * 4 */ - addr_vec = lp_build_mul(&bld->base, addr_vec, vec4); + addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4); return addr_vec; } @@ -533,7 +533,7 @@ emit_fetch( reg->Register.Index * 4 + swizzle); /* index_vec = index_vec + addr_vec */ - index_vec = lp_build_add(&bld->base, index_vec, addr_vec); + index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec); /* Gather values from the constant buffer */ res = build_gather(bld, bld->consts_ptr, index_vec); @@ -612,11 +612,9 @@ emit_fetch( case TGSI_UTIL_SIGN_SET: /* TODO: Use bitwese OR for floating point */ res = lp_build_abs( &bld->base, res ); - res = LLVMBuildNeg( bld->base.builder, res, "" ); - break; - + /* fall through */ case TGSI_UTIL_SIGN_TOGGLE: - res = LLVMBuildNeg( bld->base.builder, res, "" ); + res = lp_build_negate( &bld->base, res ); break; case TGSI_UTIL_SIGN_KEEP: @@ -773,7 +771,9 @@ emit_store( addr = LLVMBuildExtractElement(bld->base.builder, addr, LLVMConstInt(LLVMInt32Type(), 0, 0), ""); - addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); + addr = LLVMBuildMul(bld->base.builder, + addr, LLVMConstInt(LLVMInt32Type(), 4, 0), + ""); } switch( reg->Register.File ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 5275faa5e2..298f3d0a8b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -557,6 +557,23 @@ print_temp(const struct tgsi_exec_machine *mach, uint index) #endif +void +tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, + unsigned num_bufs, + const void **bufs, + const unsigned *buf_sizes) +{ + unsigned i; + + for (i = 0; i < num_bufs; i++) { + mach->Consts[i] = bufs[i]; + mach->ConstsSize[i] = buf_sizes[i]; + } +} + + + + /** * Check if there's a potential src/dst register data dependency when * using SOA execution. @@ -632,6 +649,10 @@ tgsi_exec_machine_bind_shader( util_init_math(); + if (numSamplers) { + assert(samplers); + } + mach->Tokens = tokens; mach->Samplers = samplers; @@ -1040,6 +1061,8 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, { uint i; + assert(swizzle < 4); + switch (file) { case TGSI_FILE_CONSTANT: for (i = 0; i < QUAD_SIZE; i++) { @@ -1049,9 +1072,23 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, if (index->i[i] < 0) { chan->u[i] = 0; } else { - const uint *p = (const uint *)mach->Consts[index2D->i[i]]; - - chan->u[i] = p[index->i[i] * 4 + swizzle]; + /* NOTE: copying the const value as a uint instead of float */ + const uint constbuf = index2D->i[i]; + const uint *buf = (const uint *)mach->Consts[constbuf]; + const int pos = index->i[i] * 4 + swizzle; + /* const buffer bounds check */ + if (pos < 0 || pos >= mach->ConstsSize[constbuf]) { + if (0) { + /* Debug: print warning */ + static int count = 0; + if (count++ < 100) + debug_printf("TGSI Exec: const buffer index %d" + " out of bounds\n", pos); + } + chan->u[i] = 0; + } + else + chan->u[i] = buf[pos]; } } break; @@ -1065,9 +1102,10 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], index2D->i[i], index->i[i]); }*/ - chan->u[i] = mach->Inputs[index2D->i[i] * - TGSI_EXEC_MAX_INPUT_ATTRIBS + - index->i[i]].xyzw[swizzle].u[i]; + int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; + assert(pos >= 0); + assert(pos < Elements(mach->Inputs)); + chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; } break; @@ -1187,7 +1225,7 @@ fetch_source(const struct tgsi_exec_machine *mach, index2.i[1] = index2.i[2] = index2.i[3] = reg->Indirect.Index; - + assert(reg->Indirect.File == TGSI_FILE_ADDRESS); /* get current value of address register[swizzle] */ swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); fetch_src_file_channel(mach, diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index ccf80ca6fd..6dee362d58 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -253,7 +253,10 @@ struct tgsi_exec_machine struct tgsi_sampler **Samplers; unsigned ImmLimit; + const void *Consts[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS]; + const struct tgsi_token *Tokens; /**< Declarations, instructions */ unsigned Processor; /**< TGSI_PROCESSOR_x */ @@ -367,6 +370,13 @@ tgsi_set_exec_mask(struct tgsi_exec_machine *mach, } +extern void +tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, + unsigned num_bufs, + const void **bufs, + const unsigned *buf_sizes); + + #if defined __cplusplus } /* extern "C" */ #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 97148dbe23..acbff103ef 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -33,6 +33,10 @@ #include "tgsi_info.h" #include "tgsi_iterate.h" + +DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", FALSE) + + typedef struct { uint file : 28; /* max 2 dimensions */ @@ -54,6 +58,8 @@ struct sanity_check_ctx uint errors; uint warnings; uint implied_array_size; + + boolean print; }; static INLINE unsigned @@ -148,6 +154,9 @@ report_error( { va_list args; + if (!ctx->print) + return; + debug_printf( "Error : " ); va_start( args, format ); _debug_vprintf( format, args ); @@ -164,6 +173,9 @@ report_warning( { va_list args; + if (!ctx->print) + return; + debug_printf( "Warning: " ); va_start( args, format ); _debug_vprintf( format, args ); @@ -539,6 +551,7 @@ tgsi_sanity_check( ctx.errors = 0; ctx.warnings = 0; ctx.implied_array_size = 0; + ctx.print = debug_get_option_print_sanity(); if (!tgsi_iterate_shader( tokens, &ctx.iter )) return FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h index 52263ff883..73f0f414e3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h @@ -35,7 +35,8 @@ extern "C" { #endif /* Check the given token stream for errors and common mistakes. - * Diagnostic messages are printed out to the debug output. + * Diagnostic messages are printed out to the debug output, and is + * controlled by the debug option TGSI_PRINT_SANITY (default false). * Returns TRUE if there are no errors, even though there could be some warnings. */ boolean diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c index a9b7253bf4..fe638e211f 100644 --- a/src/gallium/auxiliary/translate/translate.c +++ b/src/gallium/auxiliary/translate/translate.c @@ -48,3 +48,8 @@ struct translate *translate_create( const struct translate_key *key ) return translate_generic_create( key ); } + +boolean translate_is_output_format_supported(enum pipe_format format) +{ + return translate_generic_is_output_format_supported(format); +} diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index edd95e0788..eb6f2cc486 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -105,6 +105,8 @@ struct translate *translate_lookup_or_create( struct translate_context *tctx, struct translate *translate_create( const struct translate_key *key ); +boolean translate_is_output_format_supported(enum pipe_format format); + static INLINE int translate_keysize( const struct translate_key *key ) { return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element); @@ -138,5 +140,6 @@ struct translate *translate_sse2_create( const struct translate_key *key ); struct translate *translate_generic_create( const struct translate_key *key ); +boolean translate_generic_is_output_format_supported(enum pipe_format format); #endif diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 0e43a512ee..42cfd763e9 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -187,9 +187,15 @@ ATTRIB( R8G8B8_SNORM, 3, char, TO_8_SNORM ) ATTRIB( R8G8_SNORM, 2, char, TO_8_SNORM ) ATTRIB( R8_SNORM, 1, char, TO_8_SNORM ) -ATTRIB( A8R8G8B8_UNORM, 4, ubyte, TO_8_UNORM ) -/*ATTRIB( R8G8B8A8_UNORM, 4, ubyte, TO_8_UNORM )*/ - +static void +emit_A8R8G8B8_UNORM( const float *attrib, void *ptr) +{ + ubyte *out = (ubyte *)ptr; + out[0] = TO_8_UNORM(attrib[3]); + out[1] = TO_8_UNORM(attrib[0]); + out[2] = TO_8_UNORM(attrib[1]); + out[3] = TO_8_UNORM(attrib[2]); +} static void emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) @@ -368,23 +374,23 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, /* loop over vertex attributes (vertex shader inputs) */ for (i = 0; i < count; i++) { - unsigned elt = *elts++; + const unsigned elt = *elts++; for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - const uint8_t *src; - unsigned index; - - char *dst = (vert + - tg->attrib[attr].output_offset); + char *dst = vert + tg->attrib[attr].output_offset; if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const uint8_t *src; + unsigned index; + if (tg->attrib[attr].instance_divisor) { index = instance_id / tg->attrib[attr].instance_divisor; } else { index = elt; } + /* clamp to void going out of bounds */ index = MIN2(index, tg->attrib[attr].max_index); src = tg->attrib[attr].input_ptr + @@ -392,11 +398,23 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, tg->attrib[attr].fetch( data, src, 0, 0 ); + if (0) + debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: " + " %f, %f, %f, %f \n", + attr, + tg->attrib[attr].input_ptr, + tg->attrib[attr].input_stride, + tg->attrib[attr].instance_divisor, + tg->attrib[attr].max_index, + index, + data[0], data[1],data[2], data[3]); } else { data[0] = (float)instance_id; } - if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", - i, elt, attr, data[0], data[1], data[2], data[3]); + + if (0) + debug_printf("vert %d/%d attr %d: %f %f %f %f\n", + i, elt, attr, data[0], data[1], data[2], data[3]); tg->attrib[attr].emit( data, dst ); } @@ -425,29 +443,42 @@ static void PIPE_CDECL generic_run( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - - char *dst = (vert + - tg->attrib[attr].output_offset); + char *dst = vert + tg->attrib[attr].output_offset; if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { const uint8_t *src; + unsigned index; if (tg->attrib[attr].instance_divisor) { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * - (instance_id / tg->attrib[attr].instance_divisor); - } else { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt; + index = instance_id / tg->attrib[attr].instance_divisor; } + else { + index = elt; + } + + /* clamp to void going out of bounds */ + index = MIN2(index, tg->attrib[attr].max_index); + + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * index; tg->attrib[attr].fetch( data, src, 0, 0 ); + + if (0) + debug_printf("Fetch linear attr %d from %p stride %d index %d: " + " %f, %f, %f, %f \n", + attr, + tg->attrib[attr].input_ptr, + tg->attrib[attr].input_stride, + index, + data[0], data[1],data[2], data[3]); } else { data[0] = (float)instance_id; } - if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", - i, attr, data[0], data[1], data[2], data[3]); + if (0) + debug_printf("vert %d attr %d: %f %f %f %f\n", + i, attr, data[0], data[1], data[2], data[3]); tg->attrib[attr].emit( data, dst ); } @@ -523,3 +554,83 @@ struct translate *translate_generic_create( const struct translate_key *key ) return &tg->translate; } + +boolean translate_generic_is_output_format_supported(enum pipe_format format) +{ + switch(format) + { + case PIPE_FORMAT_R64G64B64A64_FLOAT: return TRUE; + case PIPE_FORMAT_R64G64B64_FLOAT: return TRUE; + case PIPE_FORMAT_R64G64_FLOAT: return TRUE; + case PIPE_FORMAT_R64_FLOAT: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_FLOAT: return TRUE; + case PIPE_FORMAT_R32G32B32_FLOAT: return TRUE; + case PIPE_FORMAT_R32G32_FLOAT: return TRUE; + case PIPE_FORMAT_R32_FLOAT: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_USCALED: return TRUE; + case PIPE_FORMAT_R32G32B32_USCALED: return TRUE; + case PIPE_FORMAT_R32G32_USCALED: return TRUE; + case PIPE_FORMAT_R32_USCALED: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_SSCALED: return TRUE; + case PIPE_FORMAT_R32G32B32_SSCALED: return TRUE; + case PIPE_FORMAT_R32G32_SSCALED: return TRUE; + case PIPE_FORMAT_R32_SSCALED: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_UNORM: return TRUE; + case PIPE_FORMAT_R32G32B32_UNORM: return TRUE; + case PIPE_FORMAT_R32G32_UNORM: return TRUE; + case PIPE_FORMAT_R32_UNORM: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_SNORM: return TRUE; + case PIPE_FORMAT_R32G32B32_SNORM: return TRUE; + case PIPE_FORMAT_R32G32_SNORM: return TRUE; + case PIPE_FORMAT_R32_SNORM: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_USCALED: return TRUE; + case PIPE_FORMAT_R16G16B16_USCALED: return TRUE; + case PIPE_FORMAT_R16G16_USCALED: return TRUE; + case PIPE_FORMAT_R16_USCALED: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_SSCALED: return TRUE; + case PIPE_FORMAT_R16G16B16_SSCALED: return TRUE; + case PIPE_FORMAT_R16G16_SSCALED: return TRUE; + case PIPE_FORMAT_R16_SSCALED: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_UNORM: return TRUE; + case PIPE_FORMAT_R16G16B16_UNORM: return TRUE; + case PIPE_FORMAT_R16G16_UNORM: return TRUE; + case PIPE_FORMAT_R16_UNORM: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_SNORM: return TRUE; + case PIPE_FORMAT_R16G16B16_SNORM: return TRUE; + case PIPE_FORMAT_R16G16_SNORM: return TRUE; + case PIPE_FORMAT_R16_SNORM: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_USCALED: return TRUE; + case PIPE_FORMAT_R8G8B8_USCALED: return TRUE; + case PIPE_FORMAT_R8G8_USCALED: return TRUE; + case PIPE_FORMAT_R8_USCALED: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_SSCALED: return TRUE; + case PIPE_FORMAT_R8G8B8_SSCALED: return TRUE; + case PIPE_FORMAT_R8G8_SSCALED: return TRUE; + case PIPE_FORMAT_R8_SSCALED: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_UNORM: return TRUE; + case PIPE_FORMAT_R8G8B8_UNORM: return TRUE; + case PIPE_FORMAT_R8G8_UNORM: return TRUE; + case PIPE_FORMAT_R8_UNORM: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_SNORM: return TRUE; + case PIPE_FORMAT_R8G8B8_SNORM: return TRUE; + case PIPE_FORMAT_R8G8_SNORM: return TRUE; + case PIPE_FORMAT_R8_SNORM: return TRUE; + + case PIPE_FORMAT_A8R8G8B8_UNORM: return TRUE; + case PIPE_FORMAT_B8G8R8A8_UNORM: return TRUE; + default: return FALSE; + } +} diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 0d94aaae95..b5b86b7214 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -87,6 +87,7 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; + void *dsa_flush_depth_stencil; void *velem_state; @@ -156,6 +157,10 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->dsa_keep_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); + dsa.depth.writemask = 1; + ctx->dsa_flush_depth_stencil = + pipe->create_depth_stencil_alpha_state(pipe, &dsa); + dsa.depth.enabled = 1; dsa.depth.writemask = 1; dsa.depth.func = PIPE_FUNC_ALWAYS; @@ -940,3 +945,42 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } + +/* Clear a region of a depth stencil surface. */ +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->base.pipe; + struct pipe_framebuffer_state fb_state; + + assert(dstsurf->texture); + if (!dstsurf->texture) + return; + + /* check the saved state */ + blitter_check_saved_CSOs(ctx); + assert(blitter->saved_fb_state.nr_cbufs != ~0); + + /* bind CSOs */ + pipe->bind_blend_state(pipe, ctx->blend_keep_color); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); + + pipe->bind_rasterizer_state(pipe, ctx->rs_state); + pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); + pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); + + /* set a framebuffer state */ + fb_state.width = dstsurf->width; + fb_state.height = dstsurf->height; + fb_state.nr_cbufs = 0; + fb_state.cbufs[0] = 0; + fb_state.zsbuf = dstsurf; + pipe->set_framebuffer_state(pipe, &fb_state); + + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0, + UTIL_BLITTER_ATTRIB_NONE, NULL); + blitter_restore_CSOs(ctx); +} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index ba3f92eca8..f316587dea 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -200,6 +200,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf); /* The functions below should be used to save currently bound constant state * objects inside a driver. The objects are automatically restored at the end * of the util_blitter_{clear, copy_region, fill_region} functions and then diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index a08241971c..5056351307 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -38,7 +38,7 @@ #include "u_cpu_detect.h" #if defined(PIPE_ARCH_PPC) -#if defined(PIPE_OS_DARWIN) +#if defined(PIPE_OS_APPLE) #include <sys/sysctl.h> #else #include <signal.h> @@ -73,9 +73,15 @@ #endif +DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE) + + struct util_cpu_caps util_cpu_caps; +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) static int has_cpuid(void); +#endif + #if defined(PIPE_ARCH_X86) @@ -132,7 +138,7 @@ win32_sig_handler_sse(EXCEPTION_POINTERS* ep) #endif /* PIPE_ARCH_X86 */ -#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_DARWIN) +#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE) static jmp_buf __lv_powerpc_jmpbuf; static volatile sig_atomic_t __lv_powerpc_canjump = 0; @@ -153,7 +159,7 @@ sigill_handler(int sig) static void check_os_altivec_support(void) { -#if defined(PIPE_OS_DARWIN) +#if defined(PIPE_OS_APPLE) int sels[2] = {CTL_HW, HW_VECTORUNIT}; int has_vu = 0; int len = sizeof (has_vu); @@ -166,8 +172,8 @@ check_os_altivec_support(void) util_cpu_caps.has_altivec = 1; } } -#else /* !PIPE_OS_DARWIN */ - /* no Darwin, do it the brute-force way */ +#else /* !PIPE_OS_APPLE */ + /* not on Apple/Darwin, do it the brute-force way */ /* this is borrowed from the libmpeg2 library */ signal(SIGILL, sigill_handler); if (setjmp(__lv_powerpc_jmpbuf)) { @@ -184,7 +190,7 @@ check_os_altivec_support(void) signal(SIGILL, SIG_DFL); util_cpu_caps.has_altivec = 1; } -#endif /* PIPE_OS_DARWIN */ +#endif /* !PIPE_OS_APPLE */ } #endif /* PIPE_ARCH_PPC */ @@ -385,23 +391,6 @@ util_cpu_detect(void) memset(&util_cpu_caps, 0, sizeof util_cpu_caps); - /* Check for arch type */ -#if defined(PIPE_ARCH_MIPS) - util_cpu_caps.arch = UTIL_CPU_ARCH_MIPS; -#elif defined(PIPE_ARCH_ALPHA) - util_cpu_caps.arch = UTIL_CPU_ARCH_ALPHA; -#elif defined(PIPE_ARCH_SPARC) - util_cpu_caps.arch = UTIL_CPU_ARCH_SPARC; -#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - util_cpu_caps.arch = UTIL_CPU_ARCH_X86; - util_cpu_caps.little_endian = 1; -#elif defined(PIPE_ARCH_PPC) - util_cpu_caps.arch = UTIL_CPU_ARCH_POWERPC; - util_cpu_caps.little_endian = 0; -#else - util_cpu_caps.arch = UTIL_CPU_ARCH_UNKNOWN; -#endif - /* Count the number of CPUs in system */ #if defined(PIPE_OS_WINDOWS) { @@ -497,23 +486,24 @@ util_cpu_detect(void) #endif /* PIPE_ARCH_PPC */ #ifdef DEBUG - debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch); - debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); - - debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); - debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline); - - debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc); - debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx); - debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2); - debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse); - debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2); - debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3); - debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3); - debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1); - debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow); - debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext); - debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); + if (debug_get_option_dump_cpu()) { + debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); + + debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); + debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline); + + debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc); + debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx); + debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2); + debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse); + debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2); + debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3); + debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3); + debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1); + debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow); + debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext); + debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); + } #endif util_cpu_detect_initialized = TRUE; diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h index 4b3dc39c34..f3bef0993c 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.h +++ b/src/gallium/auxiliary/util/u_cpu_detect.h @@ -36,26 +36,15 @@ #define _UTIL_CPU_DETECT_H #include "pipe/p_compiler.h" - -enum util_cpu_arch { - UTIL_CPU_ARCH_UNKNOWN = 0, - UTIL_CPU_ARCH_MIPS, - UTIL_CPU_ARCH_ALPHA, - UTIL_CPU_ARCH_SPARC, - UTIL_CPU_ARCH_X86, - UTIL_CPU_ARCH_POWERPC -}; +#include "pipe/p_config.h" struct util_cpu_caps { - enum util_cpu_arch arch; unsigned nr_cpus; /* Feature flags */ int x86_cpu_type; unsigned cacheline; - unsigned little_endian:1; - unsigned has_tsc:1; unsigned has_mmx:1; unsigned has_mmx2:1; diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index ad162558bc..504e6d2a18 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -88,7 +88,7 @@ debug_get_option_should_print(void) * but its cool since we set first to false */ first = FALSE; - value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", TRUE); + value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", FALSE); /* XXX should we print this option? Currently it wont */ return value; } diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h new file mode 100644 index 0000000000..2a91ea0f9a --- /dev/null +++ b/src/gallium/auxiliary/util/u_draw.h @@ -0,0 +1,138 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_DRAW_H +#define U_DRAW_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" + + +static INLINE void +util_draw_init_info(struct pipe_draw_info *info) +{ + memset(info, 0, sizeof(*info)); + info->instance_count = 1; + info->max_index = 0xffffffff; +} + + +static INLINE void +util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.min_index = start; + info.max_index = start + count - 1; + + pipe->draw_vbo(pipe, &info); +} + +static INLINE void +util_draw_elements(struct pipe_context *pipe, int index_bias, + uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.indexed = TRUE; + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = index_bias; + + pipe->draw_vbo(pipe, &info); +} + +static INLINE void +util_draw_arrays_instanced(struct pipe_context *pipe, + uint mode, uint start, uint count, + uint start_instance, + uint instance_count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.start_instance = start_instance; + info.instance_count = instance_count; + info.min_index = start; + info.max_index = start + count - 1; + + pipe->draw_vbo(pipe, &info); +} + +static INLINE void +util_draw_elements_instanced(struct pipe_context *pipe, + int index_bias, + uint mode, uint start, uint count, + uint start_instance, + uint instance_count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.indexed = TRUE; + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = index_bias; + info.start_instance = start_instance; + info.instance_count = instance_count; + + pipe->draw_vbo(pipe, &info); +} + +static INLINE void +util_draw_range_elements(struct pipe_context *pipe, + int index_bias, + uint min_index, + uint max_index, + uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.indexed = TRUE; + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = index_bias; + info.min_index = min_index; + info.max_index = max_index; + + pipe->draw_vbo(pipe, &info); +} + +#endif diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index b37b48b5ae..0b6dc5880f 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -60,7 +60,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /* note: vertex elements already set by caller */ /* draw */ - pipe->draw_arrays(pipe, prim_type, 0, num_verts); + util_draw_arrays(pipe, prim_type, 0, num_verts); } diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h index 42eb184428..52994fe05c 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.h +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -29,12 +29,18 @@ #define U_DRAWQUAD_H +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" + + #ifdef __cplusplus extern "C" { #endif struct pipe_resource; +#include "util/u_draw.h" + extern void util_draw_vertex_buffer(struct pipe_context *pipe, struct pipe_resource *vbuf, uint offset, diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 38254b1096..8e786a390a 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -631,6 +631,44 @@ util_format_has_alpha(enum pipe_format format) } /** + * Return the matching SRGB format, or PIPE_FORMAT_NONE if none. + */ +static INLINE enum pipe_format +util_format_srgb(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_L8_UNORM: + return PIPE_FORMAT_L8_SRGB; + case PIPE_FORMAT_L8A8_UNORM: + return PIPE_FORMAT_L8A8_SRGB; + case PIPE_FORMAT_R8G8B8_UNORM: + return PIPE_FORMAT_R8G8B8_SRGB; + case PIPE_FORMAT_A8B8G8R8_UNORM: + return PIPE_FORMAT_A8B8G8R8_SRGB; + case PIPE_FORMAT_X8B8G8R8_UNORM: + return PIPE_FORMAT_X8B8G8R8_SRGB; + case PIPE_FORMAT_B8G8R8A8_UNORM: + return PIPE_FORMAT_B8G8R8A8_SRGB; + case PIPE_FORMAT_B8G8R8X8_UNORM: + return PIPE_FORMAT_B8G8R8X8_SRGB; + case PIPE_FORMAT_A8R8G8B8_UNORM: + return PIPE_FORMAT_A8R8G8B8_SRGB; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return PIPE_FORMAT_X8R8G8B8_SRGB; + case PIPE_FORMAT_DXT1_RGB: + return PIPE_FORMAT_DXT1_SRGB; + case PIPE_FORMAT_DXT1_RGBA: + return PIPE_FORMAT_DXT1_SRGBA; + case PIPE_FORMAT_DXT3_RGBA: + return PIPE_FORMAT_DXT3_SRGBA; + case PIPE_FORMAT_DXT5_RGBA: + return PIPE_FORMAT_DXT5_SRGBA; + default: + return PIPE_FORMAT_NONE; + } +} + +/** * Return the number of components stored. * Formats with block size != 1x1 will always have 1 component (the block). */ diff --git a/src/gallium/auxiliary/util/u_format_other.c b/src/gallium/auxiliary/util/u_format_other.c index 723fa8c3bf..fa42ec3713 100644 --- a/src/gallium/auxiliary/util/u_format_other.c +++ b/src/gallium/auxiliary/util/u_format_other.c @@ -121,6 +121,15 @@ util_format_r1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, * A.k.a. D3DFMT_CxV8U8 */ +static uint8_t +r8g8bx_derive(int16_t r, int16_t g) +{ + /* Derive blue from red and green components. + * Apparently, we must always use integers to perform calculations, + * otherwise the results won't match D3D's CxV8U8 definition. + */ + return (uint8_t)sqrtf(0x7f * 0x7f - r * r - g * g) * 0xff / 0x7f; +} void util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, @@ -145,7 +154,7 @@ util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, dst[0] = (float)(r * (1.0f/0x7f)); /* r */ dst[1] = (float)(g * (1.0f/0x7f)); /* g */ - dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */ + dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */ dst[3] = 1.0f; /* a */ dst += 4; } @@ -177,7 +186,7 @@ util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_strid dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */ dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */ - dst[2] = (uint8_t)sqrtf(0x7f*0x7f - r * r - g * g) * 0xff / 0x7f; /* b */ + dst[2] = r8g8bx_derive(r, g); /* b */ dst[3] = 255; /* a */ dst += 4; } @@ -262,6 +271,6 @@ util_format_r8g8bx_snorm_fetch_rgba_float(float *dst, const uint8_t *src, dst[0] = r * (1.0f/0x7f); /* r */ dst[1] = g * (1.0f/0x7f); /* g */ - dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */ + dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */ dst[3] = 1.0f; /* a */ } diff --git a/src/gallium/auxiliary/util/u_framebuffer.c b/src/gallium/auxiliary/util/u_framebuffer.c index 768ae9ceb5..7803ec6a8b 100644 --- a/src/gallium/auxiliary/util/u_framebuffer.c +++ b/src/gallium/auxiliary/util/u_framebuffer.c @@ -85,9 +85,11 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, dst->width = src->width; dst->height = src->height; - for (i = 0; i < Elements(src->cbufs); i++) { + for (i = 0; i < src->nr_cbufs; i++) pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); - } + + for (i = src->nr_cbufs; i < dst->nr_cbufs; i++) + pipe_surface_reference(&dst->cbufs[i], NULL); dst->nr_cbufs = src->nr_cbufs; diff --git a/src/gallium/auxiliary/util/u_mempool.c b/src/gallium/auxiliary/util/u_mempool.c index 84e2a34acc..1f336b39a1 100644 --- a/src/gallium/auxiliary/util/u_mempool.c +++ b/src/gallium/auxiliary/util/u_mempool.c @@ -126,7 +126,6 @@ void util_mempool_set_thread_safety(struct util_mempool *pool, pool->threading = threading; if (threading) { - pipe_mutex_init(pool->mutex); pool->malloc = util_mempool_malloc_mt; pool->free = util_mempool_free_mt; } else { @@ -152,6 +151,8 @@ void util_mempool_create(struct util_mempool *pool, make_empty_list(&pool->list); + pipe_mutex_init(pool->mutex); + util_mempool_set_thread_safety(pool, threading); } @@ -164,6 +165,5 @@ void util_mempool_destroy(struct util_mempool *pool) FREE(page); } - if (pool->threading) - pipe_mutex_destroy(pool->mutex); + pipe_mutex_destroy(pool->mutex); } diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index 87ee0e4768..77f2c5fc7d 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # include <winsock2.h> # include <windows.h> -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_CYGWIN) # include <sys/socket.h> # include <netinet/in.h> # include <unistd.h> diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 3ebef9fb74..5f113f742b 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -425,6 +425,53 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color * } } +/* Integer versions of util_pack_z and util_pack_z_stencil - useful for + * constructing clear masks. + */ +static INLINE uint +util_pack_uint_z(enum pipe_format format, unsigned z) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return z & 0xffff; + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + return z; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_Z24X8_UNORM: + return z & 0xffffff; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return (z & 0xffffff) << 8; + case PIPE_FORMAT_S8_USCALED: + return 0; + default: + debug_print_format("gallium: unhandled format in util_pack_z()", format); + assert(0); + return 0; + } +} + +static INLINE uint +util_pack_uint_z_stencil(enum pipe_format format, double z, uint s) +{ + unsigned packed = util_pack_uint_z(format, z); + + s &= 0xff; + + switch (format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return packed | (s << 24); + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return packed | s; + case PIPE_FORMAT_S8_USCALED: + return packed | s; + default: + return packed; + } +} + + /** * Note: it's assumed that z is in [0,1] diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index 606b9b5c6b..3c851f7340 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -108,6 +108,20 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr ) ok = (*nr >= 4); *nr -= (*nr % 2); break; + case PIPE_PRIM_LINES_ADJACENCY: + ok = (*nr >= 4); + *nr -= (*nr % 4); + break; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + ok = (*nr >= 4); + break; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + ok = (*nr >= 6); + *nr -= (*nr % 5); + break; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + ok = (*nr >= 4); + break; default: ok = 0; break; diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h new file mode 100644 index 0000000000..206e1ec311 --- /dev/null +++ b/src/gallium/auxiliary/util/u_split_prim.h @@ -0,0 +1,105 @@ +/* Originally written by Ben Skeggs for the nv50 driver*/ +#include <pipe/p_defines.h> + +struct util_split_prim { + void *priv; + void (*emit)(void *priv, unsigned start, unsigned count); + void (*edge)(void *priv, boolean enabled); + + unsigned mode; + unsigned start; + unsigned p_start; + unsigned p_end; + + uint repeat_first:1; + uint close_first:1; + uint edgeflag_off:1; +}; + +static INLINE void +util_split_prim_init(struct util_split_prim *s, + unsigned mode, unsigned start, unsigned count) +{ + if (mode == PIPE_PRIM_LINE_LOOP) { + s->mode = PIPE_PRIM_LINE_STRIP; + s->close_first = 1; + } else { + s->mode = mode; + s->close_first = 0; + } + s->start = start; + s->p_start = start; + s->p_end = start + count; + s->edgeflag_off = 0; + s->repeat_first = 0; +} + +static INLINE boolean +util_split_prim_next(struct util_split_prim *s, unsigned max_verts) +{ + int repeat = 0; + + if (s->repeat_first) { + s->emit(s->priv, s->start, 1); + max_verts--; + if (s->edgeflag_off) { + s->edge(s->priv, TRUE); + s->edgeflag_off = FALSE; + } + } + + if (s->p_start + s->close_first + max_verts >= s->p_end) { + s->emit(s->priv, s->p_start, s->p_end - s->p_start); + if (s->close_first) + s->emit(s->priv, s->start, 1); + return TRUE; + } + + switch (s->mode) { + case PIPE_PRIM_LINES: + max_verts &= ~1; + break; + case PIPE_PRIM_LINE_STRIP: + repeat = 1; + break; + case PIPE_PRIM_POLYGON: + max_verts--; + s->emit(s->priv, s->p_start, max_verts); + s->edge(s->priv, FALSE); + s->emit(s->priv, s->p_start + max_verts, 1); + s->p_start += max_verts; + s->repeat_first = TRUE; + s->edgeflag_off = TRUE; + return FALSE; + case PIPE_PRIM_TRIANGLES: + max_verts = max_verts - (max_verts % 3); + break; + case PIPE_PRIM_TRIANGLE_STRIP: + /* to ensure winding stays correct, always split + * on an even number of generated triangles + */ + max_verts = max_verts & ~1; + repeat = 2; + break; + case PIPE_PRIM_TRIANGLE_FAN: + s->repeat_first = TRUE; + repeat = 1; + break; + case PIPE_PRIM_QUADS: + max_verts &= ~3; + break; + case PIPE_PRIM_QUAD_STRIP: + max_verts &= ~1; + repeat = 2; + break; + case PIPE_PRIM_POINTS: + break; + default: + /* TODO: implement adjacency primitives */ + assert(0); + } + + s->emit (s->priv, s->p_start, max_verts); + s->p_start += (max_verts - repeat); + return FALSE; +} diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index e2a8491e62..87959ab0aa 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -41,7 +41,6 @@ #if defined(PIPE_ARCH_SSE) -#include <xmmintrin.h> #include <emmintrin.h> @@ -72,6 +71,35 @@ _mm_castps_si128(__m128 a) #endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ + +#if defined(PIPE_ARCH_SSSE3) + +#include <tmmintrin.h> + +#else /* !PIPE_ARCH_SSSE3 */ + +#include <emmintrin.h> + +/** + * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases + * where -mssse3 is not supported/enabled. + * + * MSVC will never get in here as its intrinsics support do not rely on + * compiler command line options. + */ +static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_epi8(__m128i a, __m128i mask) +{ + __m128i result; + __asm__("pshufb %1, %0" + : "=x" (result) + : "xm" (mask), "0" (a)); + return result; +} + +#endif /* !PIPE_ARCH_SSSE3 */ + + #endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ #endif /* U_SSE_H_ */ diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c new file mode 100644 index 0000000000..607c31f5ee --- /dev/null +++ b/src/gallium/auxiliary/util/u_staging.c @@ -0,0 +1,95 @@ +#include "util/u_staging.h" +#include "pipe/p_context.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" + +static void +util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigned height, unsigned depth, struct pipe_resource *template) +{ + memset(template, 0, sizeof(struct pipe_resource)); + if(pt->target != PIPE_BUFFER && depth <= 1) + template->target = PIPE_TEXTURE_2D; + else + template->target = pt->target; + template->format = pt->format; + template->width0 = width; + template->height0 = height; + template->depth0 = depth; + template->last_level = 0; + template->nr_samples = pt->nr_samples; + template->bind = 0; + template->usage = PIPE_USAGE_STAGING; + template->flags = 0; +} + +struct util_staging_transfer * +util_staging_transfer_new(struct pipe_context *pipe, + struct pipe_resource *pt, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + bool direct) +{ + struct pipe_screen *pscreen = pipe->screen; + struct util_staging_transfer *tx; + struct pipe_resource staging_resource_template; + + tx = CALLOC_STRUCT(util_staging_transfer); + if (!tx) + return NULL; + + pipe_resource_reference(&tx->base.resource, pt); + tx->base.sr = sr; + tx->base.usage = usage; + tx->base.box = *box; + + if (direct) + { + tx->staging_resource = pt; + return tx; + } + + util_staging_resource_template(pt, box->width, box->height, box->depth, &staging_resource_template); + tx->staging_resource = pscreen->resource_create(pscreen, &staging_resource_template); + if (!tx->staging_resource) + { + pipe_resource_reference(&tx->base.resource, NULL); + FREE(tx); + return NULL; + } + + if (usage & PIPE_TRANSFER_READ) + { + struct pipe_subresource dstsr; + unsigned zi; + dstsr.face = 0; + dstsr.level = 0; + for(zi = 0; zi < box->depth; ++zi) + pipe->resource_copy_region(pipe, tx->staging_resource, dstsr, 0, 0, 0, tx->base.resource, sr, box->x, box->y, box->z + zi, box->width, box->height); + } + + return tx; +} + +void +util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx) +{ + struct util_staging_transfer *tx = (struct util_staging_transfer *)ptx; + + if (tx->staging_resource != tx->base.resource) + { + if(tx->base.usage & PIPE_TRANSFER_WRITE) { + struct pipe_subresource srcsr; + unsigned zi; + srcsr.face = 0; + srcsr.level = 0; + for(zi = 0; zi < tx->base.box.depth; ++zi) + pipe->resource_copy_region(pipe, tx->base.resource, tx->base.sr, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi, tx->staging_resource, srcsr, 0, 0, 0, tx->base.box.width, tx->base.box.height); + } + + pipe_resource_reference(&tx->staging_resource, NULL); + } + + pipe_resource_reference(&ptx->resource, NULL); + FREE(ptx); +} diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h new file mode 100644 index 0000000000..602faa2971 --- /dev/null +++ b/src/gallium/auxiliary/util/u_staging.h @@ -0,0 +1,37 @@ +/* Direct3D 10/11 has no concept of transfers. Applications instead + * create resources with a STAGING or DYNAMIC usage, copy between them + * and the real resource and use Map to map the STAGING/DYNAMIC resource. + * + * This util module allows to implement Gallium drivers as a Direct3D + * driver would be implemented: transfers allocate a resource with + * PIPE_USAGE_STAGING, and copy the data between it and the real resource + * with resource_copy_region. + */ + +#ifndef U_STAGING_H +#define U_STAGING_H + +#include "pipe/p_state.h" + +struct util_staging_transfer { + struct pipe_transfer base; + + /* if direct, same as base.resource, otherwise the temporary staging resource */ + struct pipe_resource *staging_resource; +}; + +/* user must be stride, slice_stride and offset */ +/* pt->usage == PIPE_USAGE_DYNAMIC should be a good value to pass for direct */ +/* staging resource is currently created with PIPE_USAGE_DYNAMIC */ +struct util_staging_transfer * +util_staging_transfer_new(struct pipe_context *pipe, + struct pipe_resource *pt, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + bool direct); + +void +util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx); + +#endif diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c index b5d21570d5..7733ad24d0 100644 --- a/src/gallium/auxiliary/util/u_surfaces.c +++ b/src/gallium/auxiliary/util/u_surfaces.c @@ -3,40 +3,22 @@ #include "util/u_inlines.h" #include "util/u_memory.h" -/* TODO: ouch, util_hash_table should do these by default when passed a null function pointer - * this indirect function call is quite bad - */ -static unsigned -hash(void *key) -{ - return (unsigned)(uintptr_t)key; -} - -static int -compare(void *key1, void *key2) -{ - return (unsigned)(uintptr_t)key1 - (unsigned)(uintptr_t)key2; -} - struct pipe_surface * util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { struct pipe_surface *ps; - void *key = NULL; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - { /* or 2D array */ - if(!us->u.table) - us->u.table = util_hash_table_create(hash, compare); - key = (void *)(uintptr_t)(((zslice + face) << 8) | level); - /* TODO: ouch, should have a get-reference function... - * also, shouldn't allocate a two-pointer structure for each item... */ - ps = util_hash_table_get(us->u.table, key); + { /* or 2D array */ + if(!us->u.hash) + us->u.hash = cso_hash_create(); + + ps = cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level)); } else { if(!us->u.array) - us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *)); + us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *)); ps = us->u.array[level]; } @@ -54,7 +36,7 @@ util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, str ps->offset = ~0; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - util_hash_table_set(us->u.table, key, ps); + cso_hash_insert(us->u.hash, ((zslice + face) << 8) | level, ps); else us->u.array[level] = ps; @@ -66,47 +48,44 @@ util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps) { struct pipe_resource *pt = ps->texture; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - { /* or 2D array */ - void* key = (void*)(uintptr_t)(((ps->zslice + ps->face) << 8) | ps->level); - util_hash_table_remove(us->u.table, key); + { /* or 2D array */ + cso_hash_erase(us->u.hash, cso_hash_find(us->u.hash, ((ps->zslice + ps->face) << 8) | ps->level)); } else us->u.array[ps->level] = 0; } -static enum pipe_error -util_surfaces_destroy_callback(void *key, void *value, void *data) -{ - void (*destroy_surface) (struct pipe_surface * ps) = data; - destroy_surface((struct pipe_surface *)value); - return PIPE_OK; -} - void util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void (*destroy_surface) (struct pipe_surface *)) { if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - { /* or 2D array */ - if(us->u.table) + { /* or 2D array */ + if(us->u.hash) { - util_hash_table_foreach(us->u.table, util_surfaces_destroy_callback, destroy_surface); - util_hash_table_destroy(us->u.table); - us->u.table = NULL; + struct cso_hash_iter iter; + iter = cso_hash_first_node(us->u.hash); + while (!cso_hash_iter_is_null(iter)) { + destroy_surface(cso_hash_iter_data(iter)); + iter = cso_hash_iter_next(iter); + } + + cso_hash_delete(us->u.hash); + us->u.hash = NULL; } } else { if(us->u.array) { - unsigned i; - for(i = 0; i < pt->last_level; ++i) - { - struct pipe_surface *ps = us->u.array[i]; - if(ps) - destroy_surface(ps); - } - FREE(us->u.array); - us->u.array = NULL; + unsigned i; + for(i = 0; i <= pt->last_level; ++i) + { + struct pipe_surface *ps = us->u.array[i]; + if(ps) + destroy_surface(ps); + } + FREE(us->u.array); + us->u.array = NULL; } } } diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h index 0195bf5afb..af978c7057 100644 --- a/src/gallium/auxiliary/util/u_surfaces.h +++ b/src/gallium/auxiliary/util/u_surfaces.h @@ -4,15 +4,15 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "util/u_atomic.h" - -struct util_hash_table; +#include "cso_cache/cso_hash.h" struct util_surfaces { union { - struct util_hash_table *table; + struct cso_hash *hash; struct pipe_surface **array; + void* pv; } u; }; @@ -35,6 +35,18 @@ util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct return util_surfaces_do_get(us, surface_struct_size, pscreen, pt, face, level, zslice, flags); } +static INLINE struct pipe_surface * +util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice) +{ + if(!us->u.pv) + return 0; + + if(unlikely(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)) + return cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level)); + else + return us->u.array[level]; +} + void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps); static INLINE void |